diff options
Diffstat (limited to 'lib/Target')
190 files changed, 12580 insertions, 10793 deletions
diff --git a/lib/Target/ARM/ARM.h b/lib/Target/ARM/ARM.h index ae7ae59c9262..14825a785649 100644 --- a/lib/Target/ARM/ARM.h +++ b/lib/Target/ARM/ARM.h @@ -90,10 +90,6 @@ inline static const char *ARMCondCodeToString(ARMCC::CondCodes CC) { } } -/// ModelWithRegSequence - Return true if isel should use REG_SEQUENCE to model -/// operations involving sub-registers. -bool ModelWithRegSequence(); - FunctionPass *createARMISelDag(ARMBaseTargetMachine &TM, CodeGenOpt::Level OptLevel); diff --git a/lib/Target/ARM/ARMAddressingModes.h b/lib/Target/ARM/ARMAddressingModes.h index e68354a42f8d..d316b13e0488 100644 --- a/lib/Target/ARM/ARMAddressingModes.h +++ b/lib/Target/ARM/ARMAddressingModes.h @@ -520,6 +520,70 @@ namespace ARM_AM { // This is stored in two operands [regaddr, align]. The first is the // address register. The second operand is the value of the alignment // specifier to use or zero if no explicit alignment. + // Valid alignments are: 0, 8, 16, and 32 bytes, depending on the specific + // instruction. + + //===--------------------------------------------------------------------===// + // NEON Modified Immediates + //===--------------------------------------------------------------------===// + // + // Several NEON instructions (e.g., VMOV) take a "modified immediate" + // vector operand, where a small immediate encoded in the instruction + // specifies a full NEON vector value. These modified immediates are + // represented here as encoded integers. The low 8 bits hold the immediate + // value; bit 12 holds the "Op" field of the instruction, and bits 11-8 hold + // the "Cmode" field of the instruction. The interfaces below treat the + // Op and Cmode values as a single 5-bit value. + + static inline unsigned createNEONModImm(unsigned OpCmode, unsigned Val) { + return (OpCmode << 8) | Val; + } + static inline unsigned getNEONModImmOpCmode(unsigned ModImm) { + return (ModImm >> 8) & 0x1f; + } + static inline unsigned getNEONModImmVal(unsigned ModImm) { + return ModImm & 0xff; + } + + /// decodeNEONModImm - Decode a NEON modified immediate value into the + /// element value and the element size in bits. (If the element size is + /// smaller than the vector, it is splatted into all the elements.) + static inline uint64_t decodeNEONModImm(unsigned ModImm, unsigned &EltBits) { + unsigned OpCmode = getNEONModImmOpCmode(ModImm); + unsigned Imm8 = getNEONModImmVal(ModImm); + uint64_t Val = 0; + + if (OpCmode == 0xe) { + // 8-bit vector elements + Val = Imm8; + EltBits = 8; + } else if ((OpCmode & 0xc) == 0x8) { + // 16-bit vector elements + unsigned ByteNum = (OpCmode & 0x6) >> 1; + Val = Imm8 << (8 * ByteNum); + EltBits = 16; + } else if ((OpCmode & 0x8) == 0) { + // 32-bit vector elements, zero with one byte set + unsigned ByteNum = (OpCmode & 0x6) >> 1; + Val = Imm8 << (8 * ByteNum); + EltBits = 32; + } else if ((OpCmode & 0xe) == 0xc) { + // 32-bit vector elements, one byte with low bits set + unsigned ByteNum = 1 + (OpCmode & 0x1); + Val = (Imm8 << (8 * ByteNum)) | (0xffff >> (8 * (2 - ByteNum))); + EltBits = 32; + } else if (OpCmode == 0x1e) { + // 64-bit vector elements + for (unsigned ByteNum = 0; ByteNum < 8; ++ByteNum) { + if ((ModImm >> ByteNum) & 1) + Val |= (uint64_t)0xff << (8 * ByteNum); + } + EltBits = 64; + } else { + assert(false && "Unsupported NEON immediate"); + } + return Val; + } } // end namespace ARM_AM } // end namespace llvm diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp index 2528854133e5..49c16f3e0720 100644 --- a/lib/Target/ARM/ARMBaseInstrInfo.cpp +++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp @@ -56,7 +56,7 @@ ARMBaseInstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, MachineInstr *MI = MBBI; MachineFunction &MF = *MI->getParent()->getParent(); - unsigned TSFlags = MI->getDesc().TSFlags; + uint64_t TSFlags = MI->getDesc().TSFlags; bool isPre = false; switch ((TSFlags & ARMII::IndexModeMask) >> ARMII::IndexModeShift) { default: return NULL; @@ -199,9 +199,9 @@ ARMBaseInstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, bool ARMBaseInstrInfo::spillCalleeSavedRegisters(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MI, - const std::vector<CalleeSavedInfo> &CSI, - const TargetRegisterInfo *TRI) const { + MachineBasicBlock::iterator MI, + const std::vector<CalleeSavedInfo> &CSI, + const TargetRegisterInfo *TRI) const { if (CSI.empty()) return false; @@ -227,8 +227,9 @@ ARMBaseInstrInfo::spillCalleeSavedRegisters(MachineBasicBlock &MBB, // Insert the spill to the stack frame. The register is killed at the spill // + const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); storeRegToStackSlot(MBB, MI, Reg, isKill, - CSI[i].getFrameIdx(), CSI[i].getRegClass(), TRI); + CSI[i].getFrameIdx(), RC, TRI); } return true; } @@ -347,10 +348,8 @@ unsigned ARMBaseInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const { unsigned ARMBaseInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, - const SmallVectorImpl<MachineOperand> &Cond) const { - // FIXME this should probably have a DebugLoc argument - DebugLoc dl; - + const SmallVectorImpl<MachineOperand> &Cond, + DebugLoc DL) const { ARMFunctionInfo *AFI = MBB.getParent()->getInfo<ARMFunctionInfo>(); int BOpc = !AFI->isThumbFunction() ? ARM::B : (AFI->isThumb2Function() ? ARM::t2B : ARM::tB); @@ -364,17 +363,17 @@ ARMBaseInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, if (FBB == 0) { if (Cond.empty()) // Unconditional branch? - BuildMI(&MBB, dl, get(BOpc)).addMBB(TBB); + BuildMI(&MBB, DL, get(BOpc)).addMBB(TBB); else - BuildMI(&MBB, dl, get(BccOpc)).addMBB(TBB) + BuildMI(&MBB, DL, get(BccOpc)).addMBB(TBB) .addImm(Cond[0].getImm()).addReg(Cond[1].getReg()); return 1; } // Two-way conditional branch. - BuildMI(&MBB, dl, get(BccOpc)).addMBB(TBB) + BuildMI(&MBB, DL, get(BccOpc)).addMBB(TBB) .addImm(Cond[0].getImm()).addReg(Cond[1].getReg()); - BuildMI(&MBB, dl, get(BOpc)).addMBB(FBB); + BuildMI(&MBB, DL, get(BOpc)).addMBB(FBB); return 2; } @@ -487,7 +486,7 @@ unsigned ARMBaseInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const { // Basic size info comes from the TSFlags field. const TargetInstrDesc &TID = MI->getDesc(); - unsigned TSFlags = TID.TSFlags; + uint64_t TSFlags = TID.TSFlags; unsigned Opc = MI->getOpcode(); switch ((TSFlags & ARMII::SizeMask) >> ARMII::SizeShift) { @@ -524,11 +523,11 @@ unsigned ARMBaseInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const { return 10; case ARM::Int_eh_sjlj_setjmp: case ARM::Int_eh_sjlj_setjmp_nofp: - return 24; + return 20; case ARM::tInt_eh_sjlj_setjmp: case ARM::t2Int_eh_sjlj_setjmp: case ARM::t2Int_eh_sjlj_setjmp_nofp: - return 14; + return 12; case ARM::BR_JTr: case ARM::BR_JTm: case ARM::BR_JTadd: @@ -595,6 +594,7 @@ ARMBaseInstrInfo::isMoveInstr(const MachineInstr &MI, return true; } case ARM::MOVr: + case ARM::MOVr_TC: case ARM::tMOVr: case ARM::tMOVgpr2tgpr: case ARM::tMOVtgpr2gpr: @@ -693,75 +693,44 @@ ARMBaseInstrInfo::isStoreToStackSlot(const MachineInstr *MI, return 0; } -bool -ARMBaseInstrInfo::copyRegToReg(MachineBasicBlock &MBB, - MachineBasicBlock::iterator I, - unsigned DestReg, unsigned SrcReg, - const TargetRegisterClass *DestRC, - const TargetRegisterClass *SrcRC, - DebugLoc DL) const { - // tGPR is used sometimes in ARM instructions that need to avoid using - // certain registers. Just treat it as GPR here. - if (DestRC == ARM::tGPRRegisterClass) - DestRC = ARM::GPRRegisterClass; - if (SrcRC == ARM::tGPRRegisterClass) - SrcRC = ARM::GPRRegisterClass; - - // Allow DPR / DPR_VFP2 / DPR_8 cross-class copies. - if (DestRC == ARM::DPR_8RegisterClass) - DestRC = ARM::DPR_VFP2RegisterClass; - if (SrcRC == ARM::DPR_8RegisterClass) - SrcRC = ARM::DPR_VFP2RegisterClass; - - // Allow QPR / QPR_VFP2 / QPR_8 cross-class copies. - if (DestRC == ARM::QPR_VFP2RegisterClass || - DestRC == ARM::QPR_8RegisterClass) - DestRC = ARM::QPRRegisterClass; - if (SrcRC == ARM::QPR_VFP2RegisterClass || - SrcRC == ARM::QPR_8RegisterClass) - SrcRC = ARM::QPRRegisterClass; - - // Allow QQPR / QQPR_VFP2 cross-class copies. - if (DestRC == ARM::QQPR_VFP2RegisterClass) - DestRC = ARM::QQPRRegisterClass; - if (SrcRC == ARM::QQPR_VFP2RegisterClass) - SrcRC = ARM::QQPRRegisterClass; - - // Disallow copies of unequal sizes. - if (DestRC != SrcRC && DestRC->getSize() != SrcRC->getSize()) - return false; - - if (DestRC == ARM::GPRRegisterClass) { - if (SrcRC == ARM::SPRRegisterClass) - AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VMOVRS), DestReg) - .addReg(SrcReg)); - else - AddDefaultCC(AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::MOVr), - DestReg).addReg(SrcReg))); - } else { - unsigned Opc; - - if (DestRC == ARM::SPRRegisterClass) - Opc = (SrcRC == ARM::GPRRegisterClass ? ARM::VMOVSR : ARM::VMOVS); - else if (DestRC == ARM::DPRRegisterClass) - Opc = ARM::VMOVD; - else if (DestRC == ARM::DPR_VFP2RegisterClass || - SrcRC == ARM::DPR_VFP2RegisterClass) - // Always use neon reg-reg move if source or dest is NEON-only regclass. - Opc = ARM::VMOVDneon; - else if (DestRC == ARM::QPRRegisterClass) - Opc = ARM::VMOVQ; - else if (DestRC == ARM::QQPRRegisterClass) - Opc = ARM::VMOVQQ; - else if (DestRC == ARM::QQQQPRRegisterClass) - Opc = ARM::VMOVQQQQ; - else - return false; - - AddDefaultPred(BuildMI(MBB, I, DL, get(Opc), DestReg).addReg(SrcReg)); +void ARMBaseInstrInfo::copyPhysReg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, DebugLoc DL, + unsigned DestReg, unsigned SrcReg, + bool KillSrc) const { + bool GPRDest = ARM::GPRRegClass.contains(DestReg); + bool GPRSrc = ARM::GPRRegClass.contains(SrcReg); + + if (GPRDest && GPRSrc) { + AddDefaultCC(AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::MOVr), DestReg) + .addReg(SrcReg, getKillRegState(KillSrc)))); + return; } - return true; + bool SPRDest = ARM::SPRRegClass.contains(DestReg); + bool SPRSrc = ARM::SPRRegClass.contains(SrcReg); + + unsigned Opc; + if (SPRDest && SPRSrc) + Opc = ARM::VMOVS; + else if (GPRDest && SPRSrc) + Opc = ARM::VMOVRS; + else if (SPRDest && GPRSrc) + Opc = ARM::VMOVSR; + else if (ARM::DPRRegClass.contains(DestReg, SrcReg)) + Opc = ARM::VMOVD; + else if (ARM::QPRRegClass.contains(DestReg, SrcReg)) + Opc = ARM::VMOVQ; + else if (ARM::QQPRRegClass.contains(DestReg, SrcReg)) + Opc = ARM::VMOVQQ; + else if (ARM::QQQQPRRegClass.contains(DestReg, SrcReg)) + Opc = ARM::VMOVQQQQ; + else + llvm_unreachable("Impossible reg-to-reg copy"); + + MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(Opc), DestReg); + MIB.addReg(SrcReg, getKillRegState(KillSrc)); + if (Opc != ARM::VMOVQQ && Opc != ARM::VMOVQQQQ) + AddDefaultPred(MIB); } static const @@ -795,30 +764,34 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, // tGPR is used sometimes in ARM instructions that need to avoid using // certain registers. Just treat it as GPR here. - if (RC == ARM::tGPRRegisterClass) + if (RC == ARM::tGPRRegisterClass || RC == ARM::tcGPRRegisterClass) RC = ARM::GPRRegisterClass; - if (RC == ARM::GPRRegisterClass) { + switch (RC->getID()) { + case ARM::GPRRegClassID: AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::STR)) .addReg(SrcReg, getKillRegState(isKill)) .addFrameIndex(FI).addReg(0).addImm(0).addMemOperand(MMO)); - } else if (RC == ARM::SPRRegisterClass) { + break; + case ARM::SPRRegClassID: AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTRS)) .addReg(SrcReg, getKillRegState(isKill)) .addFrameIndex(FI).addImm(0).addMemOperand(MMO)); - } else if (RC == ARM::DPRRegisterClass || - RC == ARM::DPR_VFP2RegisterClass || - RC == ARM::DPR_8RegisterClass) { + break; + case ARM::DPRRegClassID: + case ARM::DPR_VFP2RegClassID: + case ARM::DPR_8RegClassID: AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTRD)) .addReg(SrcReg, getKillRegState(isKill)) .addFrameIndex(FI).addImm(0).addMemOperand(MMO)); - } else if (RC == ARM::QPRRegisterClass || - RC == ARM::QPR_VFP2RegisterClass || - RC == ARM::QPR_8RegisterClass) { + break; + case ARM::QPRRegClassID: + case ARM::QPR_VFP2RegClassID: + case ARM::QPR_8RegClassID: // FIXME: Neon instructions should support predicates if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) { AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VST1q)) - .addFrameIndex(FI).addImm(128) + .addFrameIndex(FI).addImm(16) .addReg(SrcReg, getKillRegState(isKill)) .addMemOperand(MMO)); } else { @@ -828,12 +801,14 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, .addImm(ARM_AM::getAM5Opc(ARM_AM::ia, 4)) .addMemOperand(MMO)); } - } else if (RC == ARM::QQPRRegisterClass || RC == ARM::QQPR_VFP2RegisterClass){ + break; + case ARM::QQPRRegClassID: + case ARM::QQPR_VFP2RegClassID: if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) { // FIXME: It's possible to only store part of the QQ register if the // spilled def has a sub-register index. - MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::VST2q32)) - .addFrameIndex(FI).addImm(128); + MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::VST1d64Q)) + .addFrameIndex(FI).addImm(16); MIB = AddDReg(MIB, SrcReg, ARM::dsub_0, getKillRegState(isKill), TRI); MIB = AddDReg(MIB, SrcReg, ARM::dsub_1, 0, TRI); MIB = AddDReg(MIB, SrcReg, ARM::dsub_2, 0, TRI); @@ -850,8 +825,8 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, MIB = AddDReg(MIB, SrcReg, ARM::dsub_2, 0, TRI); AddDReg(MIB, SrcReg, ARM::dsub_3, 0, TRI); } - } else { - assert(RC == ARM::QQQQPRRegisterClass && "Unknown regclass!"); + break; + case ARM::QQQQPRRegClassID: { MachineInstrBuilder MIB = AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTMD)) .addFrameIndex(FI) @@ -865,6 +840,10 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, MIB = AddDReg(MIB, SrcReg, ARM::dsub_5, 0, TRI); MIB = AddDReg(MIB, SrcReg, ARM::dsub_6, 0, TRI); AddDReg(MIB, SrcReg, ARM::dsub_7, 0, TRI); + break; + } + default: + llvm_unreachable("Unknown regclass!"); } } @@ -886,26 +865,30 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, // tGPR is used sometimes in ARM instructions that need to avoid using // certain registers. Just treat it as GPR here. - if (RC == ARM::tGPRRegisterClass) + if (RC == ARM::tGPRRegisterClass || RC == ARM::tcGPRRegisterClass) RC = ARM::GPRRegisterClass; - if (RC == ARM::GPRRegisterClass) { + switch (RC->getID()) { + case ARM::GPRRegClassID: AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::LDR), DestReg) .addFrameIndex(FI).addReg(0).addImm(0).addMemOperand(MMO)); - } else if (RC == ARM::SPRRegisterClass) { + break; + case ARM::SPRRegClassID: AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDRS), DestReg) .addFrameIndex(FI).addImm(0).addMemOperand(MMO)); - } else if (RC == ARM::DPRRegisterClass || - RC == ARM::DPR_VFP2RegisterClass || - RC == ARM::DPR_8RegisterClass) { + break; + case ARM::DPRRegClassID: + case ARM::DPR_VFP2RegClassID: + case ARM::DPR_8RegClassID: AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDRD), DestReg) .addFrameIndex(FI).addImm(0).addMemOperand(MMO)); - } else if (RC == ARM::QPRRegisterClass || - RC == ARM::QPR_VFP2RegisterClass || - RC == ARM::QPR_8RegisterClass) { + break; + case ARM::QPRRegClassID: + case ARM::QPR_VFP2RegClassID: + case ARM::QPR_8RegClassID: if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) { AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLD1q), DestReg) - .addFrameIndex(FI).addImm(128) + .addFrameIndex(FI).addImm(16) .addMemOperand(MMO)); } else { AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDMQ), DestReg) @@ -913,14 +896,16 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, .addImm(ARM_AM::getAM5Opc(ARM_AM::ia, 4)) .addMemOperand(MMO)); } - } else if (RC == ARM::QQPRRegisterClass || RC == ARM::QQPR_VFP2RegisterClass){ + break; + case ARM::QQPRRegClassID: + case ARM::QQPR_VFP2RegClassID: if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) { - MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::VLD2q32)); + MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::VLD1d64Q)); MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::Define, TRI); MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::Define, TRI); MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::Define, TRI); MIB = AddDReg(MIB, DestReg, ARM::dsub_3, RegState::Define, TRI); - AddDefaultPred(MIB.addFrameIndex(FI).addImm(128).addMemOperand(MMO)); + AddDefaultPred(MIB.addFrameIndex(FI).addImm(16).addMemOperand(MMO)); } else { MachineInstrBuilder MIB = AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDMD)) @@ -932,21 +917,25 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::Define, TRI); AddDReg(MIB, DestReg, ARM::dsub_3, RegState::Define, TRI); } - } else { - assert(RC == ARM::QQQQPRRegisterClass && "Unknown regclass!"); - MachineInstrBuilder MIB = - AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDMD)) - .addFrameIndex(FI) - .addImm(ARM_AM::getAM5Opc(ARM_AM::ia, 4))) - .addMemOperand(MMO); - MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::Define, TRI); - MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::Define, TRI); - MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::Define, TRI); - MIB = AddDReg(MIB, DestReg, ARM::dsub_3, RegState::Define, TRI); - MIB = AddDReg(MIB, DestReg, ARM::dsub_4, RegState::Define, TRI); - MIB = AddDReg(MIB, DestReg, ARM::dsub_5, RegState::Define, TRI); - MIB = AddDReg(MIB, DestReg, ARM::dsub_6, RegState::Define, TRI); - AddDReg(MIB, DestReg, ARM::dsub_7, RegState::Define, TRI); + break; + case ARM::QQQQPRRegClassID: { + MachineInstrBuilder MIB = + AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDMD)) + .addFrameIndex(FI) + .addImm(ARM_AM::getAM5Opc(ARM_AM::ia, 4))) + .addMemOperand(MMO); + MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::Define, TRI); + MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::Define, TRI); + MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::Define, TRI); + MIB = AddDReg(MIB, DestReg, ARM::dsub_3, RegState::Define, TRI); + MIB = AddDReg(MIB, DestReg, ARM::dsub_4, RegState::Define, TRI); + MIB = AddDReg(MIB, DestReg, ARM::dsub_5, RegState::Define, TRI); + MIB = AddDReg(MIB, DestReg, ARM::dsub_6, RegState::Define, TRI); + AddDReg(MIB, DestReg, ARM::dsub_7, RegState::Define, TRI); + break; + } + default: + llvm_unreachable("Unknown regclass!"); } } @@ -960,223 +949,6 @@ ARMBaseInstrInfo::emitFrameIndexDebugValue(MachineFunction &MF, return &*MIB; } -MachineInstr *ARMBaseInstrInfo:: -foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI, - const SmallVectorImpl<unsigned> &Ops, int FI) const { - if (Ops.size() != 1) return NULL; - - unsigned OpNum = Ops[0]; - unsigned Opc = MI->getOpcode(); - MachineInstr *NewMI = NULL; - if (Opc == ARM::MOVr || Opc == ARM::t2MOVr) { - // If it is updating CPSR, then it cannot be folded. - if (MI->getOperand(4).getReg() == ARM::CPSR && !MI->getOperand(4).isDead()) - return NULL; - unsigned Pred = MI->getOperand(2).getImm(); - unsigned PredReg = MI->getOperand(3).getReg(); - if (OpNum == 0) { // move -> store - unsigned SrcReg = MI->getOperand(1).getReg(); - unsigned SrcSubReg = MI->getOperand(1).getSubReg(); - bool isKill = MI->getOperand(1).isKill(); - bool isUndef = MI->getOperand(1).isUndef(); - if (Opc == ARM::MOVr) - NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::STR)) - .addReg(SrcReg, - getKillRegState(isKill) | getUndefRegState(isUndef), - SrcSubReg) - .addFrameIndex(FI).addReg(0).addImm(0).addImm(Pred).addReg(PredReg); - else // ARM::t2MOVr - NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::t2STRi12)) - .addReg(SrcReg, - getKillRegState(isKill) | getUndefRegState(isUndef), - SrcSubReg) - .addFrameIndex(FI).addImm(0).addImm(Pred).addReg(PredReg); - } else { // move -> load - unsigned DstReg = MI->getOperand(0).getReg(); - unsigned DstSubReg = MI->getOperand(0).getSubReg(); - bool isDead = MI->getOperand(0).isDead(); - bool isUndef = MI->getOperand(0).isUndef(); - if (Opc == ARM::MOVr) - NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::LDR)) - .addReg(DstReg, - RegState::Define | - getDeadRegState(isDead) | - getUndefRegState(isUndef), DstSubReg) - .addFrameIndex(FI).addReg(0).addImm(0).addImm(Pred).addReg(PredReg); - else // ARM::t2MOVr - NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::t2LDRi12)) - .addReg(DstReg, - RegState::Define | - getDeadRegState(isDead) | - getUndefRegState(isUndef), DstSubReg) - .addFrameIndex(FI).addImm(0).addImm(Pred).addReg(PredReg); - } - } else if (Opc == ARM::tMOVgpr2gpr || - Opc == ARM::tMOVtgpr2gpr || - Opc == ARM::tMOVgpr2tgpr) { - if (OpNum == 0) { // move -> store - unsigned SrcReg = MI->getOperand(1).getReg(); - unsigned SrcSubReg = MI->getOperand(1).getSubReg(); - bool isKill = MI->getOperand(1).isKill(); - bool isUndef = MI->getOperand(1).isUndef(); - NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::t2STRi12)) - .addReg(SrcReg, - getKillRegState(isKill) | getUndefRegState(isUndef), - SrcSubReg) - .addFrameIndex(FI).addImm(0).addImm(ARMCC::AL).addReg(0); - } else { // move -> load - unsigned DstReg = MI->getOperand(0).getReg(); - unsigned DstSubReg = MI->getOperand(0).getSubReg(); - bool isDead = MI->getOperand(0).isDead(); - bool isUndef = MI->getOperand(0).isUndef(); - NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::t2LDRi12)) - .addReg(DstReg, - RegState::Define | - getDeadRegState(isDead) | - getUndefRegState(isUndef), - DstSubReg) - .addFrameIndex(FI).addImm(0).addImm(ARMCC::AL).addReg(0); - } - } else if (Opc == ARM::VMOVS) { - unsigned Pred = MI->getOperand(2).getImm(); - unsigned PredReg = MI->getOperand(3).getReg(); - if (OpNum == 0) { // move -> store - unsigned SrcReg = MI->getOperand(1).getReg(); - unsigned SrcSubReg = MI->getOperand(1).getSubReg(); - bool isKill = MI->getOperand(1).isKill(); - bool isUndef = MI->getOperand(1).isUndef(); - NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::VSTRS)) - .addReg(SrcReg, getKillRegState(isKill) | getUndefRegState(isUndef), - SrcSubReg) - .addFrameIndex(FI) - .addImm(0).addImm(Pred).addReg(PredReg); - } else { // move -> load - unsigned DstReg = MI->getOperand(0).getReg(); - unsigned DstSubReg = MI->getOperand(0).getSubReg(); - bool isDead = MI->getOperand(0).isDead(); - bool isUndef = MI->getOperand(0).isUndef(); - NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::VLDRS)) - .addReg(DstReg, - RegState::Define | - getDeadRegState(isDead) | - getUndefRegState(isUndef), - DstSubReg) - .addFrameIndex(FI).addImm(0).addImm(Pred).addReg(PredReg); - } - } else if (Opc == ARM::VMOVD || Opc == ARM::VMOVDneon) { - unsigned Pred = MI->getOperand(2).getImm(); - unsigned PredReg = MI->getOperand(3).getReg(); - if (OpNum == 0) { // move -> store - unsigned SrcReg = MI->getOperand(1).getReg(); - unsigned SrcSubReg = MI->getOperand(1).getSubReg(); - bool isKill = MI->getOperand(1).isKill(); - bool isUndef = MI->getOperand(1).isUndef(); - NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::VSTRD)) - .addReg(SrcReg, - getKillRegState(isKill) | getUndefRegState(isUndef), - SrcSubReg) - .addFrameIndex(FI).addImm(0).addImm(Pred).addReg(PredReg); - } else { // move -> load - unsigned DstReg = MI->getOperand(0).getReg(); - unsigned DstSubReg = MI->getOperand(0).getSubReg(); - bool isDead = MI->getOperand(0).isDead(); - bool isUndef = MI->getOperand(0).isUndef(); - NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::VLDRD)) - .addReg(DstReg, - RegState::Define | - getDeadRegState(isDead) | - getUndefRegState(isUndef), - DstSubReg) - .addFrameIndex(FI).addImm(0).addImm(Pred).addReg(PredReg); - } - } else if (Opc == ARM::VMOVQ) { - MachineFrameInfo &MFI = *MF.getFrameInfo(); - unsigned Pred = MI->getOperand(2).getImm(); - unsigned PredReg = MI->getOperand(3).getReg(); - if (OpNum == 0) { // move -> store - unsigned SrcReg = MI->getOperand(1).getReg(); - unsigned SrcSubReg = MI->getOperand(1).getSubReg(); - bool isKill = MI->getOperand(1).isKill(); - bool isUndef = MI->getOperand(1).isUndef(); - if (MFI.getObjectAlignment(FI) >= 16 && - getRegisterInfo().canRealignStack(MF)) { - NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::VST1q)) - .addFrameIndex(FI).addImm(128) - .addReg(SrcReg, - getKillRegState(isKill) | getUndefRegState(isUndef), - SrcSubReg) - .addImm(Pred).addReg(PredReg); - } else { - NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::VSTMQ)) - .addReg(SrcReg, - getKillRegState(isKill) | getUndefRegState(isUndef), - SrcSubReg) - .addFrameIndex(FI).addImm(ARM_AM::getAM5Opc(ARM_AM::ia, 4)) - .addImm(Pred).addReg(PredReg); - } - } else { // move -> load - unsigned DstReg = MI->getOperand(0).getReg(); - unsigned DstSubReg = MI->getOperand(0).getSubReg(); - bool isDead = MI->getOperand(0).isDead(); - bool isUndef = MI->getOperand(0).isUndef(); - if (MFI.getObjectAlignment(FI) >= 16 && - getRegisterInfo().canRealignStack(MF)) { - NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::VLD1q)) - .addReg(DstReg, - RegState::Define | - getDeadRegState(isDead) | - getUndefRegState(isUndef), - DstSubReg) - .addFrameIndex(FI).addImm(128).addImm(Pred).addReg(PredReg); - } else { - NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::VLDMQ)) - .addReg(DstReg, - RegState::Define | - getDeadRegState(isDead) | - getUndefRegState(isUndef), - DstSubReg) - .addFrameIndex(FI).addImm(ARM_AM::getAM5Opc(ARM_AM::ia, 4)) - .addImm(Pred).addReg(PredReg); - } - } - } - - return NewMI; -} - -MachineInstr* -ARMBaseInstrInfo::foldMemoryOperandImpl(MachineFunction &MF, - MachineInstr* MI, - const SmallVectorImpl<unsigned> &Ops, - MachineInstr* LoadMI) const { - // FIXME - return 0; -} - -bool -ARMBaseInstrInfo::canFoldMemoryOperand(const MachineInstr *MI, - const SmallVectorImpl<unsigned> &Ops) const { - if (Ops.size() != 1) return false; - - unsigned Opc = MI->getOpcode(); - if (Opc == ARM::MOVr || Opc == ARM::t2MOVr) { - // If it is updating CPSR, then it cannot be folded. - return MI->getOperand(4).getReg() != ARM::CPSR || - MI->getOperand(4).isDead(); - } else if (Opc == ARM::tMOVgpr2gpr || - Opc == ARM::tMOVtgpr2gpr || - Opc == ARM::tMOVgpr2tgpr) { - return true; - } else if (Opc == ARM::VMOVS || Opc == ARM::VMOVD || - Opc == ARM::VMOVDneon || Opc == ARM::VMOVQ) { - return true; - } - - // FIXME: VMOVQQ and VMOVQQQQ? - - return false; -} - /// Create a copy of a const pool value. Update CPI to the new index and return /// the label UID. static unsigned duplicateCPV(MachineFunction &MF, unsigned &CPI) { @@ -1211,17 +983,12 @@ reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, unsigned DestReg, unsigned SubIdx, const MachineInstr *Orig, - const TargetRegisterInfo *TRI) const { - if (SubIdx && TargetRegisterInfo::isPhysicalRegister(DestReg)) { - DestReg = TRI->getSubReg(DestReg, SubIdx); - SubIdx = 0; - } - + const TargetRegisterInfo &TRI) const { unsigned Opcode = Orig->getOpcode(); switch (Opcode) { default: { MachineInstr *MI = MBB.getParent()->CloneMachineInstr(Orig); - MI->getOperand(0).setReg(DestReg); + MI->substituteRegister(Orig->getOperand(0).getReg(), DestReg, SubIdx, TRI); MBB.insert(I, MI); break; } @@ -1237,9 +1004,6 @@ reMaterialize(MachineBasicBlock &MBB, break; } } - - MachineInstr *NewMI = prior(I); - NewMI->getOperand(0).setSubReg(SubIdx); } MachineInstr * @@ -1291,6 +1055,165 @@ bool ARMBaseInstrInfo::produceSameValue(const MachineInstr *MI0, return MI0->isIdenticalTo(MI1, MachineInstr::IgnoreVRegDefs); } +/// areLoadsFromSameBasePtr - This is used by the pre-regalloc scheduler to +/// determine if two loads are loading from the same base address. It should +/// only return true if the base pointers are the same and the only differences +/// between the two addresses is the offset. It also returns the offsets by +/// reference. +bool ARMBaseInstrInfo::areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2, + int64_t &Offset1, + int64_t &Offset2) const { + // Don't worry about Thumb: just ARM and Thumb2. + if (Subtarget.isThumb1Only()) return false; + + if (!Load1->isMachineOpcode() || !Load2->isMachineOpcode()) + return false; + + switch (Load1->getMachineOpcode()) { + default: + return false; + case ARM::LDR: + case ARM::LDRB: + case ARM::LDRD: + case ARM::LDRH: + case ARM::LDRSB: + case ARM::LDRSH: + case ARM::VLDRD: + case ARM::VLDRS: + case ARM::t2LDRi8: + case ARM::t2LDRDi8: + case ARM::t2LDRSHi8: + case ARM::t2LDRi12: + case ARM::t2LDRSHi12: + break; + } + + switch (Load2->getMachineOpcode()) { + default: + return false; + case ARM::LDR: + case ARM::LDRB: + case ARM::LDRD: + case ARM::LDRH: + case ARM::LDRSB: + case ARM::LDRSH: + case ARM::VLDRD: + case ARM::VLDRS: + case ARM::t2LDRi8: + case ARM::t2LDRDi8: + case ARM::t2LDRSHi8: + case ARM::t2LDRi12: + case ARM::t2LDRSHi12: + break; + } + + // Check if base addresses and chain operands match. + if (Load1->getOperand(0) != Load2->getOperand(0) || + Load1->getOperand(4) != Load2->getOperand(4)) + return false; + + // Index should be Reg0. + if (Load1->getOperand(3) != Load2->getOperand(3)) + return false; + + // Determine the offsets. + if (isa<ConstantSDNode>(Load1->getOperand(1)) && + isa<ConstantSDNode>(Load2->getOperand(1))) { + Offset1 = cast<ConstantSDNode>(Load1->getOperand(1))->getSExtValue(); + Offset2 = cast<ConstantSDNode>(Load2->getOperand(1))->getSExtValue(); + return true; + } + + return false; +} + +/// shouldScheduleLoadsNear - This is a used by the pre-regalloc scheduler to +/// determine (in conjuction with areLoadsFromSameBasePtr) if two loads should +/// be scheduled togther. On some targets if two loads are loading from +/// addresses in the same cache line, it's better if they are scheduled +/// together. This function takes two integers that represent the load offsets +/// from the common base address. It returns true if it decides it's desirable +/// to schedule the two loads together. "NumLoads" is the number of loads that +/// have already been scheduled after Load1. +bool ARMBaseInstrInfo::shouldScheduleLoadsNear(SDNode *Load1, SDNode *Load2, + int64_t Offset1, int64_t Offset2, + unsigned NumLoads) const { + // Don't worry about Thumb: just ARM and Thumb2. + if (Subtarget.isThumb1Only()) return false; + + assert(Offset2 > Offset1); + + if ((Offset2 - Offset1) / 8 > 64) + return false; + + if (Load1->getMachineOpcode() != Load2->getMachineOpcode()) + return false; // FIXME: overly conservative? + + // Four loads in a row should be sufficient. + if (NumLoads >= 3) + return false; + + return true; +} + +bool ARMBaseInstrInfo::isSchedulingBoundary(const MachineInstr *MI, + const MachineBasicBlock *MBB, + const MachineFunction &MF) const { + // Debug info is never a scheduling boundary. It's necessary to be explicit + // due to the special treatment of IT instructions below, otherwise a + // dbg_value followed by an IT will result in the IT instruction being + // considered a scheduling hazard, which is wrong. It should be the actual + // instruction preceding the dbg_value instruction(s), just like it is + // when debug info is not present. + if (MI->isDebugValue()) + return false; + + // Terminators and labels can't be scheduled around. + if (MI->getDesc().isTerminator() || MI->isLabel()) + return true; + + // Treat the start of the IT block as a scheduling boundary, but schedule + // t2IT along with all instructions following it. + // FIXME: This is a big hammer. But the alternative is to add all potential + // true and anti dependencies to IT block instructions as implicit operands + // to the t2IT instruction. The added compile time and complexity does not + // seem worth it. + MachineBasicBlock::const_iterator I = MI; + // Make sure to skip any dbg_value instructions + while (++I != MBB->end() && I->isDebugValue()) + ; + if (I != MBB->end() && I->getOpcode() == ARM::t2IT) + return true; + + // Don't attempt to schedule around any instruction that defines + // a stack-oriented pointer, as it's unlikely to be profitable. This + // saves compile time, because it doesn't require every single + // stack slot reference to depend on the instruction that does the + // modification. + if (MI->definesRegister(ARM::SP)) + return true; + + return false; +} + +bool ARMBaseInstrInfo:: +isProfitableToIfCvt(MachineBasicBlock &MBB, unsigned NumInstrs) const { + if (!NumInstrs) + return false; + if (Subtarget.getCPUString() == "generic") + // Generic (and overly aggressive) if-conversion limits for testing. + return NumInstrs <= 10; + else if (Subtarget.hasV7Ops()) + return NumInstrs <= 3; + return NumInstrs <= 2; +} + +bool ARMBaseInstrInfo:: +isProfitableToIfCvt(MachineBasicBlock &TMBB, unsigned NumT, + MachineBasicBlock &FMBB, unsigned NumF) const { + return NumT && NumF && NumT <= 2 && NumF <= 2; +} + /// getInstrPredicate - If instruction is predicated, returns its predicate /// condition, otherwise returns AL. It also returns the condition code /// register by reference. diff --git a/lib/Target/ARM/ARMBaseInstrInfo.h b/lib/Target/ARM/ARMBaseInstrInfo.h index b566271c8db9..89a2db74a75e 100644 --- a/lib/Target/ARM/ARMBaseInstrInfo.h +++ b/lib/Target/ARM/ARMBaseInstrInfo.h @@ -116,11 +116,25 @@ namespace ARMII { // Thumb format ThumbFrm = 24 << FormShift, - // NEON format - NEONFrm = 25 << FormShift, - NEONGetLnFrm = 26 << FormShift, - NEONSetLnFrm = 27 << FormShift, - NEONDupFrm = 28 << FormShift, + // Miscelleaneous format + MiscFrm = 25 << FormShift, + + // NEON formats + NGetLnFrm = 26 << FormShift, + NSetLnFrm = 27 << FormShift, + NDupFrm = 28 << FormShift, + NLdStFrm = 29 << FormShift, + N1RegModImmFrm= 30 << FormShift, + N2RegFrm = 31 << FormShift, + NVCVTFrm = 32 << FormShift, + NVDupLnFrm = 33 << FormShift, + N2RegVShLFrm = 34 << FormShift, + N2RegVShRFrm = 35 << FormShift, + N3RegFrm = 36 << FormShift, + N3RegVShFrm = 37 << FormShift, + NVExtFrm = 38 << FormShift, + NVMulSLFrm = 39 << FormShift, + NVTBLFrm = 40 << FormShift, //===------------------------------------------------------------------===// // Misc flags. @@ -213,7 +227,8 @@ public: virtual unsigned RemoveBranch(MachineBasicBlock &MBB) const; virtual unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, - const SmallVectorImpl<MachineOperand> &Cond) const; + const SmallVectorImpl<MachineOperand> &Cond, + DebugLoc DL) const; virtual bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const; @@ -258,12 +273,10 @@ public: virtual unsigned isStoreToStackSlot(const MachineInstr *MI, int &FrameIndex) const; - virtual bool copyRegToReg(MachineBasicBlock &MBB, - MachineBasicBlock::iterator I, - unsigned DestReg, unsigned SrcReg, - const TargetRegisterClass *DestRC, - const TargetRegisterClass *SrcRC, - DebugLoc DL) const; + virtual void copyPhysReg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, DebugLoc DL, + unsigned DestReg, unsigned SrcReg, + bool KillSrc) const; virtual void storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, @@ -283,29 +296,51 @@ public: const MDNode *MDPtr, DebugLoc DL) const; - virtual bool canFoldMemoryOperand(const MachineInstr *MI, - const SmallVectorImpl<unsigned> &Ops) const; - - virtual MachineInstr* foldMemoryOperandImpl(MachineFunction &MF, - MachineInstr* MI, - const SmallVectorImpl<unsigned> &Ops, - int FrameIndex) const; - - virtual MachineInstr* foldMemoryOperandImpl(MachineFunction &MF, - MachineInstr* MI, - const SmallVectorImpl<unsigned> &Ops, - MachineInstr* LoadMI) const; - virtual void reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, unsigned DestReg, unsigned SubIdx, const MachineInstr *Orig, - const TargetRegisterInfo *TRI) const; + const TargetRegisterInfo &TRI) const; MachineInstr *duplicate(MachineInstr *Orig, MachineFunction &MF) const; virtual bool produceSameValue(const MachineInstr *MI0, const MachineInstr *MI1) const; + + /// areLoadsFromSameBasePtr - This is used by the pre-regalloc scheduler to + /// determine if two loads are loading from the same base address. It should + /// only return true if the base pointers are the same and the only + /// differences between the two addresses is the offset. It also returns the + /// offsets by reference. + virtual bool areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2, + int64_t &Offset1, int64_t &Offset2)const; + + /// shouldScheduleLoadsNear - This is a used by the pre-regalloc scheduler to + /// determine (in conjuction with areLoadsFromSameBasePtr) if two loads should + /// be scheduled togther. On some targets if two loads are loading from + /// addresses in the same cache line, it's better if they are scheduled + /// together. This function takes two integers that represent the load offsets + /// from the common base address. It returns true if it decides it's desirable + /// to schedule the two loads together. "NumLoads" is the number of loads that + /// have already been scheduled after Load1. + virtual bool shouldScheduleLoadsNear(SDNode *Load1, SDNode *Load2, + int64_t Offset1, int64_t Offset2, + unsigned NumLoads) const; + + virtual bool isSchedulingBoundary(const MachineInstr *MI, + const MachineBasicBlock *MBB, + const MachineFunction &MF) const; + + virtual bool isProfitableToIfCvt(MachineBasicBlock &MBB, + unsigned NumInstrs) const; + + virtual bool isProfitableToIfCvt(MachineBasicBlock &TMBB,unsigned NumT, + MachineBasicBlock &FMBB,unsigned NumF) const; + + virtual bool isProfitableToDupForIfCvt(MachineBasicBlock &MBB, + unsigned NumInstrs) const { + return NumInstrs && NumInstrs == 1; + } }; static inline diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/lib/Target/ARM/ARMBaseRegisterInfo.cpp index 82458d2347cc..182bd9937145 100644 --- a/lib/Target/ARM/ARMBaseRegisterInfo.cpp +++ b/lib/Target/ARM/ARMBaseRegisterInfo.cpp @@ -170,56 +170,6 @@ ARMBaseRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { return STI.isTargetDarwin() ? DarwinCalleeSavedRegs : CalleeSavedRegs; } -const TargetRegisterClass* const * -ARMBaseRegisterInfo::getCalleeSavedRegClasses(const MachineFunction *MF) const { - static const TargetRegisterClass * const CalleeSavedRegClasses[] = { - &ARM::GPRRegClass, &ARM::GPRRegClass, &ARM::GPRRegClass, - &ARM::GPRRegClass, &ARM::GPRRegClass, &ARM::GPRRegClass, - &ARM::GPRRegClass, &ARM::GPRRegClass, &ARM::GPRRegClass, - - &ARM::DPRRegClass, &ARM::DPRRegClass, &ARM::DPRRegClass, &ARM::DPRRegClass, - &ARM::DPRRegClass, &ARM::DPRRegClass, &ARM::DPRRegClass, &ARM::DPRRegClass, - 0 - }; - - static const TargetRegisterClass * const ThumbCalleeSavedRegClasses[] = { - &ARM::GPRRegClass, &ARM::GPRRegClass, &ARM::GPRRegClass, - &ARM::GPRRegClass, &ARM::GPRRegClass, &ARM::tGPRRegClass, - &ARM::tGPRRegClass,&ARM::tGPRRegClass,&ARM::tGPRRegClass, - - &ARM::DPRRegClass, &ARM::DPRRegClass, &ARM::DPRRegClass, &ARM::DPRRegClass, - &ARM::DPRRegClass, &ARM::DPRRegClass, &ARM::DPRRegClass, &ARM::DPRRegClass, - 0 - }; - - static const TargetRegisterClass * const DarwinCalleeSavedRegClasses[] = { - &ARM::GPRRegClass, &ARM::GPRRegClass, &ARM::GPRRegClass, - &ARM::GPRRegClass, &ARM::GPRRegClass, &ARM::GPRRegClass, - &ARM::GPRRegClass, &ARM::GPRRegClass, - - &ARM::DPRRegClass, &ARM::DPRRegClass, &ARM::DPRRegClass, &ARM::DPRRegClass, - &ARM::DPRRegClass, &ARM::DPRRegClass, &ARM::DPRRegClass, &ARM::DPRRegClass, - 0 - }; - - static const TargetRegisterClass * const DarwinThumbCalleeSavedRegClasses[] ={ - &ARM::GPRRegClass, &ARM::tGPRRegClass, &ARM::tGPRRegClass, - &ARM::tGPRRegClass, &ARM::tGPRRegClass, &ARM::GPRRegClass, - &ARM::GPRRegClass, &ARM::GPRRegClass, - - &ARM::DPRRegClass, &ARM::DPRRegClass, &ARM::DPRRegClass, &ARM::DPRRegClass, - &ARM::DPRRegClass, &ARM::DPRRegClass, &ARM::DPRRegClass, &ARM::DPRRegClass, - 0 - }; - - if (STI.isThumb1Only()) { - return STI.isTargetDarwin() - ? DarwinThumbCalleeSavedRegClasses : ThumbCalleeSavedRegClasses; - } - return STI.isTargetDarwin() - ? DarwinCalleeSavedRegClasses : CalleeSavedRegClasses; -} - BitVector ARMBaseRegisterInfo:: getReservedRegs(const MachineFunction &MF) const { // FIXME: avoid re-calculating this everytime. @@ -352,7 +302,7 @@ ARMBaseRegisterInfo::getMatchingSuperRegClass(const TargetRegisterClass *A, } bool -ARMBaseRegisterInfo::canCombinedSubRegIndex(const TargetRegisterClass *RC, +ARMBaseRegisterInfo::canCombineSubRegIndices(const TargetRegisterClass *RC, SmallVectorImpl<unsigned> &SubIndices, unsigned &NewSubIdx) const { @@ -724,6 +674,15 @@ ARMBaseRegisterInfo::estimateRSStackSizeLimit(MachineFunction &MF) const { I != E; ++I) { for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) { if (!I->getOperand(i).isFI()) continue; + + // When using ADDri to get the address of a stack object, 255 is the + // largest offset guaranteed to fit in the immediate offset. + if (I->getOpcode() == ARM::ADDri) { + Limit = std::min(Limit, (1U << 8) - 1); + break; + } + + // Otherwise check the addressing mode. switch (I->getDesc().TSFlags & ARMII::AddrModeMask) { case ARMII::AddrMode3: case ARMII::AddrModeT2_i8: @@ -765,6 +724,7 @@ ARMBaseRegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, SmallVector<unsigned, 4> UnspilledCS1GPRs; SmallVector<unsigned, 4> UnspilledCS2GPRs; ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); + MachineFrameInfo *MFI = MF.getFrameInfo(); // Spill R4 if Thumb2 function requires stack realignment - it will be used as // scratch register. @@ -780,7 +740,6 @@ ARMBaseRegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, // Don't spill FP if the frame can be eliminated. This is determined // by scanning the callee-save registers to see if any is used. const unsigned *CSRegs = getCalleeSavedRegs(); - const TargetRegisterClass* const *CSRegClasses = getCalleeSavedRegClasses(); for (unsigned i = 0; CSRegs[i]; ++i) { unsigned Reg = CSRegs[i]; bool Spilled = false; @@ -798,50 +757,50 @@ ARMBaseRegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, } } - if (CSRegClasses[i] == ARM::GPRRegisterClass || - CSRegClasses[i] == ARM::tGPRRegisterClass) { - if (Spilled) { - NumGPRSpills++; + if (!ARM::GPRRegisterClass->contains(Reg)) + continue; - if (!STI.isTargetDarwin()) { - if (Reg == ARM::LR) - LRSpilled = true; - CS1Spilled = true; - continue; - } + if (Spilled) { + NumGPRSpills++; - // Keep track if LR and any of R4, R5, R6, and R7 is spilled. - switch (Reg) { - case ARM::LR: + if (!STI.isTargetDarwin()) { + if (Reg == ARM::LR) LRSpilled = true; - // Fallthrough - case ARM::R4: - case ARM::R5: - case ARM::R6: - case ARM::R7: - CS1Spilled = true; - break; - default: - break; - } - } else { - if (!STI.isTargetDarwin()) { - UnspilledCS1GPRs.push_back(Reg); - continue; - } + CS1Spilled = true; + continue; + } - switch (Reg) { - case ARM::R4: - case ARM::R5: - case ARM::R6: - case ARM::R7: - case ARM::LR: - UnspilledCS1GPRs.push_back(Reg); - break; - default: - UnspilledCS2GPRs.push_back(Reg); - break; - } + // Keep track if LR and any of R4, R5, R6, and R7 is spilled. + switch (Reg) { + case ARM::LR: + LRSpilled = true; + // Fallthrough + case ARM::R4: + case ARM::R5: + case ARM::R6: + case ARM::R7: + CS1Spilled = true; + break; + default: + break; + } + } else { + if (!STI.isTargetDarwin()) { + UnspilledCS1GPRs.push_back(Reg); + continue; + } + + switch (Reg) { + case ARM::R4: + case ARM::R5: + case ARM::R6: + case ARM::R7: + case ARM::LR: + UnspilledCS1GPRs.push_back(Reg); + break; + default: + UnspilledCS2GPRs.push_back(Reg); + break; } } } @@ -862,9 +821,16 @@ ARMBaseRegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, // offset, make sure a register (or a spill slot) is available for the // register scavenger. Note that if we're indexing off the frame pointer, the // effective stack size is 4 bytes larger since the FP points to the stack - // slot of the previous FP. - bool BigStack = RS && - estimateStackSize(MF) + (hasFP(MF) ? 4 : 0) >= estimateRSStackSizeLimit(MF); + // slot of the previous FP. Also, if we have variable sized objects in the + // function, stack slot references will often be negative, and some of + // our instructions are positive-offset only, so conservatively consider + // that case to want a spill slot (or register) as well. + // FIXME: We could add logic to be more precise about negative offsets + // and which instructions will need a scratch register for them. Is it + // worth the effort and added fragility? + bool BigStack = + (RS && (estimateStackSize(MF) + (hasFP(MF) ? 4:0) >= + estimateRSStackSizeLimit(MF))) || MFI->hasVarSizedObjects(); bool ExtraCSSpill = false; if (BigStack || !CanEliminateFrame || cannotEliminateFrame(MF)) { @@ -957,7 +923,6 @@ ARMBaseRegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, // note: Thumb1 functions spill to R12, not the stack. Reserve a slot // closest to SP or frame pointer. const TargetRegisterClass *RC = ARM::GPRRegisterClass; - MachineFrameInfo *MFI = MF.getFrameInfo(); RS->setScavengingFrameIndex(MFI->CreateStackObject(RC->getSize(), RC->getAlignment(), false)); @@ -1622,6 +1587,7 @@ emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const { MachineBasicBlock::iterator MBBI = prior(MBB.end()); assert(MBBI->getDesc().isReturn() && "Can only insert epilog into returning blocks"); + unsigned RetOpcode = MBBI->getOpcode(); DebugLoc dl = MBBI->getDebugLoc(); MachineFrameInfo *MFI = MF.getFrameInfo(); ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); @@ -1696,6 +1662,39 @@ emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const { emitSPUpdate(isARM, MBB, MBBI, dl, TII, AFI->getGPRCalleeSavedArea1Size()); } + if (RetOpcode == ARM::TCRETURNdi || RetOpcode == ARM::TCRETURNdiND || + RetOpcode == ARM::TCRETURNri || RetOpcode == ARM::TCRETURNriND) { + // Tail call return: adjust the stack pointer and jump to callee. + MBBI = prior(MBB.end()); + MachineOperand &JumpTarget = MBBI->getOperand(0); + + // Jump to label or value in register. + if (RetOpcode == ARM::TCRETURNdi) { + BuildMI(MBB, MBBI, dl, + TII.get(STI.isThumb() ? ARM::TAILJMPdt : ARM::TAILJMPd)). + addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset(), + JumpTarget.getTargetFlags()); + } else if (RetOpcode == ARM::TCRETURNdiND) { + BuildMI(MBB, MBBI, dl, + TII.get(STI.isThumb() ? ARM::TAILJMPdNDt : ARM::TAILJMPdND)). + addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset(), + JumpTarget.getTargetFlags()); + } else if (RetOpcode == ARM::TCRETURNri) { + BuildMI(MBB, MBBI, dl, TII.get(ARM::TAILJMPr)). + addReg(JumpTarget.getReg(), RegState::Kill); + } else if (RetOpcode == ARM::TCRETURNriND) { + BuildMI(MBB, MBBI, dl, TII.get(ARM::TAILJMPrND)). + addReg(JumpTarget.getReg(), RegState::Kill); + } + + MachineInstr *NewMI = prior(MBBI); + for (unsigned i = 1, e = MBBI->getNumOperands(); i != e; ++i) + NewMI->addOperand(MBBI->getOperand(i)); + + // Delete the pseudo instruction TCRETURN. + MBB.erase(MBBI); + } + if (VARegSaveSize) emitSPUpdate(isARM, MBB, MBBI, dl, TII, VARegSaveSize); } diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.h b/lib/Target/ARM/ARMBaseRegisterInfo.h index 2c9c82d0318b..f7ee0d5cc66d 100644 --- a/lib/Target/ARM/ARMBaseRegisterInfo.h +++ b/lib/Target/ARM/ARMBaseRegisterInfo.h @@ -69,9 +69,6 @@ public: /// Code Generation virtual methods... const unsigned *getCalleeSavedRegs(const MachineFunction *MF = 0) const; - const TargetRegisterClass* const* - getCalleeSavedRegClasses(const MachineFunction *MF = 0) const; - BitVector getReservedRegs(const MachineFunction &MF) const; /// getMatchingSuperRegClass - Return a subclass of the specified register @@ -81,14 +78,15 @@ public: getMatchingSuperRegClass(const TargetRegisterClass *A, const TargetRegisterClass *B, unsigned Idx) const; - /// canCombinedSubRegIndex - Given a register class and a list of sub-register - /// indices, return true if it's possible to combine the sub-register indices - /// into one that corresponds to a larger sub-register. Return the new sub- - /// register index by reference. Note the new index by be zero if the given - /// sub-registers combined to form the whole register. - virtual bool canCombinedSubRegIndex(const TargetRegisterClass *RC, - SmallVectorImpl<unsigned> &SubIndices, - unsigned &NewSubIdx) const; + /// canCombineSubRegIndices - Given a register class and a list of + /// subregister indices, return true if it's possible to combine the + /// subregister indices into one that corresponds to a larger + /// subregister. Return the new subregister index by reference. Note the + /// new index may be zero if the given subregisters can be combined to + /// form the whole register. + virtual bool canCombineSubRegIndices(const TargetRegisterClass *RC, + SmallVectorImpl<unsigned> &SubIndices, + unsigned &NewSubIdx) const; const TargetRegisterClass *getPointerRegClass(unsigned Kind = 0) const; @@ -150,8 +148,8 @@ public: virtual bool canSimplifyCallFramePseudos(MachineFunction &MF) const; virtual void eliminateCallFramePseudoInstr(MachineFunction &MF, - MachineBasicBlock &MBB, - MachineBasicBlock::iterator I) const; + MachineBasicBlock &MBB, + MachineBasicBlock::iterator I) const; virtual unsigned eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj, FrameIndexValue *Value = NULL, diff --git a/lib/Target/ARM/ARMCodeEmitter.cpp b/lib/Target/ARM/ARMCodeEmitter.cpp index f2730fc8a5cb..7895cb071922 100644 --- a/lib/Target/ARM/ARMCodeEmitter.cpp +++ b/lib/Target/ARM/ARMCodeEmitter.cpp @@ -55,6 +55,7 @@ namespace { const std::vector<MachineConstantPoolEntry> *MCPEs; const std::vector<MachineJumpTableEntry> *MJTEs; bool IsPIC; + bool IsThumb; void getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequired<MachineModuleInfo>(); @@ -67,8 +68,8 @@ namespace { : MachineFunctionPass(&ID), JTI(0), II((const ARMInstrInfo *)tm.getInstrInfo()), TD(tm.getTargetData()), TM(tm), - MCE(mce), MCPEs(0), MJTEs(0), - IsPIC(TM.getRelocationModel() == Reloc::PIC_) {} + MCE(mce), MCPEs(0), MJTEs(0), + IsPIC(TM.getRelocationModel() == Reloc::PIC_), IsThumb(false) {} /// getBinaryCodeForInstr - This function, generated by the /// CodeEmitterGenerator using TableGen, produces the binary encoding for @@ -139,6 +140,12 @@ namespace { void emitMiscInstruction(const MachineInstr &MI); + void emitNEONLaneInstruction(const MachineInstr &MI); + void emitNEONDupInstruction(const MachineInstr &MI); + void emitNEON1RegModImmInstruction(const MachineInstr &MI); + void emitNEON2RegInstruction(const MachineInstr &MI); + void emitNEON3RegInstruction(const MachineInstr &MI); + /// getMachineOpValue - Return binary encoding of operand. If the machine /// operand requires relocation, record the relocation and return zero. unsigned getMachineOpValue(const MachineInstr &MI,const MachineOperand &MO); @@ -147,7 +154,8 @@ namespace { } /// getMovi32Value - Return binary encoding of operand for movw/movt. If the - /// machine operand requires relocation, record the relocation and return zero. + /// machine operand requires relocation, record the relocation and return + /// zero. unsigned getMovi32Value(const MachineInstr &MI,const MachineOperand &MO, unsigned Reloc); unsigned getMovi32Value(const MachineInstr &MI, unsigned OpIdx, @@ -193,6 +201,7 @@ bool ARMCodeEmitter::runOnMachineFunction(MachineFunction &MF) { MJTEs = 0; if (MF.getJumpTableInfo()) MJTEs = &MF.getJumpTableInfo()->getJumpTables(); IsPIC = TM.getRelocationModel() == Reloc::PIC_; + IsThumb = MF.getInfo<ARMFunctionInfo>()->isThumbFunction(); JTI->Initialize(MF, IsPIC); MMI = &getAnalysis<MachineModuleInfo>(); MCE.setModuleInfo(MMI); @@ -347,7 +356,7 @@ void ARMCodeEmitter::emitInstruction(const MachineInstr &MI) { MCE.processDebugLoc(MI.getDebugLoc(), true); - NumEmitted++; // Keep track of the # of mi's emitted + ++NumEmitted; // Keep track of the # of mi's emitted switch (MI.getDesc().TSFlags & ARMII::FormMask) { default: { llvm_unreachable("Unhandled instruction encoding format!"); @@ -407,6 +416,23 @@ void ARMCodeEmitter::emitInstruction(const MachineInstr &MI) { case ARMII::VFPMiscFrm: emitMiscInstruction(MI); break; + // NEON instructions. + case ARMII::NGetLnFrm: + case ARMII::NSetLnFrm: + emitNEONLaneInstruction(MI); + break; + case ARMII::NDupFrm: + emitNEONDupInstruction(MI); + break; + case ARMII::N1RegModImmFrm: + emitNEON1RegModImmInstruction(MI); + break; + case ARMII::N2RegFrm: + emitNEON2RegInstruction(MI); + break; + case ARMII::N3RegFrm: + emitNEON3RegInstruction(MI); + break; } MCE.processDebugLoc(MI.getDebugLoc(), false); } @@ -1539,4 +1565,144 @@ void ARMCodeEmitter::emitMiscInstruction(const MachineInstr &MI) { emitWordLE(Binary); } +static unsigned encodeNEONRd(const MachineInstr &MI, unsigned OpIdx) { + unsigned RegD = MI.getOperand(OpIdx).getReg(); + unsigned Binary = 0; + RegD = ARMRegisterInfo::getRegisterNumbering(RegD); + Binary |= (RegD & 0xf) << ARMII::RegRdShift; + Binary |= ((RegD >> 4) & 1) << ARMII::D_BitShift; + return Binary; +} + +static unsigned encodeNEONRn(const MachineInstr &MI, unsigned OpIdx) { + unsigned RegN = MI.getOperand(OpIdx).getReg(); + unsigned Binary = 0; + RegN = ARMRegisterInfo::getRegisterNumbering(RegN); + Binary |= (RegN & 0xf) << ARMII::RegRnShift; + Binary |= ((RegN >> 4) & 1) << ARMII::N_BitShift; + return Binary; +} + +static unsigned encodeNEONRm(const MachineInstr &MI, unsigned OpIdx) { + unsigned RegM = MI.getOperand(OpIdx).getReg(); + unsigned Binary = 0; + RegM = ARMRegisterInfo::getRegisterNumbering(RegM); + Binary |= (RegM & 0xf); + Binary |= ((RegM >> 4) & 1) << ARMII::M_BitShift; + return Binary; +} + +/// convertNEONDataProcToThumb - Convert the ARM mode encoding for a NEON +/// data-processing instruction to the corresponding Thumb encoding. +static unsigned convertNEONDataProcToThumb(unsigned Binary) { + assert((Binary & 0xfe000000) == 0xf2000000 && + "not an ARM NEON data-processing instruction"); + unsigned UBit = (Binary >> 24) & 1; + return 0xef000000 | (UBit << 28) | (Binary & 0xffffff); +} + +void ARMCodeEmitter::emitNEONLaneInstruction(const MachineInstr &MI) { + unsigned Binary = getBinaryCodeForInstr(MI); + + unsigned RegTOpIdx, RegNOpIdx, LnOpIdx; + const TargetInstrDesc &TID = MI.getDesc(); + if ((TID.TSFlags & ARMII::FormMask) == ARMII::NGetLnFrm) { + RegTOpIdx = 0; + RegNOpIdx = 1; + LnOpIdx = 2; + } else { // ARMII::NSetLnFrm + RegTOpIdx = 2; + RegNOpIdx = 0; + LnOpIdx = 3; + } + + // Set the conditional execution predicate + Binary |= (IsThumb ? ARMCC::AL : II->getPredicate(&MI)) << ARMII::CondShift; + + unsigned RegT = MI.getOperand(RegTOpIdx).getReg(); + RegT = ARMRegisterInfo::getRegisterNumbering(RegT); + Binary |= (RegT << ARMII::RegRdShift); + Binary |= encodeNEONRn(MI, RegNOpIdx); + + unsigned LaneShift; + if ((Binary & (1 << 22)) != 0) + LaneShift = 0; // 8-bit elements + else if ((Binary & (1 << 5)) != 0) + LaneShift = 1; // 16-bit elements + else + LaneShift = 2; // 32-bit elements + + unsigned Lane = MI.getOperand(LnOpIdx).getImm() << LaneShift; + unsigned Opc1 = Lane >> 2; + unsigned Opc2 = Lane & 3; + assert((Opc1 & 3) == 0 && "out-of-range lane number operand"); + Binary |= (Opc1 << 21); + Binary |= (Opc2 << 5); + + emitWordLE(Binary); +} + +void ARMCodeEmitter::emitNEONDupInstruction(const MachineInstr &MI) { + unsigned Binary = getBinaryCodeForInstr(MI); + + // Set the conditional execution predicate + Binary |= (IsThumb ? ARMCC::AL : II->getPredicate(&MI)) << ARMII::CondShift; + + unsigned RegT = MI.getOperand(1).getReg(); + RegT = ARMRegisterInfo::getRegisterNumbering(RegT); + Binary |= (RegT << ARMII::RegRdShift); + Binary |= encodeNEONRn(MI, 0); + emitWordLE(Binary); +} + +void ARMCodeEmitter::emitNEON1RegModImmInstruction(const MachineInstr &MI) { + unsigned Binary = getBinaryCodeForInstr(MI); + // Destination register is encoded in Dd. + Binary |= encodeNEONRd(MI, 0); + // Immediate fields: Op, Cmode, I, Imm3, Imm4 + unsigned Imm = MI.getOperand(1).getImm(); + unsigned Op = (Imm >> 12) & 1; + unsigned Cmode = (Imm >> 8) & 0xf; + unsigned I = (Imm >> 7) & 1; + unsigned Imm3 = (Imm >> 4) & 0x7; + unsigned Imm4 = Imm & 0xf; + Binary |= (I << 24) | (Imm3 << 16) | (Cmode << 8) | (Op << 5) | Imm4; + if (IsThumb) + Binary = convertNEONDataProcToThumb(Binary); + emitWordLE(Binary); +} + +void ARMCodeEmitter::emitNEON2RegInstruction(const MachineInstr &MI) { + const TargetInstrDesc &TID = MI.getDesc(); + unsigned Binary = getBinaryCodeForInstr(MI); + // Destination register is encoded in Dd; source register in Dm. + unsigned OpIdx = 0; + Binary |= encodeNEONRd(MI, OpIdx++); + if (TID.getOperandConstraint(OpIdx, TOI::TIED_TO) != -1) + ++OpIdx; + Binary |= encodeNEONRm(MI, OpIdx); + if (IsThumb) + Binary = convertNEONDataProcToThumb(Binary); + // FIXME: This does not handle VDUPfdf or VDUPfqf. + emitWordLE(Binary); +} + +void ARMCodeEmitter::emitNEON3RegInstruction(const MachineInstr &MI) { + const TargetInstrDesc &TID = MI.getDesc(); + unsigned Binary = getBinaryCodeForInstr(MI); + // Destination register is encoded in Dd; source registers in Dn and Dm. + unsigned OpIdx = 0; + Binary |= encodeNEONRd(MI, OpIdx++); + if (TID.getOperandConstraint(OpIdx, TOI::TIED_TO) != -1) + ++OpIdx; + Binary |= encodeNEONRn(MI, OpIdx++); + if (TID.getOperandConstraint(OpIdx, TOI::TIED_TO) != -1) + ++OpIdx; + Binary |= encodeNEONRm(MI, OpIdx); + if (IsThumb) + Binary = convertNEONDataProcToThumb(Binary); + // FIXME: This does not handle VMOVDneon or VMOVQ. + emitWordLE(Binary); +} + #include "ARMGenCodeEmitter.inc" diff --git a/lib/Target/ARM/ARMConstantIslandPass.cpp b/lib/Target/ARM/ARMConstantIslandPass.cpp index 13d8b74014c5..65a3da6f1617 100644 --- a/lib/Target/ARM/ARMConstantIslandPass.cpp +++ b/lib/Target/ARM/ARMConstantIslandPass.cpp @@ -337,7 +337,7 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &MF) { if (CPChange && ++NoCPIters > 30) llvm_unreachable("Constant Island pass failed to converge!"); DEBUG(dumpBBs()); - + // Clear NewWaterList now. If we split a block for branches, it should // appear as "new water" for the next iteration of constant pool placement. NewWaterList.clear(); @@ -361,8 +361,8 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &MF) { // After a while, this might be made debug-only, but it is not expensive. verify(MF); - // If LR has been forced spilled and no far jumps (i.e. BL) has been issued. - // Undo the spill / restore of LR if possible. + // If LR has been forced spilled and no far jump (i.e. BL) has been issued, + // undo the spill / restore of LR if possible. if (isThumb && !HasFarJump && AFI->isLRSpilledForFarJump()) MadeChange |= UndoLRSpillRestore(); @@ -407,7 +407,7 @@ void ARMConstantIslands::DoInitialPlacement(MachineFunction &MF, std::vector<CPEntry> CPEs; CPEs.push_back(CPEntry(CPEMI, i)); CPEntries.push_back(CPEs); - NumCPEs++; + ++NumCPEs; DEBUG(errs() << "Moved CPI#" << i << " to end of function as #" << i << "\n"); } @@ -418,7 +418,8 @@ void ARMConstantIslands::DoInitialPlacement(MachineFunction &MF, static bool BBHasFallthrough(MachineBasicBlock *MBB) { // Get the next machine basic block in the function. MachineFunction::iterator MBBI = MBB; - if (llvm::next(MBBI) == MBB->getParent()->end()) // Can't fall off end of function. + // Can't fall off end of function. + if (llvm::next(MBBI) == MBB->getParent()->end()) return false; MachineBasicBlock *NextBB = llvm::next(MBBI); @@ -491,6 +492,8 @@ void ARMConstantIslands::InitialFunctionScan(MachineFunction &MF, unsigned MBBSize = 0; for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); I != E; ++I) { + if (I->isDebugValue()) + continue; // Add instruction size to MBBSize. MBBSize += TII->GetInstSizeInBytes(I); @@ -722,7 +725,7 @@ MachineBasicBlock *ARMConstantIslands::SplitBlockBeforeInstr(MachineInstr *MI) { // correspond to anything in the source. unsigned Opc = isThumb ? (isThumb2 ? ARM::t2B : ARM::tB) : ARM::B; BuildMI(OrigBB, DebugLoc(), TII->get(Opc)).addMBB(NewBB); - NumSplit++; + ++NumSplit; // Update the CFG. All succs of OrigBB are now succs of NewBB. while (!OrigBB->succ_empty()) { @@ -945,7 +948,7 @@ bool ARMConstantIslands::DecrementOldEntry(unsigned CPI, MachineInstr *CPEMI) { if (--CPE->RefCount == 0) { RemoveDeadCPEMI(CPEMI); CPE->CPEMI = NULL; - NumCPEs--; + --NumCPEs; return true; } return false; @@ -1246,7 +1249,7 @@ bool ARMConstantIslands::HandleConstantPoolUser(MachineFunction &MF, U.CPEMI = BuildMI(NewIsland, DebugLoc(), TII->get(ARM::CONSTPOOL_ENTRY)) .addImm(ID).addConstantPoolIndex(CPI).addImm(Size); CPEntries[CPI].push_back(CPEntry(U.CPEMI, ID, 1)); - NumCPEs++; + ++NumCPEs; BBOffsets[NewIsland->getNumber()] = BBOffsets[NewMBB->getNumber()]; // Compensate for .align 2 in thumb mode. @@ -1369,7 +1372,7 @@ ARMConstantIslands::FixUpUnconditionalBr(MachineFunction &MF, ImmBranch &Br) { BBSizes[MBB->getNumber()] += 2; AdjustBBOffsetsAfter(MBB, 2); HasFarJump = true; - NumUBrFixed++; + ++NumUBrFixed; DEBUG(errs() << " Changed B to long jump " << *MI); @@ -1402,7 +1405,7 @@ ARMConstantIslands::FixUpConditionalBr(MachineFunction &MF, ImmBranch &Br) { MachineInstr *BMI = &MBB->back(); bool NeedSplit = (BMI != MI) || !BBHasFallthrough(MBB); - NumCBrFixed++; + ++NumCBrFixed; if (BMI != MI) { if (llvm::next(MachineBasicBlock::iterator(MI)) == prior(MBB->end()) && BMI->getOpcode() == Br.UncondBr) { @@ -1621,7 +1624,7 @@ bool ARMConstantIslands::OptimizeThumb2JumpTables(MachineFunction &MF) { // constantpool tables? MachineJumpTableInfo *MJTI = MF.getJumpTableInfo(); if (MJTI == 0) return false; - + const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables(); for (unsigned i = 0, e = T2JumpTables.size(); i != e; ++i) { MachineInstr *MI = T2JumpTables[i]; @@ -1658,15 +1661,25 @@ bool ARMConstantIslands::OptimizeThumb2JumpTables(MachineFunction &MF) { continue; unsigned IdxReg = MI->getOperand(1).getReg(); bool IdxRegKill = MI->getOperand(1).isKill(); + + // Scan backwards to find the instruction that defines the base + // register. Due to post-RA scheduling, we can't count on it + // immediately preceding the branch instruction. MachineBasicBlock::iterator PrevI = MI; - if (PrevI == MBB->begin()) + MachineBasicBlock::iterator B = MBB->begin(); + while (PrevI != B && !PrevI->definesRegister(BaseReg)) + --PrevI; + + // If for some reason we didn't find it, we can't do anything, so + // just skip this one. + if (!PrevI->definesRegister(BaseReg)) continue; - MachineInstr *AddrMI = --PrevI; + MachineInstr *AddrMI = PrevI; bool OptOk = true; - // Examine the instruction that calculate the jumptable entry address. - // If it's not the one just before the t2BR_JT, we won't delete it, then - // it's not worth doing the optimization. + // Examine the instruction that calculates the jumptable entry address. + // Make sure it only defines the base register and kills any uses + // other than the index register. for (unsigned k = 0, eee = AddrMI->getNumOperands(); k != eee; ++k) { const MachineOperand &MO = AddrMI->getOperand(k); if (!MO.isReg() || !MO.getReg()) @@ -1683,9 +1696,14 @@ bool ARMConstantIslands::OptimizeThumb2JumpTables(MachineFunction &MF) { if (!OptOk) continue; - // The previous instruction should be a tLEApcrel or t2LEApcrelJT, we want + // Now scan back again to find the tLEApcrel or t2LEApcrelJT instruction + // that gave us the initial base register definition. + for (--PrevI; PrevI != B && !PrevI->definesRegister(BaseReg); --PrevI) + ; + + // The instruction should be a tLEApcrel or t2LEApcrelJT; we want // to delete it as well. - MachineInstr *LeaMI = --PrevI; + MachineInstr *LeaMI = PrevI; if ((LeaMI->getOpcode() != ARM::tLEApcrelJT && LeaMI->getOpcode() != ARM::t2LEApcrelJT) || LeaMI->getOperand(0).getReg() != BaseReg) @@ -1729,7 +1747,7 @@ bool ARMConstantIslands::ReorderThumb2JumpTables(MachineFunction &MF) { MachineJumpTableInfo *MJTI = MF.getJumpTableInfo(); if (MJTI == 0) return false; - + const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables(); for (unsigned i = 0, e = T2JumpTables.size(); i != e; ++i) { MachineInstr *MI = T2JumpTables[i]; @@ -1769,7 +1787,7 @@ AdjustJTTargetBlockForward(MachineBasicBlock *BB, MachineBasicBlock *JTBB) { MachineFunction &MF = *BB->getParent(); - // If it's the destination block is terminated by an unconditional branch, + // If the destination block is terminated by an unconditional branch, // try to move it; otherwise, create a new block following the jump // table that branches back to the actual target. This is a very simple // heuristic. FIXME: We can definitely improve it. diff --git a/lib/Target/ARM/ARMConstantPoolValue.h b/lib/Target/ARM/ARMConstantPoolValue.h index 6f4eddf7361d..3119b54563de 100644 --- a/lib/Target/ARM/ARMConstantPoolValue.h +++ b/lib/Target/ARM/ARMConstantPoolValue.h @@ -15,6 +15,7 @@ #define LLVM_TARGET_ARM_CONSTANTPOOLVALUE_H #include "llvm/CodeGen/MachineConstantPool.h" +#include <cstddef> namespace llvm { diff --git a/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/lib/Target/ARM/ARMExpandPseudoInsts.cpp index c87f5d7c16a5..9c62597b4323 100644 --- a/lib/Target/ARM/ARMExpandPseudoInsts.cpp +++ b/lib/Target/ARM/ARMExpandPseudoInsts.cpp @@ -144,13 +144,15 @@ bool ARMExpandPseudo::ExpandMBB(MachineBasicBlock &MBB) { MachineInstrBuilder Even = AddDefaultPred(BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::VMOVQ)) - .addReg(EvenDst, getDefRegState(true) | getDeadRegState(DstIsDead)) - .addReg(EvenSrc, getKillRegState(SrcIsKill))); + .addReg(EvenDst, + getDefRegState(true) | getDeadRegState(DstIsDead)) + .addReg(EvenSrc, getKillRegState(SrcIsKill))); MachineInstrBuilder Odd = AddDefaultPred(BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::VMOVQ)) - .addReg(OddDst, getDefRegState(true) | getDeadRegState(DstIsDead)) - .addReg(OddSrc, getKillRegState(SrcIsKill))); + .addReg(OddDst, + getDefRegState(true) | getDeadRegState(DstIsDead)) + .addReg(OddSrc, getKillRegState(SrcIsKill))); TransferImpOps(MI, Even, Odd); MI.eraseFromParent(); Modified = true; diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp index 9baef6bf1243..c84d3ff81324 100644 --- a/lib/Target/ARM/ARMISelDAGToDAG.cpp +++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp @@ -11,6 +11,7 @@ // //===----------------------------------------------------------------------===// +#define DEBUG_TYPE "arm-isel" #include "ARM.h" #include "ARMAddressingModes.h" #include "ARMTargetMachine.h" @@ -35,11 +36,6 @@ using namespace llvm; -static cl::opt<bool> -UseRegSeq("neon-reg-sequence", cl::Hidden, - cl::desc("Use reg_sequence to model ld / st of multiple neon regs"), - cl::init(true)); - //===--------------------------------------------------------------------===// /// ARMDAGToDAGISel - ARM specific code to select ARM machine /// instructions for SelectionDAG operations. @@ -147,6 +143,11 @@ private: unsigned *DOpcodes, unsigned *QOpcodes0, unsigned *QOpcodes1); + /// SelectVTBL - Select NEON VTBL and VTBX intrinsics. NumVecs should be 2, + /// 3 or 4. These are custom-selected so that a REG_SEQUENCE can be + /// generated to force the table registers to be consecutive. + SDNode *SelectVTBL(SDNode *N, bool IsExt, unsigned NumVecs, unsigned Opc); + /// SelectV6T2BitfieldExtractOp - Select SBFX/UBFX instructions for ARM. SDNode *SelectV6T2BitfieldExtractOp(SDNode *N, bool isSigned); @@ -173,24 +174,17 @@ private: char ConstraintCode, std::vector<SDValue> &OutOps); - /// PairDRegs - Form a quad register from a pair of D registers. - /// + // Form pairs of consecutive S, D, or Q registers. + SDNode *PairSRegs(EVT VT, SDValue V0, SDValue V1); SDNode *PairDRegs(EVT VT, SDValue V0, SDValue V1); - - /// PairDRegs - Form a quad register pair from a pair of Q registers. - /// SDNode *PairQRegs(EVT VT, SDValue V0, SDValue V1); - /// QuadDRegs - Form a quad register pair from a quad of D registers. - /// + // Form sequences of 4 consecutive S, D, or Q registers. + SDNode *QuadSRegs(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3); SDNode *QuadDRegs(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3); - - /// QuadQRegs - Form 4 consecutive Q registers. - /// SDNode *QuadQRegs(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3); - /// OctoDRegs - Form 8 consecutive D registers. - /// + // Form sequences of 8 consecutive D registers. SDNode *OctoDRegs(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3, SDValue V4, SDValue V5, SDValue V6, SDValue V7); }; @@ -544,10 +538,9 @@ bool ARMDAGToDAGISel::SelectAddrModePC(SDNode *Op, SDValue N, bool ARMDAGToDAGISel::SelectThumbAddrModeRR(SDNode *Op, SDValue N, SDValue &Base, SDValue &Offset){ // FIXME dl should come from the parent load or store, not the address - DebugLoc dl = Op->getDebugLoc(); if (N.getOpcode() != ISD::ADD) { ConstantSDNode *NC = dyn_cast<ConstantSDNode>(N); - if (!NC || NC->getZExtValue() != 0) + if (!NC || !NC->isNullValue()) return false; Base = Offset = N; @@ -788,8 +781,9 @@ bool ARMDAGToDAGISel::SelectT2AddrModeImm8s4(SDNode *Op, SDValue N, if (N.getOpcode() == ISD::ADD) { if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) { int RHSC = (int)RHS->getZExtValue(); + // 8 bits. if (((RHSC & 0x3) == 0) && - ((RHSC >= 0 && RHSC < 0x400) || (RHSC < 0 && RHSC > -0x400))) { // 8 bits. + ((RHSC >= 0 && RHSC < 0x400) || (RHSC < 0 && RHSC > -0x400))) { Base = N.getOperand(0); OffImm = CurDAG->getTargetConstant(RHSC, MVT::i32); return true; @@ -798,7 +792,8 @@ bool ARMDAGToDAGISel::SelectT2AddrModeImm8s4(SDNode *Op, SDValue N, } else if (N.getOpcode() == ISD::SUB) { if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) { int RHSC = (int)RHS->getZExtValue(); - if (((RHSC & 0x3) == 0) && (RHSC >= 0 && RHSC < 0x400)) { // 8 bits. + // 8 bits. + if (((RHSC & 0x3) == 0) && (RHSC >= 0 && RHSC < 0x400)) { Base = N.getOperand(0); OffImm = CurDAG->getTargetConstant(-RHSC, MVT::i32); return true; @@ -960,22 +955,24 @@ SDNode *ARMDAGToDAGISel::SelectT2IndexedLoad(SDNode *N) { return NULL; } +/// PairSRegs - Form a D register from a pair of S registers. +/// +SDNode *ARMDAGToDAGISel::PairSRegs(EVT VT, SDValue V0, SDValue V1) { + DebugLoc dl = V0.getNode()->getDebugLoc(); + SDValue SubReg0 = CurDAG->getTargetConstant(ARM::ssub_0, MVT::i32); + SDValue SubReg1 = CurDAG->getTargetConstant(ARM::ssub_1, MVT::i32); + const SDValue Ops[] = { V0, SubReg0, V1, SubReg1 }; + return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 4); +} + /// PairDRegs - Form a quad register from a pair of D registers. /// SDNode *ARMDAGToDAGISel::PairDRegs(EVT VT, SDValue V0, SDValue V1) { DebugLoc dl = V0.getNode()->getDebugLoc(); SDValue SubReg0 = CurDAG->getTargetConstant(ARM::dsub_0, MVT::i32); SDValue SubReg1 = CurDAG->getTargetConstant(ARM::dsub_1, MVT::i32); - if (llvm::ModelWithRegSequence()) { - const SDValue Ops[] = { V0, SubReg0, V1, SubReg1 }; - return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 4); - } - SDValue Undef = - SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0); - SDNode *Pair = CurDAG->getMachineNode(TargetOpcode::INSERT_SUBREG, dl, - VT, Undef, V0, SubReg0); - return CurDAG->getMachineNode(TargetOpcode::INSERT_SUBREG, dl, - VT, SDValue(Pair, 0), V1, SubReg1); + const SDValue Ops[] = { V0, SubReg0, V1, SubReg1 }; + return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 4); } /// PairQRegs - Form 4 consecutive D registers from a pair of Q registers. @@ -988,6 +985,19 @@ SDNode *ARMDAGToDAGISel::PairQRegs(EVT VT, SDValue V0, SDValue V1) { return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 4); } +/// QuadSRegs - Form 4 consecutive S registers. +/// +SDNode *ARMDAGToDAGISel::QuadSRegs(EVT VT, SDValue V0, SDValue V1, + SDValue V2, SDValue V3) { + DebugLoc dl = V0.getNode()->getDebugLoc(); + SDValue SubReg0 = CurDAG->getTargetConstant(ARM::ssub_0, MVT::i32); + SDValue SubReg1 = CurDAG->getTargetConstant(ARM::ssub_1, MVT::i32); + SDValue SubReg2 = CurDAG->getTargetConstant(ARM::ssub_2, MVT::i32); + SDValue SubReg3 = CurDAG->getTargetConstant(ARM::ssub_3, MVT::i32); + const SDValue Ops[] = { V0, SubReg0, V1, SubReg1, V2, SubReg2, V3, SubReg3 }; + return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 8); +} + /// QuadDRegs - Form 4 consecutive D registers. /// SDNode *ARMDAGToDAGISel::QuadDRegs(EVT VT, SDValue V0, SDValue V1, @@ -1088,7 +1098,7 @@ SDNode *ARMDAGToDAGISel::SelectVLD(SDNode *N, unsigned NumVecs, std::vector<EVT> ResTys(NumVecs, VT); ResTys.push_back(MVT::Other); SDNode *VLd = CurDAG->getMachineNode(Opc, dl, ResTys, Ops, 5); - if (!llvm::ModelWithRegSequence() || NumVecs < 2) + if (NumVecs < 2) return VLd; SDValue RegSeq; @@ -1129,24 +1139,17 @@ SDNode *ARMDAGToDAGISel::SelectVLD(SDNode *N, unsigned NumVecs, Chain = SDValue(VLd, 2 * NumVecs); // Combine the even and odd subregs to produce the result. - if (llvm::ModelWithRegSequence()) { - if (NumVecs == 1) { - SDNode *Q = PairDRegs(VT, SDValue(VLd, 0), SDValue(VLd, 1)); - ReplaceUses(SDValue(N, 0), SDValue(Q, 0)); - } else { - SDValue QQ = SDValue(QuadDRegs(MVT::v4i64, - SDValue(VLd, 0), SDValue(VLd, 1), - SDValue(VLd, 2), SDValue(VLd, 3)), 0); - SDValue Q0 = CurDAG->getTargetExtractSubreg(ARM::qsub_0, dl, VT, QQ); - SDValue Q1 = CurDAG->getTargetExtractSubreg(ARM::qsub_1, dl, VT, QQ); - ReplaceUses(SDValue(N, 0), Q0); - ReplaceUses(SDValue(N, 1), Q1); - } + if (NumVecs == 1) { + SDNode *Q = PairDRegs(VT, SDValue(VLd, 0), SDValue(VLd, 1)); + ReplaceUses(SDValue(N, 0), SDValue(Q, 0)); } else { - for (unsigned Vec = 0; Vec < NumVecs; ++Vec) { - SDNode *Q = PairDRegs(VT, SDValue(VLd, 2*Vec), SDValue(VLd, 2*Vec+1)); - ReplaceUses(SDValue(N, Vec), SDValue(Q, 0)); - } + SDValue QQ = SDValue(QuadDRegs(MVT::v4i64, + SDValue(VLd, 0), SDValue(VLd, 1), + SDValue(VLd, 2), SDValue(VLd, 3)), 0); + SDValue Q0 = CurDAG->getTargetExtractSubreg(ARM::qsub_0, dl, VT, QQ); + SDValue Q1 = CurDAG->getTargetExtractSubreg(ARM::qsub_1, dl, VT, QQ); + ReplaceUses(SDValue(N, 0), Q0); + ReplaceUses(SDValue(N, 1), Q1); } } else { // Otherwise, quad registers are loaded with two separate instructions, @@ -1169,37 +1172,27 @@ SDNode *ARMDAGToDAGISel::SelectVLD(SDNode *N, unsigned NumVecs, SDNode *VLdB = CurDAG->getMachineNode(Opc, dl, ResTys, OpsB, 6); Chain = SDValue(VLdB, NumVecs+1); - if (llvm::ModelWithRegSequence()) { - SDValue V0 = SDValue(VLdA, 0); - SDValue V1 = SDValue(VLdB, 0); - SDValue V2 = SDValue(VLdA, 1); - SDValue V3 = SDValue(VLdB, 1); - SDValue V4 = SDValue(VLdA, 2); - SDValue V5 = SDValue(VLdB, 2); - SDValue V6 = (NumVecs == 3) - ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,RegVT), - 0) - : SDValue(VLdA, 3); - SDValue V7 = (NumVecs == 3) - ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,RegVT), - 0) - : SDValue(VLdB, 3); - SDValue RegSeq = SDValue(OctoDRegs(MVT::v8i64, V0, V1, V2, V3, - V4, V5, V6, V7), 0); - - // Extract out the 3 / 4 Q registers. - assert(ARM::qsub_3 == ARM::qsub_0+3 && "Unexpected subreg numbering"); - for (unsigned Vec = 0; Vec < NumVecs; ++Vec) { - SDValue Q = CurDAG->getTargetExtractSubreg(ARM::qsub_0+Vec, - dl, VT, RegSeq); - ReplaceUses(SDValue(N, Vec), Q); - } - } else { - // Combine the even and odd subregs to produce the result. - for (unsigned Vec = 0; Vec < NumVecs; ++Vec) { - SDNode *Q = PairDRegs(VT, SDValue(VLdA, Vec), SDValue(VLdB, Vec)); - ReplaceUses(SDValue(N, Vec), SDValue(Q, 0)); - } + SDValue V0 = SDValue(VLdA, 0); + SDValue V1 = SDValue(VLdB, 0); + SDValue V2 = SDValue(VLdA, 1); + SDValue V3 = SDValue(VLdB, 1); + SDValue V4 = SDValue(VLdA, 2); + SDValue V5 = SDValue(VLdB, 2); + SDValue V6 = (NumVecs == 3) + ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,RegVT), 0) + : SDValue(VLdA, 3); + SDValue V7 = (NumVecs == 3) + ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,RegVT), 0) + : SDValue(VLdB, 3); + SDValue RegSeq = SDValue(OctoDRegs(MVT::v8i64, V0, V1, V2, V3, + V4, V5, V6, V7), 0); + + // Extract out the 3 / 4 Q registers. + assert(ARM::qsub_3 == ARM::qsub_0+3 && "Unexpected subreg numbering"); + for (unsigned Vec = 0; Vec < NumVecs; ++Vec) { + SDValue Q = CurDAG->getTargetExtractSubreg(ARM::qsub_0+Vec, + dl, VT, RegSeq); + ReplaceUses(SDValue(N, Vec), Q); } } ReplaceUses(SDValue(N, NumVecs), Chain); @@ -1209,7 +1202,7 @@ SDNode *ARMDAGToDAGISel::SelectVLD(SDNode *N, unsigned NumVecs, SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, unsigned NumVecs, unsigned *DOpcodes, unsigned *QOpcodes0, unsigned *QOpcodes1) { - assert(NumVecs >=1 && NumVecs <= 4 && "VST NumVecs out-of-range"); + assert(NumVecs >= 1 && NumVecs <= 4 && "VST NumVecs out-of-range"); DebugLoc dl = N->getDebugLoc(); SDValue MemAddr, Align; @@ -1247,7 +1240,7 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, unsigned NumVecs, Ops.push_back(Align); if (is64BitVector) { - if (llvm::ModelWithRegSequence() && NumVecs >= 2) { + if (NumVecs >= 2) { SDValue RegSeq; SDValue V0 = N->getOperand(0+3); SDValue V1 = N->getOperand(1+3); @@ -1292,7 +1285,7 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, unsigned NumVecs, // Quad registers are directly supported for VST1 and VST2, // storing pairs of D regs. unsigned Opc = QOpcodes0[OpcodeIndex]; - if (llvm::ModelWithRegSequence() && NumVecs == 2) { + if (NumVecs == 2) { // First extract the pair of Q registers. SDValue Q0 = N->getOperand(3); SDValue Q1 = N->getOperand(4); @@ -1330,76 +1323,48 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, unsigned NumVecs, // Otherwise, quad registers are stored with two separate instructions, // where one stores the even registers and the other stores the odd registers. - if (llvm::ModelWithRegSequence()) { - // Form the QQQQ REG_SEQUENCE. - SDValue V[8]; - for (unsigned Vec = 0, i = 0; Vec < NumVecs; ++Vec, i+=2) { - V[i] = CurDAG->getTargetExtractSubreg(ARM::dsub_0, dl, RegVT, - N->getOperand(Vec+3)); - V[i+1] = CurDAG->getTargetExtractSubreg(ARM::dsub_1, dl, RegVT, - N->getOperand(Vec+3)); - } - if (NumVecs == 3) - V[6] = V[7] = SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, - dl, RegVT), 0); - - SDValue RegSeq = SDValue(OctoDRegs(MVT::v8i64, V[0], V[1], V[2], V[3], - V[4], V[5], V[6], V[7]), 0); - - // Store the even D registers. - assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering"); - Ops.push_back(Reg0); // post-access address offset - for (unsigned Vec = 0; Vec < NumVecs; ++Vec) - Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_0+Vec*2, dl, - RegVT, RegSeq)); - Ops.push_back(Pred); - Ops.push_back(Reg0); // predicate register - Ops.push_back(Chain); - unsigned Opc = QOpcodes0[OpcodeIndex]; - SDNode *VStA = CurDAG->getMachineNode(Opc, dl, MemAddr.getValueType(), - MVT::Other, Ops.data(), NumVecs+6); - Chain = SDValue(VStA, 1); - // Store the odd D registers. - Ops[0] = SDValue(VStA, 0); // MemAddr - for (unsigned Vec = 0; Vec < NumVecs; ++Vec) - Ops[Vec+3] = CurDAG->getTargetExtractSubreg(ARM::dsub_1+Vec*2, dl, - RegVT, RegSeq); - Ops[NumVecs+5] = Chain; - Opc = QOpcodes1[OpcodeIndex]; - SDNode *VStB = CurDAG->getMachineNode(Opc, dl, MemAddr.getValueType(), - MVT::Other, Ops.data(), NumVecs+6); - Chain = SDValue(VStB, 1); - ReplaceUses(SDValue(N, 0), Chain); - return NULL; - } else { - Ops.push_back(Reg0); // post-access address offset - - // Store the even subregs. - for (unsigned Vec = 0; Vec < NumVecs; ++Vec) - Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_0, dl, RegVT, - N->getOperand(Vec+3))); - Ops.push_back(Pred); - Ops.push_back(Reg0); // predicate register - Ops.push_back(Chain); - unsigned Opc = QOpcodes0[OpcodeIndex]; - SDNode *VStA = CurDAG->getMachineNode(Opc, dl, MemAddr.getValueType(), - MVT::Other, Ops.data(), NumVecs+6); - Chain = SDValue(VStA, 1); - - // Store the odd subregs. - Ops[0] = SDValue(VStA, 0); // MemAddr - for (unsigned Vec = 0; Vec < NumVecs; ++Vec) - Ops[Vec+3] = CurDAG->getTargetExtractSubreg(ARM::dsub_1, dl, RegVT, - N->getOperand(Vec+3)); - Ops[NumVecs+5] = Chain; - Opc = QOpcodes1[OpcodeIndex]; - SDNode *VStB = CurDAG->getMachineNode(Opc, dl, MemAddr.getValueType(), - MVT::Other, Ops.data(), NumVecs+6); - Chain = SDValue(VStB, 1); - ReplaceUses(SDValue(N, 0), Chain); - return NULL; - } + // Form the QQQQ REG_SEQUENCE. + SDValue V[8]; + for (unsigned Vec = 0, i = 0; Vec < NumVecs; ++Vec, i+=2) { + V[i] = CurDAG->getTargetExtractSubreg(ARM::dsub_0, dl, RegVT, + N->getOperand(Vec+3)); + V[i+1] = CurDAG->getTargetExtractSubreg(ARM::dsub_1, dl, RegVT, + N->getOperand(Vec+3)); + } + if (NumVecs == 3) + V[6] = V[7] = SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, + dl, RegVT), 0); + + SDValue RegSeq = SDValue(OctoDRegs(MVT::v8i64, V[0], V[1], V[2], V[3], + V[4], V[5], V[6], V[7]), 0); + + // Store the even D registers. + assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering"); + Ops.push_back(Reg0); // post-access address offset + for (unsigned Vec = 0; Vec < NumVecs; ++Vec) + Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_0+Vec*2, dl, + RegVT, RegSeq)); + Ops.push_back(Pred); + Ops.push_back(Reg0); // predicate register + Ops.push_back(Chain); + unsigned Opc = QOpcodes0[OpcodeIndex]; + SDNode *VStA = CurDAG->getMachineNode(Opc, dl, MemAddr.getValueType(), + MVT::Other, Ops.data(), NumVecs+6); + Chain = SDValue(VStA, 1); + + // Store the odd D registers. + Ops[0] = SDValue(VStA, 0); // MemAddr + for (unsigned Vec = 0; Vec < NumVecs; ++Vec) + Ops[Vec+3] = CurDAG->getTargetExtractSubreg(ARM::dsub_1+Vec*2, dl, + RegVT, RegSeq); + Ops[NumVecs+5] = Chain; + Opc = QOpcodes1[OpcodeIndex]; + SDNode *VStB = CurDAG->getMachineNode(Opc, dl, MemAddr.getValueType(), + MVT::Other, Ops.data(), NumVecs+6); + Chain = SDValue(VStB, 1); + ReplaceUses(SDValue(N, 0), Chain); + return NULL; } SDNode *ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad, @@ -1421,13 +1386,11 @@ SDNode *ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad, // Quad registers are handled by load/store of subregs. Find the subreg info. unsigned NumElts = 0; - int SubregIdx = 0; bool Even = false; EVT RegVT = VT; if (!is64BitVector) { RegVT = GetNEONSubregVT(VT); NumElts = RegVT.getVectorNumElements(); - SubregIdx = (Lane < NumElts) ? ARM::dsub_0 : ARM::dsub_1; Even = Lane < NumElts; } @@ -1455,35 +1418,26 @@ SDNode *ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad, unsigned Opc = 0; if (is64BitVector) { Opc = DOpcodes[OpcodeIndex]; - if (llvm::ModelWithRegSequence()) { - SDValue RegSeq; - SDValue V0 = N->getOperand(0+3); - SDValue V1 = N->getOperand(1+3); - if (NumVecs == 2) { - RegSeq = SDValue(PairDRegs(MVT::v2i64, V0, V1), 0); - } else { - SDValue V2 = N->getOperand(2+3); - SDValue V3 = (NumVecs == 3) - ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,VT), 0) - : N->getOperand(3+3); - RegSeq = SDValue(QuadDRegs(MVT::v4i64, V0, V1, V2, V3), 0); - } - - // Now extract the D registers back out. - Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_0, dl, VT, - RegSeq)); - Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_1, dl, VT, - RegSeq)); - if (NumVecs > 2) - Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_2, dl, VT, - RegSeq)); - if (NumVecs > 3) - Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_3, dl, VT, - RegSeq)); + SDValue RegSeq; + SDValue V0 = N->getOperand(0+3); + SDValue V1 = N->getOperand(1+3); + if (NumVecs == 2) { + RegSeq = SDValue(PairDRegs(MVT::v2i64, V0, V1), 0); } else { - for (unsigned Vec = 0; Vec < NumVecs; ++Vec) - Ops.push_back(N->getOperand(Vec+3)); + SDValue V2 = N->getOperand(2+3); + SDValue V3 = (NumVecs == 3) + ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,VT), 0) + : N->getOperand(3+3); + RegSeq = SDValue(QuadDRegs(MVT::v4i64, V0, V1, V2, V3), 0); } + + // Now extract the D registers back out. + Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_0, dl, VT, RegSeq)); + Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_1, dl, VT, RegSeq)); + if (NumVecs > 2) + Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_2, dl, VT,RegSeq)); + if (NumVecs > 3) + Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_3, dl, VT,RegSeq)); } else { // Check if this is loading the even or odd subreg of a Q register. if (Lane < NumElts) { @@ -1493,31 +1447,24 @@ SDNode *ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad, Opc = QOpcodes1[OpcodeIndex]; } - if (llvm::ModelWithRegSequence()) { - SDValue RegSeq; - SDValue V0 = N->getOperand(0+3); - SDValue V1 = N->getOperand(1+3); - if (NumVecs == 2) { - RegSeq = SDValue(PairQRegs(MVT::v4i64, V0, V1), 0); - } else { - SDValue V2 = N->getOperand(2+3); - SDValue V3 = (NumVecs == 3) - ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,VT), 0) - : N->getOperand(3+3); - RegSeq = SDValue(QuadQRegs(MVT::v8i64, V0, V1, V2, V3), 0); - } - - // Extract the subregs of the input vector. - unsigned SubIdx = Even ? ARM::dsub_0 : ARM::dsub_1; - for (unsigned Vec = 0; Vec < NumVecs; ++Vec) - Ops.push_back(CurDAG->getTargetExtractSubreg(SubIdx+Vec*2, dl, RegVT, - RegSeq)); + SDValue RegSeq; + SDValue V0 = N->getOperand(0+3); + SDValue V1 = N->getOperand(1+3); + if (NumVecs == 2) { + RegSeq = SDValue(PairQRegs(MVT::v4i64, V0, V1), 0); } else { - // Extract the subregs of the input vector. - for (unsigned Vec = 0; Vec < NumVecs; ++Vec) - Ops.push_back(CurDAG->getTargetExtractSubreg(SubregIdx, dl, RegVT, - N->getOperand(Vec+3))); + SDValue V2 = N->getOperand(2+3); + SDValue V3 = (NumVecs == 3) + ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,VT), 0) + : N->getOperand(3+3); + RegSeq = SDValue(QuadQRegs(MVT::v8i64, V0, V1, V2, V3), 0); } + + // Extract the subregs of the input vector. + unsigned SubIdx = Even ? ARM::dsub_0 : ARM::dsub_1; + for (unsigned Vec = 0; Vec < NumVecs; ++Vec) + Ops.push_back(CurDAG->getTargetExtractSubreg(SubIdx+Vec*2, dl, RegVT, + RegSeq)); } Ops.push_back(getI32Imm(Lane)); Ops.push_back(Pred); @@ -1531,76 +1478,97 @@ SDNode *ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad, ResTys.push_back(MVT::Other); SDNode *VLdLn = CurDAG->getMachineNode(Opc, dl, ResTys, Ops.data(),NumVecs+6); - if (llvm::ModelWithRegSequence()) { - // Form a REG_SEQUENCE to force register allocation. - SDValue RegSeq; - if (is64BitVector) { - SDValue V0 = SDValue(VLdLn, 0); - SDValue V1 = SDValue(VLdLn, 1); - if (NumVecs == 2) { - RegSeq = SDValue(PairDRegs(MVT::v2i64, V0, V1), 0); - } else { - SDValue V2 = SDValue(VLdLn, 2); - // If it's a vld3, form a quad D-register but discard the last part. - SDValue V3 = (NumVecs == 3) - ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,VT), 0) - : SDValue(VLdLn, 3); - RegSeq = SDValue(QuadDRegs(MVT::v4i64, V0, V1, V2, V3), 0); - } + // Form a REG_SEQUENCE to force register allocation. + SDValue RegSeq; + if (is64BitVector) { + SDValue V0 = SDValue(VLdLn, 0); + SDValue V1 = SDValue(VLdLn, 1); + if (NumVecs == 2) { + RegSeq = SDValue(PairDRegs(MVT::v2i64, V0, V1), 0); } else { - // For 128-bit vectors, take the 64-bit results of the load and insert them - // as subregs into the result. - SDValue V[8]; - for (unsigned Vec = 0, i = 0; Vec < NumVecs; ++Vec, i+=2) { - if (Even) { - V[i] = SDValue(VLdLn, Vec); - V[i+1] = SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, - dl, RegVT), 0); - } else { - V[i] = SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, - dl, RegVT), 0); - V[i+1] = SDValue(VLdLn, Vec); - } + SDValue V2 = SDValue(VLdLn, 2); + // If it's a vld3, form a quad D-register but discard the last part. + SDValue V3 = (NumVecs == 3) + ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,VT), 0) + : SDValue(VLdLn, 3); + RegSeq = SDValue(QuadDRegs(MVT::v4i64, V0, V1, V2, V3), 0); + } + } else { + // For 128-bit vectors, take the 64-bit results of the load and insert + // them as subregs into the result. + SDValue V[8]; + for (unsigned Vec = 0, i = 0; Vec < NumVecs; ++Vec, i+=2) { + if (Even) { + V[i] = SDValue(VLdLn, Vec); + V[i+1] = SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, + dl, RegVT), 0); + } else { + V[i] = SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, + dl, RegVT), 0); + V[i+1] = SDValue(VLdLn, Vec); } - if (NumVecs == 3) - V[6] = V[7] = SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, - dl, RegVT), 0); - - if (NumVecs == 2) - RegSeq = SDValue(QuadDRegs(MVT::v4i64, V[0], V[1], V[2], V[3]), 0); - else - RegSeq = SDValue(OctoDRegs(MVT::v8i64, V[0], V[1], V[2], V[3], - V[4], V[5], V[6], V[7]), 0); } + if (NumVecs == 3) + V[6] = V[7] = SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, + dl, RegVT), 0); - assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering"); - assert(ARM::qsub_3 == ARM::qsub_0+3 && "Unexpected subreg numbering"); - unsigned SubIdx = is64BitVector ? ARM::dsub_0 : ARM::qsub_0; - for (unsigned Vec = 0; Vec < NumVecs; ++Vec) - ReplaceUses(SDValue(N, Vec), - CurDAG->getTargetExtractSubreg(SubIdx+Vec, dl, VT, RegSeq)); - ReplaceUses(SDValue(N, NumVecs), SDValue(VLdLn, NumVecs)); - return NULL; - } - - // For a 64-bit vector load to D registers, nothing more needs to be done. - if (is64BitVector) - return VLdLn; - - // For 128-bit vectors, take the 64-bit results of the load and insert them - // as subregs into the result. - for (unsigned Vec = 0; Vec < NumVecs; ++Vec) { - SDValue QuadVec = CurDAG->getTargetInsertSubreg(SubregIdx, dl, VT, - N->getOperand(Vec+3), - SDValue(VLdLn, Vec)); - ReplaceUses(SDValue(N, Vec), QuadVec); + if (NumVecs == 2) + RegSeq = SDValue(QuadDRegs(MVT::v4i64, V[0], V[1], V[2], V[3]), 0); + else + RegSeq = SDValue(OctoDRegs(MVT::v8i64, V[0], V[1], V[2], V[3], + V[4], V[5], V[6], V[7]), 0); } - Chain = SDValue(VLdLn, NumVecs); - ReplaceUses(SDValue(N, NumVecs), Chain); + assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering"); + assert(ARM::qsub_3 == ARM::qsub_0+3 && "Unexpected subreg numbering"); + unsigned SubIdx = is64BitVector ? ARM::dsub_0 : ARM::qsub_0; + for (unsigned Vec = 0; Vec < NumVecs; ++Vec) + ReplaceUses(SDValue(N, Vec), + CurDAG->getTargetExtractSubreg(SubIdx+Vec, dl, VT, RegSeq)); + ReplaceUses(SDValue(N, NumVecs), SDValue(VLdLn, NumVecs)); return NULL; } +SDNode *ARMDAGToDAGISel::SelectVTBL(SDNode *N, bool IsExt, unsigned NumVecs, + unsigned Opc) { + assert(NumVecs >= 2 && NumVecs <= 4 && "VTBL NumVecs out-of-range"); + DebugLoc dl = N->getDebugLoc(); + EVT VT = N->getValueType(0); + unsigned FirstTblReg = IsExt ? 2 : 1; + + // Form a REG_SEQUENCE to force register allocation. + SDValue RegSeq; + SDValue V0 = N->getOperand(FirstTblReg + 0); + SDValue V1 = N->getOperand(FirstTblReg + 1); + if (NumVecs == 2) + RegSeq = SDValue(PairDRegs(MVT::v16i8, V0, V1), 0); + else { + SDValue V2 = N->getOperand(FirstTblReg + 2); + // If it's a vtbl3, form a quad D-register and leave the last part as + // an undef. + SDValue V3 = (NumVecs == 3) + ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0) + : N->getOperand(FirstTblReg + 3); + RegSeq = SDValue(QuadDRegs(MVT::v4i64, V0, V1, V2, V3), 0); + } + + // Now extract the D registers back out. + SmallVector<SDValue, 6> Ops; + if (IsExt) + Ops.push_back(N->getOperand(1)); + Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_0, dl, VT, RegSeq)); + Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_1, dl, VT, RegSeq)); + if (NumVecs > 2) + Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_2, dl, VT, RegSeq)); + if (NumVecs > 3) + Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_3, dl, VT, RegSeq)); + + Ops.push_back(N->getOperand(FirstTblReg + NumVecs)); + Ops.push_back(getAL(CurDAG)); // predicate + Ops.push_back(CurDAG->getRegister(0, MVT::i32)); // predicate register + return CurDAG->getMachineNode(Opc, dl, VT, Ops.data(), Ops.size()); +} + SDNode *ARMDAGToDAGISel::SelectV6T2BitfieldExtractOp(SDNode *N, bool isSigned) { if (!Subtarget->hasV6T2Ops()) @@ -1954,8 +1922,8 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { SDValue ShImmOp = CurDAG->getTargetConstant(ShImm, MVT::i32); SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); if (Subtarget->isThumb()) { - SDValue Ops[] = { V, V, ShImmOp, getAL(CurDAG), Reg0 }; - return CurDAG->SelectNodeTo(N, ARM::t2RSBrs, MVT::i32, Ops, 5); + SDValue Ops[] = { V, V, ShImmOp, getAL(CurDAG), Reg0, Reg0 }; + return CurDAG->SelectNodeTo(N, ARM::t2RSBrs, MVT::i32, Ops, 6); } else { SDValue Ops[] = { V, V, Reg0, ShImmOp, getAL(CurDAG), Reg0, Reg0 }; return CurDAG->SelectNodeTo(N, ARM::RSBrs, MVT::i32, Ops, 7); @@ -2015,7 +1983,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { SDValue Ops[] = { N->getOperand(0), N->getOperand(1), getAL(CurDAG), CurDAG->getRegister(0, MVT::i32), CurDAG->getRegister(0, MVT::i32) }; - return CurDAG->getMachineNode(ARM::t2UMULL, dl, MVT::i32, MVT::i32, Ops,4); + return CurDAG->getMachineNode(ARM::t2UMULL, dl, MVT::i32, MVT::i32,Ops,4); } else { SDValue Ops[] = { N->getOperand(0), N->getOperand(1), getAL(CurDAG), CurDAG->getRegister(0, MVT::i32), @@ -2029,7 +1997,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { if (Subtarget->isThumb()) { SDValue Ops[] = { N->getOperand(0), N->getOperand(1), getAL(CurDAG), CurDAG->getRegister(0, MVT::i32) }; - return CurDAG->getMachineNode(ARM::t2SMULL, dl, MVT::i32, MVT::i32, Ops,4); + return CurDAG->getMachineNode(ARM::t2SMULL, dl, MVT::i32, MVT::i32,Ops,4); } else { SDValue Ops[] = { N->getOperand(0), N->getOperand(1), getAL(CurDAG), CurDAG->getRegister(0, MVT::i32), @@ -2211,6 +2179,22 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg }; return CurDAG->getMachineNode(Opc, dl, VT, VT, Ops, 4); } + case ARMISD::BUILD_VECTOR: { + EVT VecVT = N->getValueType(0); + EVT EltVT = VecVT.getVectorElementType(); + unsigned NumElts = VecVT.getVectorNumElements(); + if (EltVT.getSimpleVT() == MVT::f64) { + assert(NumElts == 2 && "unexpected type for BUILD_VECTOR"); + return PairDRegs(VecVT, N->getOperand(0), N->getOperand(1)); + } + assert(EltVT.getSimpleVT() == MVT::f32 && + "unexpected type for BUILD_VECTOR"); + if (NumElts == 2) + return PairSRegs(VecVT, N->getOperand(0), N->getOperand(1)); + assert(NumElts == 4 && "unexpected type for BUILD_VECTOR"); + return QuadSRegs(VecVT, N->getOperand(0), N->getOperand(1), + N->getOperand(2), N->getOperand(3)); + } case ISD::INTRINSIC_VOID: case ISD::INTRINSIC_W_CHAIN: { @@ -2342,6 +2326,29 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { break; } + case ISD::INTRINSIC_WO_CHAIN: { + unsigned IntNo = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue(); + switch (IntNo) { + default: + break; + + case Intrinsic::arm_neon_vtbl2: + return SelectVTBL(N, false, 2, ARM::VTBL2); + case Intrinsic::arm_neon_vtbl3: + return SelectVTBL(N, false, 3, ARM::VTBL3); + case Intrinsic::arm_neon_vtbl4: + return SelectVTBL(N, false, 4, ARM::VTBL4); + + case Intrinsic::arm_neon_vtbx2: + return SelectVTBL(N, true, 2, ARM::VTBX2); + case Intrinsic::arm_neon_vtbx3: + return SelectVTBL(N, true, 3, ARM::VTBX3); + case Intrinsic::arm_neon_vtbx4: + return SelectVTBL(N, true, 4, ARM::VTBX4); + } + break; + } + case ISD::CONCAT_VECTORS: return SelectConcatVector(N); } @@ -2367,9 +2374,3 @@ FunctionPass *llvm::createARMISelDag(ARMBaseTargetMachine &TM, CodeGenOpt::Level OptLevel) { return new ARMDAGToDAGISel(TM, OptLevel); } - -/// ModelWithRegSequence - Return true if isel should use REG_SEQUENCE to model -/// operations involving sub-registers. -bool llvm::ModelWithRegSequence() { - return UseRegSeq; -} diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index b8126a3c5d18..98d8b8585428 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -12,6 +12,7 @@ // //===----------------------------------------------------------------------===// +#define DEBUG_TYPE "arm-isel" #include "ARM.h" #include "ARMAddressingModes.h" #include "ARMConstantPoolValue.h" @@ -40,6 +41,7 @@ #include "llvm/MC/MCSectionMachO.h" #include "llvm/Target/TargetOptions.h" #include "llvm/ADT/VectorExtras.h" +#include "llvm/ADT/Statistic.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" @@ -47,9 +49,27 @@ #include <sstream> using namespace llvm; +STATISTIC(NumTailCalls, "Number of tail calls"); + +// This option should go away when tail calls fully work. +static cl::opt<bool> +EnableARMTailCalls("arm-tail-calls", cl::Hidden, + cl::desc("Generate tail calls (TEMPORARY OPTION)."), + cl::init(true)); + static cl::opt<bool> EnableARMLongCalls("arm-long-calls", cl::Hidden, - cl::desc("Generate calls via indirect call instructions."), + cl::desc("Generate calls via indirect call instructions"), + cl::init(false)); + +static cl::opt<bool> +ARMInterworking("arm-interworking", cl::Hidden, + cl::desc("Enable / disable ARM interworking (for debugging only)"), + cl::init(true)); + +static cl::opt<bool> +EnableARMCodePlacement("arm-code-placement", cl::Hidden, + cl::desc("Enable code placement pass for ARM"), cl::init(false)); static bool CC_ARM_APCS_Custom_f64(unsigned &ValNo, EVT &ValVT, EVT &LocVT, @@ -94,10 +114,7 @@ void ARMTargetLowering::addTypeForNEON(EVT VT, EVT PromotedLdStVT, } setOperationAction(ISD::BUILD_VECTOR, VT.getSimpleVT(), Custom); setOperationAction(ISD::VECTOR_SHUFFLE, VT.getSimpleVT(), Custom); - if (llvm::ModelWithRegSequence()) - setOperationAction(ISD::CONCAT_VECTORS, VT.getSimpleVT(), Legal); - else - setOperationAction(ISD::CONCAT_VECTORS, VT.getSimpleVT(), Custom); + setOperationAction(ISD::CONCAT_VECTORS, VT.getSimpleVT(), Legal); setOperationAction(ISD::EXTRACT_SUBVECTOR, VT.getSimpleVT(), Expand); setOperationAction(ISD::SELECT, VT.getSimpleVT(), Expand); setOperationAction(ISD::SELECT_CC, VT.getSimpleVT(), Expand); @@ -393,13 +410,57 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) // doesn't yet know how to not do that for SjLj. setExceptionSelectorRegister(ARM::R0); setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand); - setOperationAction(ISD::MEMBARRIER, MVT::Other, Custom); - - // If the subtarget does not have extract instructions, sign_extend_inreg - // needs to be expanded. Extract is available in ARM mode on v6 and up, - // and on most Thumb2 implementations. - if ((!Subtarget->isThumb() && !Subtarget->hasV6Ops()) - || (Subtarget->isThumb2() && !Subtarget->hasT2ExtractPack())) { + // Handle atomics directly for ARMv[67] (except for Thumb1), otherwise + // use the default expansion. + bool canHandleAtomics = + (Subtarget->hasV7Ops() || + (Subtarget->hasV6Ops() && !Subtarget->isThumb1Only())); + if (canHandleAtomics) { + // membarrier needs custom lowering; the rest are legal and handled + // normally. + setOperationAction(ISD::MEMBARRIER, MVT::Other, Custom); + } else { + // Set them all for expansion, which will force libcalls. + setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand); + setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i8, Expand); + setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i16, Expand); + setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i32, Expand); + setOperationAction(ISD::ATOMIC_SWAP, MVT::i8, Expand); + setOperationAction(ISD::ATOMIC_SWAP, MVT::i16, Expand); + setOperationAction(ISD::ATOMIC_SWAP, MVT::i32, Expand); + setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i8, Expand); + setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i16, Expand); + setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i32, Expand); + setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i8, Expand); + setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i16, Expand); + setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i32, Expand); + setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i8, Expand); + setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i16, Expand); + setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i32, Expand); + setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i8, Expand); + setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i16, Expand); + setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i32, Expand); + setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i8, Expand); + setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i16, Expand); + setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i32, Expand); + setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i8, Expand); + setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i16, Expand); + setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i32, Expand); + // Since the libcalls include locking, fold in the fences + setShouldFoldAtomicFences(true); + } + // 64-bit versions are always libcalls (for now) + setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i64, Expand); + setOperationAction(ISD::ATOMIC_SWAP, MVT::i64, Expand); + setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i64, Expand); + setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i64, Expand); + setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i64, Expand); + setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i64, Expand); + setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i64, Expand); + setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i64, Expand); + + // Requires SXTB/SXTH, available on v6 and up in both ARM and Thumb modes. + if (!Subtarget->hasV6Ops()) { setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand); setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8, Expand); } @@ -412,8 +473,10 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) // We want to custom lower some of our intrinsics. setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); - setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom); - setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom); + if (Subtarget->isTargetDarwin()) { + setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom); + setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom); + } setOperationAction(ISD::SETCC, MVT::i32, Expand); setOperationAction(ISD::SETCC, MVT::f32, Expand); @@ -474,28 +537,14 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) else setSchedulingPreference(Sched::Hybrid); - // FIXME: If-converter should use instruction latency to determine - // profitability rather than relying on fixed limits. - if (Subtarget->getCPUString() == "generic") { - // Generic (and overly aggressive) if-conversion limits. - setIfCvtBlockSizeLimit(10); - setIfCvtDupBlockSizeLimit(2); - } else if (Subtarget->hasV7Ops()) { - setIfCvtBlockSizeLimit(3); - setIfCvtDupBlockSizeLimit(1); - } else if (Subtarget->hasV6Ops()) { - setIfCvtBlockSizeLimit(2); - setIfCvtDupBlockSizeLimit(1); - } else { - setIfCvtBlockSizeLimit(3); - setIfCvtDupBlockSizeLimit(2); - } - maxStoresPerMemcpy = 1; //// temporary - rewrite interface to use type - // Do not enable CodePlacementOpt for now: it currently runs after the - // ARMConstantIslandPass and messes up branch relaxation and placement - // of constant islands. - // benefitFromCodePlacementOpt = true; + + // On ARM arguments smaller than 4 bytes are extended, so all arguments + // are at least 4 bytes aligned. + setMinStackArgumentAlignment(4); + + if (EnableARMCodePlacement) + benefitFromCodePlacementOpt = true; } const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const { @@ -537,6 +586,8 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const { case ARMISD::EH_SJLJ_SETJMP: return "ARMISD::EH_SJLJ_SETJMP"; case ARMISD::EH_SJLJ_LONGJMP:return "ARMISD::EH_SJLJ_LONGJMP"; + case ARMISD::TC_RETURN: return "ARMISD::TC_RETURN"; + case ARMISD::THREAD_POINTER:return "ARMISD::THREAD_POINTER"; case ARMISD::DYN_ALLOC: return "ARMISD::DYN_ALLOC"; @@ -581,6 +632,7 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const { case ARMISD::VZIP: return "ARMISD::VZIP"; case ARMISD::VUZP: return "ARMISD::VUZP"; case ARMISD::VTRN: return "ARMISD::VTRN"; + case ARMISD::BUILD_VECTOR: return "ARMISD::BUILD_VECTOR"; case ARMISD::FMAX: return "ARMISD::FMAX"; case ARMISD::FMIN: return "ARMISD::FMIN"; } @@ -603,15 +655,33 @@ TargetRegisterClass *ARMTargetLowering::getRegClassFor(EVT VT) const { /// getFunctionAlignment - Return the Log2 alignment of this function. unsigned ARMTargetLowering::getFunctionAlignment(const Function *F) const { - return getTargetMachine().getSubtarget<ARMSubtarget>().isThumb() ? 0 : 1; + return getTargetMachine().getSubtarget<ARMSubtarget>().isThumb() ? 1 : 2; } Sched::Preference ARMTargetLowering::getSchedulingPreference(SDNode *N) const { - for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) { + unsigned NumVals = N->getNumValues(); + if (!NumVals) + return Sched::RegPressure; + + for (unsigned i = 0; i != NumVals; ++i) { EVT VT = N->getValueType(i); if (VT.isFloatingPoint() || VT.isVector()) return Sched::Latency; } + + if (!N->isMachineOpcode()) + return Sched::RegPressure; + + // Load are scheduled for latency even if there instruction itinerary + // is not available. + const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); + const TargetInstrDesc &TID = TII->get(N->getMachineOpcode()); + if (TID.mayLoad()) + return Sched::Latency; + + const InstrItineraryData &Itins = getTargetMachine().getInstrItineraryData(); + if (!Itins.isEmpty() && Itins.getStageLatency(TID.getSchedClass()) > 2) + return Sched::Latency; return Sched::RegPressure; } @@ -964,11 +1034,28 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee, CallingConv::ID CallConv, bool isVarArg, bool &isTailCall, const SmallVectorImpl<ISD::OutputArg> &Outs, + const SmallVectorImpl<SDValue> &OutVals, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const { - // ARM target does not yet support tail call optimization. - isTailCall = false; + MachineFunction &MF = DAG.getMachineFunction(); + bool IsStructRet = (Outs.empty()) ? false : Outs[0].Flags.isSRet(); + bool IsSibCall = false; + // Temporarily disable tail calls so things don't break. + if (!EnableARMTailCalls) + isTailCall = false; + if (isTailCall) { + // Check if it's really possible to do a tail call. + isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv, + isVarArg, IsStructRet, MF.getFunction()->hasStructRetAttr(), + Outs, OutVals, Ins, DAG); + // We don't support GuaranteedTailCallOpt for ARM, only automatically + // detected sibcalls. + if (isTailCall) { + ++NumTailCalls; + IsSibCall = true; + } + } // Analyze operands of the call, assigning locations to each operand. SmallVector<CCValAssign, 16> ArgLocs; @@ -981,9 +1068,14 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee, // Get a count of how many bytes are to be pushed on the stack. unsigned NumBytes = CCInfo.getNextStackOffset(); + // For tail calls, memory operands are available in our caller's stack. + if (IsSibCall) + NumBytes = 0; + // Adjust the stack pointer for the new arguments... // These operations are automatically eliminated by the prolog/epilog pass - Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true)); + if (!IsSibCall) + Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true)); SDValue StackPtr = DAG.getCopyFromReg(Chain, dl, ARM::SP, getPointerTy()); @@ -996,7 +1088,7 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee, i != e; ++i, ++realArgIdx) { CCValAssign &VA = ArgLocs[i]; - SDValue Arg = Outs[realArgIdx].Val; + SDValue Arg = OutVals[realArgIdx]; ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags; // Promote the value if needed. @@ -1044,7 +1136,7 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee, } } else if (VA.isRegLoc()) { RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg)); - } else { + } else if (!IsSibCall) { assert(VA.isMemLoc()); MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Arg, @@ -1059,10 +1151,32 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee, // Build a sequence of copy-to-reg nodes chained together with token chain // and flag operands which copy the outgoing args into the appropriate regs. SDValue InFlag; - for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { - Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first, - RegsToPass[i].second, InFlag); - InFlag = Chain.getValue(1); + // Tail call byval lowering might overwrite argument registers so in case of + // tail call optimization the copies to registers are lowered later. + if (!isTailCall) + for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { + Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first, + RegsToPass[i].second, InFlag); + InFlag = Chain.getValue(1); + } + + // For tail calls lower the arguments to the 'real' stack slot. + if (isTailCall) { + // Force all the incoming stack arguments to be loaded from the stack + // before any new outgoing arguments are stored to the stack, because the + // outgoing stack slots may alias the incoming argument stack slots, and + // the alias isn't otherwise explicit. This is slightly more conservative + // than necessary, because it means that each store effectively depends + // on every argument instead of just those arguments it would clobber. + + // Do not flag preceeding copytoreg stuff together with the following stuff. + InFlag = SDValue(); + for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { + Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first, + RegsToPass[i].second, InFlag); + InFlag = Chain.getValue(1); + } + InFlag =SDValue(); } // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every @@ -1071,7 +1185,6 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee, bool isDirect = false; bool isARMFunc = false; bool isLocalARMFunc = false; - MachineFunction &MF = DAG.getMachineFunction(); ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); if (EnableARMLongCalls) { @@ -1117,7 +1230,7 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee, getTargetMachine().getRelocationModel() != Reloc::Static; isARMFunc = !Subtarget->isThumb() || isStub; // ARM call to a local ARM function is predicable. - isLocalARMFunc = !Subtarget->isThumb() && !isExt; + isLocalARMFunc = !Subtarget->isThumb() && (!isExt || !ARMInterworking); // tBX takes a register source operand. if (isARMFunc && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) { unsigned ARMPCLabelIndex = AFI->createConstPoolEntryUId(); @@ -1134,7 +1247,7 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee, Callee = DAG.getNode(ARMISD::PIC_ADD, dl, getPointerTy(), Callee, PICLabel); } else - Callee = DAG.getTargetGlobalAddress(GV, getPointerTy()); + Callee = DAG.getTargetGlobalAddress(GV, dl, getPointerTy()); } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) { isDirect = true; bool isStub = Subtarget->isTargetDarwin() && @@ -1171,11 +1284,6 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee, ? (isLocalARMFunc ? ARMISD::CALL_PRED : ARMISD::CALL) : ARMISD::CALL_NOLINK; } - if (CallOpc == ARMISD::CALL_NOLINK && !Subtarget->isThumb1Only()) { - // implicit def LR - LR mustn't be allocated as GRP:$dst of CALL_NOLINK - Chain = DAG.getCopyToReg(Chain, dl, ARM::LR, DAG.getUNDEF(MVT::i32),InFlag); - InFlag = Chain.getValue(1); - } std::vector<SDValue> Ops; Ops.push_back(Chain); @@ -1189,9 +1297,13 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee, if (InFlag.getNode()) Ops.push_back(InFlag); + + SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Flag); + if (isTailCall) + return DAG.getNode(ARMISD::TC_RETURN, dl, NodeTys, &Ops[0], Ops.size()); + // Returns a chain and a flag for retval copy to use. - Chain = DAG.getNode(CallOpc, dl, DAG.getVTList(MVT::Other, MVT::Flag), - &Ops[0], Ops.size()); + Chain = DAG.getNode(CallOpc, dl, NodeTys, &Ops[0], Ops.size()); InFlag = Chain.getValue(1); Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true), @@ -1205,10 +1317,203 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee, dl, DAG, InVals); } +/// MatchingStackOffset - Return true if the given stack call argument is +/// already available in the same position (relatively) of the caller's +/// incoming argument stack. +static +bool MatchingStackOffset(SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags, + MachineFrameInfo *MFI, const MachineRegisterInfo *MRI, + const ARMInstrInfo *TII) { + unsigned Bytes = Arg.getValueType().getSizeInBits() / 8; + int FI = INT_MAX; + if (Arg.getOpcode() == ISD::CopyFromReg) { + unsigned VR = cast<RegisterSDNode>(Arg.getOperand(1))->getReg(); + if (!VR || TargetRegisterInfo::isPhysicalRegister(VR)) + return false; + MachineInstr *Def = MRI->getVRegDef(VR); + if (!Def) + return false; + if (!Flags.isByVal()) { + if (!TII->isLoadFromStackSlot(Def, FI)) + return false; + } else { + return false; + } + } else if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Arg)) { + if (Flags.isByVal()) + // ByVal argument is passed in as a pointer but it's now being + // dereferenced. e.g. + // define @foo(%struct.X* %A) { + // tail call @bar(%struct.X* byval %A) + // } + return false; + SDValue Ptr = Ld->getBasePtr(); + FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(Ptr); + if (!FINode) + return false; + FI = FINode->getIndex(); + } else + return false; + + assert(FI != INT_MAX); + if (!MFI->isFixedObjectIndex(FI)) + return false; + return Offset == MFI->getObjectOffset(FI) && Bytes == MFI->getObjectSize(FI); +} + +/// IsEligibleForTailCallOptimization - Check whether the call is eligible +/// for tail call optimization. Targets which want to do tail call +/// optimization should implement this function. +bool +ARMTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee, + CallingConv::ID CalleeCC, + bool isVarArg, + bool isCalleeStructRet, + bool isCallerStructRet, + const SmallVectorImpl<ISD::OutputArg> &Outs, + const SmallVectorImpl<SDValue> &OutVals, + const SmallVectorImpl<ISD::InputArg> &Ins, + SelectionDAG& DAG) const { + const Function *CallerF = DAG.getMachineFunction().getFunction(); + CallingConv::ID CallerCC = CallerF->getCallingConv(); + bool CCMatch = CallerCC == CalleeCC; + + // Look for obvious safe cases to perform tail call optimization that do not + // require ABI changes. This is what gcc calls sibcall. + + // Do not sibcall optimize vararg calls unless the call site is not passing + // any arguments. + if (isVarArg && !Outs.empty()) + return false; + + // Also avoid sibcall optimization if either caller or callee uses struct + // return semantics. + if (isCalleeStructRet || isCallerStructRet) + return false; + + // FIXME: Completely disable sibcall for Thumb1 since Thumb1RegisterInfo:: + // emitEpilogue is not ready for them. + // Doing this is tricky, since the LDM/POP instruction on Thumb doesn't take + // LR. This means if we need to reload LR, it takes an extra instructions, + // which outweighs the value of the tail call; but here we don't know yet + // whether LR is going to be used. Probably the right approach is to + // generate the tail call here and turn it back into CALL/RET in + // emitEpilogue if LR is used. + if (Subtarget->isThumb1Only()) + return false; + + // For the moment, we can only do this to functions defined in this + // compilation, or to indirect calls. A Thumb B to an ARM function, + // or vice versa, is not easily fixed up in the linker unlike BL. + // (We could do this by loading the address of the callee into a register; + // that is an extra instruction over the direct call and burns a register + // as well, so is not likely to be a win.) + + // It might be safe to remove this restriction on non-Darwin. + + // Thumb1 PIC calls to external symbols use BX, so they can be tail calls, + // but we need to make sure there are enough registers; the only valid + // registers are the 4 used for parameters. We don't currently do this + // case. + if (isa<ExternalSymbolSDNode>(Callee)) + return false; + + if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) { + const GlobalValue *GV = G->getGlobal(); + if (GV->isDeclaration() || GV->isWeakForLinker()) + return false; + } + + // If the calling conventions do not match, then we'd better make sure the + // results are returned in the same way as what the caller expects. + if (!CCMatch) { + SmallVector<CCValAssign, 16> RVLocs1; + CCState CCInfo1(CalleeCC, false, getTargetMachine(), + RVLocs1, *DAG.getContext()); + CCInfo1.AnalyzeCallResult(Ins, CCAssignFnForNode(CalleeCC, true, isVarArg)); + + SmallVector<CCValAssign, 16> RVLocs2; + CCState CCInfo2(CallerCC, false, getTargetMachine(), + RVLocs2, *DAG.getContext()); + CCInfo2.AnalyzeCallResult(Ins, CCAssignFnForNode(CallerCC, true, isVarArg)); + + if (RVLocs1.size() != RVLocs2.size()) + return false; + for (unsigned i = 0, e = RVLocs1.size(); i != e; ++i) { + if (RVLocs1[i].isRegLoc() != RVLocs2[i].isRegLoc()) + return false; + if (RVLocs1[i].getLocInfo() != RVLocs2[i].getLocInfo()) + return false; + if (RVLocs1[i].isRegLoc()) { + if (RVLocs1[i].getLocReg() != RVLocs2[i].getLocReg()) + return false; + } else { + if (RVLocs1[i].getLocMemOffset() != RVLocs2[i].getLocMemOffset()) + return false; + } + } + } + + // If the callee takes no arguments then go on to check the results of the + // call. + if (!Outs.empty()) { + // Check if stack adjustment is needed. For now, do not do this if any + // argument is passed on the stack. + SmallVector<CCValAssign, 16> ArgLocs; + CCState CCInfo(CalleeCC, isVarArg, getTargetMachine(), + ArgLocs, *DAG.getContext()); + CCInfo.AnalyzeCallOperands(Outs, + CCAssignFnForNode(CalleeCC, false, isVarArg)); + if (CCInfo.getNextStackOffset()) { + MachineFunction &MF = DAG.getMachineFunction(); + + // Check if the arguments are already laid out in the right way as + // the caller's fixed stack objects. + MachineFrameInfo *MFI = MF.getFrameInfo(); + const MachineRegisterInfo *MRI = &MF.getRegInfo(); + const ARMInstrInfo *TII = + ((ARMTargetMachine&)getTargetMachine()).getInstrInfo(); + for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size(); + i != e; + ++i, ++realArgIdx) { + CCValAssign &VA = ArgLocs[i]; + EVT RegVT = VA.getLocVT(); + SDValue Arg = OutVals[realArgIdx]; + ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags; + if (VA.getLocInfo() == CCValAssign::Indirect) + return false; + if (VA.needsCustom()) { + // f64 and vector types are split into multiple registers or + // register/stack-slot combinations. The types will not match + // the registers; give up on memory f64 refs until we figure + // out what to do about this. + if (!VA.isRegLoc()) + return false; + if (!ArgLocs[++i].isRegLoc()) + return false; + if (RegVT == MVT::v2f64) { + if (!ArgLocs[++i].isRegLoc()) + return false; + if (!ArgLocs[++i].isRegLoc()) + return false; + } + } else if (!VA.isRegLoc()) { + if (!MatchingStackOffset(Arg, VA.getLocMemOffset(), Flags, + MFI, MRI, TII)) + return false; + } + } + } + } + + return true; +} + SDValue ARMTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl<ISD::OutputArg> &Outs, + const SmallVectorImpl<SDValue> &OutVals, DebugLoc dl, SelectionDAG &DAG) const { // CCValAssign - represent the assignment of the return value to a location. @@ -1239,7 +1544,7 @@ ARMTargetLowering::LowerReturn(SDValue Chain, CCValAssign &VA = RVLocs[i]; assert(VA.isRegLoc() && "Can only return in registers!"); - SDValue Arg = Outs[realRVLocIdx].Val; + SDValue Arg = OutVals[realRVLocIdx]; switch (VA.getLocInfo()) { default: llvm_unreachable("Unknown loc info!"); @@ -1477,7 +1782,7 @@ SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op, // pair. This is always cheaper. if (Subtarget->useMovt()) { return DAG.getNode(ARMISD::Wrapper, dl, PtrVT, - DAG.getTargetGlobalAddress(GV, PtrVT)); + DAG.getTargetGlobalAddress(GV, dl, PtrVT)); } else { SDValue CPAddr = DAG.getTargetConstantPool(GV, PtrVT, 4); CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); @@ -1552,9 +1857,7 @@ SDValue ARMTargetLowering::LowerGLOBAL_OFFSET_TABLE(SDValue Op, SDValue ARMTargetLowering::LowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const { DebugLoc dl = Op.getDebugLoc(); - SDValue Val = Subtarget->isThumb() ? - DAG.getCopyFromReg(DAG.getEntryNode(), dl, ARM::SP, MVT::i32) : - DAG.getConstant(0, MVT::i32); + SDValue Val = DAG.getConstant(0, MVT::i32); return DAG.getNode(ARMISD::EH_SJLJ_SETJMP, dl, MVT::i32, Op.getOperand(0), Op.getOperand(1), Val); } @@ -1568,8 +1871,7 @@ ARMTargetLowering::LowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const { SDValue ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG, - const ARMSubtarget *Subtarget) - const { + const ARMSubtarget *Subtarget) const { unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); DebugLoc dl = Op.getDebugLoc(); switch (IntNo) { @@ -1597,7 +1899,6 @@ ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG, DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr, PseudoSourceValue::getConstantPool(), 0, false, false, 0); - SDValue Chain = Result.getValue(1); if (RelocM == Reloc::PIC_) { SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32); @@ -1609,25 +1910,21 @@ ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG, } static SDValue LowerMEMBARRIER(SDValue Op, SelectionDAG &DAG, - const ARMSubtarget *Subtarget) { + const ARMSubtarget *Subtarget) { DebugLoc dl = Op.getDebugLoc(); SDValue Op5 = Op.getOperand(5); - SDValue Res; unsigned isDeviceBarrier = cast<ConstantSDNode>(Op5)->getZExtValue(); - if (isDeviceBarrier) { - if (Subtarget->hasV7Ops()) - Res = DAG.getNode(ARMISD::SYNCBARRIER, dl, MVT::Other, Op.getOperand(0)); - else - Res = DAG.getNode(ARMISD::SYNCBARRIER, dl, MVT::Other, Op.getOperand(0), - DAG.getConstant(0, MVT::i32)); - } else { - if (Subtarget->hasV7Ops()) - Res = DAG.getNode(ARMISD::MEMBARRIER, dl, MVT::Other, Op.getOperand(0)); - else - Res = DAG.getNode(ARMISD::MEMBARRIER, dl, MVT::Other, Op.getOperand(0), - DAG.getConstant(0, MVT::i32)); - } - return Res; + // v6 and v7 can both handle barriers directly, but need handled a bit + // differently. Thumb1 and pre-v6 ARM mode use a libcall instead and should + // never get here. + unsigned Opc = isDeviceBarrier ? ARMISD::SYNCBARRIER : ARMISD::MEMBARRIER; + if (Subtarget->hasV7Ops()) + return DAG.getNode(Opc, dl, MVT::Other, Op.getOperand(0)); + else if (Subtarget->hasV6Ops() && !Subtarget->isThumb1Only()) + return DAG.getNode(Opc, dl, MVT::Other, Op.getOperand(0), + DAG.getConstant(0, MVT::i32)); + assert(0 && "Unexpected ISD::MEMBARRIER encountered. Should be libcall!"); + return SDValue(); } static SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) { @@ -1712,7 +2009,7 @@ ARMTargetLowering::GetF64FormalArgument(CCValAssign &VA, CCValAssign &NextVA, SDValue ArgValue2; if (NextVA.isMemLoc()) { MachineFrameInfo *MFI = MF.getFrameInfo(); - int FI = MFI->CreateFixedObject(4, NextVA.getLocMemOffset(), true, false); + int FI = MFI->CreateFixedObject(4, NextVA.getLocMemOffset(), true); // Create load node to retrieve arguments from the stack. SDValue FIN = DAG.getFrameIndex(FI, getPointerTy()); @@ -1768,8 +2065,7 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain, VA = ArgLocs[++i]; // skip ahead to next loc SDValue ArgValue2; if (VA.isMemLoc()) { - int FI = MFI->CreateFixedObject(8, VA.getLocMemOffset(), - true, false); + int FI = MFI->CreateFixedObject(8, VA.getLocMemOffset(), true); SDValue FIN = DAG.getFrameIndex(FI, getPointerTy()); ArgValue2 = DAG.getLoad(MVT::f64, dl, Chain, FIN, PseudoSourceValue::getFixedStack(FI), 0, @@ -1836,8 +2132,7 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain, assert(VA.getValVT() != MVT::i64 && "i64 should already be lowered"); unsigned ArgSize = VA.getLocVT().getSizeInBits()/8; - int FI = MFI->CreateFixedObject(ArgSize, VA.getLocMemOffset(), - true, false); + int FI = MFI->CreateFixedObject(ArgSize, VA.getLocMemOffset(), true); // Create load nodes to retrieve arguments from the stack. SDValue FIN = DAG.getFrameIndex(FI, getPointerTy()); @@ -1868,7 +2163,7 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain, AFI->setVarArgsFrameIndex( MFI->CreateFixedObject(VARegSaveSize, ArgOffset + VARegSaveSize - VARegSize, - true, false)); + true)); SDValue FIN = DAG.getFrameIndex(AFI->getVarArgsFrameIndex(), getPointerTy()); @@ -1884,8 +2179,8 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain, SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32); SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, - PseudoSourceValue::getFixedStack(AFI->getVarArgsFrameIndex()), 0, - false, false, 0); + PseudoSourceValue::getFixedStack(AFI->getVarArgsFrameIndex()), + 0, false, false, 0); MemOps.push_back(Store); FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(), FIN, DAG.getConstant(4, getPointerTy())); @@ -1895,8 +2190,7 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain, &MemOps[0], MemOps.size()); } else // This will point to the next argument passed via stack. - AFI->setVarArgsFrameIndex(MFI->CreateFixedObject(4, ArgOffset, - true, false)); + AFI->setVarArgsFrameIndex(MFI->CreateFixedObject(4, ArgOffset, true)); } return Chain; @@ -1978,9 +2272,44 @@ ARMTargetLowering::getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC, return DAG.getNode(CompareType, dl, MVT::Flag, LHS, RHS); } +static bool canBitcastToInt(SDNode *Op) { + return Op->hasOneUse() && + ISD::isNormalLoad(Op) && + Op->getValueType(0) == MVT::f32; +} + +static SDValue bitcastToInt(SDValue Op, SelectionDAG &DAG) { + if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Op)) + return DAG.getLoad(MVT::i32, Op.getDebugLoc(), + Ld->getChain(), Ld->getBasePtr(), + Ld->getSrcValue(), Ld->getSrcValueOffset(), + Ld->isVolatile(), Ld->isNonTemporal(), + Ld->getAlignment()); + + llvm_unreachable("Unknown VFP cmp argument!"); +} + /// Returns a appropriate VFP CMP (fcmp{s|d}+fmstat) for the given operands. -static SDValue getVFPCmp(SDValue LHS, SDValue RHS, SelectionDAG &DAG, - DebugLoc dl) { +SDValue +ARMTargetLowering::getVFPCmp(SDValue &LHS, SDValue &RHS, ISD::CondCode CC, + SDValue &ARMCC, SelectionDAG &DAG, + DebugLoc dl) const { + if (UnsafeFPMath && FiniteOnlyFPMath() && + (CC == ISD::SETEQ || CC == ISD::SETOEQ || + CC == ISD::SETNE || CC == ISD::SETUNE) && + canBitcastToInt(LHS.getNode()) && canBitcastToInt(RHS.getNode())) { + // If unsafe fp math optimization is enabled and there are no othter uses of + // the CMP operands, and the condition code is EQ oe NE, we can optimize it + // to an integer comparison. + if (CC == ISD::SETOEQ) + CC = ISD::SETEQ; + else if (CC == ISD::SETUNE) + CC = ISD::SETNE; + LHS = bitcastToInt(LHS, DAG); + RHS = bitcastToInt(RHS, DAG); + return getARMCmp(LHS, RHS, CC, ARMCC, DAG, dl); + } + SDValue Cmp; if (!isFloatingPointZero(RHS)) Cmp = DAG.getNode(ARMISD::CMPFP, dl, MVT::Flag, LHS, RHS); @@ -2010,13 +2339,13 @@ SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const { SDValue ARMCC = DAG.getConstant(CondCode, MVT::i32); SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); - SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl); + SDValue Cmp = getVFPCmp(LHS, RHS, CC, ARMCC, DAG, dl); SDValue Result = DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, - ARMCC, CCR, Cmp); + ARMCC, CCR, Cmp); if (CondCode2 != ARMCC::AL) { SDValue ARMCC2 = DAG.getConstant(CondCode2, MVT::i32); // FIXME: Needs another CMP because flag can have but one use. - SDValue Cmp2 = getVFPCmp(LHS, RHS, DAG, dl); + SDValue Cmp2 = getVFPCmp(LHS, RHS, CC, ARMCC2, DAG, dl); Result = DAG.getNode(ARMISD::CMOV, dl, VT, Result, TrueVal, ARMCC2, CCR, Cmp2); } @@ -2043,8 +2372,8 @@ SDValue ARMTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const { ARMCC::CondCodes CondCode, CondCode2; FPCCToARMCC(CC, CondCode, CondCode2); - SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl); SDValue ARMCC = DAG.getConstant(CondCode, MVT::i32); + SDValue Cmp = getVFPCmp(LHS, RHS, CC, ARMCC, DAG, dl); SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); SDVTList VTList = DAG.getVTList(MVT::Other, MVT::Flag); SDValue Ops[] = { Chain, Dest, ARMCC, CCR, Cmp }; @@ -2132,7 +2461,7 @@ static SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) { return DAG.getNode(Opc, dl, VT, Op); } -static SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) { +SDValue ARMTargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const { // Implement fcopysign with a fabs and a conditional fneg. SDValue Tmp0 = Op.getOperand(0); SDValue Tmp1 = Op.getOperand(1); @@ -2140,8 +2469,10 @@ static SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) { EVT VT = Op.getValueType(); EVT SrcVT = Tmp1.getValueType(); SDValue AbsVal = DAG.getNode(ISD::FABS, dl, VT, Tmp0); - SDValue Cmp = getVFPCmp(Tmp1, DAG.getConstantFP(0.0, SrcVT), DAG, dl); SDValue ARMCC = DAG.getConstant(ARMCC::LT, MVT::i32); + SDValue FP0 = DAG.getConstantFP(0.0, SrcVT); + SDValue Cmp = getVFPCmp(Tmp1, FP0, + ISD::SETLT, ARMCC, DAG, dl); SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); return DAG.getNode(ARMISD::CNEG, dl, VT, AbsVal, AbsVal, ARMCC, CCR, Cmp); } @@ -2206,7 +2537,8 @@ static SDValue ExpandBIT_CONVERT(SDNode *N, SelectionDAG &DAG) { DAG.getConstant(0, MVT::i32)); SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Op, DAG.getConstant(1, MVT::i32)); - return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi); + return DAG.getNode(ISD::BIT_CONVERT, dl, DstVT, + DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi)); } // Turn f64->i64 into VMOVRRD. @@ -2516,76 +2848,149 @@ static SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) { return Result; } -/// isVMOVSplat - Check if the specified splat value corresponds to an immediate -/// VMOV instruction, and if so, return the constant being splatted. -static SDValue isVMOVSplat(uint64_t SplatBits, uint64_t SplatUndef, - unsigned SplatBitSize, SelectionDAG &DAG) { +/// isNEONModifiedImm - Check if the specified splat value corresponds to a +/// valid vector constant for a NEON instruction with a "modified immediate" +/// operand (e.g., VMOV). If so, return either the constant being +/// splatted or the encoded value, depending on the DoEncode parameter. +static SDValue isNEONModifiedImm(uint64_t SplatBits, uint64_t SplatUndef, + unsigned SplatBitSize, SelectionDAG &DAG, + bool isVMOV, bool DoEncode) { + unsigned OpCmode, Imm; + EVT VT; + + // SplatBitSize is set to the smallest size that splats the vector, so a + // zero vector will always have SplatBitSize == 8. However, NEON modified + // immediate instructions others than VMOV do not support the 8-bit encoding + // of a zero vector, and the default encoding of zero is supposed to be the + // 32-bit version. + if (SplatBits == 0) + SplatBitSize = 32; + switch (SplatBitSize) { case 8: - // Any 1-byte value is OK. + // Any 1-byte value is OK. Op=0, Cmode=1110. assert((SplatBits & ~0xff) == 0 && "one byte splat value is too big"); - return DAG.getTargetConstant(SplatBits, MVT::i8); + OpCmode = 0xe; + Imm = SplatBits; + VT = MVT::i8; + break; case 16: // NEON's 16-bit VMOV supports splat values where only one byte is nonzero. - if ((SplatBits & ~0xff) == 0 || - (SplatBits & ~0xff00) == 0) - return DAG.getTargetConstant(SplatBits, MVT::i16); - break; + VT = MVT::i16; + if ((SplatBits & ~0xff) == 0) { + // Value = 0x00nn: Op=x, Cmode=100x. + OpCmode = 0x8; + Imm = SplatBits; + break; + } + if ((SplatBits & ~0xff00) == 0) { + // Value = 0xnn00: Op=x, Cmode=101x. + OpCmode = 0xa; + Imm = SplatBits >> 8; + break; + } + return SDValue(); case 32: // NEON's 32-bit VMOV supports splat values where: // * only one byte is nonzero, or // * the least significant byte is 0xff and the second byte is nonzero, or // * the least significant 2 bytes are 0xff and the third is nonzero. - if ((SplatBits & ~0xff) == 0 || - (SplatBits & ~0xff00) == 0 || - (SplatBits & ~0xff0000) == 0 || - (SplatBits & ~0xff000000) == 0) - return DAG.getTargetConstant(SplatBits, MVT::i32); + VT = MVT::i32; + if ((SplatBits & ~0xff) == 0) { + // Value = 0x000000nn: Op=x, Cmode=000x. + OpCmode = 0; + Imm = SplatBits; + break; + } + if ((SplatBits & ~0xff00) == 0) { + // Value = 0x0000nn00: Op=x, Cmode=001x. + OpCmode = 0x2; + Imm = SplatBits >> 8; + break; + } + if ((SplatBits & ~0xff0000) == 0) { + // Value = 0x00nn0000: Op=x, Cmode=010x. + OpCmode = 0x4; + Imm = SplatBits >> 16; + break; + } + if ((SplatBits & ~0xff000000) == 0) { + // Value = 0xnn000000: Op=x, Cmode=011x. + OpCmode = 0x6; + Imm = SplatBits >> 24; + break; + } if ((SplatBits & ~0xffff) == 0 && - ((SplatBits | SplatUndef) & 0xff) == 0xff) - return DAG.getTargetConstant(SplatBits | 0xff, MVT::i32); + ((SplatBits | SplatUndef) & 0xff) == 0xff) { + // Value = 0x0000nnff: Op=x, Cmode=1100. + OpCmode = 0xc; + Imm = SplatBits >> 8; + SplatBits |= 0xff; + break; + } if ((SplatBits & ~0xffffff) == 0 && - ((SplatBits | SplatUndef) & 0xffff) == 0xffff) - return DAG.getTargetConstant(SplatBits | 0xffff, MVT::i32); + ((SplatBits | SplatUndef) & 0xffff) == 0xffff) { + // Value = 0x00nnffff: Op=x, Cmode=1101. + OpCmode = 0xd; + Imm = SplatBits >> 16; + SplatBits |= 0xffff; + break; + } // Note: there are a few 32-bit splat values (specifically: 00ffff00, // ff000000, ff0000ff, and ffff00ff) that are valid for VMOV.I64 but not // VMOV.I32. A (very) minor optimization would be to replicate the value // and fall through here to test for a valid 64-bit splat. But, then the // caller would also need to check and handle the change in size. - break; + return SDValue(); case 64: { // NEON has a 64-bit VMOV splat where each byte is either 0 or 0xff. + if (!isVMOV) + return SDValue(); uint64_t BitMask = 0xff; uint64_t Val = 0; + unsigned ImmMask = 1; + Imm = 0; for (int ByteNum = 0; ByteNum < 8; ++ByteNum) { - if (((SplatBits | SplatUndef) & BitMask) == BitMask) + if (((SplatBits | SplatUndef) & BitMask) == BitMask) { Val |= BitMask; - else if ((SplatBits & BitMask) != 0) + Imm |= ImmMask; + } else if ((SplatBits & BitMask) != 0) { return SDValue(); + } BitMask <<= 8; + ImmMask <<= 1; } - return DAG.getTargetConstant(Val, MVT::i64); + // Op=1, Cmode=1110. + OpCmode = 0x1e; + SplatBits = Val; + VT = MVT::i64; + break; } default: - llvm_unreachable("unexpected size for isVMOVSplat"); - break; + llvm_unreachable("unexpected size for isNEONModifiedImm"); + return SDValue(); } - return SDValue(); + if (DoEncode) { + unsigned EncodedVal = ARM_AM::createNEONModImm(OpCmode, Imm); + return DAG.getTargetConstant(EncodedVal, MVT::i32); + } + return DAG.getTargetConstant(SplatBits, VT); } -/// getVMOVImm - If this is a build_vector of constants which can be -/// formed by using a VMOV instruction of the specified element size, -/// return the constant being splatted. The ByteSize field indicates the -/// number of bytes of each element [1248]. -SDValue ARM::getVMOVImm(SDNode *N, unsigned ByteSize, SelectionDAG &DAG) { +/// getNEONModImm - If this is a valid vector constant for a NEON instruction +/// with a "modified immediate" operand (e.g., VMOV) of the specified element +/// size, return the encoded value for that immediate. The ByteSize field +/// indicates the number of bytes of each element [1248]. +SDValue ARM::getNEONModImm(SDNode *N, unsigned ByteSize, bool isVMOV, + SelectionDAG &DAG) { BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(N); APInt SplatBits, SplatUndef; unsigned SplatBitSize; @@ -2597,8 +3002,8 @@ SDValue ARM::getVMOVImm(SDNode *N, unsigned ByteSize, SelectionDAG &DAG) { if (SplatBitSize > ByteSize * 8) return SDValue(); - return isVMOVSplat(SplatBits.getZExtValue(), SplatUndef.getZExtValue(), - SplatBitSize, DAG); + return isNEONModifiedImm(SplatBits.getZExtValue(), SplatUndef.getZExtValue(), + SplatBitSize, DAG, isVMOV, true); } static bool isVEXTMask(const SmallVectorImpl<int> &M, EVT VT, @@ -2838,8 +3243,10 @@ static SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) { bool HasAnyUndefs; if (BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) { if (SplatBitSize <= 64) { - SDValue Val = isVMOVSplat(SplatBits.getZExtValue(), - SplatUndef.getZExtValue(), SplatBitSize, DAG); + // Check if an immediate VMOV works. + SDValue Val = isNEONModifiedImm(SplatBits.getZExtValue(), + SplatUndef.getZExtValue(), + SplatBitSize, DAG, true, false); if (Val.getNode()) return BuildSplat(Val, VT, DAG, dl); } @@ -2883,21 +3290,17 @@ static SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) { return DAG.getNode(ARMISD::VDUP, dl, VT, Value); // Vectors with 32- or 64-bit elements can be built by directly assigning - // the subregisters. + // the subregisters. Lower it to an ARMISD::BUILD_VECTOR so the operands + // will be legalized. if (EltSize >= 32) { // Do the expansion with floating-point types, since that is what the VFP // registers are defined to use, and since i64 is not legal. EVT EltVT = EVT::getFloatingPointVT(EltSize); EVT VecVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NumElts); - SDValue Val = DAG.getUNDEF(VecVT); - for (unsigned i = 0; i < NumElts; ++i) { - SDValue Elt = Op.getOperand(i); - if (Elt.getOpcode() == ISD::UNDEF) - continue; - Elt = DAG.getNode(ISD::BIT_CONVERT, dl, EltVT, Elt); - Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VecVT, Val, Elt, - DAG.getConstant(i, MVT::i32)); - } + SmallVector<SDValue, 8> Ops; + for (unsigned i = 0; i < NumElts; ++i) + Ops.push_back(DAG.getNode(ISD::BIT_CONVERT, dl, EltVT, Op.getOperand(i))); + SDValue Val = DAG.getNode(ARMISD::BUILD_VECTOR, dl, VecVT, &Ops[0],NumElts); return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Val); } @@ -2934,7 +3337,9 @@ ARMTargetLowering::isShuffleMaskLegal(const SmallVectorImpl<int> &M, bool ReverseVEXT; unsigned Imm, WhichResult; - return (ShuffleVectorSDNode::isSplatMask(&M[0], VT) || + unsigned EltSize = VT.getVectorElementType().getSizeInBits(); + return (EltSize >= 32 || + ShuffleVectorSDNode::isSplatMask(&M[0], VT) || isVREVMask(M, VT, 64) || isVREVMask(M, VT, 32) || isVREVMask(M, VT, 16) || @@ -3032,59 +3437,62 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) { // of the same time so that they get CSEd properly. SVN->getMask(ShuffleMask); - if (ShuffleVectorSDNode::isSplatMask(&ShuffleMask[0], VT)) { - int Lane = SVN->getSplatIndex(); - // If this is undef splat, generate it via "just" vdup, if possible. - if (Lane == -1) Lane = 0; - - if (Lane == 0 && V1.getOpcode() == ISD::SCALAR_TO_VECTOR) { - return DAG.getNode(ARMISD::VDUP, dl, VT, V1.getOperand(0)); + unsigned EltSize = VT.getVectorElementType().getSizeInBits(); + if (EltSize <= 32) { + if (ShuffleVectorSDNode::isSplatMask(&ShuffleMask[0], VT)) { + int Lane = SVN->getSplatIndex(); + // If this is undef splat, generate it via "just" vdup, if possible. + if (Lane == -1) Lane = 0; + + if (Lane == 0 && V1.getOpcode() == ISD::SCALAR_TO_VECTOR) { + return DAG.getNode(ARMISD::VDUP, dl, VT, V1.getOperand(0)); + } + return DAG.getNode(ARMISD::VDUPLANE, dl, VT, V1, + DAG.getConstant(Lane, MVT::i32)); } - return DAG.getNode(ARMISD::VDUPLANE, dl, VT, V1, - DAG.getConstant(Lane, MVT::i32)); - } - bool ReverseVEXT; - unsigned Imm; - if (isVEXTMask(ShuffleMask, VT, ReverseVEXT, Imm)) { - if (ReverseVEXT) - std::swap(V1, V2); - return DAG.getNode(ARMISD::VEXT, dl, VT, V1, V2, - DAG.getConstant(Imm, MVT::i32)); - } - - if (isVREVMask(ShuffleMask, VT, 64)) - return DAG.getNode(ARMISD::VREV64, dl, VT, V1); - if (isVREVMask(ShuffleMask, VT, 32)) - return DAG.getNode(ARMISD::VREV32, dl, VT, V1); - if (isVREVMask(ShuffleMask, VT, 16)) - return DAG.getNode(ARMISD::VREV16, dl, VT, V1); - - // Check for Neon shuffles that modify both input vectors in place. - // If both results are used, i.e., if there are two shuffles with the same - // source operands and with masks corresponding to both results of one of - // these operations, DAG memoization will ensure that a single node is - // used for both shuffles. - unsigned WhichResult; - if (isVTRNMask(ShuffleMask, VT, WhichResult)) - return DAG.getNode(ARMISD::VTRN, dl, DAG.getVTList(VT, VT), - V1, V2).getValue(WhichResult); - if (isVUZPMask(ShuffleMask, VT, WhichResult)) - return DAG.getNode(ARMISD::VUZP, dl, DAG.getVTList(VT, VT), - V1, V2).getValue(WhichResult); - if (isVZIPMask(ShuffleMask, VT, WhichResult)) - return DAG.getNode(ARMISD::VZIP, dl, DAG.getVTList(VT, VT), - V1, V2).getValue(WhichResult); + bool ReverseVEXT; + unsigned Imm; + if (isVEXTMask(ShuffleMask, VT, ReverseVEXT, Imm)) { + if (ReverseVEXT) + std::swap(V1, V2); + return DAG.getNode(ARMISD::VEXT, dl, VT, V1, V2, + DAG.getConstant(Imm, MVT::i32)); + } - if (isVTRN_v_undef_Mask(ShuffleMask, VT, WhichResult)) - return DAG.getNode(ARMISD::VTRN, dl, DAG.getVTList(VT, VT), - V1, V1).getValue(WhichResult); - if (isVUZP_v_undef_Mask(ShuffleMask, VT, WhichResult)) - return DAG.getNode(ARMISD::VUZP, dl, DAG.getVTList(VT, VT), - V1, V1).getValue(WhichResult); - if (isVZIP_v_undef_Mask(ShuffleMask, VT, WhichResult)) - return DAG.getNode(ARMISD::VZIP, dl, DAG.getVTList(VT, VT), - V1, V1).getValue(WhichResult); + if (isVREVMask(ShuffleMask, VT, 64)) + return DAG.getNode(ARMISD::VREV64, dl, VT, V1); + if (isVREVMask(ShuffleMask, VT, 32)) + return DAG.getNode(ARMISD::VREV32, dl, VT, V1); + if (isVREVMask(ShuffleMask, VT, 16)) + return DAG.getNode(ARMISD::VREV16, dl, VT, V1); + + // Check for Neon shuffles that modify both input vectors in place. + // If both results are used, i.e., if there are two shuffles with the same + // source operands and with masks corresponding to both results of one of + // these operations, DAG memoization will ensure that a single node is + // used for both shuffles. + unsigned WhichResult; + if (isVTRNMask(ShuffleMask, VT, WhichResult)) + return DAG.getNode(ARMISD::VTRN, dl, DAG.getVTList(VT, VT), + V1, V2).getValue(WhichResult); + if (isVUZPMask(ShuffleMask, VT, WhichResult)) + return DAG.getNode(ARMISD::VUZP, dl, DAG.getVTList(VT, VT), + V1, V2).getValue(WhichResult); + if (isVZIPMask(ShuffleMask, VT, WhichResult)) + return DAG.getNode(ARMISD::VZIP, dl, DAG.getVTList(VT, VT), + V1, V2).getValue(WhichResult); + + if (isVTRN_v_undef_Mask(ShuffleMask, VT, WhichResult)) + return DAG.getNode(ARMISD::VTRN, dl, DAG.getVTList(VT, VT), + V1, V1).getValue(WhichResult); + if (isVUZP_v_undef_Mask(ShuffleMask, VT, WhichResult)) + return DAG.getNode(ARMISD::VUZP, dl, DAG.getVTList(VT, VT), + V1, V1).getValue(WhichResult); + if (isVZIP_v_undef_Mask(ShuffleMask, VT, WhichResult)) + return DAG.getNode(ARMISD::VZIP, dl, DAG.getVTList(VT, VT), + V1, V1).getValue(WhichResult); + } // If the shuffle is not directly supported and it has 4 elements, use // the PerfectShuffle-generated table to synthesize it from other shuffles. @@ -3108,8 +3516,7 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) { return GeneratePerfectShuffle(PFEntry, V1, V2, DAG, dl); } - // Implement shuffles with 32- or 64-bit elements as subreg copies. - unsigned EltSize = VT.getVectorElementType().getSizeInBits(); + // Implement shuffles with 32- or 64-bit elements as ARMISD::BUILD_VECTORs. if (EltSize >= 32) { // Do the expansion with floating-point types, since that is what the VFP // registers are defined to use, and since i64 is not legal. @@ -3117,17 +3524,17 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) { EVT VecVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NumElts); V1 = DAG.getNode(ISD::BIT_CONVERT, dl, VecVT, V1); V2 = DAG.getNode(ISD::BIT_CONVERT, dl, VecVT, V2); - SDValue Val = DAG.getUNDEF(VecVT); + SmallVector<SDValue, 8> Ops; for (unsigned i = 0; i < NumElts; ++i) { if (ShuffleMask[i] < 0) - continue; - SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, - ShuffleMask[i] < (int)NumElts ? V1 : V2, - DAG.getConstant(ShuffleMask[i] & (NumElts-1), - MVT::i32)); - Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VecVT, Val, - Elt, DAG.getConstant(i, MVT::i32)); + Ops.push_back(DAG.getUNDEF(EltVT)); + else + Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, + ShuffleMask[i] < (int)NumElts ? V1 : V2, + DAG.getConstant(ShuffleMask[i] & (NumElts-1), + MVT::i32))); } + SDValue Val = DAG.getNode(ARMISD::BUILD_VECTOR, dl, VecVT, &Ops[0],NumElts); return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Val); } @@ -3277,7 +3684,12 @@ ARMTargetLowering::EmitAtomicCmpSwap(MachineInstr *MI, MF->insert(It, loop1MBB); MF->insert(It, loop2MBB); MF->insert(It, exitMBB); - exitMBB->transferSuccessors(BB); + + // Transfer the remainder of BB and its successor edges to exitMBB. + exitMBB->splice(exitMBB->begin(), BB, + llvm::next(MachineBasicBlock::iterator(MI)), + BB->end()); + exitMBB->transferSuccessorsAndUpdatePHIs(BB); // thisMBB: // ... @@ -3315,7 +3727,7 @@ ARMTargetLowering::EmitAtomicCmpSwap(MachineInstr *MI, // ... BB = exitMBB; - MF->DeleteMachineInstr(MI); // The instruction is gone now. + MI->eraseFromParent(); // The instruction is gone now. return BB; } @@ -3358,7 +3770,12 @@ ARMTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB, MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB); MF->insert(It, loopMBB); MF->insert(It, exitMBB); - exitMBB->transferSuccessors(BB); + + // Transfer the remainder of BB and its successor edges to exitMBB. + exitMBB->splice(exitMBB->begin(), BB, + llvm::next(MachineBasicBlock::iterator(MI)), + BB->end()); + exitMBB->transferSuccessorsAndUpdatePHIs(BB); MachineRegisterInfo &RegInfo = MF->getRegInfo(); unsigned scratch = RegInfo.createVirtualRegister(ARM::GPRRegisterClass); @@ -3403,7 +3820,7 @@ ARMTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB, // ... BB = exitMBB; - MF->DeleteMachineInstr(MI); // The instruction is gone now. + MI->eraseFromParent(); // The instruction is gone now. return BB; } @@ -3488,22 +3905,21 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, MachineFunction *F = BB->getParent(); MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB); MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB); - BuildMI(BB, dl, TII->get(ARM::tBcc)).addMBB(sinkMBB) - .addImm(MI->getOperand(3).getImm()).addReg(MI->getOperand(4).getReg()); F->insert(It, copy0MBB); F->insert(It, sinkMBB); - // Update machine-CFG edges by first adding all successors of the current - // block to the new block which will contain the Phi node for the select. - for (MachineBasicBlock::succ_iterator I = BB->succ_begin(), - E = BB->succ_end(); I != E; ++I) - sinkMBB->addSuccessor(*I); - // Next, remove all successors of the current block, and add the true - // and fallthrough blocks as its successors. - while (!BB->succ_empty()) - BB->removeSuccessor(BB->succ_begin()); + + // Transfer the remainder of BB and its successor edges to sinkMBB. + sinkMBB->splice(sinkMBB->begin(), BB, + llvm::next(MachineBasicBlock::iterator(MI)), + BB->end()); + sinkMBB->transferSuccessorsAndUpdatePHIs(BB); + BB->addSuccessor(copy0MBB); BB->addSuccessor(sinkMBB); + BuildMI(BB, dl, TII->get(ARM::tBcc)).addMBB(sinkMBB) + .addImm(MI->getOperand(3).getImm()).addReg(MI->getOperand(4).getReg()); + // copy0MBB: // %FalseValue = ... // # fallthrough to sinkMBB @@ -3516,11 +3932,12 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ] // ... BB = sinkMBB; - BuildMI(BB, dl, TII->get(ARM::PHI), MI->getOperand(0).getReg()) + BuildMI(*BB, BB->begin(), dl, + TII->get(ARM::PHI), MI->getOperand(0).getReg()) .addReg(MI->getOperand(1).getReg()).addMBB(copy0MBB) .addReg(MI->getOperand(2).getReg()).addMBB(thisMBB); - F->DeleteMachineInstr(MI); // The pseudo instruction is gone now. + MI->eraseFromParent(); // The pseudo instruction is gone now. return BB; } @@ -3541,7 +3958,7 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, const TargetRegisterClass *RC = MF->getRegInfo().getRegClass(SrcReg); unsigned CopyOpc = (RC == ARM::tGPRRegisterClass) ? ARM::tMOVtgpr2gpr : ARM::tMOVgpr2gpr; - BuildMI(BB, dl, TII->get(CopyOpc), ARM::SP) + BuildMI(*BB, MI, dl, TII->get(CopyOpc), ARM::SP) .addReg(SrcReg, getKillRegState(SrcIsKill)); } @@ -3573,7 +3990,7 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, NeedPred = true; NeedCC = true; NeedOp3 = true; break; } - MachineInstrBuilder MIB = BuildMI(BB, dl, TII->get(OpOpc), ARM::SP); + MachineInstrBuilder MIB = BuildMI(*BB, MI, dl, TII->get(OpOpc), ARM::SP); if (OpOpc == ARM::tAND) AddDefaultT1CC(MIB); MIB.addReg(ARM::SP); @@ -3589,10 +4006,10 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, const TargetRegisterClass *RC = MF->getRegInfo().getRegClass(DstReg); unsigned CopyOpc = (RC == ARM::tGPRRegisterClass) ? ARM::tMOVgpr2tgpr : ARM::tMOVgpr2gpr; - BuildMI(BB, dl, TII->get(CopyOpc)) + BuildMI(*BB, MI, dl, TII->get(CopyOpc)) .addReg(DstReg, getDefRegState(true) | getDeadRegState(DstIsDead)) .addReg(ARM::SP); - MF->DeleteMachineInstr(MI); // The pseudo instruction is gone now. + MI->eraseFromParent(); // The pseudo instruction is gone now. return BB; } } @@ -3893,7 +4310,8 @@ static SDValue PerformIntrinsicCombine(SDNode *N, SelectionDAG &DAG) { // Narrowing shifts require an immediate right shift. if (isVShiftRImm(N->getOperand(2), VT, true, true, Cnt)) break; - llvm_unreachable("invalid shift count for narrowing vector shift intrinsic"); + llvm_unreachable("invalid shift count for narrowing vector shift " + "intrinsic"); default: llvm_unreachable("unhandled vector shift"); @@ -4156,14 +4574,13 @@ bool ARMTargetLowering::allowsUnalignedMemoryAccesses(EVT VT) const { if (!Subtarget->hasV6Ops()) // Pre-v6 does not support unaligned mem access. return false; - else { - // v6+ may or may not support unaligned mem access depending on the system - // configuration. - // FIXME: This is pretty conservative. Should we provide cmdline option to - // control the behaviour? - if (!Subtarget->isTargetDarwin()) - return false; - } + + // v6+ may or may not support unaligned mem access depending on the system + // configuration. + // FIXME: This is pretty conservative. Should we provide cmdline option to + // control the behaviour? + if (!Subtarget->isTargetDarwin()) + return false; switch (VT.getSimpleVT().SimpleTy) { default: @@ -4619,7 +5036,7 @@ ARMTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint, } } if (StringRef("{cc}").equals_lower(Constraint)) - return std::make_pair(0U, ARM::CCRRegisterClass); + return std::make_pair(unsigned(ARM::CPSR), ARM::CCRRegisterClass); return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT); } @@ -4669,7 +5086,6 @@ getRegClassForInlineAsmConstraint(const std::string &Constraint, /// vector. If it is invalid, don't add anything to Ops. void ARMTargetLowering::LowerAsmOperandForConstraint(SDValue Op, char Constraint, - bool hasMemory, std::vector<SDValue>&Ops, SelectionDAG &DAG) const { SDValue Result(0, 0); @@ -4818,8 +5234,7 @@ void ARMTargetLowering::LowerAsmOperandForConstraint(SDValue Op, Ops.push_back(Result); return; } - return TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, hasMemory, - Ops, DAG); + return TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG); } bool diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h index 9c7517c88195..3a3866928a0e 100644 --- a/lib/Target/ARM/ARMISelLowering.h +++ b/lib/Target/ARM/ARMISelLowering.h @@ -70,6 +70,8 @@ namespace llvm { EH_SJLJ_SETJMP, // SjLj exception handling setjmp. EH_SJLJ_LONGJMP, // SjLj exception handling longjmp. + TC_RETURN, // Tail call return pseudo. + THREAD_POINTER, DYN_ALLOC, // Dynamic allocation on the stack. @@ -133,6 +135,13 @@ namespace llvm { VUZP, // unzip (deinterleave) VTRN, // transpose + // Operands of the standard BUILD_VECTOR node are not legalized, which + // is fine if BUILD_VECTORs are always lowered to shuffles or other + // operations, but for ARM some BUILD_VECTORs are legal as-is and their + // operands need to be legalized. Define an ARM-specific version of + // BUILD_VECTOR for this purpose. + BUILD_VECTOR, + // Floating-point max and min: FMAX, FMIN @@ -141,11 +150,12 @@ namespace llvm { /// Define some predicates that are used for node matching. namespace ARM { - /// getVMOVImm - If this is a build_vector of constants which can be - /// formed by using a VMOV instruction of the specified element size, - /// return the constant being splatted. The ByteSize field indicates the - /// number of bytes of each element [1248]. - SDValue getVMOVImm(SDNode *N, unsigned ByteSize, SelectionDAG &DAG); + /// getNEONModImm - If this is a valid vector constant for a NEON + /// instruction with a "modified immediate" operand (e.g., VMOV) of the + /// specified element size, return the encoded value for that immediate. + /// The ByteSize field indicates the number of bytes of each element [1248]. + SDValue getNEONModImm(SDNode *N, unsigned ByteSize, bool isVMOV, + SelectionDAG &DAG); /// getVFPf32Imm / getVFPf64Imm - If the given fp immediate can be /// materialized with a VMOV.f32 / VMOV.f64 (i.e. fconsts / fconstd) @@ -189,9 +199,9 @@ namespace llvm { bool isLegalT2ScaledAddressingMode(const AddrMode &AM, EVT VT) const; /// isLegalICmpImmediate - Return true if the specified immediate is legal - /// icmp immediate, that is the target has icmp instructions which can compare - /// a register against the immediate without having to materialize the - /// immediate into a register. + /// icmp immediate, that is the target has icmp instructions which can + /// compare a register against the immediate without having to materialize + /// the immediate into a register. virtual bool isLegalICmpImmediate(int64_t Imm) const; /// getPreIndexedAddressParts - returns true by value, base pointer and @@ -232,7 +242,6 @@ namespace llvm { /// being processed is 'm'. virtual void LowerAsmOperandForConstraint(SDValue Op, char ConstraintLetter, - bool hasMemory, std::vector<SDValue> &Ops, SelectionDAG &DAG) const; @@ -282,7 +291,8 @@ namespace llvm { SDValue &Root, SelectionDAG &DAG, DebugLoc dl) const; - CCAssignFn *CCAssignFnForNode(CallingConv::ID CC, bool Return, bool isVarArg) const; + CCAssignFn *CCAssignFnForNode(CallingConv::ID CC, bool Return, + bool isVarArg) const; SDValue LowerMemOpCallTo(SDValue Chain, SDValue StackPtr, SDValue Arg, DebugLoc dl, SelectionDAG &DAG, const CCValAssign &VA, @@ -303,6 +313,7 @@ namespace llvm { SDValue LowerBR_JT(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const; SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const; SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const; SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const; @@ -327,18 +338,34 @@ namespace llvm { CallingConv::ID CallConv, bool isVarArg, bool &isTailCall, const SmallVectorImpl<ISD::OutputArg> &Outs, + const SmallVectorImpl<SDValue> &OutVals, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const; + /// IsEligibleForTailCallOptimization - Check whether the call is eligible + /// for tail call optimization. Targets which want to do tail call + /// optimization should implement this function. + bool IsEligibleForTailCallOptimization(SDValue Callee, + CallingConv::ID CalleeCC, + bool isVarArg, + bool isCalleeStructRet, + bool isCallerStructRet, + const SmallVectorImpl<ISD::OutputArg> &Outs, + const SmallVectorImpl<SDValue> &OutVals, + const SmallVectorImpl<ISD::InputArg> &Ins, + SelectionDAG& DAG) const; virtual SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl<ISD::OutputArg> &Outs, + const SmallVectorImpl<SDValue> &OutVals, DebugLoc dl, SelectionDAG &DAG) const; SDValue getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC, SDValue &ARMCC, SelectionDAG &DAG, DebugLoc dl) const; + SDValue getVFPCmp(SDValue &LHS, SDValue &RHS, ISD::CondCode CC, + SDValue &ARMCC, SelectionDAG &DAG, DebugLoc dl) const; MachineBasicBlock *EmitAtomicCmpSwap(MachineInstr *MI, MachineBasicBlock *BB, diff --git a/lib/Target/ARM/ARMInstrFormats.td b/lib/Target/ARM/ARMInstrFormats.td index d487df16df5e..ac568e75ccc4 100644 --- a/lib/Target/ARM/ARMInstrFormats.td +++ b/lib/Target/ARM/ARMInstrFormats.td @@ -50,27 +50,23 @@ def VFPLdStMulFrm : Format<22>; def VFPMiscFrm : Format<23>; def ThumbFrm : Format<24>; - -def NEONFrm : Format<25>; -def NEONGetLnFrm : Format<26>; -def NEONSetLnFrm : Format<27>; -def NEONDupFrm : Format<28>; - -def MiscFrm : Format<29>; -def ThumbMiscFrm : Format<30>; - -def NLdStFrm : Format<31>; -def N1RegModImmFrm : Format<32>; -def N2RegFrm : Format<33>; -def NVCVTFrm : Format<34>; -def NVDupLnFrm : Format<35>; -def N2RegVShLFrm : Format<36>; -def N2RegVShRFrm : Format<37>; -def N3RegFrm : Format<38>; -def N3RegVShFrm : Format<39>; -def NVExtFrm : Format<40>; -def NVMulSLFrm : Format<41>; -def NVTBLFrm : Format<42>; +def MiscFrm : Format<25>; + +def NGetLnFrm : Format<26>; +def NSetLnFrm : Format<27>; +def NDupFrm : Format<28>; +def NLdStFrm : Format<29>; +def N1RegModImmFrm: Format<30>; +def N2RegFrm : Format<31>; +def NVCVTFrm : Format<32>; +def NVDupLnFrm : Format<33>; +def N2RegVShLFrm : Format<34>; +def N2RegVShRFrm : Format<35>; +def N3RegFrm : Format<36>; +def N3RegVShFrm : Format<37>; +def NVExtFrm : Format<38>; +def NVMulSLFrm : Format<39>; +def NVTBLFrm : Format<40>; // Misc flags. @@ -1653,17 +1649,17 @@ class NVLaneOp<bits<8> opcod1, bits<4> opcod2, bits<2> opcod3, class NVGetLane<bits<8> opcod1, bits<4> opcod2, bits<2> opcod3, dag oops, dag iops, InstrItinClass itin, string opc, string dt, string asm, list<dag> pattern> - : NVLaneOp<opcod1, opcod2, opcod3, oops, iops, NEONGetLnFrm, itin, + : NVLaneOp<opcod1, opcod2, opcod3, oops, iops, NGetLnFrm, itin, opc, dt, asm, pattern>; class NVSetLane<bits<8> opcod1, bits<4> opcod2, bits<2> opcod3, dag oops, dag iops, InstrItinClass itin, string opc, string dt, string asm, list<dag> pattern> - : NVLaneOp<opcod1, opcod2, opcod3, oops, iops, NEONSetLnFrm, itin, + : NVLaneOp<opcod1, opcod2, opcod3, oops, iops, NSetLnFrm, itin, opc, dt, asm, pattern>; class NVDup<bits<8> opcod1, bits<4> opcod2, bits<2> opcod3, dag oops, dag iops, InstrItinClass itin, string opc, string dt, string asm, list<dag> pattern> - : NVLaneOp<opcod1, opcod2, opcod3, oops, iops, NEONDupFrm, itin, + : NVLaneOp<opcod1, opcod2, opcod3, oops, iops, NDupFrm, itin, opc, dt, asm, pattern>; // Vector Duplicate Lane (from scalar to all elements) diff --git a/lib/Target/ARM/ARMInstrInfo.cpp b/lib/Target/ARM/ARMInstrInfo.cpp index 85f6b4019a8e..ba228ffac8ed 100644 --- a/lib/Target/ARM/ARMInstrInfo.cpp +++ b/lib/Target/ARM/ARMInstrInfo.cpp @@ -63,7 +63,7 @@ unsigned ARMInstrInfo::getUnindexedOpcode(unsigned Opc) const { void ARMInstrInfo:: reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, unsigned DestReg, unsigned SubIdx, const MachineInstr *Orig, - const TargetRegisterInfo *TRI) const { + const TargetRegisterInfo &TRI) const { DebugLoc dl = Orig->getDebugLoc(); unsigned Opcode = Orig->getOpcode(); switch (Opcode) { diff --git a/lib/Target/ARM/ARMInstrInfo.h b/lib/Target/ARM/ARMInstrInfo.h index d4199d1267fd..4563ffea7b9c 100644 --- a/lib/Target/ARM/ARMInstrInfo.h +++ b/lib/Target/ARM/ARMInstrInfo.h @@ -35,7 +35,7 @@ public: void reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, unsigned DestReg, unsigned SubIdx, const MachineInstr *Orig, - const TargetRegisterInfo *TRI) const; + const TargetRegisterInfo &TRI) const; /// getRegisterInfo - TargetInstrInfo is a superset of MRegister info. As /// such, whenever a client has an instance of instruction info, it should diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td index f3156d94693b..c73e204a26b3 100644 --- a/lib/Target/ARM/ARMInstrInfo.td +++ b/lib/Target/ARM/ARMInstrInfo.td @@ -53,6 +53,8 @@ def SDT_ARMSYNCBARRIERV7 : SDTypeProfile<0, 0, []>; def SDT_ARMMEMBARRIERV6 : SDTypeProfile<0, 1, [SDTCisInt<0>]>; def SDT_ARMSYNCBARRIERV6 : SDTypeProfile<0, 1, [SDTCisInt<0>]>; +def SDT_ARMTCRET : SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>; + // Node definitions. def ARMWrapper : SDNode<"ARMISD::Wrapper", SDTIntUnaryOp>; def ARMWrapperJT : SDNode<"ARMISD::WrapperJT", SDTIntBinOp>; @@ -117,6 +119,9 @@ def ARMSyncBarrierV6 : SDNode<"ARMISD::SYNCBARRIER", SDT_ARMMEMBARRIERV6, def ARMrbit : SDNode<"ARMISD::RBIT", SDTIntUnaryOp>; +def ARMtcret : SDNode<"ARMISD::TC_RETURN", SDT_ARMTCRET, + [SDNPHasChain, SDNPOptInFlag, SDNPVariadic]>; + //===----------------------------------------------------------------------===// // ARM Instruction Predicate Definitions. // @@ -858,13 +863,13 @@ def LEApcrel : AXI1<0x0, (outs GPR:$dst), (ins i32imm:$label, pred:$p), Pseudo, IIC_iALUi, "adr$p\t$dst, #$label", []>; +} // neverHasSideEffects def LEApcrelJT : AXI1<0x0, (outs GPR:$dst), (ins i32imm:$label, nohash_imm:$id, pred:$p), Pseudo, IIC_iALUi, "adr$p\t$dst, #${label}_${id}", []> { let Inst{25} = 1; } -} // neverHasSideEffects //===----------------------------------------------------------------------===// // Control Flow Instructions. @@ -1026,6 +1031,74 @@ let isCall = 1, } } +// Tail calls. + +let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in { + // Darwin versions. + let Defs = [R0, R1, R2, R3, R9, R12, + D0, D1, D2, D3, D4, D5, D6, D7, + D16, D17, D18, D19, D20, D21, D22, D23, D24, D25, D26, + D27, D28, D29, D30, D31, PC], + Uses = [SP] in { + def TCRETURNdi : AInoP<(outs), (ins i32imm:$dst, variable_ops), + Pseudo, IIC_Br, + "@TC_RETURN","\t$dst", []>, Requires<[IsDarwin]>; + + def TCRETURNri : AInoP<(outs), (ins tcGPR:$dst, variable_ops), + Pseudo, IIC_Br, + "@TC_RETURN","\t$dst", []>, Requires<[IsDarwin]>; + + def TAILJMPd : ABXI<0b1010, (outs), (ins brtarget:$dst, variable_ops), + IIC_Br, "b\t$dst @ TAILCALL", + []>, Requires<[IsDarwin]>; + + def TAILJMPdt: ABXI<0b1010, (outs), (ins brtarget:$dst, variable_ops), + IIC_Br, "b.w\t$dst @ TAILCALL", + []>, Requires<[IsDarwin]>; + + def TAILJMPr : AXI<(outs), (ins tcGPR:$dst, variable_ops), + BrMiscFrm, IIC_Br, "bx\t$dst @ TAILCALL", + []>, Requires<[IsDarwin]> { + let Inst{7-4} = 0b0001; + let Inst{19-8} = 0b111111111111; + let Inst{27-20} = 0b00010010; + let Inst{31-28} = 0b1110; + } + } + + // Non-Darwin versions (the difference is R9). + let Defs = [R0, R1, R2, R3, R12, + D0, D1, D2, D3, D4, D5, D6, D7, + D16, D17, D18, D19, D20, D21, D22, D23, D24, D25, D26, + D27, D28, D29, D30, D31, PC], + Uses = [SP] in { + def TCRETURNdiND : AInoP<(outs), (ins i32imm:$dst, variable_ops), + Pseudo, IIC_Br, + "@TC_RETURN","\t$dst", []>, Requires<[IsNotDarwin]>; + + def TCRETURNriND : AInoP<(outs), (ins tcGPR:$dst, variable_ops), + Pseudo, IIC_Br, + "@TC_RETURN","\t$dst", []>, Requires<[IsNotDarwin]>; + + def TAILJMPdND : ABXI<0b1010, (outs), (ins brtarget:$dst, variable_ops), + IIC_Br, "b\t$dst @ TAILCALL", + []>, Requires<[IsARM, IsNotDarwin]>; + + def TAILJMPdNDt : ABXI<0b1010, (outs), (ins brtarget:$dst, variable_ops), + IIC_Br, "b.w\t$dst @ TAILCALL", + []>, Requires<[IsThumb, IsNotDarwin]>; + + def TAILJMPrND : AXI<(outs), (ins tcGPR:$dst, variable_ops), + BrMiscFrm, IIC_Br, "bx\t$dst @ TAILCALL", + []>, Requires<[IsNotDarwin]> { + let Inst{7-4} = 0b0001; + let Inst{19-8} = 0b111111111111; + let Inst{27-20} = 0b00010010; + let Inst{31-28} = 0b1110; + } + } +} + let isBranch = 1, isTerminator = 1 in { // B is "predicable" since it can be xformed into a Bcc. let isBarrier = 1 in { @@ -1397,6 +1470,14 @@ def MOVr : AsI1<0b1101, (outs GPR:$dst), (ins GPR:$src), DPFrm, IIC_iMOVr, let Inst{25} = 0; } +// A version for the smaller set of tail call registers. +let neverHasSideEffects = 1 in +def MOVr_TC : AsI1<0b1101, (outs tcGPR:$dst), (ins tcGPR:$src), DPFrm, + IIC_iMOVr, "mov", "\t$dst, $src", []>, UnaryDP { + let Inst{11-4} = 0b00000000; + let Inst{25} = 0; +} + def MOVs : AsI1<0b1101, (outs GPR:$dst), (ins so_reg:$src), DPSoRegFrm, IIC_iMOVsr, "mov", "\t$dst, $src", [(set GPR:$dst, so_reg:$src)]>, UnaryDP { @@ -2530,31 +2611,30 @@ let Defs = [ R0, R1, R2, R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, LR, D0, D1, D2, D3, D4, D5, D6, D7, D8, D9, D10, D11, D12, D13, D14, D15, D16, D17, D18, D19, D20, D21, D22, D23, D24, D25, D26, D27, D28, D29, D30, - D31 ] in { + D31 ], hasSideEffects = 1, isBarrier = 1 in { def Int_eh_sjlj_setjmp : XI<(outs), (ins GPR:$src, GPR:$val), AddrModeNone, SizeSpecial, IndexModeNone, Pseudo, NoItinerary, - "str\tsp, [$src, #+8] ${:comment} eh_setjmp begin\n\t" - "add\t$val, pc, #8\n\t" - "str\t$val, [$src, #+4]\n\t" - "mov\tr0, #0\n\t" - "add\tpc, pc, #0\n\t" - "mov\tr0, #1 ${:comment} eh_setjmp end", "", + "add\t$val, pc, #8\t${:comment} eh_setjmp begin\n\t" + "str\t$val, [$src, #+4]\n\t" + "mov\tr0, #0\n\t" + "add\tpc, pc, #0\n\t" + "mov\tr0, #1 ${:comment} eh_setjmp end", "", [(set R0, (ARMeh_sjlj_setjmp GPR:$src, GPR:$val))]>, Requires<[IsARM, HasVFP2]>; } let Defs = - [ R0, R1, R2, R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, LR ] in { + [ R0, R1, R2, R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, LR ], + hasSideEffects = 1, isBarrier = 1 in { def Int_eh_sjlj_setjmp_nofp : XI<(outs), (ins GPR:$src, GPR:$val), AddrModeNone, SizeSpecial, IndexModeNone, Pseudo, NoItinerary, - "str\tsp, [$src, #+8] ${:comment} eh_setjmp begin\n\t" - "add\t$val, pc, #8\n\t" - "str\t$val, [$src, #+4]\n\t" - "mov\tr0, #0\n\t" - "add\tpc, pc, #0\n\t" - "mov\tr0, #1 ${:comment} eh_setjmp end", "", + "add\t$val, pc, #8\n ${:comment} eh_setjmp begin\n\t" + "str\t$val, [$src, #+4]\n\t" + "mov\tr0, #0\n\t" + "add\tpc, pc, #0\n\t" + "mov\tr0, #1 ${:comment} eh_setjmp end", "", [(set R0, (ARMeh_sjlj_setjmp GPR:$src, GPR:$val))]>, Requires<[IsARM, NoVFP]>; } @@ -2621,6 +2701,24 @@ def : ARMPat<(ARMWrapperJT tjumptable:$dst, imm:$id), // TODO: add,sub,and, 3-instr forms? +// Tail calls +def : ARMPat<(ARMtcret tcGPR:$dst), + (TCRETURNri tcGPR:$dst)>, Requires<[IsDarwin]>; + +def : ARMPat<(ARMtcret (i32 tglobaladdr:$dst)), + (TCRETURNdi texternalsym:$dst)>, Requires<[IsDarwin]>; + +def : ARMPat<(ARMtcret (i32 texternalsym:$dst)), + (TCRETURNdi texternalsym:$dst)>, Requires<[IsDarwin]>; + +def : ARMPat<(ARMtcret tcGPR:$dst), + (TCRETURNriND tcGPR:$dst)>, Requires<[IsNotDarwin]>; + +def : ARMPat<(ARMtcret (i32 tglobaladdr:$dst)), + (TCRETURNdiND texternalsym:$dst)>, Requires<[IsNotDarwin]>; + +def : ARMPat<(ARMtcret (i32 texternalsym:$dst)), + (TCRETURNdiND texternalsym:$dst)>, Requires<[IsNotDarwin]>; // Direct calls def : ARMPat<(ARMcall texternalsym:$func), (BL texternalsym:$func)>, diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td index 197ec16eedea..a84315f73038 100644 --- a/lib/Target/ARM/ARMInstrNEON.td +++ b/lib/Target/ARM/ARMInstrNEON.td @@ -98,17 +98,8 @@ def NEONfmin : SDNode<"ARMISD::FMIN", SDTARMFMAX>; // NEON operand definitions //===----------------------------------------------------------------------===// -def h8imm : Operand<i8> { - let PrintMethod = "printHex8ImmOperand"; -} -def h16imm : Operand<i16> { - let PrintMethod = "printHex16ImmOperand"; -} -def h32imm : Operand<i32> { - let PrintMethod = "printHex32ImmOperand"; -} -def h64imm : Operand<i64> { - let PrintMethod = "printHex64ImmOperand"; +def nModImm : Operand<i32> { + let PrintMethod = "printNEONModImmOperand"; } //===----------------------------------------------------------------------===// @@ -812,11 +803,6 @@ def DSubReg_f64_reg : SDNodeXForm<imm, [{ assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering"); return CurDAG->getTargetConstant(ARM::dsub_0 + N->getZExtValue(), MVT::i32); }]>; -def DSubReg_f64_other_reg : SDNodeXForm<imm, [{ - assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering"); - return CurDAG->getTargetConstant(ARM::dsub_0 + (1 - N->getZExtValue()), - MVT::i32); -}]>; // Extract S sub-registers of Q/D registers. def SSubReg_f32_reg : SDNodeXForm<imm, [{ @@ -2282,7 +2268,7 @@ def VCEQfq : N3VQ<0,0,0b00,0b1110,0, IIC_VBINQ, "vceq", "f32", v4i32, v4f32, NEONvceq, 1>; // For disassembly only. defm VCEQz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00010, 0, "vceq", "i", - "$dst, $src, #0">; + "$dst, $src, #0">; // VCGE : Vector Compare Greater Than or Equal defm VCGEs : N3V_QHS<0, 0, 0b0011, 1, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, @@ -2834,73 +2820,70 @@ def VMOVQQQQ : PseudoInst<(outs QQQQPR:$dst), (ins QQQQPR:$src), // VMOV_get_imm8 xform function: convert build_vector to VMOV.i8 imm. def VMOV_get_imm8 : SDNodeXForm<build_vector, [{ - return ARM::getVMOVImm(N, 1, *CurDAG); + return ARM::getNEONModImm(N, 1, true, *CurDAG); }]>; def vmovImm8 : PatLeaf<(build_vector), [{ - return ARM::getVMOVImm(N, 1, *CurDAG).getNode() != 0; + return ARM::getNEONModImm(N, 1, true, *CurDAG).getNode() != 0; }], VMOV_get_imm8>; // VMOV_get_imm16 xform function: convert build_vector to VMOV.i16 imm. def VMOV_get_imm16 : SDNodeXForm<build_vector, [{ - return ARM::getVMOVImm(N, 2, *CurDAG); + return ARM::getNEONModImm(N, 2, true, *CurDAG); }]>; def vmovImm16 : PatLeaf<(build_vector), [{ - return ARM::getVMOVImm(N, 2, *CurDAG).getNode() != 0; + return ARM::getNEONModImm(N, 2, true, *CurDAG).getNode() != 0; }], VMOV_get_imm16>; // VMOV_get_imm32 xform function: convert build_vector to VMOV.i32 imm. def VMOV_get_imm32 : SDNodeXForm<build_vector, [{ - return ARM::getVMOVImm(N, 4, *CurDAG); + return ARM::getNEONModImm(N, 4, true, *CurDAG); }]>; def vmovImm32 : PatLeaf<(build_vector), [{ - return ARM::getVMOVImm(N, 4, *CurDAG).getNode() != 0; + return ARM::getNEONModImm(N, 4, true, *CurDAG).getNode() != 0; }], VMOV_get_imm32>; // VMOV_get_imm64 xform function: convert build_vector to VMOV.i64 imm. def VMOV_get_imm64 : SDNodeXForm<build_vector, [{ - return ARM::getVMOVImm(N, 8, *CurDAG); + return ARM::getNEONModImm(N, 8, true, *CurDAG); }]>; def vmovImm64 : PatLeaf<(build_vector), [{ - return ARM::getVMOVImm(N, 8, *CurDAG).getNode() != 0; + return ARM::getNEONModImm(N, 8, true, *CurDAG).getNode() != 0; }], VMOV_get_imm64>; -// Note: Some of the cmode bits in the following VMOV instructions need to -// be encoded based on the immed values. - let isReMaterializable = 1 in { def VMOVv8i8 : N1ModImm<1, 0b000, 0b1110, 0, 0, 0, 1, (outs DPR:$dst), - (ins h8imm:$SIMM), IIC_VMOVImm, + (ins nModImm:$SIMM), IIC_VMOVImm, "vmov", "i8", "$dst, $SIMM", "", [(set DPR:$dst, (v8i8 vmovImm8:$SIMM))]>; def VMOVv16i8 : N1ModImm<1, 0b000, 0b1110, 0, 1, 0, 1, (outs QPR:$dst), - (ins h8imm:$SIMM), IIC_VMOVImm, + (ins nModImm:$SIMM), IIC_VMOVImm, "vmov", "i8", "$dst, $SIMM", "", [(set QPR:$dst, (v16i8 vmovImm8:$SIMM))]>; -def VMOVv4i16 : N1ModImm<1, 0b000, {1,0,?,?}, 0, 0, {?}, 1, (outs DPR:$dst), - (ins h16imm:$SIMM), IIC_VMOVImm, +def VMOVv4i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 0, 0, 1, (outs DPR:$dst), + (ins nModImm:$SIMM), IIC_VMOVImm, "vmov", "i16", "$dst, $SIMM", "", [(set DPR:$dst, (v4i16 vmovImm16:$SIMM))]>; -def VMOVv8i16 : N1ModImm<1, 0b000, {1,0,?,?}, 0, 1, {?}, 1, (outs QPR:$dst), - (ins h16imm:$SIMM), IIC_VMOVImm, +def VMOVv8i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 1, 0, 1, (outs QPR:$dst), + (ins nModImm:$SIMM), IIC_VMOVImm, "vmov", "i16", "$dst, $SIMM", "", [(set QPR:$dst, (v8i16 vmovImm16:$SIMM))]>; -def VMOVv2i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 0, {?}, 1, (outs DPR:$dst), - (ins h32imm:$SIMM), IIC_VMOVImm, +def VMOVv2i32 : N1ModImm<1, 0b000, {0,?,?,0}, 0, 0, 0, 1, (outs DPR:$dst), + (ins nModImm:$SIMM), IIC_VMOVImm, "vmov", "i32", "$dst, $SIMM", "", [(set DPR:$dst, (v2i32 vmovImm32:$SIMM))]>; -def VMOVv4i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 1, {?}, 1, (outs QPR:$dst), - (ins h32imm:$SIMM), IIC_VMOVImm, +def VMOVv4i32 : N1ModImm<1, 0b000, {0,?,?,0}, 0, 1, 0, 1, (outs QPR:$dst), + (ins nModImm:$SIMM), IIC_VMOVImm, "vmov", "i32", "$dst, $SIMM", "", [(set QPR:$dst, (v4i32 vmovImm32:$SIMM))]>; def VMOVv1i64 : N1ModImm<1, 0b000, 0b1110, 0, 0, 1, 1, (outs DPR:$dst), - (ins h64imm:$SIMM), IIC_VMOVImm, + (ins nModImm:$SIMM), IIC_VMOVImm, "vmov", "i64", "$dst, $SIMM", "", [(set DPR:$dst, (v1i64 vmovImm64:$SIMM))]>; def VMOVv2i64 : N1ModImm<1, 0b000, 0b1110, 0, 1, 1, 1, (outs QPR:$dst), - (ins h64imm:$SIMM), IIC_VMOVImm, + (ins nModImm:$SIMM), IIC_VMOVImm, "vmov", "i64", "$dst, $SIMM", "", [(set QPR:$dst, (v2i64 vmovImm64:$SIMM))]>; } // isReMaterializable @@ -3122,17 +3105,6 @@ def VDUPfqf : N2V<0b11, 0b11, {?,1}, {0,0}, 0b11000, 1, 0, IIC_VMOVD, "vdup", "32", "$dst, ${src:lane}", "", [(set QPR:$dst, (v4f32 (NEONvdup (f32 SPR:$src))))]>; -def : Pat<(v2i64 (NEONvduplane (v2i64 QPR:$src), imm:$lane)), - (INSERT_SUBREG QPR:$src, - (i64 (EXTRACT_SUBREG QPR:$src, - (DSubReg_f64_reg imm:$lane))), - (DSubReg_f64_other_reg imm:$lane))>; -def : Pat<(v2f64 (NEONvduplane (v2f64 QPR:$src), imm:$lane)), - (INSERT_SUBREG QPR:$src, - (f64 (EXTRACT_SUBREG QPR:$src, - (DSubReg_f64_reg imm:$lane))), - (DSubReg_f64_other_reg imm:$lane))>; - // VMOVN : Vector Narrowing Move defm VMOVN : N2VNInt_HSD<0b11,0b11,0b10,0b00100,0,0, IIC_VMOVD, "vmovn", "i", int_arm_neon_vmovn>; @@ -3319,22 +3291,16 @@ let hasExtraSrcRegAllocReq = 1 in { def VTBL2 : N3V<1,1,0b11,0b1001,0,0, (outs DPR:$dst), (ins DPR:$tbl1, DPR:$tbl2, DPR:$src), NVTBLFrm, IIC_VTB2, - "vtbl", "8", "$dst, \\{$tbl1, $tbl2\\}, $src", "", - [(set DPR:$dst, (v8i8 (int_arm_neon_vtbl2 - DPR:$tbl1, DPR:$tbl2, DPR:$src)))]>; + "vtbl", "8", "$dst, \\{$tbl1, $tbl2\\}, $src", "", []>; def VTBL3 : N3V<1,1,0b11,0b1010,0,0, (outs DPR:$dst), (ins DPR:$tbl1, DPR:$tbl2, DPR:$tbl3, DPR:$src), NVTBLFrm, IIC_VTB3, - "vtbl", "8", "$dst, \\{$tbl1, $tbl2, $tbl3\\}, $src", "", - [(set DPR:$dst, (v8i8 (int_arm_neon_vtbl3 - DPR:$tbl1, DPR:$tbl2, DPR:$tbl3, DPR:$src)))]>; + "vtbl", "8", "$dst, \\{$tbl1, $tbl2, $tbl3\\}, $src", "", []>; def VTBL4 : N3V<1,1,0b11,0b1011,0,0, (outs DPR:$dst), (ins DPR:$tbl1, DPR:$tbl2, DPR:$tbl3, DPR:$tbl4, DPR:$src), NVTBLFrm, IIC_VTB4, - "vtbl", "8", "$dst, \\{$tbl1, $tbl2, $tbl3, $tbl4\\}, $src", "", - [(set DPR:$dst, (v8i8 (int_arm_neon_vtbl4 DPR:$tbl1, DPR:$tbl2, - DPR:$tbl3, DPR:$tbl4, DPR:$src)))]>; + "vtbl", "8", "$dst, \\{$tbl1, $tbl2, $tbl3, $tbl4\\}, $src", "", []>; } // hasExtraSrcRegAllocReq = 1 // VTBX : Vector Table Extension @@ -3348,23 +3314,18 @@ let hasExtraSrcRegAllocReq = 1 in { def VTBX2 : N3V<1,1,0b11,0b1001,1,0, (outs DPR:$dst), (ins DPR:$orig, DPR:$tbl1, DPR:$tbl2, DPR:$src), NVTBLFrm, IIC_VTBX2, - "vtbx", "8", "$dst, \\{$tbl1, $tbl2\\}, $src", "$orig = $dst", - [(set DPR:$dst, (v8i8 (int_arm_neon_vtbx2 - DPR:$orig, DPR:$tbl1, DPR:$tbl2, DPR:$src)))]>; + "vtbx", "8", "$dst, \\{$tbl1, $tbl2\\}, $src", "$orig = $dst", []>; def VTBX3 : N3V<1,1,0b11,0b1010,1,0, (outs DPR:$dst), (ins DPR:$orig, DPR:$tbl1, DPR:$tbl2, DPR:$tbl3, DPR:$src), NVTBLFrm, IIC_VTBX3, - "vtbx", "8", "$dst, \\{$tbl1, $tbl2, $tbl3\\}, $src", "$orig = $dst", - [(set DPR:$dst, (v8i8 (int_arm_neon_vtbx3 DPR:$orig, DPR:$tbl1, - DPR:$tbl2, DPR:$tbl3, DPR:$src)))]>; + "vtbx", "8", "$dst, \\{$tbl1, $tbl2, $tbl3\\}, $src", + "$orig = $dst", []>; def VTBX4 : N3V<1,1,0b11,0b1011,1,0, (outs DPR:$dst), (ins DPR:$orig, DPR:$tbl1, DPR:$tbl2, DPR:$tbl3, DPR:$tbl4, DPR:$src), NVTBLFrm, IIC_VTBX4, "vtbx", "8", "$dst, \\{$tbl1, $tbl2, $tbl3, $tbl4\\}, $src", - "$orig = $dst", - [(set DPR:$dst, (v8i8 (int_arm_neon_vtbx4 DPR:$orig, DPR:$tbl1, - DPR:$tbl2, DPR:$tbl3, DPR:$tbl4, DPR:$src)))]>; + "$orig = $dst", []>; } // hasExtraSrcRegAllocReq = 1 //===----------------------------------------------------------------------===// diff --git a/lib/Target/ARM/ARMInstrThumb.td b/lib/Target/ARM/ARMInstrThumb.td index 40f924b679fa..bc0790dccbb5 100644 --- a/lib/Target/ARM/ARMInstrThumb.td +++ b/lib/Target/ARM/ARMInstrThumb.td @@ -894,11 +894,11 @@ def tLEApcrel : T1I<(outs tGPR:$dst), (ins i32imm:$label, pred:$p), IIC_iALUi, "adr$p\t$dst, #$label", []>, T1Encoding<{1,0,1,0,0,?}>; // A6.2 & A8.6.10 +} // neverHasSideEffects def tLEApcrelJT : T1I<(outs tGPR:$dst), (ins i32imm:$label, nohash_imm:$id, pred:$p), IIC_iALUi, "adr$p\t$dst, #${label}_${id}", []>, T1Encoding<{1,0,1,0,0,?}>; // A6.2 & A8.6.10 -} // neverHasSideEffects //===----------------------------------------------------------------------===// // TLS Instructions @@ -923,18 +923,18 @@ let isCall = 1, // except for our own input by listing the relevant registers in Defs. By // doing so, we also cause the prologue/epilogue code to actively preserve // all of the callee-saved resgisters, which is exactly what we want. -// The current SP is passed in $val, and we reuse the reg as a scratch. +// $val is a scratch register for our use. let Defs = - [ R0, R1, R2, R3, R4, R5, R6, R7, R12 ] in { + [ R0, R1, R2, R3, R4, R5, R6, R7, R12 ], hasSideEffects = 1, + isBarrier = 1 in { def tInt_eh_sjlj_setjmp : ThumbXI<(outs),(ins tGPR:$src, tGPR:$val), AddrModeNone, SizeSpecial, NoItinerary, - "str\t$val, [$src, #8]\t${:comment} begin eh.setjmp\n" - "\tmov\t$val, pc\n" - "\tadds\t$val, #7\n" - "\tstr\t$val, [$src, #4]\n" - "\tmovs\tr0, #0\n" - "\tb\t1f\n" - "\tmovs\tr0, #1\t${:comment} end eh.setjmp\n" + "mov\t$val, pc\t${:comment} begin eh.setjmp\n\t" + "adds\t$val, #7\n\t" + "str\t$val, [$src, #4]\n\t" + "movs\tr0, #0\n\t" + "b\t1f\n\t" + "movs\tr0, #1\t${:comment} end eh.setjmp\n\t" "1:", "", [(set R0, (ARMeh_sjlj_setjmp tGPR:$src, tGPR:$val))]>; } @@ -1037,7 +1037,8 @@ def : T1Pat<(i32 imm0_255_comp:$src), // scheduling. let isReMaterializable = 1 in def tLDRpci_pic : PseudoInst<(outs GPR:$dst), (ins i32imm:$addr, pclabel:$cp), - NoItinerary, "${:comment} ldr.n\t$dst, $addr\n$cp:\n\tadd\t$dst, pc", + NoItinerary, + "${:comment} ldr.n\t$dst, $addr\n$cp:\n\tadd\t$dst, pc", [(set GPR:$dst, (ARMpic_add (load (ARMWrapper tconstpool:$addr)), imm:$cp))]>, Requires<[IsThumb1Only]>; diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td index b91c089fa5db..4692f2a42133 100644 --- a/lib/Target/ARM/ARMInstrThumb2.td +++ b/lib/Target/ARM/ARMInstrThumb2.td @@ -637,8 +637,7 @@ multiclass T2I_st<bits<2> opcod, string opc, PatFrag opnode> { multiclass T2I_unary_rrot<bits<3> opcod, string opc, PatFrag opnode> { def r : T2I<(outs GPR:$dst), (ins GPR:$src), IIC_iUNAr, opc, ".w\t$dst, $src", - [(set GPR:$dst, (opnode GPR:$src))]>, - Requires<[HasT2ExtractPack]> { + [(set GPR:$dst, (opnode GPR:$src))]> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0100; let Inst{22-20} = opcod; @@ -649,8 +648,7 @@ multiclass T2I_unary_rrot<bits<3> opcod, string opc, PatFrag opnode> { } def r_rot : T2I<(outs GPR:$dst), (ins GPR:$src, i32imm:$rot), IIC_iUNAsi, opc, ".w\t$dst, $src, ror $rot", - [(set GPR:$dst, (opnode (rotr GPR:$src, rot_imm:$rot)))]>, - Requires<[HasT2ExtractPack]> { + [(set GPR:$dst, (opnode (rotr GPR:$src, rot_imm:$rot)))]> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0100; let Inst{22-20} = opcod; @@ -661,8 +659,8 @@ multiclass T2I_unary_rrot<bits<3> opcod, string opc, PatFrag opnode> { } } -// SXTB16 and UXTB16 do not need the .w qualifier. -multiclass T2I_unary_rrot_nw<bits<3> opcod, string opc, PatFrag opnode> { +// UXTB16 - Requres T2ExtractPack, does not need the .w qualifier. +multiclass T2I_unary_rrot_uxtb16<bits<3> opcod, string opc, PatFrag opnode> { def r : T2I<(outs GPR:$dst), (ins GPR:$src), IIC_iUNAr, opc, "\t$dst, $src", [(set GPR:$dst, (opnode GPR:$src))]>, @@ -689,9 +687,9 @@ multiclass T2I_unary_rrot_nw<bits<3> opcod, string opc, PatFrag opnode> { } } -// DO variant - disassembly only, no pattern - -multiclass T2I_unary_rrot_DO<bits<3> opcod, string opc> { +// SXTB16 - Requres T2ExtractPack, does not need the .w qualifier, no pattern +// supported yet. +multiclass T2I_unary_rrot_sxtb16<bits<3> opcod, string opc> { def r : T2I<(outs GPR:$dst), (ins GPR:$src), IIC_iUNAr, opc, "\t$dst, $src", []> { let Inst{31-27} = 0b11111; @@ -787,6 +785,7 @@ def t2LEApcrel : T2XI<(outs GPR:$dst), (ins i32imm:$label, pred:$p), IIC_iALUi, let Inst{19-16} = 0b1111; // Rn let Inst{15} = 0; } +} // neverHasSideEffects def t2LEApcrelJT : T2XI<(outs GPR:$dst), (ins i32imm:$label, nohash_imm:$id, pred:$p), IIC_iALUi, "adr$p.w\t$dst, #${label}_${id}", []> { @@ -798,7 +797,6 @@ def t2LEApcrelJT : T2XI<(outs GPR:$dst), let Inst{19-16} = 0b1111; // Rn let Inst{15} = 0; } -} // neverHasSideEffects // ADD r, sp, {so_imm|i12} def t2ADDrSPi : T2sI<(outs GPR:$dst), (ins GPR:$sp, t2_so_imm:$imm), @@ -1330,7 +1328,7 @@ defm t2SXTB : T2I_unary_rrot<0b100, "sxtb", UnOpFrag<(sext_inreg node:$Src, i8)>>; defm t2SXTH : T2I_unary_rrot<0b000, "sxth", UnOpFrag<(sext_inreg node:$Src, i16)>>; -defm t2SXTB16 : T2I_unary_rrot_DO<0b010, "sxtb16">; +defm t2SXTB16 : T2I_unary_rrot_sxtb16<0b010, "sxtb16">; defm t2SXTAB : T2I_bin_rrot<0b100, "sxtab", BinOpFrag<(add node:$LHS, (sext_inreg node:$RHS, i8))>>; @@ -1347,13 +1345,13 @@ defm t2UXTB : T2I_unary_rrot<0b101, "uxtb", UnOpFrag<(and node:$Src, 0x000000FF)>>; defm t2UXTH : T2I_unary_rrot<0b001, "uxth", UnOpFrag<(and node:$Src, 0x0000FFFF)>>; -defm t2UXTB16 : T2I_unary_rrot_nw<0b011, "uxtb16", +defm t2UXTB16 : T2I_unary_rrot_uxtb16<0b011, "uxtb16", UnOpFrag<(and node:$Src, 0x00FF00FF)>>; def : T2Pat<(and (shl GPR:$Src, (i32 8)), 0xFF00FF), - (t2UXTB16r_rot GPR:$Src, 24)>; + (t2UXTB16r_rot GPR:$Src, 24)>, Requires<[HasT2ExtractPack]>; def : T2Pat<(and (srl GPR:$Src, (i32 8)), 0xFF00FF), - (t2UXTB16r_rot GPR:$Src, 8)>; + (t2UXTB16r_rot GPR:$Src, 8)>, Requires<[HasT2ExtractPack]>; defm t2UXTAB : T2I_bin_rrot<0b101, "uxtab", BinOpFrag<(add node:$LHS, (and node:$RHS, 0x00FF))>>; @@ -2389,37 +2387,36 @@ let isCall = 1, // except for our own input by listing the relevant registers in Defs. By // doing so, we also cause the prologue/epilogue code to actively preserve // all of the callee-saved resgisters, which is exactly what we want. -// The current SP is passed in $val, and we reuse the reg as a scratch. +// $val is a scratch register for our use. let Defs = [ R0, R1, R2, R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, LR, D0, D1, D2, D3, D4, D5, D6, D7, D8, D9, D10, D11, D12, D13, D14, D15, D16, D17, D18, D19, D20, D21, D22, D23, D24, D25, D26, D27, D28, D29, D30, - D31 ] in { + D31 ], hasSideEffects = 1, isBarrier = 1 in { def t2Int_eh_sjlj_setjmp : Thumb2XI<(outs), (ins GPR:$src, tGPR:$val), AddrModeNone, SizeSpecial, NoItinerary, - "str\t$val, [$src, #8]\t${:comment} begin eh.setjmp\n" - "\tmov\t$val, pc\n" - "\tadds\t$val, #7\n" - "\tstr\t$val, [$src, #4]\n" - "\tmovs\tr0, #0\n" - "\tb\t1f\n" - "\tmovs\tr0, #1\t${:comment} end eh.setjmp\n" + "mov\t$val, pc\t${:comment} begin eh.setjmp\n\t" + "adds\t$val, #7\n\t" + "str\t$val, [$src, #4]\n\t" + "movs\tr0, #0\n\t" + "b\t1f\n\t" + "movs\tr0, #1\t${:comment} end eh.setjmp\n\t" "1:", "", [(set R0, (ARMeh_sjlj_setjmp GPR:$src, tGPR:$val))]>, Requires<[IsThumb2, HasVFP2]>; } let Defs = - [ R0, R1, R2, R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, LR ] in { + [ R0, R1, R2, R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, LR ], + hasSideEffects = 1, isBarrier = 1 in { def t2Int_eh_sjlj_setjmp_nofp : Thumb2XI<(outs), (ins GPR:$src, tGPR:$val), AddrModeNone, SizeSpecial, NoItinerary, - "str\t$val, [$src, #8]\t${:comment} begin eh.setjmp\n" - "\tmov\t$val, pc\n" - "\tadds\t$val, #7\n" - "\tstr\t$val, [$src, #4]\n" - "\tmovs\tr0, #0\n" - "\tb\t1f\n" - "\tmovs\tr0, #1\t${:comment} end eh.setjmp\n" + "mov\t$val, pc\t${:comment} begin eh.setjmp\n\t" + "adds\t$val, #7\n\t" + "str\t$val, [$src, #4]\n\t" + "movs\tr0, #0\n\t" + "b\t1f\n\t" + "movs\tr0, #1\t${:comment} end eh.setjmp\n\t" "1:", "", [(set R0, (ARMeh_sjlj_setjmp GPR:$src, tGPR:$val))]>, Requires<[IsThumb2, NoVFP]>; @@ -2529,6 +2526,7 @@ def t2Bcc : T2I<(outs), (ins brtarget:$target), IIC_Br, // IT block +let Defs = [ITSTATE] in def t2IT : Thumb2XI<(outs), (ins it_pred:$cc, it_mask:$mask), AddrModeNone, Size2Bytes, IIC_iALUx, "it$mask\t$cc", "", []> { @@ -2691,7 +2689,8 @@ def : T2Pat<(ARMWrapperJT tjumptable:$dst, imm:$id), // scheduling. let canFoldAsLoad = 1, isReMaterializable = 1 in def t2LDRpci_pic : PseudoInst<(outs GPR:$dst), (ins i32imm:$addr, pclabel:$cp), - NoItinerary, "${:comment} ldr.w\t$dst, $addr\n$cp:\n\tadd\t$dst, pc", + NoItinerary, + "${:comment} ldr.w\t$dst, $addr\n$cp:\n\tadd\t$dst, pc", [(set GPR:$dst, (ARMpic_add (load (ARMWrapper tconstpool:$addr)), imm:$cp))]>, Requires<[IsThumb2]>; diff --git a/lib/Target/ARM/ARMInstrVFP.td b/lib/Target/ARM/ARMInstrVFP.td index 54474cfe2e29..84c23e1a784c 100644 --- a/lib/Target/ARM/ARMInstrVFP.td +++ b/lib/Target/ARM/ARMInstrVFP.td @@ -255,25 +255,25 @@ def VCVTSD : VFPAI<(outs SPR:$dst), (ins DPR:$a), VFPUnaryFrm, // Between half-precision and single-precision. For disassembly only. -def VCVTBSH : ASuI<0b11101, 0b11, 0b0010, 0b01, 0, (outs SPR:$dst), (ins SPR:$a), +def VCVTBSH: ASuI<0b11101, 0b11, 0b0010, 0b01, 0, (outs SPR:$dst), (ins SPR:$a), /* FIXME */ IIC_fpCVTSH, "vcvtb", ".f32.f16\t$dst, $a", [/* For disassembly only; pattern left blank */]>; def : ARMPat<(f32_to_f16 SPR:$a), (i32 (COPY_TO_REGCLASS (VCVTBSH SPR:$a), GPR))>; -def VCVTBHS : ASuI<0b11101, 0b11, 0b0011, 0b01, 0, (outs SPR:$dst), (ins SPR:$a), +def VCVTBHS: ASuI<0b11101, 0b11, 0b0011, 0b01, 0, (outs SPR:$dst), (ins SPR:$a), /* FIXME */ IIC_fpCVTHS, "vcvtb", ".f16.f32\t$dst, $a", [/* For disassembly only; pattern left blank */]>; def : ARMPat<(f16_to_f32 GPR:$a), (VCVTBHS (COPY_TO_REGCLASS GPR:$a, SPR))>; -def VCVTTSH : ASuI<0b11101, 0b11, 0b0010, 0b11, 0, (outs SPR:$dst), (ins SPR:$a), +def VCVTTSH: ASuI<0b11101, 0b11, 0b0010, 0b11, 0, (outs SPR:$dst), (ins SPR:$a), /* FIXME */ IIC_fpCVTSH, "vcvtt", ".f32.f16\t$dst, $a", [/* For disassembly only; pattern left blank */]>; -def VCVTTHS : ASuI<0b11101, 0b11, 0b0011, 0b11, 0, (outs SPR:$dst), (ins SPR:$a), +def VCVTTHS: ASuI<0b11101, 0b11, 0b0011, 0b11, 0, (outs SPR:$dst), (ins SPR:$a), /* FIXME */ IIC_fpCVTHS, "vcvtt", ".f16.f32\t$dst, $a", [/* For disassembly only; pattern left blank */]>; diff --git a/lib/Target/ARM/ARMJITInfo.h b/lib/Target/ARM/ARMJITInfo.h index ff332b7ee15b..f5d9effeb9a5 100644 --- a/lib/Target/ARM/ARMJITInfo.h +++ b/lib/Target/ARM/ARMJITInfo.h @@ -143,7 +143,8 @@ namespace llvm { JumpTableId2AddrMap[JTI] = Addr; } - /// getPCLabelAddr - Retrieve the address of the PC label of the specified id. + /// getPCLabelAddr - Retrieve the address of the PC label of the + /// specified id. intptr_t getPCLabelAddr(unsigned Id) const { DenseMap<unsigned, intptr_t>::const_iterator I = PCLabelMap.find(Id); assert(I != PCLabelMap.end()); diff --git a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp index 8585c1e50105..f80e316d23e8 100644 --- a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp +++ b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp @@ -74,11 +74,14 @@ namespace { private: struct MemOpQueueEntry { int Offset; + unsigned Reg; + bool isKill; unsigned Position; MachineBasicBlock::iterator MBBI; bool Merged; - MemOpQueueEntry(int o, int p, MachineBasicBlock::iterator i) - : Offset(o), Position(p), MBBI(i), Merged(false) {} + MemOpQueueEntry(int o, unsigned r, bool k, unsigned p, + MachineBasicBlock::iterator i) + : Offset(o), Reg(r), isKill(k), Position(p), MBBI(i), Merged(false) {} }; typedef SmallVector<MemOpQueueEntry,8> MemOpQueue; typedef MemOpQueue::iterator MemOpQueueIter; @@ -128,30 +131,30 @@ namespace { static int getLoadStoreMultipleOpcode(int Opcode) { switch (Opcode) { case ARM::LDR: - NumLDMGened++; + ++NumLDMGened; return ARM::LDM; case ARM::STR: - NumSTMGened++; + ++NumSTMGened; return ARM::STM; case ARM::t2LDRi8: case ARM::t2LDRi12: - NumLDMGened++; + ++NumLDMGened; return ARM::t2LDM; case ARM::t2STRi8: case ARM::t2STRi12: - NumSTMGened++; + ++NumSTMGened; return ARM::t2STM; case ARM::VLDRS: - NumVLDMGened++; + ++NumVLDMGened; return ARM::VLDMS; case ARM::VSTRS: - NumVSTMGened++; + ++NumVSTMGened; return ARM::VSTMS; case ARM::VLDRD: - NumVLDMGened++; + ++NumVLDMGened; return ARM::VLDMD; case ARM::VSTRD: - NumVSTMGened++; + ++NumVSTMGened; return ARM::VSTMD; default: llvm_unreachable("Unhandled opcode!"); } @@ -264,45 +267,59 @@ ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB, // MergeOpsUpdate - call MergeOps and update MemOps and merges accordingly on // success. -void ARMLoadStoreOpt:: -MergeOpsUpdate(MachineBasicBlock &MBB, - MemOpQueue &memOps, - unsigned memOpsBegin, - unsigned memOpsEnd, - unsigned insertAfter, - int Offset, - unsigned Base, - bool BaseKill, - int Opcode, - ARMCC::CondCodes Pred, - unsigned PredReg, - unsigned Scratch, - DebugLoc dl, - SmallVector<MachineBasicBlock::iterator, 4> &Merges) { +void ARMLoadStoreOpt::MergeOpsUpdate(MachineBasicBlock &MBB, + MemOpQueue &memOps, + unsigned memOpsBegin, unsigned memOpsEnd, + unsigned insertAfter, int Offset, + unsigned Base, bool BaseKill, + int Opcode, + ARMCC::CondCodes Pred, unsigned PredReg, + unsigned Scratch, + DebugLoc dl, + SmallVector<MachineBasicBlock::iterator, 4> &Merges) { // First calculate which of the registers should be killed by the merged // instruction. - SmallVector<std::pair<unsigned, bool>, 8> Regs; const unsigned insertPos = memOps[insertAfter].Position; + + SmallSet<unsigned, 4> UnavailRegs; + SmallSet<unsigned, 4> KilledRegs; + DenseMap<unsigned, unsigned> Killer; + for (unsigned i = 0; i < memOpsBegin; ++i) { + if (memOps[i].Position < insertPos && memOps[i].isKill) { + unsigned Reg = memOps[i].Reg; + if (memOps[i].Merged) + UnavailRegs.insert(Reg); + else { + KilledRegs.insert(Reg); + Killer[Reg] = i; + } + } + } + for (unsigned i = memOpsEnd, e = memOps.size(); i != e; ++i) { + if (memOps[i].Position < insertPos && memOps[i].isKill) { + unsigned Reg = memOps[i].Reg; + KilledRegs.insert(Reg); + Killer[Reg] = i; + } + } + + SmallVector<std::pair<unsigned, bool>, 8> Regs; for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) { - const MachineOperand &MO = memOps[i].MBBI->getOperand(0); - unsigned Reg = MO.getReg(); - bool isKill = MO.isKill(); + unsigned Reg = memOps[i].Reg; + if (UnavailRegs.count(Reg)) + // Register is killed before and it's not easy / possible to update the + // kill marker on already merged instructions. Abort. + return; // If we are inserting the merged operation after an unmerged operation that // uses the same register, make sure to transfer any kill flag. - for (unsigned j = memOpsEnd, e = memOps.size(); !isKill && j != e; ++j) - if (memOps[j].Position<insertPos) { - const MachineOperand &MOJ = memOps[j].MBBI->getOperand(0); - if (MOJ.getReg() == Reg && MOJ.isKill()) - isKill = true; - } - + bool isKill = memOps[i].isKill || KilledRegs.count(Reg); Regs.push_back(std::make_pair(Reg, isKill)); } // Try to do the merge. MachineBasicBlock::iterator Loc = memOps[insertAfter].MBBI; - Loc++; + ++Loc; if (!MergeOps(MBB, Loc, Offset, Base, BaseKill, Opcode, Pred, PredReg, Scratch, dl, Regs)) return; @@ -311,13 +328,13 @@ MergeOpsUpdate(MachineBasicBlock &MBB, Merges.push_back(prior(Loc)); for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) { // Remove kill flags from any unmerged memops that come before insertPos. - if (Regs[i-memOpsBegin].second) - for (unsigned j = memOpsEnd, e = memOps.size(); j != e; ++j) - if (memOps[j].Position<insertPos) { - MachineOperand &MOJ = memOps[j].MBBI->getOperand(0); - if (MOJ.getReg() == Regs[i-memOpsBegin].first && MOJ.isKill()) - MOJ.setIsKill(false); - } + if (Regs[i-memOpsBegin].second) { + unsigned Reg = Regs[i-memOpsBegin].first; + if (KilledRegs.count(Reg)) { + unsigned j = Killer[Reg]; + memOps[j].MBBI->getOperand(0).setIsKill(false); + } + } MBB.erase(memOps[i].MBBI); memOps[i].Merged = true; } @@ -517,8 +534,11 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB, } // Try merging with the previous instruction. - if (MBBI != MBB.begin()) { + MachineBasicBlock::iterator BeginMBBI = MBB.begin(); + if (MBBI != BeginMBBI) { MachineBasicBlock::iterator PrevMBBI = prior(MBBI); + while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue()) + --PrevMBBI; if (isAM4) { if (Mode == ARM_AM::ia && isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) { @@ -541,8 +561,11 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB, } // Try merging with the next instruction. - if (!DoMerge && MBBI != MBB.end()) { + MachineBasicBlock::iterator EndMBBI = MBB.end(); + if (!DoMerge && MBBI != EndMBBI) { MachineBasicBlock::iterator NextMBBI = llvm::next(MBBI); + while (NextMBBI != EndMBBI && NextMBBI->isDebugValue()) + ++NextMBBI; if (isAM4) { if ((Mode == ARM_AM::ia || Mode == ARM_AM::ib) && isMatchingIncrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) { @@ -669,8 +692,11 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB, unsigned Limit = isAM5 ? 0 : (isAM2 ? 0x1000 : 0x100); // Try merging with the previous instruction. - if (MBBI != MBB.begin()) { + MachineBasicBlock::iterator BeginMBBI = MBB.begin(); + if (MBBI != BeginMBBI) { MachineBasicBlock::iterator PrevMBBI = prior(MBBI); + while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue()) + --PrevMBBI; if (isMatchingDecrement(PrevMBBI, Base, Bytes, Limit, Pred, PredReg)) { DoMerge = true; AddSub = ARM_AM::sub; @@ -685,8 +711,11 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB, } // Try merging with the next instruction. - if (!DoMerge && MBBI != MBB.end()) { + MachineBasicBlock::iterator EndMBBI = MBB.end(); + if (!DoMerge && MBBI != EndMBBI) { MachineBasicBlock::iterator NextMBBI = llvm::next(MBBI); + while (NextMBBI != EndMBBI && NextMBBI->isDebugValue()) + ++NextMBBI; if (!isAM5 && isMatchingDecrement(NextMBBI, Base, Bytes, Limit, Pred, PredReg)) { DoMerge = true; @@ -759,18 +788,21 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB, /// isMemoryOp - Returns true if instruction is a memory operations (that this /// pass is capable of operating on). static bool isMemoryOp(const MachineInstr *MI) { - if (MI->hasOneMemOperand()) { - const MachineMemOperand *MMO = *MI->memoperands_begin(); + // When no memory operands are present, conservatively assume unaligned, + // volatile, unfoldable. + if (!MI->hasOneMemOperand()) + return false; - // Don't touch volatile memory accesses - we may be changing their order. - if (MMO->isVolatile()) - return false; + const MachineMemOperand *MMO = *MI->memoperands_begin(); - // Unaligned ldr/str is emulated by some kernels, but unaligned ldm/stm is - // not. - if (MMO->getAlignment() < 4) - return false; - } + // Don't touch volatile memory accesses - we may be changing their order. + if (MMO->isVolatile()) + return false; + + // Unaligned ldr/str is emulated by some kernels, but unaligned ldm/stm is + // not. + if (MMO->getAlignment() < 4) + return false; // str <undef> could probably be eliminated entirely, but for now we just want // to avoid making a mess of it. @@ -898,6 +930,7 @@ bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB, if ((EvenRegNum & 1) == 0 && (EvenRegNum + 1) == OddRegNum) return false; + MachineBasicBlock::iterator NewBBI = MBBI; bool isT2 = Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8; bool isLd = Opcode == ARM::LDRD || Opcode == ARM::t2LDRDi8; bool EvenDeadKill = isLd ? @@ -942,6 +975,7 @@ bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB, getKillRegState(OddDeadKill) | getUndefRegState(OddUndef)); ++NumSTRD2STM; } + NewBBI = llvm::prior(MBBI); } else { // Split into two instructions. assert((!isT2 || !OffReg) && @@ -962,14 +996,15 @@ bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB, OddReg, OddDeadKill, false, BaseReg, false, BaseUndef, OffReg, false, OffUndef, Pred, PredReg, TII, isT2); + NewBBI = llvm::prior(MBBI); InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc, EvenReg, EvenDeadKill, false, BaseReg, BaseKill, BaseUndef, OffReg, OffKill, OffUndef, Pred, PredReg, TII, isT2); } else { if (OddReg == EvenReg && EvenDeadKill) { - // If the two source operands are the same, the kill marker is probably - // on the first one. e.g. + // If the two source operands are the same, the kill marker is + // probably on the first one. e.g. // t2STRDi8 %R5<kill>, %R5, %R9<kill>, 0, 14, %reg0 EvenDeadKill = false; OddDeadKill = true; @@ -978,6 +1013,7 @@ bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB, EvenReg, EvenDeadKill, EvenUndef, BaseReg, false, BaseUndef, OffReg, false, OffUndef, Pred, PredReg, TII, isT2); + NewBBI = llvm::prior(MBBI); InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc, OddReg, OddDeadKill, OddUndef, BaseReg, BaseKill, BaseUndef, OffReg, OffKill, OffUndef, @@ -989,8 +1025,9 @@ bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB, ++NumSTRD2STR; } - MBBI = prior(MBBI); MBB.erase(MI); + MBBI = NewBBI; + return true; } return false; } @@ -1023,6 +1060,9 @@ bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) { if (isMemOp) { int Opcode = MBBI->getOpcode(); unsigned Size = getLSMultipleTransferSize(MBBI); + const MachineOperand &MO = MBBI->getOperand(0); + unsigned Reg = MO.getReg(); + bool isKill = MO.isDef() ? false : MO.isKill(); unsigned Base = MBBI->getOperand(1).getReg(); unsigned PredReg = 0; ARMCC::CondCodes Pred = llvm::getInstrPredicate(MBBI, PredReg); @@ -1044,8 +1084,8 @@ bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) { CurrSize = Size; CurrPred = Pred; CurrPredReg = PredReg; - MemOps.push_back(MemOpQueueEntry(Offset, Position, MBBI)); - NumMemOps++; + MemOps.push_back(MemOpQueueEntry(Offset, Reg, isKill, Position, MBBI)); + ++NumMemOps; Advance = true; } else { if (Clobber) { @@ -1057,15 +1097,17 @@ bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) { // No need to match PredReg. // Continue adding to the queue. if (Offset > MemOps.back().Offset) { - MemOps.push_back(MemOpQueueEntry(Offset, Position, MBBI)); - NumMemOps++; + MemOps.push_back(MemOpQueueEntry(Offset, Reg, isKill, + Position, MBBI)); + ++NumMemOps; Advance = true; } else { for (MemOpQueueIter I = MemOps.begin(), E = MemOps.end(); I != E; ++I) { if (Offset < I->Offset) { - MemOps.insert(I, MemOpQueueEntry(Offset, Position, MBBI)); - NumMemOps++; + MemOps.insert(I, MemOpQueueEntry(Offset, Reg, isKill, + Position, MBBI)); + ++NumMemOps; Advance = true; break; } else if (Offset == I->Offset) { @@ -1078,7 +1120,12 @@ bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) { } } - if (Advance) { + if (MBBI->isDebugValue()) { + ++MBBI; + if (MBBI == E) + // Reach the end of the block, try merging the memory instructions. + TryMerge = true; + } else if (Advance) { ++Position; ++MBBI; if (MBBI == E) @@ -1279,7 +1326,7 @@ static bool IsSafeAndProfitableToMove(bool isLd, unsigned Base, // some day. SmallSet<unsigned, 4> AddedRegPressure; while (++I != E) { - if (MemOps.count(&*I)) + if (I->isDebugValue() || MemOps.count(&*I)) continue; const TargetInstrDesc &TID = I->getDesc(); if (TID.isCall() || TID.isTerminator() || TID.hasUnmodeledSideEffects()) @@ -1411,7 +1458,7 @@ bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB, std::sort(Ops.begin(), Ops.end(), OffsetCompare()); // The loads / stores of the same base are in order. Scan them from first to - // last and check for the followins: + // last and check for the following: // 1. Any def of base. // 2. Any gaps. while (Ops.size() > 1) { @@ -1474,7 +1521,8 @@ bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB, } else { // This is the new location for the loads / stores. MachineBasicBlock::iterator InsertPos = isLd ? FirstOp : LastOp; - while (InsertPos != MBB->end() && MemOps.count(InsertPos)) + while (InsertPos != MBB->end() + && (MemOps.count(InsertPos) || InsertPos->isDebugValue())) ++InsertPos; // If we are moving a pair of loads / stores, see if it makes sense @@ -1562,7 +1610,9 @@ ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) { break; } - MI2LocMap[MI] = Loc++; + if (!MI->isDebugValue()) + MI2LocMap[MI] = ++Loc; + if (!isMemoryOp(MI)) continue; unsigned PredReg = 0; diff --git a/lib/Target/ARM/ARMMachineFunctionInfo.h b/lib/Target/ARM/ARMMachineFunctionInfo.h index 013427635592..7e57a1ca5576 100644 --- a/lib/Target/ARM/ARMMachineFunctionInfo.h +++ b/lib/Target/ARM/ARMMachineFunctionInfo.h @@ -88,6 +88,9 @@ class ARMFunctionInfo : public MachineFunctionInfo { /// VarArgsFrameIndex - FrameIndex for start of varargs area. int VarArgsFrameIndex; + /// HasITBlocks - True if IT blocks have been inserted. + bool HasITBlocks; + public: ARMFunctionInfo() : isThumb(false), @@ -97,7 +100,8 @@ public: FramePtrSpillOffset(0), GPRCS1Offset(0), GPRCS2Offset(0), DPRCSOffset(0), GPRCS1Size(0), GPRCS2Size(0), DPRCSSize(0), GPRCS1Frames(0), GPRCS2Frames(0), DPRCSFrames(0), - JumpTableUId(0), ConstPoolEntryUId(0), VarArgsFrameIndex(0) {} + JumpTableUId(0), ConstPoolEntryUId(0), VarArgsFrameIndex(0), + HasITBlocks(false) {} explicit ARMFunctionInfo(MachineFunction &MF) : isThumb(MF.getTarget().getSubtarget<ARMSubtarget>().isThumb()), @@ -108,7 +112,8 @@ public: GPRCS1Size(0), GPRCS2Size(0), DPRCSSize(0), GPRCS1Frames(32), GPRCS2Frames(32), DPRCSFrames(32), SpilledCSRegs(MF.getTarget().getRegisterInfo()->getNumRegs()), - JumpTableUId(0), ConstPoolEntryUId(0), VarArgsFrameIndex(0) {} + JumpTableUId(0), ConstPoolEntryUId(0), VarArgsFrameIndex(0), + HasITBlocks(false) {} bool isThumbFunction() const { return isThumb; } bool isThumb1OnlyFunction() const { return isThumb && !hasThumb2; } @@ -229,6 +234,9 @@ public: int getVarArgsFrameIndex() const { return VarArgsFrameIndex; } void setVarArgsFrameIndex(int Index) { VarArgsFrameIndex = Index; } + + bool hasITBlocks() const { return HasITBlocks; } + void setHasITBlocks(bool h) { HasITBlocks = h; } }; } // End llvm namespace diff --git a/lib/Target/ARM/ARMRegisterInfo.td b/lib/Target/ARM/ARMRegisterInfo.td index 6beca8b9199b..d020f3c74bde 100644 --- a/lib/Target/ARM/ARMRegisterInfo.td +++ b/lib/Target/ARM/ARMRegisterInfo.td @@ -153,11 +153,11 @@ def Q15 : ARMReg<15, "q15", [D30, D31]>; // Pseudo 256-bit registers to represent pairs of Q registers. These should // never be present in the emitted code. -// These are used for NEON load / store instructions, e.g. vld4, vst3. -// NOTE: It's possible to define more QQ registers since technical the -// starting D register number doesn't have to be multiple of 4. e.g. -// D1, D2, D3, D4 would be a legal quad. But that would make the sub-register -// stuffs very messy. +// These are used for NEON load / store instructions, e.g., vld4, vst3. +// NOTE: It's possible to define more QQ registers since technically the +// starting D register number doesn't have to be multiple of 4, e.g., +// D1, D2, D3, D4 would be a legal quad, but that would make the subregister +// stuff very messy. let SubRegIndices = [qsub_0, qsub_1] in { let CompositeIndices = [(dsub_2 qsub_1, dsub_0), (dsub_3 qsub_1, dsub_1), (ssub_4 qsub_1, ssub_0), (ssub_5 qsub_1, ssub_1), @@ -183,7 +183,8 @@ let CompositeIndices = [(qsub_2 qqsub_1, qsub_0), (qsub_3 qqsub_1, qsub_1), (ssub_8 qqsub_1, ssub_0), (ssub_9 qqsub_1, ssub_1), (ssub_10 qqsub_1, ssub_2), (ssub_11 qqsub_1, ssub_3), (ssub_12 qqsub_1, ssub_4), (ssub_13 qqsub_1, ssub_5), - (ssub_14 qqsub_1, ssub_6), (ssub_15 qqsub_1, ssub_7)] in { + (ssub_14 qqsub_1, ssub_6), (ssub_15 qqsub_1, ssub_7)] in +{ def QQQQ0 : ARMReg<0, "qqqq0", [QQ0, QQ1]>; def QQQQ1 : ARMReg<1, "qqqq1", [QQ2, QQ3]>; } @@ -196,9 +197,9 @@ def QQQQ3 : ARMReg<3, "qqqq3", [QQ6, QQ7]>; } // Current Program Status Register. -def CPSR : ARMReg<0, "cpsr">; - -def FPSCR : ARMReg<1, "fpscr">; +def CPSR : ARMReg<0, "cpsr">; +def FPSCR : ARMReg<1, "fpscr">; +def ITSTATE : ARMReg<2, "itstate">; // Register classes. // @@ -348,6 +349,73 @@ def tGPR : RegisterClass<"ARM", [i32], 32, [R0, R1, R2, R3, R4, R5, R6, R7]> { }]; } +// For tail calls, we can't use callee-saved registers, as they are restored +// to the saved value before the tail call, which would clobber a call address. +// Note, getMinimalPhysRegClass(R0) returns tGPR because of the names of +// this class and the preceding one(!) This is what we want. +def tcGPR : RegisterClass<"ARM", [i32], 32, [R0, R1, R2, R3, R9, R12]> { + let MethodProtos = [{ + iterator allocation_order_begin(const MachineFunction &MF) const; + iterator allocation_order_end(const MachineFunction &MF) const; + }]; + let MethodBodies = [{ + // R9 is available. + static const unsigned ARM_GPR_R9_TC[] = { + ARM::R0, ARM::R1, ARM::R2, ARM::R3, + ARM::R9, ARM::R12 }; + // R9 is not available. + static const unsigned ARM_GPR_NOR9_TC[] = { + ARM::R0, ARM::R1, ARM::R2, ARM::R3, + ARM::R12 }; + + // For Thumb1 mode, we don't want to allocate hi regs at all, as we + // don't know how to spill them. If we make our prologue/epilogue code + // smarter at some point, we can go back to using the above allocation + // orders for the Thumb1 instructions that know how to use hi regs. + static const unsigned THUMB_GPR_AO_TC[] = { + ARM::R0, ARM::R1, ARM::R2, ARM::R3 }; + + tcGPRClass::iterator + tcGPRClass::allocation_order_begin(const MachineFunction &MF) const { + const TargetMachine &TM = MF.getTarget(); + const ARMSubtarget &Subtarget = TM.getSubtarget<ARMSubtarget>(); + if (Subtarget.isThumb1Only()) + return THUMB_GPR_AO_TC; + if (Subtarget.isTargetDarwin()) { + if (Subtarget.isR9Reserved()) + return ARM_GPR_NOR9_TC; + else + return ARM_GPR_R9_TC; + } else + // R9 is either callee-saved or reserved; can't use it. + return ARM_GPR_NOR9_TC; + } + + tcGPRClass::iterator + tcGPRClass::allocation_order_end(const MachineFunction &MF) const { + const TargetMachine &TM = MF.getTarget(); + const ARMSubtarget &Subtarget = TM.getSubtarget<ARMSubtarget>(); + GPRClass::iterator I; + + if (Subtarget.isThumb1Only()) { + I = THUMB_GPR_AO_TC + (sizeof(THUMB_GPR_AO_TC)/sizeof(unsigned)); + return I; + } + + if (Subtarget.isTargetDarwin()) { + if (Subtarget.isR9Reserved()) + I = ARM_GPR_NOR9_TC + (sizeof(ARM_GPR_NOR9_TC)/sizeof(unsigned)); + else + I = ARM_GPR_R9_TC + (sizeof(ARM_GPR_R9_TC)/sizeof(unsigned)); + } else + // R9 is either callee-saved or reserved; can't use it. + I = ARM_GPR_NOR9_TC + (sizeof(ARM_GPR_NOR9_TC)/sizeof(unsigned)); + return I; + } + }]; +} + + // Scalar single precision floating point register class.. def SPR : RegisterClass<"ARM", [f32], 32, [S0, S1, S2, S3, S4, S5, S6, S7, S8, S9, S10, S11, S12, S13, S14, S15, S16, S17, S18, S19, S20, S21, S22, @@ -479,4 +547,3 @@ def QQQQPR : RegisterClass<"ARM", [v8i64], // Condition code registers. def CCR : RegisterClass<"ARM", [i32], 32, [CPSR]>; - diff --git a/lib/Target/ARM/ARMScheduleA8.td b/lib/Target/ARM/ARMScheduleA8.td index bbfc0b2b4744..282abca98803 100644 --- a/lib/Target/ARM/ARMScheduleA8.td +++ b/lib/Target/ARM/ARMScheduleA8.td @@ -1,10 +1,10 @@ //=- ARMScheduleA8.td - ARM Cortex-A8 Scheduling Definitions -*- tablegen -*-=// -// +// // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. -// +// //===----------------------------------------------------------------------===// // // This file defines the itinerary class data for the ARM Cortex A8 processors. @@ -32,50 +32,50 @@ def CortexA8Itineraries : ProcessorItineraries< InstrItinData<IIC_iALUx , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>]>, // // Binary Instructions that produce a result - InstrItinData<IIC_iALUi , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2]>, - InstrItinData<IIC_iALUr , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2, 2]>, - InstrItinData<IIC_iALUsi , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2, 1]>, - InstrItinData<IIC_iALUsr , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2, 1, 1]>, + InstrItinData<IIC_iALUi ,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2]>, + InstrItinData<IIC_iALUr ,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2, 2]>, + InstrItinData<IIC_iALUsi,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2, 1]>, + InstrItinData<IIC_iALUsr,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2, 1, 1]>, // // Unary Instructions that produce a result - InstrItinData<IIC_iUNAr , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2]>, - InstrItinData<IIC_iUNAsi , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1]>, - InstrItinData<IIC_iUNAsr , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1, 1]>, + InstrItinData<IIC_iUNAr , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2]>, + InstrItinData<IIC_iUNAsi, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1]>, + InstrItinData<IIC_iUNAsr, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1, 1]>, // // Compare instructions - InstrItinData<IIC_iCMPi , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2]>, - InstrItinData<IIC_iCMPr , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2]>, - InstrItinData<IIC_iCMPsi , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1]>, - InstrItinData<IIC_iCMPsr , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1, 1]>, + InstrItinData<IIC_iCMPi , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2]>, + InstrItinData<IIC_iCMPr , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2]>, + InstrItinData<IIC_iCMPsi, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1]>, + InstrItinData<IIC_iCMPsr, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1, 1]>, // // Move instructions, unconditional - InstrItinData<IIC_iMOVi , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [1]>, - InstrItinData<IIC_iMOVr , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [1, 1]>, - InstrItinData<IIC_iMOVsi , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [1, 1]>, - InstrItinData<IIC_iMOVsr , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [1, 1, 1]>, + InstrItinData<IIC_iMOVi , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [1]>, + InstrItinData<IIC_iMOVr , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [1, 1]>, + InstrItinData<IIC_iMOVsi, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [1, 1]>, + InstrItinData<IIC_iMOVsr, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [1, 1, 1]>, // // Move instructions, conditional - InstrItinData<IIC_iCMOVi , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2]>, - InstrItinData<IIC_iCMOVr , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1]>, - InstrItinData<IIC_iCMOVsi , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1]>, - InstrItinData<IIC_iCMOVsr , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1, 1]>, + InstrItinData<IIC_iCMOVi , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2]>, + InstrItinData<IIC_iCMOVr , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1]>, + InstrItinData<IIC_iCMOVsi, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1]>, + InstrItinData<IIC_iCMOVsr, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1, 1]>, // Integer multiply pipeline // Result written in E5, but that is relative to the last cycle of multicycle, // so we use 6 for those cases // InstrItinData<IIC_iMUL16 , [InstrStage<1, [A8_Pipe0]>], [5, 1, 1]>, - InstrItinData<IIC_iMAC16 , [InstrStage<1, [A8_Pipe1], 0>, + InstrItinData<IIC_iMAC16 , [InstrStage<1, [A8_Pipe1], 0>, InstrStage<2, [A8_Pipe0]>], [6, 1, 1, 4]>, - InstrItinData<IIC_iMUL32 , [InstrStage<1, [A8_Pipe1], 0>, + InstrItinData<IIC_iMUL32 , [InstrStage<1, [A8_Pipe1], 0>, InstrStage<2, [A8_Pipe0]>], [6, 1, 1]>, - InstrItinData<IIC_iMAC32 , [InstrStage<1, [A8_Pipe1], 0>, + InstrItinData<IIC_iMAC32 , [InstrStage<1, [A8_Pipe1], 0>, InstrStage<2, [A8_Pipe0]>], [6, 1, 1, 4]>, - InstrItinData<IIC_iMUL64 , [InstrStage<2, [A8_Pipe1], 0>, + InstrItinData<IIC_iMUL64 , [InstrStage<2, [A8_Pipe1], 0>, InstrStage<3, [A8_Pipe0]>], [6, 6, 1, 1]>, - InstrItinData<IIC_iMAC64 , [InstrStage<2, [A8_Pipe1], 0>, + InstrItinData<IIC_iMAC64 , [InstrStage<2, [A8_Pipe1], 0>, InstrStage<3, [A8_Pipe0]>], [6, 6, 1, 1]>, - + // Integer load pipeline // // loads have an extra cycle of latency, but are fully pipelined @@ -166,7 +166,7 @@ def CortexA8Itineraries : ProcessorItineraries< InstrStage<2, [A8_Pipe1]>, InstrStage<1, [A8_Pipe0, A8_Pipe1]>, InstrStage<1, [A8_LdSt0]>]>, - + // Branch // // no delay slots, so the latency of a branch is unimportant @@ -276,14 +276,14 @@ def CortexA8Itineraries : ProcessorItineraries< // // Single-precision FP Load // use A8_Issue to enforce the 1 load/store per cycle limit - InstrItinData<IIC_fpLoad32, [InstrStage<1, [A8_Issue], 0>, + InstrItinData<IIC_fpLoad32, [InstrStage<1, [A8_Issue], 0>, InstrStage<1, [A8_Pipe0, A8_Pipe1]>, InstrStage<1, [A8_LdSt0], 0>, InstrStage<1, [A8_NLSPipe]>]>, // // Double-precision FP Load // use A8_Issue to enforce the 1 load/store per cycle limit - InstrItinData<IIC_fpLoad64, [InstrStage<2, [A8_Issue], 0>, + InstrItinData<IIC_fpLoad64, [InstrStage<2, [A8_Issue], 0>, InstrStage<1, [A8_Pipe0], 0>, InstrStage<1, [A8_Pipe1]>, InstrStage<1, [A8_Pipe0, A8_Pipe1]>, @@ -292,7 +292,7 @@ def CortexA8Itineraries : ProcessorItineraries< // // FP Load Multiple // use A8_Issue to enforce the 1 load/store per cycle limit - InstrItinData<IIC_fpLoadm, [InstrStage<3, [A8_Issue], 0>, + InstrItinData<IIC_fpLoadm, [InstrStage<3, [A8_Issue], 0>, InstrStage<2, [A8_Pipe0], 0>, InstrStage<2, [A8_Pipe1]>, InstrStage<1, [A8_Pipe0, A8_Pipe1]>, @@ -301,14 +301,14 @@ def CortexA8Itineraries : ProcessorItineraries< // // Single-precision FP Store // use A8_Issue to enforce the 1 load/store per cycle limit - InstrItinData<IIC_fpStore32,[InstrStage<1, [A8_Issue], 0>, + InstrItinData<IIC_fpStore32,[InstrStage<1, [A8_Issue], 0>, InstrStage<1, [A8_Pipe0, A8_Pipe1]>, InstrStage<1, [A8_LdSt0], 0>, InstrStage<1, [A8_NLSPipe]>]>, // // Double-precision FP Store // use A8_Issue to enforce the 1 load/store per cycle limit - InstrItinData<IIC_fpStore64,[InstrStage<2, [A8_Issue], 0>, + InstrItinData<IIC_fpStore64,[InstrStage<2, [A8_Issue], 0>, InstrStage<1, [A8_Pipe0], 0>, InstrStage<1, [A8_Pipe1]>, InstrStage<1, [A8_Pipe0, A8_Pipe1]>, @@ -317,7 +317,7 @@ def CortexA8Itineraries : ProcessorItineraries< // // FP Store Multiple // use A8_Issue to enforce the 1 load/store per cycle limit - InstrItinData<IIC_fpStorem, [InstrStage<3, [A8_Issue], 0>, + InstrItinData<IIC_fpStorem, [InstrStage<3, [A8_Issue], 0>, InstrStage<2, [A8_Pipe0], 0>, InstrStage<2, [A8_Pipe1]>, InstrStage<1, [A8_Pipe0, A8_Pipe1]>, @@ -329,35 +329,35 @@ def CortexA8Itineraries : ProcessorItineraries< // // VLD1 // FIXME: We don't model this instruction properly - InstrItinData<IIC_VLD1, [InstrStage<1, [A8_Issue], 0>, + InstrItinData<IIC_VLD1, [InstrStage<1, [A8_Issue], 0>, InstrStage<1, [A8_Pipe0, A8_Pipe1]>, InstrStage<1, [A8_LdSt0], 0>, InstrStage<1, [A8_NLSPipe]>]>, // // VLD2 // FIXME: We don't model this instruction properly - InstrItinData<IIC_VLD2, [InstrStage<1, [A8_Issue], 0>, + InstrItinData<IIC_VLD2, [InstrStage<1, [A8_Issue], 0>, InstrStage<1, [A8_Pipe0, A8_Pipe1]>, InstrStage<1, [A8_LdSt0], 0>, InstrStage<1, [A8_NLSPipe]>], [2, 2, 1]>, // // VLD3 // FIXME: We don't model this instruction properly - InstrItinData<IIC_VLD3, [InstrStage<1, [A8_Issue], 0>, + InstrItinData<IIC_VLD3, [InstrStage<1, [A8_Issue], 0>, InstrStage<1, [A8_Pipe0, A8_Pipe1]>, InstrStage<1, [A8_LdSt0], 0>, InstrStage<1, [A8_NLSPipe]>], [2, 2, 2, 1]>, // // VLD4 // FIXME: We don't model this instruction properly - InstrItinData<IIC_VLD4, [InstrStage<1, [A8_Issue], 0>, + InstrItinData<IIC_VLD4, [InstrStage<1, [A8_Issue], 0>, InstrStage<1, [A8_Pipe0, A8_Pipe1]>, InstrStage<1, [A8_LdSt0], 0>, InstrStage<1, [A8_NLSPipe]>], [2, 2, 2, 2, 1]>, // // VST // FIXME: We don't model this instruction properly - InstrItinData<IIC_VST, [InstrStage<1, [A8_Issue], 0>, + InstrItinData<IIC_VST, [InstrStage<1, [A8_Issue], 0>, InstrStage<1, [A8_Pipe0, A8_Pipe1]>, InstrStage<1, [A8_LdSt0], 0>, InstrStage<1, [A8_NLSPipe]>]>, @@ -600,7 +600,7 @@ def CortexA8Itineraries : ProcessorItineraries< InstrItinData<IIC_VTB4, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, InstrStage<1, [A8_NLSPipe]>, InstrStage<1, [A8_NPipe], 0>, - InstrStage<2, [A8_NLSPipe]>], [4, 2, 2, 3, 3, 1]>, + InstrStage<2, [A8_NLSPipe]>],[4, 2, 2, 3, 3, 1]>, // // VTBX InstrItinData<IIC_VTBX1, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, @@ -610,9 +610,9 @@ def CortexA8Itineraries : ProcessorItineraries< InstrItinData<IIC_VTBX3, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, InstrStage<1, [A8_NLSPipe]>, InstrStage<1, [A8_NPipe], 0>, - InstrStage<2, [A8_NLSPipe]>], [4, 1, 2, 2, 3, 1]>, + InstrStage<2, [A8_NLSPipe]>],[4, 1, 2, 2, 3, 1]>, InstrItinData<IIC_VTBX4, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, InstrStage<1, [A8_NLSPipe]>, InstrStage<1, [A8_NPipe], 0>, - InstrStage<2, [A8_NLSPipe]>], [4, 1, 2, 2, 3, 3, 1]> + InstrStage<2, [A8_NLSPipe]>], [4, 1, 2, 2, 3, 3, 1]> ]>; diff --git a/lib/Target/ARM/ARMScheduleA9.td b/lib/Target/ARM/ARMScheduleA9.td index 75320d929952..df2f896a8d4b 100644 --- a/lib/Target/ARM/ARMScheduleA9.td +++ b/lib/Target/ARM/ARMScheduleA9.td @@ -1,10 +1,10 @@ //=- ARMScheduleA9.td - ARM Cortex-A9 Scheduling Definitions -*- tablegen -*-=// -// +// // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. -// +// //===----------------------------------------------------------------------===// // // This file defines the itinerary class data for the ARM Cortex A9 processors. @@ -16,7 +16,6 @@ // Reference Manual". // // Functional units -def A9_Issue : FuncUnit; // issue def A9_Pipe0 : FuncUnit; // pipeline 0 def A9_Pipe1 : FuncUnit; // pipeline 1 def A9_LSPipe : FuncUnit; // LS pipe @@ -27,7 +26,121 @@ def A9_DRegsN : FuncUnit; // FP register set, NEON side // Dual issue pipeline represented by A9_Pipe0 | A9_Pipe1 // def CortexA9Itineraries : ProcessorItineraries< - [A9_NPipe, A9_DRegsN, A9_DRegsVFP, A9_LSPipe, A9_Pipe0, A9_Pipe1, A9_Issue], [ + [A9_NPipe, A9_DRegsN, A9_DRegsVFP, A9_LSPipe, A9_Pipe0, A9_Pipe1], [ + // Two fully-pipelined integer ALU pipelines + // FIXME: There are no operand latencies for these instructions at all! + // + // Move instructions, unconditional + InstrItinData<IIC_iMOVi , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [1]>, + InstrItinData<IIC_iMOVr , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [1, 1]>, + InstrItinData<IIC_iMOVsi , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [1, 1]>, + InstrItinData<IIC_iMOVsr , [InstrStage<2, [A9_Pipe0, A9_Pipe1]>], [2, 2, 1]>, + // + // No operand cycles + InstrItinData<IIC_iALUx , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>]>, + // + // Binary Instructions that produce a result + InstrItinData<IIC_iALUi , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [2, 2]>, + InstrItinData<IIC_iALUr , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [2, 2, 2]>, + InstrItinData<IIC_iALUsi, [InstrStage<2, [A9_Pipe0, A9_Pipe1]>], [2, 2, 1]>, + InstrItinData<IIC_iALUsr,[InstrStage<3, [A9_Pipe0, A9_Pipe1]>], [2, 2, 1, 1]>, + // + // Unary Instructions that produce a result + InstrItinData<IIC_iUNAr , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [2, 2]>, + InstrItinData<IIC_iUNAsi , [InstrStage<2, [A9_Pipe0, A9_Pipe1]>], [2, 1]>, + InstrItinData<IIC_iUNAsr , [InstrStage<3, [A9_Pipe0, A9_Pipe1]>], [2, 1, 1]>, + // + // Compare instructions + InstrItinData<IIC_iCMPi , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [2]>, + InstrItinData<IIC_iCMPr , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [2, 2]>, + InstrItinData<IIC_iCMPsi , [InstrStage<2, [A9_Pipe0, A9_Pipe1]>], [2, 1]>, + InstrItinData<IIC_iCMPsr , [InstrStage<3, [A9_Pipe0, A9_Pipe1]>], [2, 1, 1]>, + // + // Move instructions, conditional + InstrItinData<IIC_iCMOVi , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [2]>, + InstrItinData<IIC_iCMOVr , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [2, 1]>, + InstrItinData<IIC_iCMOVsi , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [2, 1]>, + InstrItinData<IIC_iCMOVsr , [InstrStage<2, [A9_Pipe0, A9_Pipe1]>], [2, 1, 1]>, + + // Integer multiply pipeline + // + InstrItinData<IIC_iMUL16 , [InstrStage<1, [A9_Pipe1], 0>, + InstrStage<2, [A9_Pipe0]>], [4, 1, 1]>, + InstrItinData<IIC_iMAC16 , [InstrStage<1, [A9_Pipe1], 0>, + InstrStage<2, [A9_Pipe0]>], [4, 1, 1, 2]>, + InstrItinData<IIC_iMUL32 , [InstrStage<1, [A9_Pipe1], 0>, + InstrStage<2, [A9_Pipe0]>], [4, 1, 1]>, + InstrItinData<IIC_iMAC32 , [InstrStage<1, [A9_Pipe1], 0>, + InstrStage<2, [A9_Pipe0]>], [4, 1, 1, 2]>, + InstrItinData<IIC_iMUL64 , [InstrStage<2, [A9_Pipe1], 0>, + InstrStage<3, [A9_Pipe0]>], [4, 5, 1, 1]>, + InstrItinData<IIC_iMAC64 , [InstrStage<2, [A9_Pipe1], 0>, + InstrStage<3, [A9_Pipe0]>], [4, 5, 1, 1]>, + // Integer load pipeline + // FIXME: The timings are some rough approximations + // + // Immediate offset + InstrItinData<IIC_iLoadi , [InstrStage<1, [A9_Pipe1]>, + InstrStage<1, [A9_LSPipe]>], [3, 1]>, + // + // Register offset + InstrItinData<IIC_iLoadr , [InstrStage<1, [A9_Pipe1]>, + InstrStage<1, [A9_LSPipe]>], [3, 1, 1]>, + // + // Scaled register offset + InstrItinData<IIC_iLoadsi , [InstrStage<1, [A9_Pipe1]>, + InstrStage<2, [A9_LSPipe]>], [4, 1, 1]>, + // + // Immediate offset with update + InstrItinData<IIC_iLoadiu , [InstrStage<1, [A9_Pipe1]>, + InstrStage<2, [A9_LSPipe]>], [3, 2, 1]>, + // + // Register offset with update + InstrItinData<IIC_iLoadru , [InstrStage<1, [A9_Pipe1]>, + InstrStage<2, [A9_LSPipe]>], [3, 2, 1, 1]>, + // + // Scaled register offset with update + InstrItinData<IIC_iLoadsiu , [InstrStage<1, [A9_Pipe1]>, + InstrStage<2, [A9_LSPipe]>], [4, 3, 1, 1]>, + // + // Load multiple + InstrItinData<IIC_iLoadm , [InstrStage<1, [A9_Pipe1]>, + InstrStage<1, [A9_LSPipe]>]>, + + // Integer store pipeline + /// + // Immediate offset + InstrItinData<IIC_iStorei , [InstrStage<1, [A9_Pipe1]>, + InstrStage<1, [A9_LSPipe]>], [3, 1]>, + // + // Register offset + InstrItinData<IIC_iStorer , [InstrStage<1, [ A9_Pipe1]>, + InstrStage<1, [A9_LSPipe]>], [3, 1, 1]>, + // + // Scaled register offset + InstrItinData<IIC_iStoresi , [InstrStage<1, [A9_Pipe1]>, + InstrStage<2, [A9_LSPipe]>], [3, 1, 1]>, + // + // Immediate offset with update + InstrItinData<IIC_iStoreiu , [InstrStage<1, [A9_Pipe1]>, + InstrStage<1, [A9_LSPipe]>], [2, 3, 1]>, + // + // Register offset with update + InstrItinData<IIC_iStoreru , [InstrStage<1, [A9_Pipe1]>, + InstrStage<1, [A9_LSPipe]>], [2, 3, 1, 1]>, + // + // Scaled register offset with update + InstrItinData<IIC_iStoresiu, [InstrStage<1, [A9_Pipe1]>, + InstrStage<2, [A9_LSPipe]>], [3, 3, 1, 1]>, + // + // Store multiple + InstrItinData<IIC_iStorem , [InstrStage<1, [A9_Pipe1]>, + InstrStage<1, [A9_LSPipe]>]>, + // Branch + // + // no delay slots, so the latency of a branch is unimportant + InstrItinData<IIC_Br , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>]>, + // VFP and NEON shares the same register file. This means that every VFP // instruction should wait for full completion of the consecutive NEON // instruction and vice-versa. We model this behavior with two artificial FUs: @@ -39,8 +152,8 @@ def CortexA9Itineraries : ProcessorItineraries< // register file writeback!). // Every NEON instruction does the same but with FUs swapped. // - // Since the reserved FU cannot be acquired this models precisly "cross-domain" - // stalls. + // Since the reserved FU cannot be acquired, this models precisely + // "cross-domain" stalls. // VFP // Issue through integer pipeline, and execute in NEON unit. @@ -48,21 +161,21 @@ def CortexA9Itineraries : ProcessorItineraries< // FP Special Register to Integer Register File Move InstrItinData<IIC_fpSTAT , [InstrStage<1, [A9_DRegsVFP], 0, Required>, InstrStage<2, [A9_DRegsN], 0, Reserved>, - InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_Pipe1]>, InstrStage<1, [A9_NPipe]>]>, // // Single-precision FP Unary InstrItinData<IIC_fpUNA32 , [InstrStage<1, [A9_DRegsVFP], 0, Required>, // Extra latency cycles since wbck is 2 cycles InstrStage<3, [A9_DRegsN], 0, Reserved>, - InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_Pipe1]>, InstrStage<1, [A9_NPipe]>], [1, 1]>, // // Double-precision FP Unary InstrItinData<IIC_fpUNA64 , [InstrStage<1, [A9_DRegsVFP], 0, Required>, // Extra latency cycles since wbck is 2 cycles InstrStage<3, [A9_DRegsN], 0, Reserved>, - InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_Pipe1]>, InstrStage<1, [A9_NPipe]>], [1, 1]>, // @@ -70,124 +183,124 @@ def CortexA9Itineraries : ProcessorItineraries< InstrItinData<IIC_fpCMP32 , [InstrStage<1, [A9_DRegsVFP], 0, Required>, // Extra latency cycles since wbck is 4 cycles InstrStage<5, [A9_DRegsN], 0, Reserved>, - InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_Pipe1]>, InstrStage<1, [A9_NPipe]>], [1, 1]>, // // Double-precision FP Compare InstrItinData<IIC_fpCMP64 , [InstrStage<1, [A9_DRegsVFP], 0, Required>, // Extra latency cycles since wbck is 4 cycles InstrStage<5, [A9_DRegsN], 0, Reserved>, - InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_Pipe1]>, InstrStage<1, [A9_NPipe]>], [1, 1]>, // // Single to Double FP Convert InstrItinData<IIC_fpCVTSD , [InstrStage<1, [A9_DRegsVFP], 0, Required>, InstrStage<5, [A9_DRegsN], 0, Reserved>, - InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_Pipe1]>, InstrStage<1, [A9_NPipe]>], [4, 1]>, // // Double to Single FP Convert InstrItinData<IIC_fpCVTDS , [InstrStage<1, [A9_DRegsVFP], 0, Required>, InstrStage<5, [A9_DRegsN], 0, Reserved>, - InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_Pipe1]>, InstrStage<1, [A9_NPipe]>], [4, 1]>, // // Single to Half FP Convert InstrItinData<IIC_fpCVTSH , [InstrStage<1, [A9_DRegsVFP], 0, Required>, InstrStage<5, [A9_DRegsN], 0, Reserved>, - InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_Pipe1]>, InstrStage<1, [A9_NPipe]>], [4, 1]>, // // Half to Single FP Convert InstrItinData<IIC_fpCVTHS , [InstrStage<1, [A9_DRegsVFP], 0, Required>, InstrStage<3, [A9_DRegsN], 0, Reserved>, - InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_Pipe1]>, InstrStage<1, [A9_NPipe]>], [2, 1]>, // // Single-Precision FP to Integer Convert InstrItinData<IIC_fpCVTSI , [InstrStage<1, [A9_DRegsVFP], 0, Required>, InstrStage<5, [A9_DRegsN], 0, Reserved>, - InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_Pipe1]>, InstrStage<1, [A9_NPipe]>], [4, 1]>, // // Double-Precision FP to Integer Convert InstrItinData<IIC_fpCVTDI , [InstrStage<1, [A9_DRegsVFP], 0, Required>, InstrStage<5, [A9_DRegsN], 0, Reserved>, - InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_Pipe1]>, InstrStage<1, [A9_NPipe]>], [4, 1]>, // // Integer to Single-Precision FP Convert InstrItinData<IIC_fpCVTIS , [InstrStage<1, [A9_DRegsVFP], 0, Required>, InstrStage<5, [A9_DRegsN], 0, Reserved>, - InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_Pipe1]>, InstrStage<1, [A9_NPipe]>], [4, 1]>, // // Integer to Double-Precision FP Convert InstrItinData<IIC_fpCVTID , [InstrStage<1, [A9_DRegsVFP], 0, Required>, InstrStage<5, [A9_DRegsN], 0, Reserved>, - InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_Pipe1]>, InstrStage<1, [A9_NPipe]>], [4, 1]>, // // Single-precision FP ALU InstrItinData<IIC_fpALU32 , [InstrStage<1, [A9_DRegsVFP], 0, Required>, InstrStage<5, [A9_DRegsN], 0, Reserved>, - InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_Pipe1]>, InstrStage<1, [A9_NPipe]>], [4, 1, 1]>, // // Double-precision FP ALU InstrItinData<IIC_fpALU64 , [InstrStage<1, [A9_DRegsVFP], 0, Required>, InstrStage<5, [A9_DRegsN], 0, Reserved>, - InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_Pipe1]>, InstrStage<1, [A9_NPipe]>], [4, 1, 1]>, // // Single-precision FP Multiply InstrItinData<IIC_fpMUL32 , [InstrStage<1, [A9_DRegsVFP], 0, Required>, InstrStage<6, [A9_DRegsN], 0, Reserved>, - InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_Pipe1]>, InstrStage<1, [A9_NPipe]>], [5, 1, 1]>, // // Double-precision FP Multiply InstrItinData<IIC_fpMUL64 , [InstrStage<1, [A9_DRegsVFP], 0, Required>, InstrStage<7, [A9_DRegsN], 0, Reserved>, - InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_Pipe1]>, InstrStage<2, [A9_NPipe]>], [6, 1, 1]>, // // Single-precision FP MAC InstrItinData<IIC_fpMAC32 , [InstrStage<1, [A9_DRegsVFP], 0, Required>, InstrStage<9, [A9_DRegsN], 0, Reserved>, - InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_Pipe1]>, InstrStage<1, [A9_NPipe]>], [8, 0, 1, 1]>, // // Double-precision FP MAC InstrItinData<IIC_fpMAC64 , [InstrStage<1, [A9_DRegsVFP], 0, Required>, InstrStage<10, [A9_DRegsN], 0, Reserved>, - InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_Pipe1]>, InstrStage<2, [A9_NPipe]>], [9, 0, 1, 1]>, // // Single-precision FP DIV InstrItinData<IIC_fpDIV32 , [InstrStage<1, [A9_DRegsVFP], 0, Required>, InstrStage<16, [A9_DRegsN], 0, Reserved>, - InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_Pipe1]>, InstrStage<10, [A9_NPipe]>], [15, 1, 1]>, // // Double-precision FP DIV InstrItinData<IIC_fpDIV64 , [InstrStage<1, [A9_DRegsVFP], 0, Required>, InstrStage<26, [A9_DRegsN], 0, Reserved>, - InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_Pipe1]>, InstrStage<20, [A9_NPipe]>], [25, 1, 1]>, // // Single-precision FP SQRT InstrItinData<IIC_fpSQRT32, [InstrStage<1, [A9_DRegsVFP], 0, Required>, InstrStage<18, [A9_DRegsN], 0, Reserved>, - InstrStage<1, [A9_Pipe0, A9_Pipe1]>, - InstrStage<13, [A9_NPipe]>], [17, 1]>, + InstrStage<1, [A9_Pipe1]>, + InstrStage<13, [A9_NPipe]>], [17, 1]>, // // Double-precision FP SQRT InstrItinData<IIC_fpSQRT64, [InstrStage<1, [A9_DRegsVFP], 0, Required>, InstrStage<33, [A9_DRegsN], 0, Reserved>, - InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_Pipe1]>, InstrStage<28, [A9_NPipe]>], [32, 1]>, // @@ -195,92 +308,79 @@ def CortexA9Itineraries : ProcessorItineraries< InstrItinData<IIC_fpMOVIS, [InstrStage<1, [A9_DRegsVFP], 0, Required>, // Extra 1 latency cycle since wbck is 2 cycles InstrStage<3, [A9_DRegsN], 0, Reserved>, - InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_Pipe1]>, InstrStage<1, [A9_NPipe]>], [1, 1]>, // // Integer to Double-precision Move InstrItinData<IIC_fpMOVID, [InstrStage<1, [A9_DRegsVFP], 0, Required>, // Extra 1 latency cycle since wbck is 2 cycles InstrStage<3, [A9_DRegsN], 0, Reserved>, - InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_Pipe1]>, InstrStage<1, [A9_NPipe]>], [1, 1, 1]>, // // Single-precision to Integer Move InstrItinData<IIC_fpMOVSI, [InstrStage<1, [A9_DRegsVFP], 0, Required>, InstrStage<2, [A9_DRegsN], 0, Reserved>, - InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_Pipe1]>, InstrStage<1, [A9_NPipe]>], [1, 1]>, // // Double-precision to Integer Move InstrItinData<IIC_fpMOVDI, [InstrStage<1, [A9_DRegsVFP], 0, Required>, InstrStage<2, [A9_DRegsN], 0, Reserved>, - InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_Pipe1]>, InstrStage<1, [A9_NPipe]>], [1, 1, 1]>, // // Single-precision FP Load - // use A9_Issue to enforce the 1 load/store per cycle limit InstrItinData<IIC_fpLoad32, [InstrStage<1, [A9_DRegsVFP], 0, Required>, InstrStage<2, [A9_DRegsN], 0, Reserved>, - InstrStage<1, [A9_Issue], 0>, - InstrStage<1, [A9_Pipe0, A9_Pipe1]>, - InstrStage<1, [A9_LSPipe], 0>, + InstrStage<1, [A9_Pipe1], 0>, + InstrStage<1, [A9_LSPipe]>, InstrStage<1, [A9_NPipe]>]>, // // Double-precision FP Load - // use A9_Issue to enforce the 1 load/store per cycle limit InstrItinData<IIC_fpLoad64, [InstrStage<1, [A9_DRegsVFP], 0, Required>, InstrStage<2, [A9_DRegsN], 0, Reserved>, - InstrStage<1, [A9_Issue], 0>, - InstrStage<1, [A9_Pipe0, A9_Pipe1]>, - InstrStage<1, [A9_LSPipe], 0>, + InstrStage<1, [A9_Pipe1], 0>, + InstrStage<1, [A9_LSPipe]>, InstrStage<1, [A9_NPipe]>]>, // // FP Load Multiple - // use A9_Issue to enforce the 1 load/store per cycle limit InstrItinData<IIC_fpLoadm, [InstrStage<1, [A9_DRegsVFP], 0, Required>, InstrStage<2, [A9_DRegsN], 0, Reserved>, - InstrStage<1, [A9_Issue], 0>, - InstrStage<1, [A9_Pipe0, A9_Pipe1]>, - InstrStage<1, [A9_LSPipe], 0>, + InstrStage<1, [A9_Pipe1], 0>, + InstrStage<1, [A9_LSPipe]>, InstrStage<1, [A9_NPipe]>]>, // // Single-precision FP Store - // use A9_Issue to enforce the 1 load/store per cycle limit InstrItinData<IIC_fpStore32,[InstrStage<1, [A9_DRegsVFP], 0, Required>, InstrStage<2, [A9_DRegsN], 0, Reserved>, - InstrStage<1, [A9_Issue], 0>, - InstrStage<1, [A9_Pipe0, A9_Pipe1]>, - InstrStage<1, [A9_LSPipe], 0>, + InstrStage<1, [A9_Pipe1], 0>, + InstrStage<1, [A9_LSPipe]>, InstrStage<1, [A9_NPipe]>]>, // // Double-precision FP Store - // use A9_Issue to enforce the 1 load/store per cycle limit InstrItinData<IIC_fpStore64,[InstrStage<1, [A9_DRegsVFP], 0, Required>, InstrStage<2, [A9_DRegsN], 0, Reserved>, - InstrStage<1, [A9_Issue], 0>, - InstrStage<1, [A9_Pipe0, A9_Pipe1]>, - InstrStage<1, [A9_LSPipe], 0>, + InstrStage<1, [A9_Pipe1], 0>, + InstrStage<1, [A9_LSPipe]>, InstrStage<1, [A9_NPipe]>]>, // // FP Store Multiple - // use A9_Issue to enforce the 1 load/store per cycle limit InstrItinData<IIC_fpStorem, [InstrStage<1, [A9_DRegsVFP], 0, Required>, InstrStage<2, [A9_DRegsN], 0, Reserved>, - InstrStage<1, [A9_Issue], 0>, - InstrStage<1, [A9_Pipe0, A9_Pipe1]>, - InstrStage<1, [A9_LSPipe], 0>, + InstrStage<1, [A9_Pipe1], 0>, + InstrStage<1, [A9_LSPipe]>, InstrStage<1, [A9_NPipe]>]>, // NEON // Issue through integer pipeline, and execute in NEON unit. - // FIXME: Neon pipeline and LdSt unit are multiplexed. + // FIXME: Neon pipeline and LdSt unit are multiplexed. // Add some syntactic sugar to model this! // VLD1 // FIXME: We don't model this instruction properly InstrItinData<IIC_VLD1, [InstrStage<1, [A9_DRegsN], 0, Required>, InstrStage<7, [A9_DRegsVFP], 0, Reserved>, - InstrStage<1, [A9_Issue], 0>, - InstrStage<1, [A9_Pipe0, A9_Pipe1]>, - InstrStage<1, [A9_LSPipe], 0>, + InstrStage<1, [A9_Pipe1], 0>, + InstrStage<1, [A9_LSPipe]>, InstrStage<1, [A9_NPipe]>]>, // // VLD2 @@ -288,9 +388,8 @@ def CortexA9Itineraries : ProcessorItineraries< InstrItinData<IIC_VLD2, [InstrStage<1, [A9_DRegsN], 0, Required>, // Extra latency cycles since wbck is 6 cycles InstrStage<7, [A9_DRegsVFP], 0, Reserved>, - InstrStage<1, [A9_Issue], 0>, - InstrStage<1, [A9_Pipe0, A9_Pipe1]>, - InstrStage<1, [A9_LSPipe], 0>, + InstrStage<1, [A9_Pipe1], 0>, + InstrStage<1, [A9_LSPipe]>, InstrStage<1, [A9_NPipe]>], [2, 2, 1]>, // // VLD3 @@ -298,9 +397,8 @@ def CortexA9Itineraries : ProcessorItineraries< InstrItinData<IIC_VLD3, [InstrStage<1, [A9_DRegsN], 0, Required>, // Extra latency cycles since wbck is 6 cycles InstrStage<7, [A9_DRegsVFP], 0, Reserved>, - InstrStage<1, [A9_Issue], 0>, - InstrStage<1, [A9_Pipe0, A9_Pipe1]>, - InstrStage<1, [A9_LSPipe], 0>, + InstrStage<1, [A9_Pipe1], 0>, + InstrStage<1, [A9_LSPipe]>, InstrStage<1, [A9_NPipe]>], [2, 2, 2, 1]>, // // VLD4 @@ -308,9 +406,8 @@ def CortexA9Itineraries : ProcessorItineraries< InstrItinData<IIC_VLD4, [InstrStage<1, [A9_DRegsN], 0, Required>, // Extra latency cycles since wbck is 6 cycles InstrStage<7, [A9_DRegsVFP], 0, Reserved>, - InstrStage<1, [A9_Issue], 0>, - InstrStage<1, [A9_Pipe0, A9_Pipe1]>, - InstrStage<1, [A9_LSPipe], 0>, + InstrStage<1, [A9_Pipe1], 0>, + InstrStage<1, [A9_LSPipe]>, InstrStage<1, [A9_NPipe]>], [2, 2, 2, 2, 1]>, // // VST @@ -318,121 +415,120 @@ def CortexA9Itineraries : ProcessorItineraries< InstrItinData<IIC_VST, [InstrStage<1, [A9_DRegsN], 0, Required>, // Extra latency cycles since wbck is 6 cycles InstrStage<7, [A9_DRegsVFP], 0, Reserved>, - InstrStage<1, [A9_Issue], 0>, - InstrStage<1, [A9_Pipe0, A9_Pipe1]>, - InstrStage<1, [A9_LSPipe], 0>, + InstrStage<1, [A9_Pipe1], 0>, + InstrStage<1, [A9_LSPipe]>, InstrStage<1, [A9_NPipe]>]>, // // Double-register Integer Unary InstrItinData<IIC_VUNAiD, [InstrStage<1, [A9_DRegsN], 0, Required>, // Extra latency cycles since wbck is 6 cycles InstrStage<7, [A9_DRegsVFP], 0, Reserved>, - InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_Pipe1]>, InstrStage<1, [A9_NPipe]>], [4, 2]>, // // Quad-register Integer Unary InstrItinData<IIC_VUNAiQ, [InstrStage<1, [A9_DRegsN], 0, Required>, // Extra latency cycles since wbck is 6 cycles InstrStage<7, [A9_DRegsVFP], 0, Reserved>, - InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_Pipe1]>, InstrStage<1, [A9_NPipe]>], [4, 2]>, // // Double-register Integer Q-Unary InstrItinData<IIC_VQUNAiD, [InstrStage<1, [A9_DRegsN], 0, Required>, // Extra latency cycles since wbck is 6 cycles InstrStage<7, [A9_DRegsVFP], 0, Reserved>, - InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_Pipe1]>, InstrStage<1, [A9_NPipe]>], [4, 1]>, // // Quad-register Integer CountQ-Unary InstrItinData<IIC_VQUNAiQ, [InstrStage<1, [A9_DRegsN], 0, Required>, // Extra latency cycles since wbck is 6 cycles InstrStage<7, [A9_DRegsVFP], 0, Reserved>, - InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_Pipe1]>, InstrStage<1, [A9_NPipe]>], [4, 1]>, // // Double-register Integer Binary InstrItinData<IIC_VBINiD, [InstrStage<1, [A9_DRegsN], 0, Required>, // Extra latency cycles since wbck is 6 cycles InstrStage<7, [A9_DRegsVFP], 0, Reserved>, - InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_Pipe1]>, InstrStage<1, [A9_NPipe]>], [3, 2, 2]>, // // Quad-register Integer Binary InstrItinData<IIC_VBINiQ, [InstrStage<1, [A9_DRegsN], 0, Required>, // Extra latency cycles since wbck is 6 cycles InstrStage<7, [A9_DRegsVFP], 0, Reserved>, - InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_Pipe1]>, InstrStage<1, [A9_NPipe]>], [3, 2, 2]>, // // Double-register Integer Subtract InstrItinData<IIC_VSUBiD, [InstrStage<1, [A9_DRegsN], 0, Required>, // Extra latency cycles since wbck is 6 cycles InstrStage<7, [A9_DRegsVFP], 0, Reserved>, - InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_Pipe1]>, InstrStage<1, [A9_NPipe]>], [3, 2, 1]>, // // Quad-register Integer Subtract InstrItinData<IIC_VSUBiQ, [InstrStage<1, [A9_DRegsN], 0, Required>, // Extra latency cycles since wbck is 6 cycles InstrStage<7, [A9_DRegsVFP], 0, Reserved>, - InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_Pipe1]>, InstrStage<1, [A9_NPipe]>], [3, 2, 1]>, // // Double-register Integer Shift InstrItinData<IIC_VSHLiD, [InstrStage<1, [A9_DRegsN], 0, Required>, // Extra latency cycles since wbck is 6 cycles InstrStage<7, [A9_DRegsVFP], 0, Reserved>, - InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_Pipe1]>, InstrStage<1, [A9_NPipe]>], [3, 1, 1]>, // // Quad-register Integer Shift InstrItinData<IIC_VSHLiQ, [InstrStage<1, [A9_DRegsN], 0, Required>, // Extra latency cycles since wbck is 6 cycles InstrStage<7, [A9_DRegsVFP], 0, Reserved>, - InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_Pipe1]>, InstrStage<1, [A9_NPipe]>], [3, 1, 1]>, // // Double-register Integer Shift (4 cycle) InstrItinData<IIC_VSHLi4D, [InstrStage<1, [A9_DRegsN], 0, Required>, // Extra latency cycles since wbck is 6 cycles InstrStage<7, [A9_DRegsVFP], 0, Reserved>, - InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_Pipe1]>, InstrStage<1, [A9_NPipe]>], [4, 1, 1]>, // // Quad-register Integer Shift (4 cycle) InstrItinData<IIC_VSHLi4Q, [InstrStage<1, [A9_DRegsN], 0, Required>, // Extra latency cycles since wbck is 6 cycles InstrStage<7, [A9_DRegsVFP], 0, Reserved>, - InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_Pipe1]>, InstrStage<1, [A9_NPipe]>], [4, 1, 1]>, // // Double-register Integer Binary (4 cycle) InstrItinData<IIC_VBINi4D, [InstrStage<1, [A9_DRegsN], 0, Required>, // Extra latency cycles since wbck is 6 cycles InstrStage<7, [A9_DRegsVFP], 0, Reserved>, - InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_Pipe1]>, InstrStage<1, [A9_NPipe]>], [4, 2, 2]>, // // Quad-register Integer Binary (4 cycle) InstrItinData<IIC_VBINi4Q, [InstrStage<1, [A9_DRegsN], 0, Required>, // Extra latency cycles since wbck is 6 cycles InstrStage<7, [A9_DRegsVFP], 0, Reserved>, - InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_Pipe1]>, InstrStage<1, [A9_NPipe]>], [4, 2, 2]>, // // Double-register Integer Subtract (4 cycle) InstrItinData<IIC_VSUBiD, [InstrStage<1, [A9_DRegsN], 0, Required>, // Extra latency cycles since wbck is 6 cycles InstrStage<7, [A9_DRegsVFP], 0, Reserved>, - InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_Pipe1]>, InstrStage<1, [A9_NPipe]>], [4, 2, 1]>, // // Quad-register Integer Subtract (4 cycle) InstrItinData<IIC_VSUBiQ, [InstrStage<1, [A9_DRegsN], 0, Required>, // Extra latency cycles since wbck is 6 cycles InstrStage<7, [A9_DRegsVFP], 0, Reserved>, - InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_Pipe1]>, InstrStage<1, [A9_NPipe]>], [4, 2, 1]>, // @@ -440,7 +536,7 @@ def CortexA9Itineraries : ProcessorItineraries< InstrItinData<IIC_VCNTiD, [InstrStage<1, [A9_DRegsN], 0, Required>, // Extra latency cycles since wbck is 6 cycles InstrStage<7, [A9_DRegsVFP], 0, Reserved>, - InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_Pipe1]>, InstrStage<1, [A9_NPipe]>], [3, 2, 2]>, // // Quad-register Integer Count @@ -449,35 +545,35 @@ def CortexA9Itineraries : ProcessorItineraries< InstrItinData<IIC_VCNTiQ, [InstrStage<1, [A9_DRegsN], 0, Required>, // Extra latency cycles since wbck is 7 cycles InstrStage<8, [A9_DRegsVFP], 0, Reserved>, - InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_Pipe1]>, InstrStage<2, [A9_NPipe]>], [4, 2, 2]>, // // Double-register Absolute Difference and Accumulate InstrItinData<IIC_VABAD, [InstrStage<1, [A9_DRegsN], 0, Required>, // Extra latency cycles since wbck is 6 cycles InstrStage<7, [A9_DRegsVFP], 0, Reserved>, - InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_Pipe1]>, InstrStage<1, [A9_NPipe]>], [6, 3, 2, 1]>, // // Quad-register Absolute Difference and Accumulate InstrItinData<IIC_VABAQ, [InstrStage<1, [A9_DRegsN], 0, Required>, // Extra latency cycles since wbck is 6 cycles InstrStage<7, [A9_DRegsVFP], 0, Reserved>, - InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_Pipe1]>, InstrStage<2, [A9_NPipe]>], [6, 3, 2, 1]>, // // Double-register Integer Pair Add Long InstrItinData<IIC_VPALiD, [InstrStage<1, [A9_DRegsN], 0, Required>, // Extra latency cycles since wbck is 6 cycles InstrStage<7, [A9_DRegsVFP], 0, Reserved>, - InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_Pipe1]>, InstrStage<1, [A9_NPipe]>], [6, 3, 1]>, // // Quad-register Integer Pair Add Long InstrItinData<IIC_VPALiQ, [InstrStage<1, [A9_DRegsN], 0, Required>, // Extra latency cycles since wbck is 6 cycles InstrStage<7, [A9_DRegsVFP], 0, Reserved>, - InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_Pipe1]>, InstrStage<2, [A9_NPipe]>], [6, 3, 1]>, // @@ -485,14 +581,14 @@ def CortexA9Itineraries : ProcessorItineraries< InstrItinData<IIC_VMULi16D, [InstrStage<1, [A9_DRegsN], 0, Required>, // Extra latency cycles since wbck is 6 cycles InstrStage<7, [A9_DRegsVFP], 0, Reserved>, - InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_Pipe1]>, InstrStage<1, [A9_NPipe]>], [6, 2, 2]>, // // Quad-register Integer Multiply (.8, .16) InstrItinData<IIC_VMULi16Q, [InstrStage<1, [A9_DRegsN], 0, Required>, // Extra latency cycles since wbck is 7 cycles InstrStage<8, [A9_DRegsVFP], 0, Reserved>, - InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_Pipe1]>, InstrStage<2, [A9_NPipe]>], [7, 2, 2]>, // @@ -500,56 +596,56 @@ def CortexA9Itineraries : ProcessorItineraries< InstrItinData<IIC_VMULi32D, [InstrStage<1, [A9_DRegsN], 0, Required>, // Extra latency cycles since wbck is 7 cycles InstrStage<8, [A9_DRegsVFP], 0, Reserved>, - InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_Pipe1]>, InstrStage<2, [A9_NPipe]>], [7, 2, 1]>, // // Quad-register Integer Multiply (.32) InstrItinData<IIC_VMULi32Q, [InstrStage<1, [A9_DRegsN], 0, Required>, // Extra latency cycles since wbck is 9 cycles InstrStage<10, [A9_DRegsVFP], 0, Reserved>, - InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_Pipe1]>, InstrStage<4, [A9_NPipe]>], [9, 2, 1]>, // // Double-register Integer Multiply-Accumulate (.8, .16) InstrItinData<IIC_VMACi16D, [InstrStage<1, [A9_DRegsN], 0, Required>, // Extra latency cycles since wbck is 6 cycles InstrStage<7, [A9_DRegsVFP], 0, Reserved>, - InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_Pipe1]>, InstrStage<1, [A9_NPipe]>], [6, 3, 2, 2]>, // // Double-register Integer Multiply-Accumulate (.32) InstrItinData<IIC_VMACi32D, [InstrStage<1, [A9_DRegsN], 0, Required>, // Extra latency cycles since wbck is 7 cycles InstrStage<8, [A9_DRegsVFP], 0, Reserved>, - InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_Pipe1]>, InstrStage<2, [A9_NPipe]>], [7, 3, 2, 1]>, // // Quad-register Integer Multiply-Accumulate (.8, .16) InstrItinData<IIC_VMACi16Q, [InstrStage<1, [A9_DRegsN], 0, Required>, // Extra latency cycles since wbck is 7 cycles InstrStage<8, [A9_DRegsVFP], 0, Reserved>, - InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_Pipe1]>, InstrStage<2, [A9_NPipe]>], [7, 3, 2, 2]>, // // Quad-register Integer Multiply-Accumulate (.32) InstrItinData<IIC_VMACi32Q, [InstrStage<1, [A9_DRegsN], 0, Required>, // Extra latency cycles since wbck is 9 cycles InstrStage<10, [A9_DRegsVFP], 0, Reserved>, - InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_Pipe1]>, InstrStage<4, [A9_NPipe]>], [9, 3, 2, 1]>, // // Move Immediate InstrItinData<IIC_VMOVImm, [InstrStage<1, [A9_DRegsN], 0, Required>, // Extra latency cycles since wbck is 6 cycles InstrStage<7, [A9_DRegsVFP], 0, Reserved>, - InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_Pipe1]>, InstrStage<1, [A9_NPipe]>], [3]>, // // Double-register Permute Move InstrItinData<IIC_VMOVD, [InstrStage<1, [A9_DRegsN], 0, Required>, // FIXME: all latencies are arbitrary, no information is available InstrStage<3, [A9_DRegsVFP], 0, Reserved>, - InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_Pipe1]>, InstrStage<1, [A9_LSPipe]>], [2, 1]>, // // Quad-register Permute Move @@ -558,42 +654,42 @@ def CortexA9Itineraries : ProcessorItineraries< InstrItinData<IIC_VMOVQ, [InstrStage<1, [A9_DRegsN], 0, Required>, // FIXME: all latencies are arbitrary, no information is available InstrStage<4, [A9_DRegsVFP], 0, Reserved>, - InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_Pipe1]>, InstrStage<2, [A9_NPipe]>], [3, 1]>, // // Integer to Single-precision Move InstrItinData<IIC_VMOVIS , [InstrStage<1, [A9_DRegsN], 0, Required>, // FIXME: all latencies are arbitrary, no information is available InstrStage<3, [A9_DRegsVFP], 0, Reserved>, - InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_Pipe1]>, InstrStage<1, [A9_NPipe]>], [2, 1]>, // // Integer to Double-precision Move InstrItinData<IIC_VMOVID , [InstrStage<1, [A9_DRegsN], 0, Required>, // FIXME: all latencies are arbitrary, no information is available InstrStage<3, [A9_DRegsVFP], 0, Reserved>, - InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_Pipe1]>, InstrStage<1, [A9_NPipe]>], [2, 1, 1]>, // // Single-precision to Integer Move InstrItinData<IIC_VMOVSI , [InstrStage<1, [A9_DRegsN], 0, Required>, // FIXME: all latencies are arbitrary, no information is available InstrStage<3, [A9_DRegsVFP], 0, Reserved>, - InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_Pipe1]>, InstrStage<1, [A9_NPipe]>], [2, 1]>, // // Double-precision to Integer Move InstrItinData<IIC_VMOVDI , [InstrStage<1, [A9_DRegsN], 0, Required>, // FIXME: all latencies are arbitrary, no information is available InstrStage<3, [A9_DRegsVFP], 0, Reserved>, - InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_Pipe1]>, InstrStage<1, [A9_NPipe]>], [2, 2, 1]>, // // Integer to Lane Move InstrItinData<IIC_VMOVISL , [InstrStage<1, [A9_DRegsN], 0, Required>, // FIXME: all latencies are arbitrary, no information is available InstrStage<4, [A9_DRegsVFP], 0, Reserved>, - InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_Pipe1]>, InstrStage<2, [A9_NPipe]>], [3, 1, 1]>, // @@ -601,7 +697,7 @@ def CortexA9Itineraries : ProcessorItineraries< InstrItinData<IIC_VUNAD, [InstrStage<1, [A9_DRegsN], 0, Required>, // Extra latency cycles since wbck is 6 cycles InstrStage<7, [A9_DRegsVFP], 0, Reserved>, - InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_Pipe1]>, InstrStage<1, [A9_NPipe]>], [5, 2]>, // // Quad-register FP Unary @@ -610,7 +706,7 @@ def CortexA9Itineraries : ProcessorItineraries< InstrItinData<IIC_VUNAQ, [InstrStage<1, [A9_DRegsN], 0, Required>, // Extra latency cycles since wbck is 7 cycles InstrStage<8, [A9_DRegsVFP], 0, Reserved>, - InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_Pipe1]>, InstrStage<2, [A9_NPipe]>], [6, 2]>, // // Double-register FP Binary @@ -619,7 +715,7 @@ def CortexA9Itineraries : ProcessorItineraries< InstrItinData<IIC_VBIND, [InstrStage<1, [A9_DRegsN], 0, Required>, // Extra latency cycles since wbck is 7 cycles InstrStage<7, [A9_DRegsVFP], 0, Reserved>, - InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_Pipe1]>, InstrStage<1, [A9_NPipe]>], [5, 2, 2]>, // // Quad-register FP Binary @@ -630,14 +726,14 @@ def CortexA9Itineraries : ProcessorItineraries< InstrItinData<IIC_VBINQ, [InstrStage<1, [A9_DRegsN], 0, Required>, // Extra latency cycles since wbck is 8 cycles InstrStage<8, [A9_DRegsVFP], 0, Reserved>, - InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_Pipe1]>, InstrStage<2, [A9_NPipe]>], [6, 2, 2]>, // // Double-register FP Multiple-Accumulate InstrItinData<IIC_VMACD, [InstrStage<1, [A9_DRegsN], 0, Required>, // Extra latency cycles since wbck is 7 cycles InstrStage<8, [A9_DRegsVFP], 0, Reserved>, - InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_Pipe1]>, InstrStage<2, [A9_NPipe]>], [6, 3, 2, 1]>, // // Quad-register FP Multiple-Accumulate @@ -646,28 +742,28 @@ def CortexA9Itineraries : ProcessorItineraries< InstrItinData<IIC_VMACQ, [InstrStage<1, [A9_DRegsN], 0, Required>, // Extra latency cycles since wbck is 9 cycles InstrStage<10, [A9_DRegsVFP], 0, Reserved>, - InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_Pipe1]>, InstrStage<4, [A9_NPipe]>], [8, 4, 2, 1]>, // // Double-register Reciprical Step InstrItinData<IIC_VRECSD, [InstrStage<1, [A9_DRegsN], 0, Required>, // Extra latency cycles since wbck is 7 cycles InstrStage<8, [A9_DRegsVFP], 0, Reserved>, - InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_Pipe1]>, InstrStage<2, [A9_NPipe]>], [6, 2, 2]>, // // Quad-register Reciprical Step InstrItinData<IIC_VRECSQ, [InstrStage<1, [A9_DRegsN], 0, Required>, // Extra latency cycles since wbck is 9 cycles InstrStage<10, [A9_DRegsVFP], 0, Reserved>, - InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_Pipe1]>, InstrStage<4, [A9_NPipe]>], [8, 2, 2]>, // // Double-register Permute InstrItinData<IIC_VPERMD, [InstrStage<1, [A9_DRegsN], 0, Required>, // Extra latency cycles since wbck is 6 cycles InstrStage<7, [A9_DRegsVFP], 0, Reserved>, - InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_Pipe1]>, InstrStage<1, [A9_NPipe]>], [2, 2, 1, 1]>, // // Quad-register Permute @@ -676,7 +772,7 @@ def CortexA9Itineraries : ProcessorItineraries< InstrItinData<IIC_VPERMQ, [InstrStage<1, [A9_DRegsN], 0, Required>, // Extra latency cycles since wbck is 7 cycles InstrStage<8, [A9_DRegsVFP], 0, Reserved>, - InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_Pipe1]>, InstrStage<2, [A9_NPipe]>], [3, 3, 1, 1]>, // // Quad-register Permute (3 cycle issue) @@ -685,7 +781,7 @@ def CortexA9Itineraries : ProcessorItineraries< InstrItinData<IIC_VPERMQ3, [InstrStage<1, [A9_DRegsN], 0, Required>, // Extra latency cycles since wbck is 8 cycles InstrStage<9, [A9_DRegsVFP], 0, Reserved>, - InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_Pipe1]>, InstrStage<3, [A9_LSPipe]>], [4, 4, 1, 1]>, // @@ -693,57 +789,57 @@ def CortexA9Itineraries : ProcessorItineraries< InstrItinData<IIC_VEXTD, [InstrStage<1, [A9_DRegsN], 0, Required>, // Extra latency cycles since wbck is 7 cycles InstrStage<7, [A9_DRegsVFP], 0, Reserved>, - InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_Pipe1]>, InstrStage<1, [A9_NPipe]>], [2, 1, 1]>, // // Quad-register VEXT InstrItinData<IIC_VEXTQ, [InstrStage<1, [A9_DRegsN], 0, Required>, // Extra latency cycles since wbck is 9 cycles InstrStage<8, [A9_DRegsVFP], 0, Reserved>, - InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_Pipe1]>, InstrStage<2, [A9_NPipe]>], [3, 1, 1]>, // // VTB InstrItinData<IIC_VTB1, [InstrStage<1, [A9_DRegsN], 0, Required>, // Extra latency cycles since wbck is 7 cycles InstrStage<8, [A9_DRegsVFP], 0, Reserved>, - InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_Pipe1]>, InstrStage<2, [A9_NPipe]>], [3, 2, 1]>, InstrItinData<IIC_VTB2, [InstrStage<2, [A9_DRegsN], 0, Required>, // Extra latency cycles since wbck is 7 cycles InstrStage<8, [A9_DRegsVFP], 0, Reserved>, - InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_Pipe1]>, InstrStage<2, [A9_NPipe]>], [3, 2, 2, 1]>, InstrItinData<IIC_VTB3, [InstrStage<2, [A9_DRegsN], 0, Required>, // Extra latency cycles since wbck is 8 cycles InstrStage<9, [A9_DRegsVFP], 0, Reserved>, - InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_Pipe1]>, InstrStage<3, [A9_NPipe]>], [4, 2, 2, 3, 1]>, InstrItinData<IIC_VTB4, [InstrStage<1, [A9_DRegsN], 0, Required>, // Extra latency cycles since wbck is 8 cycles InstrStage<9, [A9_DRegsVFP], 0, Reserved>, - InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_Pipe1]>, InstrStage<3, [A9_NPipe]>], [4, 2, 2, 3, 3, 1]>, // // VTBX InstrItinData<IIC_VTBX1, [InstrStage<1, [A9_DRegsN], 0, Required>, // Extra latency cycles since wbck is 7 cycles InstrStage<8, [A9_DRegsVFP], 0, Reserved>, - InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_Pipe1]>, InstrStage<2, [A9_NPipe]>], [3, 1, 2, 1]>, InstrItinData<IIC_VTBX2, [InstrStage<1, [A9_DRegsN], 0, Required>, // Extra latency cycles since wbck is 7 cycles InstrStage<8, [A9_DRegsVFP], 0, Reserved>, - InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_Pipe1]>, InstrStage<2, [A9_NPipe]>], [3, 1, 2, 2, 1]>, InstrItinData<IIC_VTBX3, [InstrStage<1, [A9_DRegsN], 0, Required>, // Extra latency cycles since wbck is 8 cycles InstrStage<9, [A9_DRegsVFP], 0, Reserved>, - InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_Pipe1]>, InstrStage<3, [A9_NPipe]>], [4, 1, 2, 2, 3, 1]>, InstrItinData<IIC_VTBX4, [InstrStage<1, [A9_DRegsN], 0, Required>, // Extra latency cycles since wbck is 8 cycles InstrStage<9, [A9_DRegsVFP], 0, Reserved>, - InstrStage<1, [A9_Pipe0, A9_Pipe1]>, - InstrStage<2, [A9_NPipe]>], [4, 1, 2, 2, 3, 3, 1]> + InstrStage<1, [A9_Pipe1]>, + InstrStage<2, [A9_NPipe]>], [4, 1, 2, 2, 3, 3, 1]> ]>; diff --git a/lib/Target/ARM/ARMScheduleV6.td b/lib/Target/ARM/ARMScheduleV6.td index f813022d8741..08b560cc0c2f 100644 --- a/lib/Target/ARM/ARMScheduleV6.td +++ b/lib/Target/ARM/ARMScheduleV6.td @@ -16,7 +16,7 @@ // Functional Units def V6_Pipe : FuncUnit; // pipeline -// Scheduling information derived from "ARM1176JZF-S Technical Reference Manual". +// Scheduling information derived from "ARM1176JZF-S Technical Reference Manual" // def ARMV6Itineraries : ProcessorItineraries< [V6_Pipe], [ diff --git a/lib/Target/ARM/ARMTargetMachine.cpp b/lib/Target/ARM/ARMTargetMachine.cpp index b4a9252909bc..09203f9304df 100644 --- a/lib/Target/ARM/ARMTargetMachine.cpp +++ b/lib/Target/ARM/ARMTargetMachine.cpp @@ -60,8 +60,10 @@ ARMTargetMachine::ARMTargetMachine(const Target &T, const std::string &TT, const std::string &FS) : ARMBaseTargetMachine(T, TT, FS, false), InstrInfo(Subtarget), DataLayout(Subtarget.isAPCS_ABI() ? - std::string("e-p:32:32-f64:32:32-i64:32:32-n32") : - std::string("e-p:32:32-f64:64:64-i64:64:64-n32")), + std::string("e-p:32:32-f64:32:32-i64:32:32-" + "v128:32:128-v64:32:64-n32") : + std::string("e-p:32:32-f64:64:64-i64:64:64-" + "v128:64:128-v64:64:64-n32")), TLInfo(*this), TSInfo(*this) { } @@ -74,9 +76,11 @@ ThumbTargetMachine::ThumbTargetMachine(const Target &T, const std::string &TT, : ((ARMBaseInstrInfo*)new Thumb1InstrInfo(Subtarget))), DataLayout(Subtarget.isAPCS_ABI() ? std::string("e-p:32:32-f64:32:32-i64:32:32-" - "i16:16:32-i8:8:32-i1:8:32-a:0:32-n32") : + "i16:16:32-i8:8:32-i1:8:32-" + "v128:32:128-v64:32:64-a:0:32-n32") : std::string("e-p:32:32-f64:64:64-i64:64:64-" - "i16:16:32-i8:8:32-i1:8:32-a:0:32-n32")), + "i16:16:32-i8:8:32-i1:8:32-" + "v128:64:128-v64:64:64-a:0:32-n32")), TLInfo(*this), TSInfo(*this) { } @@ -98,6 +102,7 @@ bool ARMBaseTargetMachine::addPreRegAlloc(PassManagerBase &PM, // FIXME: temporarily disabling load / store optimization pass for Thumb1. if (OptLevel != CodeGenOpt::None && !Subtarget.isThumb1Only()) PM.add(createARMLoadStoreOptimizationPass(true)); + return true; } @@ -115,21 +120,20 @@ bool ARMBaseTargetMachine::addPreSched2(PassManagerBase &PM, // proper scheduling. PM.add(createARMExpandPseudoPass()); - return true; -} - -bool ARMBaseTargetMachine::addPreEmitPass(PassManagerBase &PM, - CodeGenOpt::Level OptLevel) { - // FIXME: temporarily disabling load / store optimization pass for Thumb1. if (OptLevel != CodeGenOpt::None) { if (!Subtarget.isThumb1Only()) PM.add(createIfConverterPass()); } - - if (Subtarget.isThumb2()) { + if (Subtarget.isThumb2()) PM.add(createThumb2ITBlockPass()); + + return true; +} + +bool ARMBaseTargetMachine::addPreEmitPass(PassManagerBase &PM, + CodeGenOpt::Level OptLevel) { + if (Subtarget.isThumb2()) PM.add(createThumb2SizeReductionPass()); - } PM.add(createARMConstantIslandPass()); return true; diff --git a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp index bfa89c4a4696..8415d1ad8827 100644 --- a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp +++ b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp @@ -425,7 +425,7 @@ bool ARMAsmParser::ParseMemory(OwningPtr<ARMOperand> &Op) { const AsmToken &NextTok = Parser.getTok(); if (NextTok.isNot(AsmToken::EndOfStatement)) { if (NextTok.isNot(AsmToken::Comma)) - return Error(NextTok.getLoc(), "',' expected"); + return Error(NextTok.getLoc(), "',' expected"); Parser.Lex(); // Eat comma token. if(ParseMemoryOffsetReg(Negative, OffsetRegShifted, ShiftType, ShiftAmount, Offset, OffsetIsReg, OffsetRegNum, @@ -488,7 +488,7 @@ bool ARMAsmParser::ParseMemoryOffsetReg(bool &Negative, const AsmToken &Tok = Parser.getTok(); if (ParseShift(ShiftType, ShiftAmount, E)) - return Error(Tok.getLoc(), "shift expected"); + return Error(Tok.getLoc(), "shift expected"); OffsetRegShifted = true; } } @@ -665,7 +665,6 @@ bool ARMAsmParser::ParseInstruction(const StringRef &Name, SMLoc NameLoc, Operands.push_back(Op.take()); - SMLoc Loc = Parser.getTok().getLoc(); if (getLexer().isNot(AsmToken::EndOfStatement)) { // Read the first operand. @@ -763,15 +762,10 @@ bool ARMAsmParser::ParseDirectiveSyntax(SMLoc L) { if (Tok.isNot(AsmToken::Identifier)) return Error(L, "unexpected token in .syntax directive"); const StringRef &Mode = Tok.getString(); - bool unified_syntax; - if (Mode == "unified" || Mode == "UNIFIED") { + if (Mode == "unified" || Mode == "UNIFIED") Parser.Lex(); - unified_syntax = true; - } - else if (Mode == "divided" || Mode == "DIVIDED") { + else if (Mode == "divided" || Mode == "DIVIDED") Parser.Lex(); - unified_syntax = false; - } else return Error(L, "unrecognized syntax mode in .syntax directive"); @@ -791,15 +785,10 @@ bool ARMAsmParser::ParseDirectiveCode(SMLoc L) { if (Tok.isNot(AsmToken::Integer)) return Error(L, "unexpected token in .code directive"); int64_t Val = Parser.getTok().getIntVal(); - bool thumb_mode; - if (Val == 16) { + if (Val == 16) Parser.Lex(); - thumb_mode = true; - } - else if (Val == 32) { + else if (Val == 32) Parser.Lex(); - thumb_mode = false; - } else return Error(L, "invalid operand to .code directive"); diff --git a/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp b/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp index d95efdb80943..6a40cf3602e9 100644 --- a/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp +++ b/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp @@ -175,23 +175,8 @@ namespace { raw_ostream &O); void printVFPf64ImmOperand(const MachineInstr *MI, int OpNum, raw_ostream &O); - - void printHex8ImmOperand(const MachineInstr *MI, int OpNum, - raw_ostream &O) { - O << "#0x" << utohexstr(MI->getOperand(OpNum).getImm() & 0xff); - } - void printHex16ImmOperand(const MachineInstr *MI, int OpNum, - raw_ostream &O) { - O << "#0x" << utohexstr(MI->getOperand(OpNum).getImm() & 0xffff); - } - void printHex32ImmOperand(const MachineInstr *MI, int OpNum, - raw_ostream &O) { - O << "#0x" << utohexstr(MI->getOperand(OpNum).getImm() & 0xffffffff); - } - void printHex64ImmOperand(const MachineInstr *MI, int OpNum, - raw_ostream &O) { - O << "#0x" << utohexstr(MI->getOperand(OpNum).getImm()); - } + void printNEONModImmOperand(const MachineInstr *MI, int OpNum, + raw_ostream &O); virtual bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNum, unsigned AsmVariant, const char *ExtraCode, @@ -322,7 +307,7 @@ void ARMAsmPrinter::printOperand(const MachineInstr *MI, int OpNum, unsigned DRegLo = TM.getRegisterInfo()->getSubReg(Reg, ARM::dsub_0); unsigned DRegHi = TM.getRegisterInfo()->getSubReg(Reg, ARM::dsub_1); O << '{' - << getRegisterName(DRegLo) << ',' << getRegisterName(DRegHi) + << getRegisterName(DRegLo) << ", " << getRegisterName(DRegHi) << '}'; } else if (Modifier && strcmp(Modifier, "lane") == 0) { unsigned RegNum = ARMRegisterInfo::getRegisterNumbering(Reg); @@ -617,8 +602,12 @@ void ARMAsmPrinter::printAddrMode6Operand(const MachineInstr *MI, int Op, O << "[" << getRegisterName(MO1.getReg()); if (MO2.getImm()) { + unsigned Align = MO2.getImm(); + assert((Align == 8 || Align == 16 || Align == 32) && + "unexpected NEON load/store alignment"); + Align <<= 3; // FIXME: Both darwin as and GNU as violate ARM docs here. - O << ", :" << MO2.getImm(); + O << ", :" << Align; } O << "]"; } @@ -1039,6 +1028,14 @@ void ARMAsmPrinter::printVFPf64ImmOperand(const MachineInstr *MI, int OpNum, } } +void ARMAsmPrinter::printNEONModImmOperand(const MachineInstr *MI, int OpNum, + raw_ostream &O) { + unsigned EncodedImm = MI->getOperand(OpNum).getImm(); + unsigned EltBits; + uint64_t Val = ARM_AM::decodeNEONModImm(EncodedImm, EltBits); + O << "#0x" << utohexstr(Val); +} + bool ARMAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum, unsigned AsmVariant, const char *ExtraCode, raw_ostream &O) { @@ -1064,20 +1061,10 @@ bool ARMAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum, printOperand(MI, OpNum, O); return false; case 'Q': - if (TM.getTargetData()->isLittleEndian()) - break; - // Fallthrough case 'R': - if (TM.getTargetData()->isBigEndian()) - break; - // Fallthrough - case 'H': // Write second word of DI / DF reference. - // Verify that this operand has two consecutive registers. - if (!MI->getOperand(OpNum).isReg() || - OpNum+1 == MI->getNumOperands() || - !MI->getOperand(OpNum+1).isReg()) - return true; - ++OpNum; // Return the high-part. + case 'H': + report_fatal_error("llvm does not support 'Q', 'R', and 'H' modifiers!"); + return true; } } @@ -1384,11 +1371,11 @@ void ARMAsmPrinter::printInstructionThroughMCStreamer(const MachineInstr *MI) { } else if (MO.isGlobal()) { MCSymbol *Symbol = MCInstLowering.GetGlobalAddressSymbol(MO); const MCSymbolRefExpr *SymRef1 = - MCSymbolRefExpr::Create(Symbol, - MCSymbolRefExpr::VK_ARM_LO16, OutContext); + MCSymbolRefExpr::Create(Symbol, + MCSymbolRefExpr::VK_ARM_LO16, OutContext); const MCSymbolRefExpr *SymRef2 = - MCSymbolRefExpr::Create(Symbol, - MCSymbolRefExpr::VK_ARM_HI16, OutContext); + MCSymbolRefExpr::Create(Symbol, + MCSymbolRefExpr::VK_ARM_HI16, OutContext); V1 = MCOperand::CreateExpr(SymRef1); V2 = MCOperand::CreateExpr(SymRef2); } else { diff --git a/lib/Target/ARM/AsmPrinter/ARMInstPrinter.cpp b/lib/Target/ARM/AsmPrinter/ARMInstPrinter.cpp index 2b94b76087e7..170819ad4f06 100644 --- a/lib/Target/ARM/AsmPrinter/ARMInstPrinter.cpp +++ b/lib/Target/ARM/AsmPrinter/ARMInstPrinter.cpp @@ -779,22 +779,10 @@ void ARMInstPrinter::printVFPf64ImmOperand(const MCInst *MI, unsigned OpNum, O << '#' << MI->getOperand(OpNum).getImm(); } -void ARMInstPrinter::printHex8ImmOperand(const MCInst *MI, unsigned OpNum, - raw_ostream &O) { - O << "#0x" << utohexstr(MI->getOperand(OpNum).getImm() & 0xff); -} - -void ARMInstPrinter::printHex16ImmOperand(const MCInst *MI, unsigned OpNum, - raw_ostream &O) { - O << "#0x" << utohexstr(MI->getOperand(OpNum).getImm() & 0xffff); -} - -void ARMInstPrinter::printHex32ImmOperand(const MCInst *MI, unsigned OpNum, - raw_ostream &O) { - O << "#0x" << utohexstr(MI->getOperand(OpNum).getImm() & 0xffffffff); -} - -void ARMInstPrinter::printHex64ImmOperand(const MCInst *MI, unsigned OpNum, - raw_ostream &O) { - O << "#0x" << utohexstr(MI->getOperand(OpNum).getImm()); +void ARMInstPrinter::printNEONModImmOperand(const MCInst *MI, unsigned OpNum, + raw_ostream &O) { + unsigned EncodedImm = MI->getOperand(OpNum).getImm(); + unsigned EltBits; + uint64_t Val = ARM_AM::decodeNEONModImm(EncodedImm, EltBits); + O << "#0x" << utohexstr(Val); } diff --git a/lib/Target/ARM/AsmPrinter/ARMInstPrinter.h b/lib/Target/ARM/AsmPrinter/ARMInstPrinter.h index be0b7c1eb027..ddf5047793d2 100644 --- a/lib/Target/ARM/AsmPrinter/ARMInstPrinter.h +++ b/lib/Target/ARM/AsmPrinter/ARMInstPrinter.h @@ -104,10 +104,7 @@ public: void printNoHashImmediate(const MCInst *MI, unsigned OpNum, raw_ostream &O); void printVFPf32ImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); void printVFPf64ImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); - void printHex8ImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); - void printHex16ImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); - void printHex32ImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); - void printHex64ImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); + void printNEONModImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); void printPCLabel(const MCInst *MI, unsigned OpNum, raw_ostream &O); // FIXME: Implement. diff --git a/lib/Target/ARM/CMakeLists.txt b/lib/Target/ARM/CMakeLists.txt index 29e66e13b168..0df34666b959 100644 --- a/lib/Target/ARM/CMakeLists.txt +++ b/lib/Target/ARM/CMakeLists.txt @@ -33,6 +33,7 @@ add_llvm_target(ARMCodeGen NEONPreAllocPass.cpp Thumb1InstrInfo.cpp Thumb1RegisterInfo.cpp + Thumb2HazardRecognizer.cpp Thumb2ITBlockPass.cpp Thumb2InstrInfo.cpp Thumb2RegisterInfo.cpp diff --git a/lib/Target/ARM/Disassembler/ARMDisassemblerCore.cpp b/lib/Target/ARM/Disassembler/ARMDisassemblerCore.cpp index adb7795a746c..a07ff2832aa7 100644 --- a/lib/Target/ARM/Disassembler/ARMDisassemblerCore.cpp +++ b/lib/Target/ARM/Disassembler/ARMDisassemblerCore.cpp @@ -34,7 +34,7 @@ /// Uses and Defs by this instr. For the Uses part, the pred:$p operand is /// defined with two components: /// -/// def pred { // Operand PredicateOperand +/// def pred { // Operand PredicateOperand /// ValueType Type = OtherVT; /// string PrintMethod = "printPredicateOperand"; /// string AsmOperandLowerMethod = ?; @@ -54,7 +54,7 @@ /// /// For the Defs part, in the simple case of only cc_out:$s, we have: /// -/// def cc_out { // Operand OptionalDefOperand +/// def cc_out { // Operand OptionalDefOperand /// ValueType Type = OtherVT; /// string PrintMethod = "printSBitModifierOperand"; /// string AsmOperandLowerMethod = ?; @@ -765,7 +765,7 @@ static bool DisassembleBrFrm(MCInst &MI, unsigned Opcode, uint32_t insn, || Opcode == ARM::SMC || Opcode == ARM::SVC) && "Unexpected Opcode"); - assert(NumOps >= 1 && OpInfo[0].RegClass == 0 && "Reg operand expected"); + assert(NumOps >= 1 && OpInfo[0].RegClass < 0 && "Reg operand expected"); int Imm32 = 0; if (Opcode == ARM::SMC) { @@ -1106,7 +1106,7 @@ static bool DisassembleDPSoRegFrm(MCInst &MI, unsigned Opcode, uint32_t insn, assert((OpInfo[OpIdx].RegClass == ARM::GPRRegClassID) && (OpInfo[OpIdx+1].RegClass == ARM::GPRRegClassID) && - (OpInfo[OpIdx+2].RegClass == 0) && + (OpInfo[OpIdx+2].RegClass < 0) && "Expect 3 reg operands"); // Register-controlled shifts have Inst{7} = 0 and Inst{4} = 1. @@ -1201,7 +1201,7 @@ static bool DisassembleLdStFrm(MCInst &MI, unsigned Opcode, uint32_t insn, return false; assert((OpInfo[OpIdx].RegClass == ARM::GPRRegClassID) && - (OpInfo[OpIdx+1].RegClass == 0) && + (OpInfo[OpIdx+1].RegClass < 0) && "Expect 1 reg operand followed by 1 imm operand"); ARM_AM::AddrOpc AddrOpcode = getUBit(insn) ? ARM_AM::add : ARM_AM::sub; @@ -1323,7 +1323,7 @@ static bool DisassembleLdStMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn, return false; assert((OpInfo[OpIdx].RegClass == ARM::GPRRegClassID) && - (OpInfo[OpIdx+1].RegClass == 0) && + (OpInfo[OpIdx+1].RegClass < 0) && "Expect 1 reg operand followed by 1 imm operand"); ARM_AM::AddrOpc AddrOpcode = getUBit(insn) ? ARM_AM::add : ARM_AM::sub; @@ -1494,7 +1494,7 @@ static bool DisassembleArithMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn, // If there is still an operand info left which is an immediate operand, add // an additional imm5 LSL/ASR operand. - if (ThreeReg && OpInfo[OpIdx].RegClass == 0 + if (ThreeReg && OpInfo[OpIdx].RegClass < 0 && !OpInfo[OpIdx].isPredicate() && !OpInfo[OpIdx].isOptionalDef()) { // Extract the 5-bit immediate field Inst{11-7}. unsigned ShiftAmt = (insn >> ARMII::ShiftShift) & 0x1F; @@ -1540,7 +1540,7 @@ static bool DisassembleExtFrm(MCInst &MI, unsigned Opcode, uint32_t insn, // If there is still an operand info left which is an immediate operand, add // an additional rotate immediate operand. - if (OpIdx < NumOps && OpInfo[OpIdx].RegClass == 0 + if (OpIdx < NumOps && OpInfo[OpIdx].RegClass < 0 && !OpInfo[OpIdx].isPredicate() && !OpInfo[OpIdx].isOptionalDef()) { // Extract the 2-bit rotate field Inst{11-10}. unsigned rot = (insn >> ARMII::ExtRotImmShift) & 3; @@ -1725,7 +1725,7 @@ static bool DisassembleVFPConv1Frm(MCInst &MI, unsigned Opcode, uint32_t insn, "Tied to operand expected"); MI.addOperand(MI.getOperand(0)); - assert(OpInfo[2].RegClass == 0 && !OpInfo[2].isPredicate() && + assert(OpInfo[2].RegClass < 0 && !OpInfo[2].isPredicate() && !OpInfo[2].isOptionalDef() && "Imm operand expected"); MI.addOperand(MCOperand::CreateImm(fbits)); @@ -1984,7 +1984,7 @@ static bool DisassembleVFPMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn, ++OpIdx; // Extract/decode the f64/f32 immediate. - if (OpIdx < NumOps && OpInfo[OpIdx].RegClass == 0 + if (OpIdx < NumOps && OpInfo[OpIdx].RegClass < 0 && !OpInfo[OpIdx].isPredicate() && !OpInfo[OpIdx].isOptionalDef()) { // The asm syntax specifies the before-expanded <imm>. // Not VFPExpandImm(slice(insn,19,16) << 4 | slice(insn, 3, 0), @@ -2077,42 +2077,12 @@ static unsigned decodeLaneIndex(uint32_t insn) { // imm3 = Inst{18-16}, imm4 = Inst{3-0} // Ref: Table A7-15 Modified immediate values for Advanced SIMD instructions. static uint64_t decodeN1VImm(uint32_t insn, ElemSize esize) { + unsigned char op = (insn >> 5) & 1; unsigned char cmode = (insn >> 8) & 0xF; unsigned char Imm8 = ((insn >> 24) & 1) << 7 | ((insn >> 16) & 7) << 4 | (insn & 0xF); - uint64_t Imm64 = 0; - - switch (esize) { - case ESize8: - Imm64 = Imm8; - break; - case ESize16: - Imm64 = Imm8 << 8*(cmode >> 1 & 1); - break; - case ESize32: { - if (cmode == 12) - Imm64 = (Imm8 << 8) | 0xFF; - else if (cmode == 13) - Imm64 = (Imm8 << 16) | 0xFFFF; - else { - // Imm8 to be shifted left by how many bytes... - Imm64 = Imm8 << 8*(cmode >> 1 & 3); - } - break; - } - case ESize64: { - for (unsigned i = 0; i < 8; ++i) - if ((Imm8 >> i) & 1) - Imm64 |= (uint64_t)0xFF << 8*i; - break; - } - default: - assert(0 && "Unreachable code!"); - return 0; - } - - return Imm64; + return (op << 12) | (cmode << 8) | Imm8; } // A8.6.339 VMUL, VMULL (by scalar) @@ -2303,7 +2273,7 @@ static bool DisassembleNLdSt0(MCInst &MI, unsigned Opcode, uint32_t insn, } assert((OpIdx+1) < NumOps && OpInfo[OpIdx].RegClass == ARM::GPRRegClassID && - OpInfo[OpIdx + 1].RegClass == 0 && "Addrmode #6 Operands expected"); + OpInfo[OpIdx + 1].RegClass < 0 && "Addrmode #6 Operands expected"); MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, Rn))); MI.addOperand(MCOperand::CreateImm(0)); // Alignment ignored? @@ -2320,7 +2290,7 @@ static bool DisassembleNLdSt0(MCInst &MI, unsigned Opcode, uint32_t insn, "Reg operand expected"); RegClass = OpInfo[OpIdx].RegClass; - while (OpIdx < NumOps && OpInfo[OpIdx].RegClass == RegClass) { + while (OpIdx < NumOps && (unsigned)OpInfo[OpIdx].RegClass == RegClass) { MI.addOperand(MCOperand::CreateReg( getRegisterEnum(B, RegClass, Rd, UseDRegPair(Opcode)))); @@ -2329,7 +2299,7 @@ static bool DisassembleNLdSt0(MCInst &MI, unsigned Opcode, uint32_t insn, } // Handle possible lane index. - if (OpIdx < NumOps && OpInfo[OpIdx].RegClass == 0 + if (OpIdx < NumOps && OpInfo[OpIdx].RegClass < 0 && !OpInfo[OpIdx].isPredicate() && !OpInfo[OpIdx].isOptionalDef()) { MI.addOperand(MCOperand::CreateImm(decodeLaneIndex(insn))); ++OpIdx; @@ -2340,7 +2310,7 @@ static bool DisassembleNLdSt0(MCInst &MI, unsigned Opcode, uint32_t insn, // possible TIED_TO DPR/QPR's (ignored), then possible lane index. RegClass = OpInfo[0].RegClass; - while (OpIdx < NumOps && OpInfo[OpIdx].RegClass == RegClass) { + while (OpIdx < NumOps && (unsigned)OpInfo[OpIdx].RegClass == RegClass) { MI.addOperand(MCOperand::CreateReg( getRegisterEnum(B, RegClass, Rd, UseDRegPair(Opcode)))); @@ -2355,7 +2325,7 @@ static bool DisassembleNLdSt0(MCInst &MI, unsigned Opcode, uint32_t insn, } assert((OpIdx+1) < NumOps && OpInfo[OpIdx].RegClass == ARM::GPRRegClassID && - OpInfo[OpIdx + 1].RegClass == 0 && "Addrmode #6 Operands expected"); + OpInfo[OpIdx + 1].RegClass < 0 && "Addrmode #6 Operands expected"); MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, Rn))); MI.addOperand(MCOperand::CreateImm(0)); // Alignment ignored? @@ -2366,7 +2336,7 @@ static bool DisassembleNLdSt0(MCInst &MI, unsigned Opcode, uint32_t insn, ++OpIdx; } - while (OpIdx < NumOps && OpInfo[OpIdx].RegClass == RegClass) { + while (OpIdx < NumOps && (unsigned)OpInfo[OpIdx].RegClass == RegClass) { assert(TID.getOperandConstraint(OpIdx, TOI::TIED_TO) != -1 && "Tied to operand expected"); MI.addOperand(MCOperand::CreateReg(0)); @@ -2374,7 +2344,7 @@ static bool DisassembleNLdSt0(MCInst &MI, unsigned Opcode, uint32_t insn, } // Handle possible lane index. - if (OpIdx < NumOps && OpInfo[OpIdx].RegClass == 0 + if (OpIdx < NumOps && OpInfo[OpIdx].RegClass < 0 && !OpInfo[OpIdx].isPredicate() && !OpInfo[OpIdx].isOptionalDef()) { MI.addOperand(MCOperand::CreateImm(decodeLaneIndex(insn))); ++OpIdx; @@ -2438,7 +2408,7 @@ static bool DisassembleN1RegModImmFrm(MCInst &MI, unsigned Opcode, assert(NumOps >= 2 && (OpInfo[0].RegClass == ARM::DPRRegClassID || OpInfo[0].RegClass == ARM::QPRRegClassID) && - (OpInfo[1].RegClass == 0) && + (OpInfo[1].RegClass < 0) && "Expect 1 reg operand followed by 1 imm operand"); // Qd/Dd = Inst{22:15-12} => NEON Rd @@ -2552,7 +2522,7 @@ static bool DisassembleNVdVmOptImm(MCInst &MI, unsigned Opc, uint32_t insn, } // Add the imm operand, if required. - if (OpIdx < NumOps && OpInfo[OpIdx].RegClass == 0 + if (OpIdx < NumOps && OpInfo[OpIdx].RegClass < 0 && !OpInfo[OpIdx].isPredicate() && !OpInfo[OpIdx].isOptionalDef()) { unsigned imm = 0xFFFFFFFF; @@ -2632,7 +2602,7 @@ static bool DisassembleNVectorShift(MCInst &MI, unsigned Opcode, uint32_t insn, decodeNEONRm(insn)))); ++OpIdx; - assert(OpInfo[OpIdx].RegClass == 0 && "Imm operand expected"); + assert(OpInfo[OpIdx].RegClass < 0 && "Imm operand expected"); // Add the imm operand. @@ -2762,7 +2732,7 @@ static bool DisassembleNVdVnVmOptImm(MCInst &MI, unsigned Opcode, uint32_t insn, getRegisterEnum(B, OpInfo[OpIdx].RegClass, m))); ++OpIdx; - if (OpIdx < NumOps && OpInfo[OpIdx].RegClass == 0 + if (OpIdx < NumOps && OpInfo[OpIdx].RegClass < 0 && !OpInfo[OpIdx].isPredicate() && !OpInfo[OpIdx].isOptionalDef()) { // Add the imm operand. unsigned Imm = 0; @@ -2869,15 +2839,9 @@ static bool DisassembleNVTBLFrm(MCInst &MI, unsigned Opcode, uint32_t insn, return true; } -static bool DisassembleNEONFrm(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded, BO) { - assert(0 && "Unreachable code!"); - return false; -} - // Vector Get Lane (move scalar to ARM core register) Instructions. // VGETLNi32, VGETLNs16, VGETLNs8, VGETLNu16, VGETLNu8: Rt Dn index -static bool DisassembleNEONGetLnFrm(MCInst &MI, unsigned Opcode, uint32_t insn, +static bool DisassembleNGetLnFrm(MCInst &MI, unsigned Opcode, uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) { const TargetInstrDesc &TID = ARMInsts[Opcode]; @@ -2887,7 +2851,7 @@ static bool DisassembleNEONGetLnFrm(MCInst &MI, unsigned Opcode, uint32_t insn, assert(TID.getNumDefs() == 1 && NumOps >= 3 && OpInfo[0].RegClass == ARM::GPRRegClassID && OpInfo[1].RegClass == ARM::DPRRegClassID && - OpInfo[2].RegClass == 0 && + OpInfo[2].RegClass < 0 && "Expect >= 3 operands with one dst operand"); ElemSize esize = @@ -2911,7 +2875,7 @@ static bool DisassembleNEONGetLnFrm(MCInst &MI, unsigned Opcode, uint32_t insn, // Vector Set Lane (move ARM core register to scalar) Instructions. // VSETLNi16, VSETLNi32, VSETLNi8: Dd Dd (TIED_TO) Rt index -static bool DisassembleNEONSetLnFrm(MCInst &MI, unsigned Opcode, uint32_t insn, +static bool DisassembleNSetLnFrm(MCInst &MI, unsigned Opcode, uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) { const TargetInstrDesc &TID = ARMInsts[Opcode]; @@ -2923,7 +2887,7 @@ static bool DisassembleNEONSetLnFrm(MCInst &MI, unsigned Opcode, uint32_t insn, OpInfo[1].RegClass == ARM::DPRRegClassID && TID.getOperandConstraint(1, TOI::TIED_TO) != -1 && OpInfo[2].RegClass == ARM::GPRRegClassID && - OpInfo[3].RegClass == 0 && + OpInfo[3].RegClass < 0 && "Expect >= 3 operands with one dst operand"); ElemSize esize = @@ -2950,7 +2914,7 @@ static bool DisassembleNEONSetLnFrm(MCInst &MI, unsigned Opcode, uint32_t insn, // Vector Duplicate Instructions (from ARM core register to all elements). // VDUP8d, VDUP16d, VDUP32d, VDUP8q, VDUP16q, VDUP32q: Qd/Dd Rt -static bool DisassembleNEONDupFrm(MCInst &MI, unsigned Opcode, uint32_t insn, +static bool DisassembleNDupFrm(MCInst &MI, unsigned Opcode, uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) { const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo; @@ -3090,13 +3054,6 @@ static bool DisassembleMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn, return false; } -static bool DisassembleThumbMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded, BO) { - - assert(0 && "Unexpected thumb misc. instruction!"); - return false; -} - /// FuncPtrs - FuncPtrs maps ARMFormat to its corresponding DisassembleFP. /// We divide the disassembly task into different categories, with each one /// corresponding to a specific instruction encoding format. There could be @@ -3128,12 +3085,10 @@ static const DisassembleFP FuncPtrs[] = { &DisassembleVFPLdStMulFrm, &DisassembleVFPMiscFrm, &DisassembleThumbFrm, - &DisassembleNEONFrm, - &DisassembleNEONGetLnFrm, - &DisassembleNEONSetLnFrm, - &DisassembleNEONDupFrm, &DisassembleMiscFrm, - &DisassembleThumbMiscFrm, + &DisassembleNGetLnFrm, + &DisassembleNSetLnFrm, + &DisassembleNDupFrm, // VLD and VST (including one lane) Instructions. &DisassembleNLdSt, @@ -3233,7 +3188,8 @@ bool ARMBasicMCBuilder::DoPredicateOperands(MCInst& MI, unsigned Opcode, // a pair of TargetOperandInfos with isPredicate() property. if (NumOpsRemaining >= 2 && OpInfo[Idx].isPredicate() && OpInfo[Idx+1].isPredicate() && - OpInfo[Idx].RegClass == 0 && OpInfo[Idx+1].RegClass == ARM::CCRRegClassID) + OpInfo[Idx].RegClass < 0 && + OpInfo[Idx+1].RegClass == ARM::CCRRegClassID) { // If we are inside an IT block, get the IT condition bits maintained via // ARMBasicMCBuilder::ITState[7:0], through ARMBasicMCBuilder::GetITCond(). @@ -3265,7 +3221,8 @@ bool ARMBasicMCBuilder::TryPredicateAndSBitModifier(MCInst& MI, unsigned Opcode, // a pair of TargetOperandInfos with isPredicate() property. if (NumOpsRemaining >= 2 && OpInfo[Idx].isPredicate() && OpInfo[Idx+1].isPredicate() && - OpInfo[Idx].RegClass == 0 && OpInfo[Idx+1].RegClass == ARM::CCRRegClassID) + OpInfo[Idx].RegClass < 0 && + OpInfo[Idx+1].RegClass == ARM::CCRRegClassID) { // If we are inside an IT block, get the IT condition bits maintained via // ARMBasicMCBuilder::ITState[7:0], through ARMBasicMCBuilder::GetITCond(). diff --git a/lib/Target/ARM/Disassembler/ARMDisassemblerCore.h b/lib/Target/ARM/Disassembler/ARMDisassemblerCore.h index b1d90df34177..7d21256a14f9 100644 --- a/lib/Target/ARM/Disassembler/ARMDisassemblerCore.h +++ b/lib/Target/ARM/Disassembler/ARMDisassemblerCore.h @@ -137,25 +137,25 @@ static inline void setSlice(uint32_t &Bits, unsigned From, unsigned To, /// Various utilities for checking the target specific flags. /// A unary data processing instruction doesn't have an Rn operand. -static inline bool isUnaryDP(unsigned TSFlags) { +static inline bool isUnaryDP(uint64_t TSFlags) { return (TSFlags & ARMII::UnaryDP); } /// This four-bit field describes the addressing mode used. /// See also ARMBaseInstrInfo.h. -static inline unsigned getAddrMode(unsigned TSFlags) { +static inline unsigned getAddrMode(uint64_t TSFlags) { return (TSFlags & ARMII::AddrModeMask); } /// {IndexModePre, IndexModePost} /// Only valid for load and store ops. /// See also ARMBaseInstrInfo.h. -static inline unsigned getIndexMode(unsigned TSFlags) { +static inline unsigned getIndexMode(uint64_t TSFlags) { return (TSFlags & ARMII::IndexModeMask) >> ARMII::IndexModeShift; } /// Pre-/post-indexed operations define an extra $base_wb in the OutOperandList. -static inline bool isPrePostLdSt(unsigned TSFlags) { +static inline bool isPrePostLdSt(uint64_t TSFlags) { return (TSFlags & ARMII::IndexModeMask) != 0; } diff --git a/lib/Target/ARM/Disassembler/ThumbDisassemblerCore.h b/lib/Target/ARM/Disassembler/ThumbDisassemblerCore.h index 4b2e308248c5..4b7a0bf6fdb9 100644 --- a/lib/Target/ARM/Disassembler/ThumbDisassemblerCore.h +++ b/lib/Target/ARM/Disassembler/ThumbDisassemblerCore.h @@ -395,7 +395,7 @@ static bool DisassembleThumb1General(MCInst &MI, unsigned Opcode, uint32_t insn, MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::tGPRRegClassID, getT1tRm(insn)))); } else { - assert(OpInfo[OpIdx].RegClass == 0 && + assert(OpInfo[OpIdx].RegClass < 0 && !OpInfo[OpIdx].isPredicate() && !OpInfo[OpIdx].isOptionalDef() && "Pure imm operand expected"); MI.addOperand(MCOperand::CreateImm(UseRt ? getT1Imm8(insn) @@ -531,7 +531,7 @@ static bool DisassembleThumb1LdPC(MCInst &MI, unsigned Opcode, uint32_t insn, if (!OpInfo) return false; assert(NumOps >= 2 && OpInfo[0].RegClass == ARM::tGPRRegClassID && - (OpInfo[1].RegClass == 0 && + (OpInfo[1].RegClass < 0 && !OpInfo[1].isPredicate() && !OpInfo[1].isOptionalDef()) && "Invalid arguments"); @@ -598,7 +598,7 @@ static bool DisassembleThumb1LdSt(unsigned opA, MCInst &MI, unsigned Opcode, assert(OpIdx < NumOps && "More operands expected"); - if (OpInfo[OpIdx].RegClass == 0 && !OpInfo[OpIdx].isPredicate() && + if (OpInfo[OpIdx].RegClass < 0 && !OpInfo[OpIdx].isPredicate() && !OpInfo[OpIdx].isOptionalDef()) { MI.addOperand(MCOperand::CreateImm(Imm5 ? getT1Imm5(insn) : 0)); @@ -632,7 +632,7 @@ static bool DisassembleThumb1LdStSP(MCInst &MI, unsigned Opcode, uint32_t insn, assert(NumOps >= 3 && OpInfo[0].RegClass == ARM::tGPRRegClassID && OpInfo[1].RegClass == ARM::GPRRegClassID && - (OpInfo[2].RegClass == 0 && + (OpInfo[2].RegClass < 0 && !OpInfo[2].isPredicate() && !OpInfo[2].isOptionalDef()) && "Invalid arguments"); @@ -658,7 +658,7 @@ static bool DisassembleThumb1AddPCi(MCInst &MI, unsigned Opcode, uint32_t insn, if (!OpInfo) return false; assert(NumOps >= 2 && OpInfo[0].RegClass == ARM::tGPRRegClassID && - (OpInfo[1].RegClass == 0 && + (OpInfo[1].RegClass < 0 && !OpInfo[1].isPredicate() && !OpInfo[1].isOptionalDef()) && "Invalid arguments"); @@ -685,7 +685,7 @@ static bool DisassembleThumb1AddSPi(MCInst &MI, unsigned Opcode, uint32_t insn, assert(NumOps >= 3 && OpInfo[0].RegClass == ARM::tGPRRegClassID && OpInfo[1].RegClass == ARM::GPRRegClassID && - (OpInfo[2].RegClass == 0 && + (OpInfo[2].RegClass < 0 && !OpInfo[2].isPredicate() && !OpInfo[2].isOptionalDef()) && "Invalid arguments"); @@ -761,7 +761,7 @@ static bool DisassembleThumb1Misc(MCInst &MI, unsigned Opcode, uint32_t insn, // Predicate operands are handled elsewhere. if (NumOps == 2 && OpInfo[0].isPredicate() && OpInfo[1].isPredicate() && - OpInfo[0].RegClass == 0 && OpInfo[1].RegClass == ARM::CCRRegClassID) { + OpInfo[0].RegClass < 0 && OpInfo[1].RegClass == ARM::CCRRegClassID) { return true; } @@ -808,7 +808,7 @@ static bool DisassembleThumb1Misc(MCInst &MI, unsigned Opcode, uint32_t insn, } assert(NumOps >= 2 && OpInfo[0].RegClass == ARM::tGPRRegClassID && - (OpInfo[1].RegClass==0 || OpInfo[1].RegClass==ARM::tGPRRegClassID) + (OpInfo[1].RegClass < 0 || OpInfo[1].RegClass==ARM::tGPRRegClassID) && "Expect >=2 operands"); // Add the destination operand. @@ -913,7 +913,7 @@ static bool DisassembleThumb1CondBr(MCInst &MI, unsigned Opcode, uint32_t insn, const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo; if (!OpInfo) return false; - assert(NumOps == 3 && OpInfo[0].RegClass == 0 && + assert(NumOps == 3 && OpInfo[0].RegClass < 0 && OpInfo[1].isPredicate() && OpInfo[2].RegClass == ARM::CCRRegClassID && "Exactly 3 operands expected"); @@ -939,7 +939,7 @@ static bool DisassembleThumb1Br(MCInst &MI, unsigned Opcode, uint32_t insn, const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo; if (!OpInfo) return false; - assert(NumOps == 1 && OpInfo[0].RegClass == 0 && "1 imm operand expected"); + assert(NumOps == 1 && OpInfo[0].RegClass < 0 && "1 imm operand expected"); unsigned Imm11 = getT1Imm11(insn); @@ -1239,7 +1239,7 @@ static bool DisassembleThumb2LdStDual(MCInst &MI, unsigned Opcode, && OpInfo[0].RegClass == ARM::GPRRegClassID && OpInfo[1].RegClass == ARM::GPRRegClassID && OpInfo[2].RegClass == ARM::GPRRegClassID - && OpInfo[3].RegClass == 0 + && OpInfo[3].RegClass < 0 && "Expect >= 4 operands and first 3 as reg operands"); // Add the <Rt> <Rt2> operands. @@ -1322,8 +1322,8 @@ static bool DisassembleThumb2DPSoReg(MCInst &MI, unsigned Opcode, uint32_t insn, assert(NumOps == 4 && OpInfo[0].RegClass == ARM::GPRRegClassID && OpInfo[1].RegClass == ARM::GPRRegClassID - && OpInfo[2].RegClass == 0 - && OpInfo[3].RegClass == 0 + && OpInfo[2].RegClass < 0 + && OpInfo[3].RegClass < 0 && "Exactlt 4 operands expect and first two as reg operands"); // Only need to populate the src reg operand. MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, @@ -1375,7 +1375,7 @@ static bool DisassembleThumb2DPSoReg(MCInst &MI, unsigned Opcode, uint32_t insn, if (NumOps == OpIdx) return true; - if (OpInfo[OpIdx].RegClass == 0 && !OpInfo[OpIdx].isPredicate() + if (OpInfo[OpIdx].RegClass < 0 && !OpInfo[OpIdx].isPredicate() && !OpInfo[OpIdx].isOptionalDef()) { if (Thumb2ShiftOpcode(Opcode)) @@ -1440,7 +1440,7 @@ static bool DisassembleThumb2DPModImm(MCInst &MI, unsigned Opcode, } // The modified immediate operand should come next. - assert(OpIdx < NumOps && OpInfo[OpIdx].RegClass == 0 && + assert(OpIdx < NumOps && OpInfo[OpIdx].RegClass < 0 && !OpInfo[OpIdx].isPredicate() && !OpInfo[OpIdx].isOptionalDef() && "Pure imm operand expected"); @@ -1555,7 +1555,7 @@ static bool DisassembleThumb2DPBinImm(MCInst &MI, unsigned Opcode, ++OpIdx; } - assert(OpInfo[OpIdx].RegClass == 0 && !OpInfo[OpIdx].isPredicate() + assert(OpInfo[OpIdx].RegClass < 0 && !OpInfo[OpIdx].isPredicate() && !OpInfo[OpIdx].isOptionalDef() && "Pure imm operand expected"); @@ -1772,7 +1772,7 @@ static bool DisassembleThumb2PreLoad(MCInst &MI, unsigned Opcode, uint32_t insn, MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRm(insn)))); } else { - assert(OpInfo[OpIdx].RegClass == 0 && !OpInfo[OpIdx].isPredicate() + assert(OpInfo[OpIdx].RegClass < 0 && !OpInfo[OpIdx].isPredicate() && !OpInfo[OpIdx].isOptionalDef() && "Pure imm operand expected"); int Offset = 0; @@ -1792,7 +1792,7 @@ static bool DisassembleThumb2PreLoad(MCInst &MI, unsigned Opcode, uint32_t insn, } ++OpIdx; - if (OpIdx < NumOps && OpInfo[OpIdx].RegClass == 0 && + if (OpIdx < NumOps && OpInfo[OpIdx].RegClass < 0 && !OpInfo[OpIdx].isPredicate() && !OpInfo[OpIdx].isOptionalDef()) { // Fills in the shift amount for t2PLDs, t2PLDWs, t2PLIs. MI.addOperand(MCOperand::CreateImm(slice(insn, 5, 4))); @@ -1818,7 +1818,7 @@ static bool DisassembleThumb2Ldpci(MCInst &MI, unsigned Opcode, assert(NumOps >= 2 && OpInfo[0].RegClass == ARM::GPRRegClassID && - OpInfo[1].RegClass == 0 && + OpInfo[1].RegClass < 0 && "Expect >= 2 operands, first as reg, and second as imm operand"); // Build the register operand, followed by the (+/-)imm12 immediate. @@ -1930,7 +1930,7 @@ static bool DisassembleThumb2LdSt(bool Load, MCInst &MI, unsigned Opcode, ++OpIdx; } - assert(OpInfo[OpIdx].RegClass == 0 && !OpInfo[OpIdx].isPredicate() + assert(OpInfo[OpIdx].RegClass < 0 && !OpInfo[OpIdx].isPredicate() && !OpInfo[OpIdx].isOptionalDef() && "Pure imm operand expected"); @@ -1981,7 +1981,7 @@ static bool DisassembleThumb2DPReg(MCInst &MI, unsigned Opcode, uint32_t insn, decodeRm(insn)))); ++OpIdx; - if (OpIdx < NumOps && OpInfo[OpIdx].RegClass == 0 + if (OpIdx < NumOps && OpInfo[OpIdx].RegClass < 0 && !OpInfo[OpIdx].isPredicate() && !OpInfo[OpIdx].isOptionalDef()) { // Add the rotation amount immediate. MI.addOperand(MCOperand::CreateImm(decodeRotate(insn))); diff --git a/lib/Target/ARM/NEONMoveFix.cpp b/lib/Target/ARM/NEONMoveFix.cpp index 0a4400cc7ddd..bbdd3c7f7c3e 100644 --- a/lib/Target/ARM/NEONMoveFix.cpp +++ b/lib/Target/ARM/NEONMoveFix.cpp @@ -105,8 +105,8 @@ bool NEONMoveFixPass::InsertMoves(MachineBasicBlock &MBB) { unsigned MOReg = MO.getReg(); Defs[MOReg] = MI; - // Catch subregs as well. - for (const unsigned *R = TRI->getSubRegisters(MOReg); *R; ++R) + // Catch aliases as well. + for (const unsigned *R = TRI->getAliasSet(MOReg); *R; ++R) Defs[*R] = MI; } } diff --git a/lib/Target/ARM/NEONPreAllocPass.cpp b/lib/Target/ARM/NEONPreAllocPass.cpp index a7258985e104..f67717cdd56f 100644 --- a/lib/Target/ARM/NEONPreAllocPass.cpp +++ b/lib/Target/ARM/NEONPreAllocPass.cpp @@ -407,7 +407,7 @@ NEONPreAllocPass::FormsRegSequence(MachineInstr *MI, "expected a virtual register"); // Extracting from a Q or QQ register. MachineInstr *DefMI = MRI->getVRegDef(VirtReg); - if (!DefMI || !DefMI->isExtractSubreg()) + if (!DefMI || !DefMI->isCopy() || !DefMI->getOperand(1).getSubReg()) return false; VirtReg = DefMI->getOperand(1).getReg(); if (LastSrcReg && LastSrcReg != VirtReg) @@ -418,7 +418,7 @@ NEONPreAllocPass::FormsRegSequence(MachineInstr *MI, RC != ARM::QQPRRegisterClass && RC != ARM::QQQQPRRegisterClass) return false; - unsigned SubIdx = DefMI->getOperand(2).getImm(); + unsigned SubIdx = DefMI->getOperand(1).getSubReg(); if (LastSubIdx) { if (LastSubIdx != SubIdx-Stride) return false; @@ -434,22 +434,21 @@ NEONPreAllocPass::FormsRegSequence(MachineInstr *MI, // FIXME: Update the uses of EXTRACT_SUBREG from REG_SEQUENCE is // currently required for correctness. e.g. - // %reg1041;<def> = REG_SEQUENCE %reg1040<kill>, 5, %reg1035<kill>, 6 + // %reg1041<def> = REG_SEQUENCE %reg1040<kill>, 5, %reg1035<kill>, 6 // %reg1042<def> = EXTRACT_SUBREG %reg1041, 6 // %reg1043<def> = EXTRACT_SUBREG %reg1041, 5 // VST1q16 %reg1025<kill>, 0, %reg1043<kill>, %reg1042<kill>, - // reg1025 and reg1043 should be replaced with reg1041:6 and reg1041:5 + // reg1042 and reg1043 should be replaced with reg1041:6 and reg1041:5 // respectively. // We need to change how we model uses of REG_SEQUENCE. for (unsigned R = 0; R < NumRegs; ++R) { MachineOperand &MO = MI->getOperand(FirstOpnd + R); unsigned OldReg = MO.getReg(); MachineInstr *DefMI = MRI->getVRegDef(OldReg); - assert(DefMI->isExtractSubreg()); + assert(DefMI->isCopy()); MO.setReg(LastSrcReg); MO.setSubReg(SubIds[R]); - if (R != 0) - MO.setIsKill(false); + MO.setIsKill(false); // Delete the EXTRACT_SUBREG if its result is now dead. if (MRI->use_empty(OldReg)) DefMI->eraseFromParent(); @@ -467,43 +466,9 @@ bool NEONPreAllocPass::PreAllocNEONRegisters(MachineBasicBlock &MBB) { unsigned FirstOpnd, NumRegs, Offset, Stride; if (!isNEONMultiRegOp(MI->getOpcode(), FirstOpnd, NumRegs, Offset, Stride)) continue; - if (llvm::ModelWithRegSequence() && - FormsRegSequence(MI, FirstOpnd, NumRegs, Offset, Stride)) + if (FormsRegSequence(MI, FirstOpnd, NumRegs, Offset, Stride)) continue; - - MachineBasicBlock::iterator NextI = llvm::next(MBBI); - for (unsigned R = 0; R < NumRegs; ++R) { - MachineOperand &MO = MI->getOperand(FirstOpnd + R); - assert(MO.isReg() && MO.getSubReg() == 0 && "unexpected operand"); - unsigned VirtReg = MO.getReg(); - assert(TargetRegisterInfo::isVirtualRegister(VirtReg) && - "expected a virtual register"); - - // For now, just assign a fixed set of adjacent registers. - // This leaves plenty of room for future improvements. - static const unsigned NEONDRegs[] = { - ARM::D0, ARM::D1, ARM::D2, ARM::D3, - ARM::D4, ARM::D5, ARM::D6, ARM::D7 - }; - MO.setReg(NEONDRegs[Offset + R * Stride]); - - if (MO.isUse()) { - // Insert a copy from VirtReg. - TII->copyRegToReg(MBB, MBBI, MO.getReg(), VirtReg, - ARM::DPRRegisterClass, ARM::DPRRegisterClass, - DebugLoc()); - if (MO.isKill()) { - MachineInstr *CopyMI = prior(MBBI); - CopyMI->findRegisterUseOperand(VirtReg)->setIsKill(); - } - MO.setIsKill(); - } else if (MO.isDef() && !MO.isDead()) { - // Add a copy to VirtReg. - TII->copyRegToReg(MBB, NextI, VirtReg, MO.getReg(), - ARM::DPRRegisterClass, ARM::DPRRegisterClass, - DebugLoc()); - } - } + llvm_unreachable("expected a REG_SEQUENCE"); } return Modified; diff --git a/lib/Target/ARM/Thumb1InstrInfo.cpp b/lib/Target/ARM/Thumb1InstrInfo.cpp index fae84d4ee022..af630ac797c5 100644 --- a/lib/Target/ARM/Thumb1InstrInfo.cpp +++ b/lib/Target/ARM/Thumb1InstrInfo.cpp @@ -33,64 +33,24 @@ unsigned Thumb1InstrInfo::getUnindexedOpcode(unsigned Opc) const { return 0; } -bool Thumb1InstrInfo::copyRegToReg(MachineBasicBlock &MBB, - MachineBasicBlock::iterator I, - unsigned DestReg, unsigned SrcReg, - const TargetRegisterClass *DestRC, - const TargetRegisterClass *SrcRC, - DebugLoc DL) const { - if (DestRC == ARM::GPRRegisterClass) { - if (SrcRC == ARM::GPRRegisterClass) { - BuildMI(MBB, I, DL, get(ARM::tMOVgpr2gpr), DestReg).addReg(SrcReg); - return true; - } else if (SrcRC == ARM::tGPRRegisterClass) { - BuildMI(MBB, I, DL, get(ARM::tMOVtgpr2gpr), DestReg).addReg(SrcReg); - return true; - } - } else if (DestRC == ARM::tGPRRegisterClass) { - if (SrcRC == ARM::GPRRegisterClass) { - BuildMI(MBB, I, DL, get(ARM::tMOVgpr2tgpr), DestReg).addReg(SrcReg); - return true; - } else if (SrcRC == ARM::tGPRRegisterClass) { - BuildMI(MBB, I, DL, get(ARM::tMOVr), DestReg).addReg(SrcReg); - return true; - } - } - - return false; -} - -bool Thumb1InstrInfo:: -canFoldMemoryOperand(const MachineInstr *MI, - const SmallVectorImpl<unsigned> &Ops) const { - if (Ops.size() != 1) return false; - - unsigned OpNum = Ops[0]; - unsigned Opc = MI->getOpcode(); - switch (Opc) { - default: break; - case ARM::tMOVr: - case ARM::tMOVtgpr2gpr: - case ARM::tMOVgpr2tgpr: - case ARM::tMOVgpr2gpr: { - if (OpNum == 0) { // move -> store - unsigned SrcReg = MI->getOperand(1).getReg(); - if (TargetRegisterInfo::isPhysicalRegister(SrcReg) && - !isARMLowRegister(SrcReg)) - // tSpill cannot take a high register operand. - return false; - } else { // move -> load - unsigned DstReg = MI->getOperand(0).getReg(); - if (TargetRegisterInfo::isPhysicalRegister(DstReg) && - !isARMLowRegister(DstReg)) - // tRestore cannot target a high register operand. - return false; - } - return true; - } - } - - return false; +void Thumb1InstrInfo::copyPhysReg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, DebugLoc DL, + unsigned DestReg, unsigned SrcReg, + bool KillSrc) const { + bool tDest = ARM::tGPRRegClass.contains(DestReg); + bool tSrc = ARM::tGPRRegClass.contains(SrcReg); + unsigned Opc = ARM::tMOVgpr2gpr; + if (tDest && tSrc) + Opc = ARM::tMOVr; + else if (tSrc) + Opc = ARM::tMOVtgpr2gpr; + else if (tDest) + Opc = ARM::tMOVgpr2tgpr; + + BuildMI(MBB, I, DL, get(Opc), DestReg) + .addReg(SrcReg, getKillRegState(KillSrc)); + assert(ARM::GPRRegClass.contains(DestReg, SrcReg) && + "Thumb1 can only copy GPR registers"); } void Thumb1InstrInfo:: @@ -175,10 +135,10 @@ spillCalleeSavedRegisters(MachineBasicBlock &MBB, isKill = false; } - if (isKill) { + if (isKill) MBB.addLiveIn(Reg); - MIB.addReg(Reg, RegState::Kill); - } + + MIB.addReg(Reg, getKillRegState(isKill)); } return true; } @@ -221,46 +181,3 @@ restoreCalleeSavedRegisters(MachineBasicBlock &MBB, return true; } - -MachineInstr *Thumb1InstrInfo:: -foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI, - const SmallVectorImpl<unsigned> &Ops, int FI) const { - if (Ops.size() != 1) return NULL; - - unsigned OpNum = Ops[0]; - unsigned Opc = MI->getOpcode(); - MachineInstr *NewMI = NULL; - switch (Opc) { - default: break; - case ARM::tMOVr: - case ARM::tMOVtgpr2gpr: - case ARM::tMOVgpr2tgpr: - case ARM::tMOVgpr2gpr: { - if (OpNum == 0) { // move -> store - unsigned SrcReg = MI->getOperand(1).getReg(); - bool isKill = MI->getOperand(1).isKill(); - if (TargetRegisterInfo::isPhysicalRegister(SrcReg) && - !isARMLowRegister(SrcReg)) - // tSpill cannot take a high register operand. - break; - NewMI = AddDefaultPred(BuildMI(MF, MI->getDebugLoc(), get(ARM::tSpill)) - .addReg(SrcReg, getKillRegState(isKill)) - .addFrameIndex(FI).addImm(0)); - } else { // move -> load - unsigned DstReg = MI->getOperand(0).getReg(); - if (TargetRegisterInfo::isPhysicalRegister(DstReg) && - !isARMLowRegister(DstReg)) - // tRestore cannot target a high register operand. - break; - bool isDead = MI->getOperand(0).isDead(); - NewMI = AddDefaultPred(BuildMI(MF, MI->getDebugLoc(), get(ARM::tRestore)) - .addReg(DstReg, - RegState::Define | getDeadRegState(isDead)) - .addFrameIndex(FI).addImm(0)); - } - break; - } - } - - return NewMI; -} diff --git a/lib/Target/ARM/Thumb1InstrInfo.h b/lib/Target/ARM/Thumb1InstrInfo.h index c937296bbee1..555135a8b76c 100644 --- a/lib/Target/ARM/Thumb1InstrInfo.h +++ b/lib/Target/ARM/Thumb1InstrInfo.h @@ -46,12 +46,10 @@ public: const std::vector<CalleeSavedInfo> &CSI, const TargetRegisterInfo *TRI) const; - bool copyRegToReg(MachineBasicBlock &MBB, - MachineBasicBlock::iterator I, - unsigned DestReg, unsigned SrcReg, - const TargetRegisterClass *DestRC, - const TargetRegisterClass *SrcRC, - DebugLoc DL) const; + void copyPhysReg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, DebugLoc DL, + unsigned DestReg, unsigned SrcReg, + bool KillSrc) const; void storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned SrcReg, bool isKill, int FrameIndex, @@ -64,20 +62,6 @@ public: const TargetRegisterClass *RC, const TargetRegisterInfo *TRI) const; - bool canFoldMemoryOperand(const MachineInstr *MI, - const SmallVectorImpl<unsigned> &Ops) const; - - MachineInstr* foldMemoryOperandImpl(MachineFunction &MF, - MachineInstr* MI, - const SmallVectorImpl<unsigned> &Ops, - int FrameIndex) const; - - MachineInstr* foldMemoryOperandImpl(MachineFunction &MF, - MachineInstr* MI, - const SmallVectorImpl<unsigned> &Ops, - MachineInstr* LoadMI) const { - return 0; - } }; } diff --git a/lib/Target/ARM/Thumb1RegisterInfo.cpp b/lib/Target/ARM/Thumb1RegisterInfo.cpp index 2f635fe50ad5..39b70b43b23f 100644 --- a/lib/Target/ARM/Thumb1RegisterInfo.cpp +++ b/lib/Target/ARM/Thumb1RegisterInfo.cpp @@ -68,21 +68,6 @@ void Thumb1RegisterInfo::emitLoadConstPool(MachineBasicBlock &MBB, .addConstantPoolIndex(Idx).addImm(Pred).addReg(PredReg); } -const TargetRegisterClass* -Thumb1RegisterInfo::getPhysicalRegisterRegClass(unsigned Reg, EVT VT) const { - if (isARMLowRegister(Reg)) - return ARM::tGPRRegisterClass; - switch (Reg) { - default: - break; - case ARM::R8: case ARM::R9: case ARM::R10: case ARM::R11: - case ARM::R12: case ARM::SP: case ARM::LR: case ARM::PC: - return ARM::GPRRegisterClass; - } - - return TargetRegisterInfo::getPhysicalRegisterRegClass(Reg, VT); -} - bool Thumb1RegisterInfo::hasReservedCallFrame(MachineFunction &MF) const { const MachineFrameInfo *FFI = MF.getFrameInfo(); unsigned CFSize = FFI->getMaxCallFrameSize(); @@ -410,6 +395,8 @@ Thumb1RegisterInfo::saveScavengerRegister(MachineBasicBlock &MBB, // before that instead and adjust the UseMI. bool done = false; for (MachineBasicBlock::iterator II = I; !done && II != UseMI ; ++II) { + if (II->isDebugValue()) + continue; // If this instruction affects R12, adjust our restore point. for (unsigned i = 0, e = II->getNumOperands(); i != e; ++i) { const MachineOperand &MO = II->getOperand(i); diff --git a/lib/Target/ARM/Thumb1RegisterInfo.h b/lib/Target/ARM/Thumb1RegisterInfo.h index 4eca3673391f..9a0308afa20c 100644 --- a/lib/Target/ARM/Thumb1RegisterInfo.h +++ b/lib/Target/ARM/Thumb1RegisterInfo.h @@ -38,9 +38,6 @@ public: unsigned PredReg = 0) const; /// Code Generation virtual methods... - const TargetRegisterClass * - getPhysicalRegisterRegClass(unsigned Reg, EVT VT = MVT::Other) const; - bool hasReservedCallFrame(MachineFunction &MF) const; void eliminateCallFramePseudoInstr(MachineFunction &MF, @@ -51,7 +48,8 @@ public: // could not be handled directly in MI. int rewriteFrameIndex(MachineInstr &MI, unsigned FrameRegIdx, unsigned FrameReg, int Offset, - unsigned MOVOpc, unsigned ADDriOpc, unsigned SUBriOpc) const; + unsigned MOVOpc, unsigned ADDriOpc, + unsigned SUBriOpc) const; bool saveScavengerRegister(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, diff --git a/lib/Target/ARM/Thumb2HazardRecognizer.cpp b/lib/Target/ARM/Thumb2HazardRecognizer.cpp new file mode 100644 index 000000000000..172908da228a --- /dev/null +++ b/lib/Target/ARM/Thumb2HazardRecognizer.cpp @@ -0,0 +1,53 @@ +//===-- Thumb2HazardRecognizer.cpp - Thumb2 postra hazard recognizer ------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "ARM.h" +#include "Thumb2HazardRecognizer.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/ScheduleDAG.h" +using namespace llvm; + +ScheduleHazardRecognizer::HazardType +Thumb2HazardRecognizer::getHazardType(SUnit *SU) { + if (ITBlockSize) { + MachineInstr *MI = SU->getInstr(); + if (!MI->isDebugValue() && MI != ITBlockMIs[ITBlockSize-1]) + return Hazard; + } + + return PostRAHazardRecognizer::getHazardType(SU); +} + +void Thumb2HazardRecognizer::Reset() { + ITBlockSize = 0; + PostRAHazardRecognizer::Reset(); +} + +void Thumb2HazardRecognizer::EmitInstruction(SUnit *SU) { + MachineInstr *MI = SU->getInstr(); + unsigned Opcode = MI->getOpcode(); + if (ITBlockSize) { + --ITBlockSize; + } else if (Opcode == ARM::t2IT) { + unsigned Mask = MI->getOperand(1).getImm(); + unsigned NumTZ = CountTrailingZeros_32(Mask); + assert(NumTZ <= 3 && "Invalid IT mask!"); + ITBlockSize = 4 - NumTZ; + MachineBasicBlock::iterator I = MI; + for (unsigned i = 0; i < ITBlockSize; ++i) { + // Advance to the next instruction, skipping any dbg_value instructions. + do { + ++I; + } while (I->isDebugValue()); + ITBlockMIs[ITBlockSize-1-i] = &*I; + } + } + + PostRAHazardRecognizer::EmitInstruction(SU); +} diff --git a/lib/Target/ARM/Thumb2HazardRecognizer.h b/lib/Target/ARM/Thumb2HazardRecognizer.h new file mode 100644 index 000000000000..472665862e41 --- /dev/null +++ b/lib/Target/ARM/Thumb2HazardRecognizer.h @@ -0,0 +1,40 @@ +//===-- Thumb2HazardRecognizer.h - Thumb2 Hazard Recognizers ----*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines hazard recognizers for scheduling Thumb2 functions on +// ARM processors. +// +//===----------------------------------------------------------------------===// + +#ifndef THUMB2HAZARDRECOGNIZER_H +#define THUMB2HAZARDRECOGNIZER_H + +#include "llvm/CodeGen/PostRAHazardRecognizer.h" + +namespace llvm { + +class MachineInstr; + +class Thumb2HazardRecognizer : public PostRAHazardRecognizer { + unsigned ITBlockSize; // No. of MIs in current IT block yet to be scheduled. + MachineInstr *ITBlockMIs[4]; + +public: + Thumb2HazardRecognizer(const InstrItineraryData &ItinData) : + PostRAHazardRecognizer(ItinData) {} + + virtual HazardType getHazardType(SUnit *SU); + virtual void Reset(); + virtual void EmitInstruction(SUnit *SU); +}; + + +} // end namespace llvm + +#endif // THUMB2HAZARDRECOGNIZER_H diff --git a/lib/Target/ARM/Thumb2ITBlockPass.cpp b/lib/Target/ARM/Thumb2ITBlockPass.cpp index f36d4ef7567e..cd15bbed9f23 100644 --- a/lib/Target/ARM/Thumb2ITBlockPass.cpp +++ b/lib/Target/ARM/Thumb2ITBlockPass.cpp @@ -14,17 +14,23 @@ #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/ADT/SmallSet.h" #include "llvm/ADT/Statistic.h" using namespace llvm; -STATISTIC(NumITs, "Number of IT blocks inserted"); +STATISTIC(NumITs, "Number of IT blocks inserted"); +STATISTIC(NumMovedInsts, "Number of predicated instructions moved"); namespace { - struct Thumb2ITBlockPass : public MachineFunctionPass { + class Thumb2ITBlockPass : public MachineFunctionPass { + bool PreRegAlloc; + + public: static char ID; Thumb2ITBlockPass() : MachineFunctionPass(&ID) {} const Thumb2InstrInfo *TII; + const TargetRegisterInfo *TRI; ARMFunctionInfo *AFI; virtual bool runOnMachineFunction(MachineFunction &Fn); @@ -34,61 +40,167 @@ namespace { } private: - bool InsertITBlocks(MachineBasicBlock &MBB); + bool MoveCopyOutOfITBlock(MachineInstr *MI, + ARMCC::CondCodes CC, ARMCC::CondCodes OCC, + SmallSet<unsigned, 4> &Defs, + SmallSet<unsigned, 4> &Uses); + bool InsertITInstructions(MachineBasicBlock &MBB); }; char Thumb2ITBlockPass::ID = 0; } -static ARMCC::CondCodes getPredicate(const MachineInstr *MI, unsigned &PredReg){ - unsigned Opc = MI->getOpcode(); - if (Opc == ARM::tBcc || Opc == ARM::t2Bcc) - return ARMCC::AL; - return llvm::getInstrPredicate(MI, PredReg); +/// TrackDefUses - Tracking what registers are being defined and used by +/// instructions in the IT block. This also tracks "dependencies", i.e. uses +/// in the IT block that are defined before the IT instruction. +static void TrackDefUses(MachineInstr *MI, + SmallSet<unsigned, 4> &Defs, + SmallSet<unsigned, 4> &Uses, + const TargetRegisterInfo *TRI) { + SmallVector<unsigned, 4> LocalDefs; + SmallVector<unsigned, 4> LocalUses; + + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg()) + continue; + unsigned Reg = MO.getReg(); + if (!Reg || Reg == ARM::ITSTATE || Reg == ARM::SP) + continue; + if (MO.isUse()) + LocalUses.push_back(Reg); + else + LocalDefs.push_back(Reg); + } + + for (unsigned i = 0, e = LocalUses.size(); i != e; ++i) { + unsigned Reg = LocalUses[i]; + Uses.insert(Reg); + for (const unsigned *Subreg = TRI->getSubRegisters(Reg); + *Subreg; ++Subreg) + Uses.insert(*Subreg); + } + + for (unsigned i = 0, e = LocalDefs.size(); i != e; ++i) { + unsigned Reg = LocalDefs[i]; + Defs.insert(Reg); + for (const unsigned *Subreg = TRI->getSubRegisters(Reg); + *Subreg; ++Subreg) + Defs.insert(*Subreg); + if (Reg == ARM::CPSR) + continue; + } +} + +bool +Thumb2ITBlockPass::MoveCopyOutOfITBlock(MachineInstr *MI, + ARMCC::CondCodes CC, ARMCC::CondCodes OCC, + SmallSet<unsigned, 4> &Defs, + SmallSet<unsigned, 4> &Uses) { + unsigned SrcReg, DstReg, SrcSubIdx, DstSubIdx; + if (TII->isMoveInstr(*MI, SrcReg, DstReg, SrcSubIdx, DstSubIdx)) { + assert(SrcSubIdx == 0 && DstSubIdx == 0 && + "Sub-register indices still around?"); + // llvm models select's as two-address instructions. That means a copy + // is inserted before a t2MOVccr, etc. If the copy is scheduled in + // between selects we would end up creating multiple IT blocks. + + // First check if it's safe to move it. + if (Uses.count(DstReg) || Defs.count(SrcReg)) + return false; + + // Then peek at the next instruction to see if it's predicated on CC or OCC. + // If not, then there is nothing to be gained by moving the copy. + MachineBasicBlock::iterator I = MI; ++I; + MachineBasicBlock::iterator E = MI->getParent()->end(); + while (I != E && I->isDebugValue()) + ++I; + if (I != E) { + unsigned NPredReg = 0; + ARMCC::CondCodes NCC = llvm::getITInstrPredicate(I, NPredReg); + if (NCC == CC || NCC == OCC) + return true; + } + } + return false; } -bool Thumb2ITBlockPass::InsertITBlocks(MachineBasicBlock &MBB) { +bool Thumb2ITBlockPass::InsertITInstructions(MachineBasicBlock &MBB) { bool Modified = false; + SmallSet<unsigned, 4> Defs; + SmallSet<unsigned, 4> Uses; MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end(); while (MBBI != E) { MachineInstr *MI = &*MBBI; DebugLoc dl = MI->getDebugLoc(); unsigned PredReg = 0; - ARMCC::CondCodes CC = getPredicate(MI, PredReg); - + ARMCC::CondCodes CC = llvm::getITInstrPredicate(MI, PredReg); if (CC == ARMCC::AL) { ++MBBI; continue; } + Defs.clear(); + Uses.clear(); + TrackDefUses(MI, Defs, Uses, TRI); + // Insert an IT instruction. MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII->get(ARM::t2IT)) .addImm(CC); + + // Add implicit use of ITSTATE to IT block instructions. + MI->addOperand(MachineOperand::CreateReg(ARM::ITSTATE, false/*ifDef*/, + true/*isImp*/, false/*isKill*/)); + + MachineInstr *LastITMI = MI; + MachineBasicBlock::iterator InsertPos = MIB; ++MBBI; - // Finalize IT mask. + // Form IT block. ARMCC::CondCodes OCC = ARMCC::getOppositeCondition(CC); unsigned Mask = 0, Pos = 3; // Branches, including tricky ones like LDM_RET, need to end an IT // block so check the instruction we just put in the block. - while (MBBI != E && Pos && - (!MI->getDesc().isBranch() && !MI->getDesc().isReturn())) { + for (; MBBI != E && Pos && + (!MI->getDesc().isBranch() && !MI->getDesc().isReturn()) ; ++MBBI) { + if (MBBI->isDebugValue()) + continue; + MachineInstr *NMI = &*MBBI; MI = NMI; - DebugLoc ndl = NMI->getDebugLoc(); + unsigned NPredReg = 0; - ARMCC::CondCodes NCC = getPredicate(NMI, NPredReg); - if (NCC == CC || NCC == OCC) + ARMCC::CondCodes NCC = llvm::getITInstrPredicate(NMI, NPredReg); + if (NCC == CC || NCC == OCC) { Mask |= (NCC & 1) << Pos; - else + // Add implicit use of ITSTATE. + NMI->addOperand(MachineOperand::CreateReg(ARM::ITSTATE, false/*ifDef*/, + true/*isImp*/, false/*isKill*/)); + LastITMI = NMI; + } else { + if (NCC == ARMCC::AL && + MoveCopyOutOfITBlock(NMI, CC, OCC, Defs, Uses)) { + --MBBI; + MBB.remove(NMI); + MBB.insert(InsertPos, NMI); + ++NumMovedInsts; + continue; + } break; + } + TrackDefUses(NMI, Defs, Uses, TRI); --Pos; - ++MBBI; } + + // Finalize IT mask. Mask |= (1 << Pos); // Tag along (firstcond[0] << 4) with the mask. Mask |= (CC & 1) << 4; MIB.addImm(Mask); + + // Last instruction in IT block kills ITSTATE. + LastITMI->findRegisterUseOperand(ARM::ITSTATE)->setIsKill(); + Modified = true; ++NumITs; } @@ -100,17 +212,21 @@ bool Thumb2ITBlockPass::runOnMachineFunction(MachineFunction &Fn) { const TargetMachine &TM = Fn.getTarget(); AFI = Fn.getInfo<ARMFunctionInfo>(); TII = static_cast<const Thumb2InstrInfo*>(TM.getInstrInfo()); + TRI = TM.getRegisterInfo(); if (!AFI->isThumbFunction()) return false; bool Modified = false; - for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E; - ++MFI) { + for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E; ) { MachineBasicBlock &MBB = *MFI; - Modified |= InsertITBlocks(MBB); + ++MFI; + Modified |= InsertITInstructions(MBB); } + if (Modified) + AFI->setHasITBlocks(true); + return Modified; } diff --git a/lib/Target/ARM/Thumb2InstrInfo.cpp b/lib/Target/ARM/Thumb2InstrInfo.cpp index 531d5e9f147e..ee517279c9d7 100644 --- a/lib/Target/ARM/Thumb2InstrInfo.cpp +++ b/lib/Target/ARM/Thumb2InstrInfo.cpp @@ -17,15 +17,27 @@ #include "ARMAddressingModes.h" #include "ARMGenInstrInfo.inc" #include "ARMMachineFunctionInfo.h" +#include "Thumb2HazardRecognizer.h" +#include "Thumb2InstrInfo.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineMemOperand.h" #include "llvm/CodeGen/PseudoSourceValue.h" #include "llvm/ADT/SmallVector.h" -#include "Thumb2InstrInfo.h" +#include "llvm/Support/CommandLine.h" using namespace llvm; +static cl::opt<unsigned> +IfCvtLimit("thumb2-ifcvt-limit", cl::Hidden, + cl::desc("Thumb2 if-conversion limit (default 3)"), + cl::init(3)); + +static cl::opt<unsigned> +IfCvtDiamondLimit("thumb2-ifcvt-diamond-limit", cl::Hidden, + cl::desc("Thumb2 diamond if-conversion limit (default 3)"), + cl::init(3)); + Thumb2InstrInfo::Thumb2InstrInfo(const ARMSubtarget &STI) : ARMBaseInstrInfo(STI), RI(*this, STI) { } @@ -35,33 +47,99 @@ unsigned Thumb2InstrInfo::getUnindexedOpcode(unsigned Opc) const { return 0; } -bool -Thumb2InstrInfo::copyRegToReg(MachineBasicBlock &MBB, - MachineBasicBlock::iterator I, - unsigned DestReg, unsigned SrcReg, - const TargetRegisterClass *DestRC, - const TargetRegisterClass *SrcRC, - DebugLoc DL) const { - if (DestRC == ARM::GPRRegisterClass) { - if (SrcRC == ARM::GPRRegisterClass) { - BuildMI(MBB, I, DL, get(ARM::tMOVgpr2gpr), DestReg).addReg(SrcReg); - return true; - } else if (SrcRC == ARM::tGPRRegisterClass) { - BuildMI(MBB, I, DL, get(ARM::tMOVtgpr2gpr), DestReg).addReg(SrcReg); - return true; - } - } else if (DestRC == ARM::tGPRRegisterClass) { - if (SrcRC == ARM::GPRRegisterClass) { - BuildMI(MBB, I, DL, get(ARM::tMOVgpr2tgpr), DestReg).addReg(SrcReg); - return true; - } else if (SrcRC == ARM::tGPRRegisterClass) { - BuildMI(MBB, I, DL, get(ARM::tMOVr), DestReg).addReg(SrcReg); - return true; +void +Thumb2InstrInfo::ReplaceTailWithBranchTo(MachineBasicBlock::iterator Tail, + MachineBasicBlock *NewDest) const { + MachineBasicBlock *MBB = Tail->getParent(); + ARMFunctionInfo *AFI = MBB->getParent()->getInfo<ARMFunctionInfo>(); + if (!AFI->hasITBlocks()) { + TargetInstrInfoImpl::ReplaceTailWithBranchTo(Tail, NewDest); + return; + } + + // If the first instruction of Tail is predicated, we may have to update + // the IT instruction. + unsigned PredReg = 0; + ARMCC::CondCodes CC = llvm::getInstrPredicate(Tail, PredReg); + MachineBasicBlock::iterator MBBI = Tail; + if (CC != ARMCC::AL) + // Expecting at least the t2IT instruction before it. + --MBBI; + + // Actually replace the tail. + TargetInstrInfoImpl::ReplaceTailWithBranchTo(Tail, NewDest); + + // Fix up IT. + if (CC != ARMCC::AL) { + MachineBasicBlock::iterator E = MBB->begin(); + unsigned Count = 4; // At most 4 instructions in an IT block. + while (Count && MBBI != E) { + if (MBBI->isDebugValue()) { + --MBBI; + continue; + } + if (MBBI->getOpcode() == ARM::t2IT) { + unsigned Mask = MBBI->getOperand(1).getImm(); + if (Count == 4) + MBBI->eraseFromParent(); + else { + unsigned MaskOn = 1 << Count; + unsigned MaskOff = ~(MaskOn - 1); + MBBI->getOperand(1).setImm((Mask & MaskOff) | MaskOn); + } + return; + } + --MBBI; + --Count; } + + // Ctrl flow can reach here if branch folding is run before IT block + // formation pass. } +} + +bool +Thumb2InstrInfo::isLegalToSplitMBBAt(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI) const { + unsigned PredReg = 0; + return llvm::getITInstrPredicate(MBBI, PredReg) == ARMCC::AL; +} +bool Thumb2InstrInfo::isProfitableToIfCvt(MachineBasicBlock &MBB, + unsigned NumInstrs) const { + return NumInstrs && NumInstrs <= IfCvtLimit; +} + +bool Thumb2InstrInfo:: +isProfitableToIfCvt(MachineBasicBlock &TMBB, unsigned NumT, + MachineBasicBlock &FMBB, unsigned NumF) const { + // FIXME: Catch optimization such as: + // r0 = movne + // r0 = moveq + return NumT && NumF && + NumT <= (IfCvtDiamondLimit) && NumF <= (IfCvtDiamondLimit); +} + +void Thumb2InstrInfo::copyPhysReg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, DebugLoc DL, + unsigned DestReg, unsigned SrcReg, + bool KillSrc) const { // Handle SPR, DPR, and QPR copies. - return ARMBaseInstrInfo::copyRegToReg(MBB, I, DestReg, SrcReg, DestRC, SrcRC, DL); + if (!ARM::GPRRegClass.contains(DestReg, SrcReg)) + return ARMBaseInstrInfo::copyPhysReg(MBB, I, DL, DestReg, SrcReg, KillSrc); + + bool tDest = ARM::tGPRRegClass.contains(DestReg); + bool tSrc = ARM::tGPRRegClass.contains(SrcReg); + unsigned Opc = ARM::tMOVgpr2gpr; + if (tDest && tSrc) + Opc = ARM::tMOVr; + else if (tSrc) + Opc = ARM::tMOVtgpr2gpr; + else if (tDest) + Opc = ARM::tMOVgpr2tgpr; + + BuildMI(MBB, I, DL, get(Opc), DestReg) + .addReg(SrcReg, getKillRegState(KillSrc)); } void Thumb2InstrInfo:: @@ -69,7 +147,8 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, unsigned SrcReg, bool isKill, int FI, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI) const { - if (RC == ARM::GPRRegisterClass || RC == ARM::tGPRRegisterClass) { + if (RC == ARM::GPRRegisterClass || RC == ARM::tGPRRegisterClass || + RC == ARM::tcGPRRegisterClass) { DebugLoc DL; if (I != MBB.end()) DL = I->getDebugLoc(); @@ -94,7 +173,8 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, unsigned DestReg, int FI, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI) const { - if (RC == ARM::GPRRegisterClass || RC == ARM::tGPRRegisterClass) { + if (RC == ARM::GPRRegisterClass || RC == ARM::tGPRRegisterClass || + RC == ARM::tcGPRRegisterClass) { DebugLoc DL; if (I != MBB.end()) DL = I->getDebugLoc(); @@ -113,6 +193,11 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, ARMBaseInstrInfo::loadRegFromStackSlot(MBB, I, DestReg, FI, RC, TRI); } +ScheduleHazardRecognizer *Thumb2InstrInfo:: +CreateTargetPostRAHazardRecognizer(const InstrItineraryData &II) const { + return (ScheduleHazardRecognizer *)new Thumb2HazardRecognizer(II); +} + void llvm::emitT2RegPlusImmediate(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, DebugLoc dl, unsigned DestReg, unsigned BaseReg, int NumBytes, @@ -131,14 +216,14 @@ void llvm::emitT2RegPlusImmediate(MachineBasicBlock &MBB, // Use a movw to materialize the 16-bit constant. BuildMI(MBB, MBBI, dl, TII.get(ARM::t2MOVi16), DestReg) .addImm(NumBytes) - .addImm((unsigned)Pred).addReg(PredReg).addReg(0); + .addImm((unsigned)Pred).addReg(PredReg); Fits = true; } else if ((NumBytes & 0xffff) == 0) { // Use a movt to materialize the 32-bit constant. BuildMI(MBB, MBBI, dl, TII.get(ARM::t2MOVTi16), DestReg) .addReg(DestReg) .addImm(NumBytes >> 16) - .addImm((unsigned)Pred).addReg(PredReg).addReg(0); + .addImm((unsigned)Pred).addReg(PredReg); Fits = true; } @@ -502,3 +587,54 @@ bool llvm::rewriteT2FrameIndex(MachineInstr &MI, unsigned FrameRegIdx, Offset = (isSub) ? -Offset : Offset; return Offset == 0; } + +/// scheduleTwoAddrSource - Schedule the copy / re-mat of the source of the +/// two-addrss instruction inserted by two-address pass. +void +Thumb2InstrInfo::scheduleTwoAddrSource(MachineInstr *SrcMI, + MachineInstr *UseMI, + const TargetRegisterInfo &TRI) const { + if (SrcMI->getOpcode() != ARM::tMOVgpr2gpr || + SrcMI->getOperand(1).isKill()) + return; + + unsigned PredReg = 0; + ARMCC::CondCodes CC = llvm::getInstrPredicate(UseMI, PredReg); + if (CC == ARMCC::AL || PredReg != ARM::CPSR) + return; + + // Schedule the copy so it doesn't come between previous instructions + // and UseMI which can form an IT block. + unsigned SrcReg = SrcMI->getOperand(1).getReg(); + ARMCC::CondCodes OCC = ARMCC::getOppositeCondition(CC); + MachineBasicBlock *MBB = UseMI->getParent(); + MachineBasicBlock::iterator MBBI = SrcMI; + unsigned NumInsts = 0; + while (--MBBI != MBB->begin()) { + if (MBBI->isDebugValue()) + continue; + + MachineInstr *NMI = &*MBBI; + ARMCC::CondCodes NCC = llvm::getInstrPredicate(NMI, PredReg); + if (!(NCC == CC || NCC == OCC) || + NMI->modifiesRegister(SrcReg, &TRI) || + NMI->definesRegister(ARM::CPSR)) + break; + if (++NumInsts == 4) + // Too many in a row! + return; + } + + if (NumInsts) { + MBB->remove(SrcMI); + MBB->insert(++MBBI, SrcMI); + } +} + +ARMCC::CondCodes +llvm::getITInstrPredicate(const MachineInstr *MI, unsigned &PredReg) { + unsigned Opc = MI->getOpcode(); + if (Opc == ARM::tBcc || Opc == ARM::t2Bcc) + return ARMCC::AL; + return llvm::getInstrPredicate(MI, PredReg); +} diff --git a/lib/Target/ARM/Thumb2InstrInfo.h b/lib/Target/ARM/Thumb2InstrInfo.h index 29487700d19b..3a9f8b194d3c 100644 --- a/lib/Target/ARM/Thumb2InstrInfo.h +++ b/lib/Target/ARM/Thumb2InstrInfo.h @@ -20,7 +20,8 @@ #include "Thumb2RegisterInfo.h" namespace llvm { - class ARMSubtarget; +class ARMSubtarget; +class ScheduleHazardRecognizer; class Thumb2InstrInfo : public ARMBaseInstrInfo { Thumb2RegisterInfo RI; @@ -31,12 +32,21 @@ public: // if there is not such an opcode. unsigned getUnindexedOpcode(unsigned Opc) const; - bool copyRegToReg(MachineBasicBlock &MBB, - MachineBasicBlock::iterator I, - unsigned DestReg, unsigned SrcReg, - const TargetRegisterClass *DestRC, - const TargetRegisterClass *SrcRC, - DebugLoc DL) const; + void ReplaceTailWithBranchTo(MachineBasicBlock::iterator Tail, + MachineBasicBlock *NewDest) const; + + bool isLegalToSplitMBBAt(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI) const; + + bool isProfitableToIfCvt(MachineBasicBlock &MBB, unsigned NumInstrs) const; + + bool isProfitableToIfCvt(MachineBasicBlock &TMBB, unsigned NumTInstrs, + MachineBasicBlock &FMBB, unsigned NumFInstrs) const; + + void copyPhysReg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, DebugLoc DL, + unsigned DestReg, unsigned SrcReg, + bool KillSrc) const; void storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, @@ -50,12 +60,27 @@ public: const TargetRegisterClass *RC, const TargetRegisterInfo *TRI) const; + /// scheduleTwoAddrSource - Schedule the copy / re-mat of the source of the + /// two-addrss instruction inserted by two-address pass. + void scheduleTwoAddrSource(MachineInstr *SrcMI, MachineInstr *UseMI, + const TargetRegisterInfo &TRI) const; + /// getRegisterInfo - TargetInstrInfo is a superset of MRegister info. As /// such, whenever a client has an instance of instruction info, it should /// always be able to get register info as well (through this method). /// const Thumb2RegisterInfo &getRegisterInfo() const { return RI; } + + ScheduleHazardRecognizer * + CreateTargetPostRAHazardRecognizer(const InstrItineraryData &II) const; }; + +/// getITInstrPredicate - Valid only in Thumb2 mode. This function is identical +/// to llvm::getInstrPredicate except it returns AL for conditional branch +/// instructions which are "predicated", but are not in IT blocks. +ARMCC::CondCodes getITInstrPredicate(const MachineInstr *MI, unsigned &PredReg); + + } #endif // THUMB2INSTRUCTIONINFO_H diff --git a/lib/Target/ARM/Thumb2SizeReduction.cpp b/lib/Target/ARM/Thumb2SizeReduction.cpp index 8fe2e42a7cdd..ba392f36d946 100644 --- a/lib/Target/ARM/Thumb2SizeReduction.cpp +++ b/lib/Target/ARM/Thumb2SizeReduction.cpp @@ -451,11 +451,18 @@ Thumb2SizeReduce::ReduceTo2Addr(MachineBasicBlock &MBB, MachineInstr *MI, if (ReduceLimit2Addr != -1 && ((int)Num2Addrs >= ReduceLimit2Addr)) return false; - const TargetInstrDesc &TID = MI->getDesc(); unsigned Reg0 = MI->getOperand(0).getReg(); unsigned Reg1 = MI->getOperand(1).getReg(); - if (Reg0 != Reg1) - return false; + if (Reg0 != Reg1) { + // Try to commute the operands to make it a 2-address instruction. + unsigned CommOpIdx1, CommOpIdx2; + if (!TII->findCommutedOpIndices(MI, CommOpIdx1, CommOpIdx2) || + CommOpIdx1 != 1 || MI->getOperand(CommOpIdx2).getReg() != Reg0) + return false; + MachineInstr *CommutedMI = TII->commuteInstruction(MI); + if (!CommutedMI) + return false; + } if (Entry.LowRegs2 && !isARMLowRegister(Reg0)) return false; if (Entry.Imm2Limit) { @@ -484,6 +491,7 @@ Thumb2SizeReduce::ReduceTo2Addr(MachineBasicBlock &MBB, MachineInstr *MI, bool HasCC = false; bool CCDead = false; + const TargetInstrDesc &TID = MI->getDesc(); if (TID.hasOptionalDef()) { unsigned NumOps = TID.getNumOperands(); HasCC = (MI->getOperand(NumOps-1).getReg() == ARM::CPSR); @@ -689,7 +697,7 @@ bool Thumb2SizeReduce::ReduceMBB(MachineBasicBlock &MBB) { goto ProcessNext; } - // Try to transform ro a 16-bit non-two-address instruction. + // Try to transform to a 16-bit non-two-address instruction. if (Entry.NarrowOpc1 && ReduceToNarrow(MBB, MI, Entry, LiveCPSR)) { Modified = true; MachineBasicBlock::iterator I = prior(NextMII); diff --git a/lib/Target/Alpha/AlphaISelLowering.cpp b/lib/Target/Alpha/AlphaISelLowering.cpp index 1d85f12c3ef9..ea78bf374200 100644 --- a/lib/Target/Alpha/AlphaISelLowering.cpp +++ b/lib/Target/Alpha/AlphaISelLowering.cpp @@ -224,6 +224,7 @@ AlphaTargetLowering::LowerCall(SDValue Chain, SDValue Callee, CallingConv::ID CallConv, bool isVarArg, bool &isTailCall, const SmallVectorImpl<ISD::OutputArg> &Outs, + const SmallVectorImpl<SDValue> &OutVals, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const { @@ -251,7 +252,7 @@ AlphaTargetLowering::LowerCall(SDValue Chain, SDValue Callee, for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { CCValAssign &VA = ArgLocs[i]; - SDValue Arg = Outs[i].Val; + SDValue Arg = OutVals[i]; // Promote the value if needed. switch (VA.getLocInfo()) { @@ -425,7 +426,7 @@ AlphaTargetLowering::LowerFormalArguments(SDValue Chain, } } else { //more args // Create the frame index object for this incoming parameter... - int FI = MFI->CreateFixedObject(8, 8 * (ArgNo - 6), true, false); + int FI = MFI->CreateFixedObject(8, 8 * (ArgNo - 6), true); // Create the SelectionDAG nodes corresponding to a load //from this parameter @@ -444,7 +445,7 @@ AlphaTargetLowering::LowerFormalArguments(SDValue Chain, if (TargetRegisterInfo::isPhysicalRegister(args_int[i])) args_int[i] = AddLiveIn(MF, args_int[i], &Alpha::GPRCRegClass); SDValue argt = DAG.getCopyFromReg(Chain, dl, args_int[i], MVT::i64); - int FI = MFI->CreateFixedObject(8, -8 * (6 - i), true, false); + int FI = MFI->CreateFixedObject(8, -8 * (6 - i), true); if (i == 0) FuncInfo->setVarArgsBase(FI); SDValue SDFI = DAG.getFrameIndex(FI, MVT::i64); LS.push_back(DAG.getStore(Chain, dl, argt, SDFI, NULL, 0, @@ -453,7 +454,7 @@ AlphaTargetLowering::LowerFormalArguments(SDValue Chain, if (TargetRegisterInfo::isPhysicalRegister(args_float[i])) args_float[i] = AddLiveIn(MF, args_float[i], &Alpha::F8RCRegClass); argt = DAG.getCopyFromReg(Chain, dl, args_float[i], MVT::f64); - FI = MFI->CreateFixedObject(8, - 8 * (12 - i), true, false); + FI = MFI->CreateFixedObject(8, - 8 * (12 - i), true); SDFI = DAG.getFrameIndex(FI, MVT::i64); LS.push_back(DAG.getStore(Chain, dl, argt, SDFI, NULL, 0, false, false, 0)); @@ -470,6 +471,7 @@ SDValue AlphaTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl<ISD::OutputArg> &Outs, + const SmallVectorImpl<SDValue> &OutVals, DebugLoc dl, SelectionDAG &DAG) const { SDValue Copy = DAG.getCopyToReg(Chain, dl, Alpha::R26, @@ -483,7 +485,7 @@ AlphaTargetLowering::LowerReturn(SDValue Chain, break; //return SDValue(); // ret void is legal case 1: { - EVT ArgVT = Outs[0].Val.getValueType(); + EVT ArgVT = Outs[0].VT; unsigned ArgReg; if (ArgVT.isInteger()) ArgReg = Alpha::R0; @@ -492,13 +494,13 @@ AlphaTargetLowering::LowerReturn(SDValue Chain, ArgReg = Alpha::F0; } Copy = DAG.getCopyToReg(Copy, dl, ArgReg, - Outs[0].Val, Copy.getValue(1)); + OutVals[0], Copy.getValue(1)); if (DAG.getMachineFunction().getRegInfo().liveout_empty()) DAG.getMachineFunction().getRegInfo().addLiveOut(ArgReg); break; } case 2: { - EVT ArgVT = Outs[0].Val.getValueType(); + EVT ArgVT = Outs[0].VT; unsigned ArgReg1, ArgReg2; if (ArgVT.isInteger()) { ArgReg1 = Alpha::R0; @@ -509,13 +511,13 @@ AlphaTargetLowering::LowerReturn(SDValue Chain, ArgReg2 = Alpha::F1; } Copy = DAG.getCopyToReg(Copy, dl, ArgReg1, - Outs[0].Val, Copy.getValue(1)); + OutVals[0], Copy.getValue(1)); if (std::find(DAG.getMachineFunction().getRegInfo().liveout_begin(), DAG.getMachineFunction().getRegInfo().liveout_end(), ArgReg1) == DAG.getMachineFunction().getRegInfo().liveout_end()) DAG.getMachineFunction().getRegInfo().addLiveOut(ArgReg1); Copy = DAG.getCopyToReg(Copy, dl, ArgReg2, - Outs[1].Val, Copy.getValue(1)); + OutVals[1], Copy.getValue(1)); if (std::find(DAG.getMachineFunction().getRegInfo().liveout_begin(), DAG.getMachineFunction().getRegInfo().liveout_end(), ArgReg2) == DAG.getMachineFunction().getRegInfo().liveout_end()) @@ -539,7 +541,7 @@ void AlphaTargetLowering::LowerVAARG(SDNode *N, SDValue &Chain, false, false, 0); SDValue Tmp = DAG.getNode(ISD::ADD, dl, MVT::i64, VAListP, DAG.getConstant(8, MVT::i64)); - SDValue Offset = DAG.getExtLoad(ISD::SEXTLOAD, dl, MVT::i64, Base.getValue(1), + SDValue Offset = DAG.getExtLoad(ISD::SEXTLOAD, MVT::i64, dl, Base.getValue(1), Tmp, NULL, 0, MVT::i32, false, false, 0); DataPtr = DAG.getNode(ISD::ADD, dl, MVT::i64, Base, Offset); if (N->getValueType(0).isFloatingPoint()) @@ -643,10 +645,12 @@ SDValue AlphaTargetLowering::LowerOperation(SDValue Op, case ISD::GlobalAddress: { GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op); const GlobalValue *GV = GSDN->getGlobal(); - SDValue GA = DAG.getTargetGlobalAddress(GV, MVT::i64, GSDN->getOffset()); + SDValue GA = DAG.getTargetGlobalAddress(GV, dl, MVT::i64, + GSDN->getOffset()); // FIXME there isn't really any debug info here - // if (!GV->hasWeakLinkage() && !GV->isDeclaration() && !GV->hasLinkOnceLinkage()) { + // if (!GV->hasWeakLinkage() && !GV->isDeclaration() + // && !GV->hasLinkOnceLinkage()) { if (GV->hasLocalLinkage()) { SDValue Hi = DAG.getNode(AlphaISD::GPRelHi, dl, MVT::i64, GA, DAG.getGLOBAL_OFFSET_TABLE(MVT::i64)); @@ -702,7 +706,7 @@ SDValue AlphaTargetLowering::LowerOperation(SDValue Op, SDValue Result; if (Op.getValueType() == MVT::i32) - Result = DAG.getExtLoad(ISD::SEXTLOAD, dl, MVT::i64, Chain, DataPtr, + Result = DAG.getExtLoad(ISD::SEXTLOAD, MVT::i64, dl, Chain, DataPtr, NULL, 0, MVT::i32, false, false, 0); else Result = DAG.getLoad(Op.getValueType(), dl, Chain, DataPtr, NULL, 0, @@ -722,7 +726,7 @@ SDValue AlphaTargetLowering::LowerOperation(SDValue Op, false, false, 0); SDValue NP = DAG.getNode(ISD::ADD, dl, MVT::i64, SrcP, DAG.getConstant(8, MVT::i64)); - Val = DAG.getExtLoad(ISD::SEXTLOAD, dl, MVT::i64, Result, + Val = DAG.getExtLoad(ISD::SEXTLOAD, MVT::i64, dl, Result, NP, NULL,0, MVT::i32, false, false, 0); SDValue NPD = DAG.getNode(ISD::ADD, dl, MVT::i64, DestP, DAG.getConstant(8, MVT::i64)); @@ -863,7 +867,10 @@ AlphaTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *llscMBB = F->CreateMachineBasicBlock(LLVM_BB); MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB); - sinkMBB->transferSuccessors(thisMBB); + sinkMBB->splice(sinkMBB->begin(), thisMBB, + llvm::next(MachineBasicBlock::iterator(MI)), + thisMBB->end()); + sinkMBB->transferSuccessorsAndUpdatePHIs(thisMBB); F->insert(It, llscMBB); F->insert(It, sinkMBB); @@ -912,7 +919,7 @@ AlphaTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, thisMBB->addSuccessor(llscMBB); llscMBB->addSuccessor(llscMBB); llscMBB->addSuccessor(sinkMBB); - F->DeleteMachineInstr(MI); // The pseudo instruction is gone now. + MI->eraseFromParent(); // The pseudo instruction is gone now. return sinkMBB; } diff --git a/lib/Target/Alpha/AlphaISelLowering.h b/lib/Target/Alpha/AlphaISelLowering.h index 7ee823a86a4d..46e0c7dc9f80 100644 --- a/lib/Target/Alpha/AlphaISelLowering.h +++ b/lib/Target/Alpha/AlphaISelLowering.h @@ -121,6 +121,7 @@ namespace llvm { LowerCall(SDValue Chain, SDValue Callee, CallingConv::ID CallConv, bool isVarArg, bool &isTailCall, const SmallVectorImpl<ISD::OutputArg> &Outs, + const SmallVectorImpl<SDValue> &OutVals, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const; @@ -129,6 +130,7 @@ namespace llvm { LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl<ISD::OutputArg> &Outs, + const SmallVectorImpl<SDValue> &OutVals, DebugLoc dl, SelectionDAG &DAG) const; }; } diff --git a/lib/Target/Alpha/AlphaInstrFormats.td b/lib/Target/Alpha/AlphaInstrFormats.td index d98455628887..6f4ebf279643 100644 --- a/lib/Target/Alpha/AlphaInstrFormats.td +++ b/lib/Target/Alpha/AlphaInstrFormats.td @@ -182,7 +182,7 @@ class OForm4<bits<6> opcode, bits<7> fun, string asmstr, list<dag> pattern, Inst bits<5> Rb; bits<7> Function = fun; -// let isTwoAddress = 1; +// let Constraints = "$RFALSE = $RDEST"; let Inst{25-21} = Ra; let Inst{20-16} = Rb; let Inst{15-13} = 0; @@ -223,7 +223,7 @@ class OForm4L<bits<6> opcode, bits<7> fun, string asmstr, list<dag> pattern, Ins bits<8> LIT; bits<7> Function = fun; -// let isTwoAddress = 1; +// let Constraints = "$RFALSE = $RDEST"; let Inst{25-21} = Ra; let Inst{20-13} = LIT; let Inst{12} = 1; diff --git a/lib/Target/Alpha/AlphaInstrInfo.cpp b/lib/Target/Alpha/AlphaInstrInfo.cpp index 3aba3639a5f3..ad625a269417 100644 --- a/lib/Target/Alpha/AlphaInstrInfo.cpp +++ b/lib/Target/Alpha/AlphaInstrInfo.cpp @@ -110,9 +110,8 @@ static bool isAlphaIntCondCode(unsigned Opcode) { unsigned AlphaInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, - const SmallVectorImpl<MachineOperand> &Cond) const { - // FIXME this should probably have a DebugLoc argument - DebugLoc dl; + const SmallVectorImpl<MachineOperand> &Cond, + DebugLoc DL) const { assert(TBB && "InsertBranch must not be told to insert a fallthrough"); assert((Cond.size() == 2 || Cond.size() == 0) && "Alpha branch conditions have two components!"); @@ -120,58 +119,47 @@ unsigned AlphaInstrInfo::InsertBranch(MachineBasicBlock &MBB, // One-way branch. if (FBB == 0) { if (Cond.empty()) // Unconditional branch - BuildMI(&MBB, dl, get(Alpha::BR)).addMBB(TBB); + BuildMI(&MBB, DL, get(Alpha::BR)).addMBB(TBB); else // Conditional branch if (isAlphaIntCondCode(Cond[0].getImm())) - BuildMI(&MBB, dl, get(Alpha::COND_BRANCH_I)) + BuildMI(&MBB, DL, get(Alpha::COND_BRANCH_I)) .addImm(Cond[0].getImm()).addReg(Cond[1].getReg()).addMBB(TBB); else - BuildMI(&MBB, dl, get(Alpha::COND_BRANCH_F)) + BuildMI(&MBB, DL, get(Alpha::COND_BRANCH_F)) .addImm(Cond[0].getImm()).addReg(Cond[1].getReg()).addMBB(TBB); return 1; } // Two-way Conditional Branch. if (isAlphaIntCondCode(Cond[0].getImm())) - BuildMI(&MBB, dl, get(Alpha::COND_BRANCH_I)) + BuildMI(&MBB, DL, get(Alpha::COND_BRANCH_I)) .addImm(Cond[0].getImm()).addReg(Cond[1].getReg()).addMBB(TBB); else - BuildMI(&MBB, dl, get(Alpha::COND_BRANCH_F)) + BuildMI(&MBB, DL, get(Alpha::COND_BRANCH_F)) .addImm(Cond[0].getImm()).addReg(Cond[1].getReg()).addMBB(TBB); - BuildMI(&MBB, dl, get(Alpha::BR)).addMBB(FBB); + BuildMI(&MBB, DL, get(Alpha::BR)).addMBB(FBB); return 2; } -bool AlphaInstrInfo::copyRegToReg(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MI, - unsigned DestReg, unsigned SrcReg, - const TargetRegisterClass *DestRC, - const TargetRegisterClass *SrcRC, - DebugLoc DL) const { - //cerr << "copyRegToReg " << DestReg << " <- " << SrcReg << "\n"; - if (DestRC != SrcRC) { - // Not yet supported! - return false; - } - - if (DestRC == Alpha::GPRCRegisterClass) { +void AlphaInstrInfo::copyPhysReg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, DebugLoc DL, + unsigned DestReg, unsigned SrcReg, + bool KillSrc) const { + if (Alpha::GPRCRegClass.contains(DestReg, SrcReg)) { BuildMI(MBB, MI, DL, get(Alpha::BISr), DestReg) .addReg(SrcReg) - .addReg(SrcReg); - } else if (DestRC == Alpha::F4RCRegisterClass) { + .addReg(SrcReg, getKillRegState(KillSrc)); + } else if (Alpha::F4RCRegClass.contains(DestReg, SrcReg)) { BuildMI(MBB, MI, DL, get(Alpha::CPYSS), DestReg) .addReg(SrcReg) - .addReg(SrcReg); - } else if (DestRC == Alpha::F8RCRegisterClass) { + .addReg(SrcReg, getKillRegState(KillSrc)); + } else if (Alpha::F8RCRegClass.contains(DestReg, SrcReg)) { BuildMI(MBB, MI, DL, get(Alpha::CPYST), DestReg) .addReg(SrcReg) - .addReg(SrcReg); + .addReg(SrcReg, getKillRegState(KillSrc)); } else { - // Attempt to copy register that is not GPR or FPR - return false; + llvm_unreachable("Attempt to copy register that is not GPR or FPR"); } - - return true; } void @@ -227,51 +215,6 @@ AlphaInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, llvm_unreachable("Unhandled register class"); } -MachineInstr *AlphaInstrInfo::foldMemoryOperandImpl(MachineFunction &MF, - MachineInstr *MI, - const SmallVectorImpl<unsigned> &Ops, - int FrameIndex) const { - if (Ops.size() != 1) return NULL; - - // Make sure this is a reg-reg copy. - unsigned Opc = MI->getOpcode(); - - MachineInstr *NewMI = NULL; - switch(Opc) { - default: - break; - case Alpha::BISr: - case Alpha::CPYSS: - case Alpha::CPYST: - if (MI->getOperand(1).getReg() == MI->getOperand(2).getReg()) { - if (Ops[0] == 0) { // move -> store - unsigned InReg = MI->getOperand(1).getReg(); - bool isKill = MI->getOperand(1).isKill(); - bool isUndef = MI->getOperand(1).isUndef(); - Opc = (Opc == Alpha::BISr) ? Alpha::STQ : - ((Opc == Alpha::CPYSS) ? Alpha::STS : Alpha::STT); - NewMI = BuildMI(MF, MI->getDebugLoc(), get(Opc)) - .addReg(InReg, getKillRegState(isKill) | getUndefRegState(isUndef)) - .addFrameIndex(FrameIndex) - .addReg(Alpha::F31); - } else { // load -> move - unsigned OutReg = MI->getOperand(0).getReg(); - bool isDead = MI->getOperand(0).isDead(); - bool isUndef = MI->getOperand(0).isUndef(); - Opc = (Opc == Alpha::BISr) ? Alpha::LDQ : - ((Opc == Alpha::CPYSS) ? Alpha::LDS : Alpha::LDT); - NewMI = BuildMI(MF, MI->getDebugLoc(), get(Opc)) - .addReg(OutReg, RegState::Define | getDeadRegState(isDead) | - getUndefRegState(isUndef)) - .addFrameIndex(FrameIndex) - .addReg(Alpha::F31); - } - } - break; - } - return NewMI; -} - static unsigned AlphaRevCondCode(unsigned Opcode) { switch (Opcode) { case Alpha::BEQ: return Alpha::BNE; @@ -428,11 +371,8 @@ unsigned AlphaInstrInfo::getGlobalBaseReg(MachineFunction *MF) const { const TargetInstrInfo *TII = MF->getTarget().getInstrInfo(); GlobalBaseReg = RegInfo.createVirtualRegister(&Alpha::GPRCRegClass); - bool Ok = TII->copyRegToReg(FirstMBB, MBBI, GlobalBaseReg, Alpha::R29, - &Alpha::GPRCRegClass, &Alpha::GPRCRegClass, - DebugLoc()); - assert(Ok && "Couldn't assign to global base register!"); - Ok = Ok; // Silence warning when assertions are turned off. + BuildMI(FirstMBB, MBBI, DebugLoc(), TII->get(TargetOpcode::COPY), + GlobalBaseReg).addReg(Alpha::R29); RegInfo.addLiveIn(Alpha::R29); AlphaFI->setGlobalBaseReg(GlobalBaseReg); @@ -456,11 +396,8 @@ unsigned AlphaInstrInfo::getGlobalRetAddr(MachineFunction *MF) const { const TargetInstrInfo *TII = MF->getTarget().getInstrInfo(); GlobalRetAddr = RegInfo.createVirtualRegister(&Alpha::GPRCRegClass); - bool Ok = TII->copyRegToReg(FirstMBB, MBBI, GlobalRetAddr, Alpha::R26, - &Alpha::GPRCRegClass, &Alpha::GPRCRegClass, - DebugLoc()); - assert(Ok && "Couldn't assign to global return address register!"); - Ok = Ok; // Silence warning when assertions are turned off. + BuildMI(FirstMBB, MBBI, DebugLoc(), TII->get(TargetOpcode::COPY), + GlobalRetAddr).addReg(Alpha::R26); RegInfo.addLiveIn(Alpha::R26); AlphaFI->setGlobalRetAddr(GlobalRetAddr); diff --git a/lib/Target/Alpha/AlphaInstrInfo.h b/lib/Target/Alpha/AlphaInstrInfo.h index 7d7365b75bae..e20e8323b64e 100644 --- a/lib/Target/Alpha/AlphaInstrInfo.h +++ b/lib/Target/Alpha/AlphaInstrInfo.h @@ -42,14 +42,13 @@ public: int &FrameIndex) const; virtual unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, - MachineBasicBlock *FBB, - const SmallVectorImpl<MachineOperand> &Cond) const; - virtual bool copyRegToReg(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MI, - unsigned DestReg, unsigned SrcReg, - const TargetRegisterClass *DestRC, - const TargetRegisterClass *SrcRC, - DebugLoc DL) const; + MachineBasicBlock *FBB, + const SmallVectorImpl<MachineOperand> &Cond, + DebugLoc DL) const; + virtual void copyPhysReg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, DebugLoc DL, + unsigned DestReg, unsigned SrcReg, + bool KillSrc) const; virtual void storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned SrcReg, bool isKill, int FrameIndex, @@ -62,18 +61,6 @@ public: const TargetRegisterClass *RC, const TargetRegisterInfo *TRI) const; - virtual MachineInstr* foldMemoryOperandImpl(MachineFunction &MF, - MachineInstr* MI, - const SmallVectorImpl<unsigned> &Ops, - int FrameIndex) const; - - virtual MachineInstr* foldMemoryOperandImpl(MachineFunction &MF, - MachineInstr* MI, - const SmallVectorImpl<unsigned> &Ops, - MachineInstr* LoadMI) const { - return 0; - } - bool AnalyzeBranch(MachineBasicBlock &MBB,MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl<MachineOperand> &Cond, diff --git a/lib/Target/Alpha/AlphaInstrInfo.td b/lib/Target/Alpha/AlphaInstrInfo.td index a47a29b4255a..92de78a364ba 100644 --- a/lib/Target/Alpha/AlphaInstrInfo.td +++ b/lib/Target/Alpha/AlphaInstrInfo.td @@ -680,18 +680,32 @@ def CPYSNSt : FPForm<0x17, 0x021, "cpysn $RA,$RB,$RC", } //conditional moves, floats -let OutOperandList = (outs F4RC:$RDEST), InOperandList = (ins F4RC:$RFALSE, F4RC:$RTRUE, F8RC:$RCOND), - isTwoAddress = 1 in { -def FCMOVEQS : FPForm<0x17, 0x02A, "fcmoveq $RCOND,$RTRUE,$RDEST",[], s_fcmov>; //FCMOVE if = zero -def FCMOVGES : FPForm<0x17, 0x02D, "fcmovge $RCOND,$RTRUE,$RDEST",[], s_fcmov>; //FCMOVE if >= zero -def FCMOVGTS : FPForm<0x17, 0x02F, "fcmovgt $RCOND,$RTRUE,$RDEST",[], s_fcmov>; //FCMOVE if > zero -def FCMOVLES : FPForm<0x17, 0x02E, "fcmovle $RCOND,$RTRUE,$RDEST",[], s_fcmov>; //FCMOVE if <= zero -def FCMOVLTS : FPForm<0x17, 0x02C, "fcmovlt $RCOND,$RTRUE,$RDEST",[], s_fcmov>; // FCMOVE if < zero -def FCMOVNES : FPForm<0x17, 0x02B, "fcmovne $RCOND,$RTRUE,$RDEST",[], s_fcmov>; //FCMOVE if != zero +let OutOperandList = (outs F4RC:$RDEST), + InOperandList = (ins F4RC:$RFALSE, F4RC:$RTRUE, F8RC:$RCOND), + Constraints = "$RTRUE = $RDEST" in { +def FCMOVEQS : FPForm<0x17, 0x02A, + "fcmoveq $RCOND,$RTRUE,$RDEST", + [], s_fcmov>; //FCMOVE if = zero +def FCMOVGES : FPForm<0x17, 0x02D, + "fcmovge $RCOND,$RTRUE,$RDEST", + [], s_fcmov>; //FCMOVE if >= zero +def FCMOVGTS : FPForm<0x17, 0x02F, + "fcmovgt $RCOND,$RTRUE,$RDEST", + [], s_fcmov>; //FCMOVE if > zero +def FCMOVLES : FPForm<0x17, 0x02E, + "fcmovle $RCOND,$RTRUE,$RDEST", + [], s_fcmov>; //FCMOVE if <= zero +def FCMOVLTS : FPForm<0x17, 0x02C, + "fcmovlt $RCOND,$RTRUE,$RDEST", + [], s_fcmov>; // FCMOVE if < zero +def FCMOVNES : FPForm<0x17, 0x02B, + "fcmovne $RCOND,$RTRUE,$RDEST", + [], s_fcmov>; //FCMOVE if != zero } //conditional moves, doubles -let OutOperandList = (outs F8RC:$RDEST), InOperandList = (ins F8RC:$RFALSE, F8RC:$RTRUE, F8RC:$RCOND), - isTwoAddress = 1 in { +let OutOperandList = (outs F8RC:$RDEST), + InOperandList = (ins F8RC:$RFALSE, F8RC:$RTRUE, F8RC:$RCOND), + Constraints = "$RTRUE = $RDEST" in { def FCMOVEQT : FPForm<0x17, 0x02A, "fcmoveq $RCOND,$RTRUE,$RDEST", [], s_fcmov>; def FCMOVGET : FPForm<0x17, 0x02D, "fcmovge $RCOND,$RTRUE,$RDEST", [], s_fcmov>; def FCMOVGTT : FPForm<0x17, 0x02F, "fcmovgt $RCOND,$RTRUE,$RDEST", [], s_fcmov>; diff --git a/lib/Target/Alpha/AlphaRegisterInfo.cpp b/lib/Target/Alpha/AlphaRegisterInfo.cpp index c083d8c30b16..dc9d935ec047 100644 --- a/lib/Target/Alpha/AlphaRegisterInfo.cpp +++ b/lib/Target/Alpha/AlphaRegisterInfo.cpp @@ -74,20 +74,6 @@ const unsigned* AlphaRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) return CalleeSavedRegs; } -const TargetRegisterClass* const* -AlphaRegisterInfo::getCalleeSavedRegClasses(const MachineFunction *MF) const { - static const TargetRegisterClass * const CalleeSavedRegClasses[] = { - &Alpha::GPRCRegClass, &Alpha::GPRCRegClass, - &Alpha::GPRCRegClass, &Alpha::GPRCRegClass, - &Alpha::GPRCRegClass, &Alpha::GPRCRegClass, - &Alpha::F8RCRegClass, &Alpha::F8RCRegClass, - &Alpha::F8RCRegClass, &Alpha::F8RCRegClass, - &Alpha::F8RCRegClass, &Alpha::F8RCRegClass, - &Alpha::F8RCRegClass, &Alpha::F8RCRegClass, 0 - }; - return CalleeSavedRegClasses; -} - BitVector AlphaRegisterInfo::getReservedRegs(const MachineFunction &MF) const { BitVector Reserved(getNumRegs()); Reserved.set(Alpha::R15); diff --git a/lib/Target/Alpha/AlphaRegisterInfo.h b/lib/Target/Alpha/AlphaRegisterInfo.h index 720367aa87dc..f9fd87a63737 100644 --- a/lib/Target/Alpha/AlphaRegisterInfo.h +++ b/lib/Target/Alpha/AlphaRegisterInfo.h @@ -30,9 +30,6 @@ struct AlphaRegisterInfo : public AlphaGenRegisterInfo { /// Code Generation virtual methods... const unsigned *getCalleeSavedRegs(const MachineFunction *MF = 0) const; - const TargetRegisterClass* const* getCalleeSavedRegClasses( - const MachineFunction *MF = 0) const; - BitVector getReservedRegs(const MachineFunction &MF) const; bool hasFP(const MachineFunction &MF) const; diff --git a/lib/Target/Blackfin/BlackfinISelDAGToDAG.cpp b/lib/Target/Blackfin/BlackfinISelDAGToDAG.cpp index b4da96cba59e..80ee1075aada 100644 --- a/lib/Target/Blackfin/BlackfinISelDAGToDAG.cpp +++ b/lib/Target/Blackfin/BlackfinISelDAGToDAG.cpp @@ -132,8 +132,8 @@ static void UpdateNodeOperand(SelectionDAG &DAG, SDValue Val) { SmallVector<SDValue, 8> ops(N->op_begin(), N->op_end()); ops[Num] = Val; - SDValue New = DAG.UpdateNodeOperands(SDValue(N, 0), ops.data(), ops.size()); - DAG.ReplaceAllUsesWith(N, New.getNode()); + SDNode *New = DAG.UpdateNodeOperands(N, ops.data(), ops.size()); + DAG.ReplaceAllUsesWith(N, New); } // After instruction selection, insert COPY_TO_REGCLASS nodes to help in diff --git a/lib/Target/Blackfin/BlackfinISelLowering.cpp b/lib/Target/Blackfin/BlackfinISelLowering.cpp index adf211881870..6e828e1b36b3 100644 --- a/lib/Target/Blackfin/BlackfinISelLowering.cpp +++ b/lib/Target/Blackfin/BlackfinISelLowering.cpp @@ -143,7 +143,7 @@ SDValue BlackfinTargetLowering::LowerGlobalAddress(SDValue Op, DebugLoc DL = Op.getDebugLoc(); const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal(); - Op = DAG.getTargetGlobalAddress(GV, MVT::i32); + Op = DAG.getTargetGlobalAddress(GV, DL, MVT::i32); return DAG.getNode(BFISD::Wrapper, DL, MVT::i32, Op); } @@ -205,8 +205,7 @@ BlackfinTargetLowering::LowerFormalArguments(SDValue Chain, } else { assert(VA.isMemLoc() && "CCValAssign must be RegLoc or MemLoc"); unsigned ObjSize = VA.getLocVT().getStoreSize(); - int FI = MFI->CreateFixedObject(ObjSize, VA.getLocMemOffset(), - true, false); + int FI = MFI->CreateFixedObject(ObjSize, VA.getLocMemOffset(), true); SDValue FIN = DAG.getFrameIndex(FI, MVT::i32); InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN, NULL, 0, false, false, 0)); @@ -220,6 +219,7 @@ SDValue BlackfinTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl<ISD::OutputArg> &Outs, + const SmallVectorImpl<SDValue> &OutVals, DebugLoc dl, SelectionDAG &DAG) const { // CCValAssign - represent the assignment of the return value to locations. @@ -245,7 +245,7 @@ BlackfinTargetLowering::LowerReturn(SDValue Chain, for (unsigned i = 0; i != RVLocs.size(); ++i) { CCValAssign &VA = RVLocs[i]; assert(VA.isRegLoc() && "Can only return in registers!"); - SDValue Opi = Outs[i].Val; + SDValue Opi = OutVals[i]; // Expand to i32 if necessary switch (VA.getLocInfo()) { @@ -278,6 +278,7 @@ BlackfinTargetLowering::LowerCall(SDValue Chain, SDValue Callee, CallingConv::ID CallConv, bool isVarArg, bool &isTailCall, const SmallVectorImpl<ISD::OutputArg> &Outs, + const SmallVectorImpl<SDValue> &OutVals, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const { @@ -301,7 +302,7 @@ BlackfinTargetLowering::LowerCall(SDValue Chain, SDValue Callee, // Walk the register/memloc assignments, inserting copies/loads. for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { CCValAssign &VA = ArgLocs[i]; - SDValue Arg = Outs[i].Val; + SDValue Arg = OutVals[i]; // Promote the value if needed. switch (VA.getLocInfo()) { @@ -357,7 +358,7 @@ BlackfinTargetLowering::LowerCall(SDValue Chain, SDValue Callee, // turn it into a TargetGlobalAddress node so that legalize doesn't hack it. // Likewise ExternalSymbol -> TargetExternalSymbol. if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) - Callee = DAG.getTargetGlobalAddress(G->getGlobal(), MVT::i32); + Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl, MVT::i32); else if (ExternalSymbolSDNode *E = dyn_cast<ExternalSymbolSDNode>(Callee)) Callee = DAG.getTargetExternalSymbol(E->getSymbol(), MVT::i32); diff --git a/lib/Target/Blackfin/BlackfinISelLowering.h b/lib/Target/Blackfin/BlackfinISelLowering.h index a7842482687f..6bebcc320ce9 100644 --- a/lib/Target/Blackfin/BlackfinISelLowering.h +++ b/lib/Target/Blackfin/BlackfinISelLowering.h @@ -63,6 +63,7 @@ namespace llvm { LowerCall(SDValue Chain, SDValue Callee, CallingConv::ID CallConv, bool isVarArg, bool &isTailCall, const SmallVectorImpl<ISD::OutputArg> &Outs, + const SmallVectorImpl<SDValue> &OutVals, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const; @@ -71,6 +72,7 @@ namespace llvm { LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl<ISD::OutputArg> &Outs, + const SmallVectorImpl<SDValue> &OutVals, DebugLoc dl, SelectionDAG &DAG) const; }; } // end namespace llvm diff --git a/lib/Target/Blackfin/BlackfinInstrInfo.cpp b/lib/Target/Blackfin/BlackfinInstrInfo.cpp index 73924b750a1c..a74d42d59549 100644 --- a/lib/Target/Blackfin/BlackfinInstrInfo.cpp +++ b/lib/Target/Blackfin/BlackfinInstrInfo.cpp @@ -104,10 +104,8 @@ unsigned BlackfinInstrInfo:: InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, - const SmallVectorImpl<MachineOperand> &Cond) const { - // FIXME this should probably have a DebugLoc operand - DebugLoc DL; - + const SmallVectorImpl<MachineOperand> &Cond, + DebugLoc DL) const { // Shouldn't be a fall through. assert(TBB && "InsertBranch must not be told to insert a fallthrough"); assert((Cond.size() == 1 || Cond.size() == 0) && @@ -124,69 +122,73 @@ InsertBranch(MachineBasicBlock &MBB, llvm_unreachable("Implement conditional branches!"); } -static bool inClass(const TargetRegisterClass &Test, - unsigned Reg, - const TargetRegisterClass *RC) { - if (TargetRegisterInfo::isPhysicalRegister(Reg)) - return Test.contains(Reg); - else - return &Test==RC || Test.hasSubClass(RC); -} - -bool BlackfinInstrInfo::copyRegToReg(MachineBasicBlock &MBB, - MachineBasicBlock::iterator I, - unsigned DestReg, - unsigned SrcReg, - const TargetRegisterClass *DestRC, - const TargetRegisterClass *SrcRC, - DebugLoc DL) const { - if (inClass(BF::ALLRegClass, DestReg, DestRC) && - inClass(BF::ALLRegClass, SrcReg, SrcRC)) { - BuildMI(MBB, I, DL, get(BF::MOVE), DestReg).addReg(SrcReg); - return true; +void BlackfinInstrInfo::copyPhysReg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, DebugLoc DL, + unsigned DestReg, unsigned SrcReg, + bool KillSrc) const { + if (BF::ALLRegClass.contains(DestReg, SrcReg)) { + BuildMI(MBB, I, DL, get(BF::MOVE), DestReg) + .addReg(SrcReg, getKillRegState(KillSrc)); + return; } - if (inClass(BF::D16RegClass, DestReg, DestRC) && - inClass(BF::D16RegClass, SrcReg, SrcRC)) { - BuildMI(MBB, I, DL, get(BF::SLL16i), DestReg).addReg(SrcReg).addImm(0); - return true; + if (BF::D16RegClass.contains(DestReg, SrcReg)) { + BuildMI(MBB, I, DL, get(BF::SLL16i), DestReg) + .addReg(SrcReg, getKillRegState(KillSrc)) + .addImm(0); + return; } - if (inClass(BF::AnyCCRegClass, SrcReg, SrcRC) && - inClass(BF::DRegClass, DestReg, DestRC)) { - if (inClass(BF::NotCCRegClass, SrcReg, SrcRC)) { - BuildMI(MBB, I, DL, get(BF::MOVENCC_z), DestReg).addReg(SrcReg); + if (BF::DRegClass.contains(DestReg)) { + if (SrcReg == BF::NCC) { + BuildMI(MBB, I, DL, get(BF::MOVENCC_z), DestReg) + .addReg(SrcReg, getKillRegState(KillSrc)); BuildMI(MBB, I, DL, get(BF::BITTGL), DestReg).addReg(DestReg).addImm(0); - } else { - BuildMI(MBB, I, DL, get(BF::MOVECC_zext), DestReg).addReg(SrcReg); + return; + } + if (SrcReg == BF::CC) { + BuildMI(MBB, I, DL, get(BF::MOVECC_zext), DestReg) + .addReg(SrcReg, getKillRegState(KillSrc)); + return; } - return true; } - if (inClass(BF::AnyCCRegClass, DestReg, DestRC) && - inClass(BF::DRegClass, SrcReg, SrcRC)) { - if (inClass(BF::NotCCRegClass, DestReg, DestRC)) - BuildMI(MBB, I, DL, get(BF::SETEQri_not), DestReg).addReg(SrcReg); - else - BuildMI(MBB, I, DL, get(BF::MOVECC_nz), DestReg).addReg(SrcReg); - return true; + if (BF::DRegClass.contains(SrcReg)) { + if (DestReg == BF::NCC) { + BuildMI(MBB, I, DL, get(BF::SETEQri_not), DestReg) + .addReg(SrcReg, getKillRegState(KillSrc)).addImm(0); + return; + } + if (DestReg == BF::CC) { + BuildMI(MBB, I, DL, get(BF::MOVECC_nz), DestReg) + .addReg(SrcReg, getKillRegState(KillSrc)); + return; + } } - if (inClass(BF::NotCCRegClass, DestReg, DestRC) && - inClass(BF::JustCCRegClass, SrcReg, SrcRC)) { - BuildMI(MBB, I, DL, get(BF::MOVE_ncccc), DestReg).addReg(SrcReg); - return true; + + if (DestReg == BF::NCC && SrcReg == BF::CC) { + BuildMI(MBB, I, DL, get(BF::MOVE_ncccc), DestReg) + .addReg(SrcReg, getKillRegState(KillSrc)); + return; } - if (inClass(BF::JustCCRegClass, DestReg, DestRC) && - inClass(BF::NotCCRegClass, SrcReg, SrcRC)) { - BuildMI(MBB, I, DL, get(BF::MOVE_ccncc), DestReg).addReg(SrcReg); - return true; + if (DestReg == BF::CC && SrcReg == BF::NCC) { + BuildMI(MBB, I, DL, get(BF::MOVE_ccncc), DestReg) + .addReg(SrcReg, getKillRegState(KillSrc)); + return; } - llvm_unreachable((std::string("Bad regclasses for reg-to-reg copy: ")+ - SrcRC->getName() + " -> " + DestRC->getName()).c_str()); - return false; + llvm_unreachable("Bad reg-to-reg copy"); +} + +static bool inClass(const TargetRegisterClass &Test, + unsigned Reg, + const TargetRegisterClass *RC) { + if (TargetRegisterInfo::isPhysicalRegister(Reg)) + return Test.contains(Reg); + else + return &Test==RC || Test.hasSubClass(RC); } void diff --git a/lib/Target/Blackfin/BlackfinInstrInfo.h b/lib/Target/Blackfin/BlackfinInstrInfo.h index c1dcd58ef5ed..6c3591707269 100644 --- a/lib/Target/Blackfin/BlackfinInstrInfo.h +++ b/lib/Target/Blackfin/BlackfinInstrInfo.h @@ -44,14 +44,13 @@ namespace llvm { InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, - const SmallVectorImpl<MachineOperand> &Cond) const; - - virtual bool copyRegToReg(MachineBasicBlock &MBB, - MachineBasicBlock::iterator I, - unsigned DestReg, unsigned SrcReg, - const TargetRegisterClass *DestRC, - const TargetRegisterClass *SrcRC, - DebugLoc DL) const; + const SmallVectorImpl<MachineOperand> &Cond, + DebugLoc DL) const; + + virtual void copyPhysReg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, DebugLoc DL, + unsigned DestReg, unsigned SrcReg, + bool KillSrc) const; virtual void storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, diff --git a/lib/Target/Blackfin/BlackfinInstrInfo.td b/lib/Target/Blackfin/BlackfinInstrInfo.td index 5cf350a0cb4f..8034a7fd0fc2 100644 --- a/lib/Target/Blackfin/BlackfinInstrInfo.td +++ b/lib/Target/Blackfin/BlackfinInstrInfo.td @@ -488,7 +488,7 @@ def MOVE: F1<(outs ALL:$dst), (ins ALL:$src), "$dst = $src;", []>; -let isTwoAddress = 1 in +let Constraints = "$src1 = $dst" in def MOVEcc: F1<(outs DP:$dst), (ins DP:$src1, DP:$src2, AnyCC:$cc), "if $cc $dst = $src2;", [(set DP:$dst, (select AnyCC:$cc, DP:$src2, DP:$src1))]>; @@ -645,7 +645,7 @@ def XOR: F1<(outs D:$dst), (ins D:$src1, D:$src2), // Table C-15. Bit Operations Instructions //===----------------------------------------------------------------------===// -let isTwoAddress = 1 in { +let Constraints = "$src1 = $dst" in { def BITCLR: F1<(outs D:$dst), (ins D:$src1, uimm5imask:$src2), "bitclr($dst, $src2);", [(set D:$dst, (and D:$src1, uimm5imask:$src2))]>; @@ -691,7 +691,7 @@ multiclass SHIFT32<SDNode opnode, string ops> { } let Defs = [AZ, AN, V, VS], - isTwoAddress = 1 in { + Constraints = "$src = $dst" in { defm SRA : SHIFT32<sra, ">>>">; defm SRL : SHIFT32<srl, ">>">; defm SLL : SHIFT32<shl, "<<">; @@ -748,7 +748,7 @@ def ADD16: F2<(outs D16:$dst), (ins D16:$src1, D16:$src2), "$dst = $src1 + $src2;", [(set D16:$dst, (add D16:$src1, D16:$src2))]>; -let isTwoAddress = 1 in +let Constraints = "$src1 = $dst" in def ADDimm7: F1<(outs D:$dst), (ins D:$src1, i32imm:$src2), "$dst += $src2;", [(set D:$dst, (add D:$src1, imm7:$src2))]>; @@ -775,7 +775,7 @@ def NEG: F1<(outs D:$dst), (ins D:$src), def ADDpp: F1<(outs P:$dst), (ins P:$src1, P:$src2), "$dst = $src1 + $src2;", []>; -let isTwoAddress = 1 in +let Constraints = "$src1 = $dst" in def ADDpp_imm7: F1<(outs P:$dst), (ins P:$src1, i32imm:$src2), "$dst += $src2;", []>; @@ -802,7 +802,7 @@ def MULhh32u: F2<(outs D:$dst), (ins D16:$src1, D16:$src2), } -let isTwoAddress = 1 in +let Constraints = "$src1 = $dst" in def MUL32: F1<(outs D:$dst), (ins D:$src1, D:$src2), "$dst *= $src2;", [(set D:$dst, (mul D:$src1, D:$src2))]>; diff --git a/lib/Target/Blackfin/BlackfinRegisterInfo.cpp b/lib/Target/Blackfin/BlackfinRegisterInfo.cpp index 5153aceb8f73..06e95de1587f 100644 --- a/lib/Target/Blackfin/BlackfinRegisterInfo.cpp +++ b/lib/Target/Blackfin/BlackfinRegisterInfo.cpp @@ -48,17 +48,6 @@ BlackfinRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { return CalleeSavedRegs; } -const TargetRegisterClass* const *BlackfinRegisterInfo:: -getCalleeSavedRegClasses(const MachineFunction *MF) const { - using namespace BF; - static const TargetRegisterClass * const CalleeSavedRegClasses[] = { - &PRegClass, - &DRegClass, &DRegClass, &DRegClass, &DRegClass, - &PRegClass, &PRegClass, &PRegClass, - 0 }; - return CalleeSavedRegClasses; -} - BitVector BlackfinRegisterInfo::getReservedRegs(const MachineFunction &MF) const { using namespace BF; @@ -86,25 +75,6 @@ BlackfinRegisterInfo::getReservedRegs(const MachineFunction &MF) const { return Reserved; } -const TargetRegisterClass* -BlackfinRegisterInfo::getPhysicalRegisterRegClass(unsigned reg, EVT VT) const { - assert(isPhysicalRegister(reg) && "reg must be a physical register"); - - // Pick the smallest register class of the right type that contains - // this physreg. - const TargetRegisterClass* BestRC = 0; - for (regclass_iterator I = regclass_begin(), E = regclass_end(); - I != E; ++I) { - const TargetRegisterClass* RC = *I; - if ((VT == MVT::Other || RC->hasType(VT)) && RC->contains(reg) && - (!BestRC || RC->getNumRegs() < BestRC->getNumRegs())) - BestRC = RC; - } - - assert(BestRC && "Couldn't find the register class"); - return BestRC; -} - // hasFP - Return true if the specified function should have a dedicated frame // pointer register. This is true if the function has variable sized allocas or // if frame pointer elimination is disabled. diff --git a/lib/Target/Blackfin/BlackfinRegisterInfo.h b/lib/Target/Blackfin/BlackfinRegisterInfo.h index 03c54507ff6e..ead0b4a73c83 100644 --- a/lib/Target/Blackfin/BlackfinRegisterInfo.h +++ b/lib/Target/Blackfin/BlackfinRegisterInfo.h @@ -33,9 +33,6 @@ namespace llvm { /// Code Generation virtual methods... const unsigned *getCalleeSavedRegs(const MachineFunction *MF = 0) const; - const TargetRegisterClass* const* - getCalleeSavedRegClasses(const MachineFunction *MF = 0) const; - BitVector getReservedRegs(const MachineFunction &MF) const; // getSubReg implemented by tablegen @@ -44,9 +41,6 @@ namespace llvm { return &BF::PRegClass; } - const TargetRegisterClass *getPhysicalRegisterRegClass(unsigned reg, - EVT VT) const; - bool hasFP(const MachineFunction &MF) const; // bool hasReservedCallFrame(MachineFunction &MF) const; diff --git a/lib/Target/CBackend/CBackend.cpp b/lib/Target/CBackend/CBackend.cpp index 55b8aaa4179f..e8d8474b5be8 100644 --- a/lib/Target/CBackend/CBackend.cpp +++ b/lib/Target/CBackend/CBackend.cpp @@ -264,7 +264,7 @@ namespace { // static const AllocaInst *isDirectAlloca(const Value *V) { const AllocaInst *AI = dyn_cast<AllocaInst>(V); - if (!AI) return false; + if (!AI) return 0; if (AI->isArrayAllocation()) return 0; // FIXME: we can also inline fixed size array allocas! if (AI->getParent() != &AI->getParent()->getParent()->getEntryBlock()) @@ -2889,7 +2889,7 @@ void CWriter::visitCallInst(CallInst &I) { bool hasByVal = I.hasByValArgument(); bool isStructRet = I.hasStructRetAttr(); if (isStructRet) { - writeOperandDeref(I.getOperand(1)); + writeOperandDeref(I.getArgOperand(0)); Out << " = "; } @@ -2944,8 +2944,8 @@ void CWriter::visitCallInst(CallInst &I) { } unsigned NumDeclaredParams = FTy->getNumParams(); - - CallSite::arg_iterator AI = I.op_begin()+1, AE = I.op_end(); + CallSite CS(&I); + CallSite::arg_iterator AI = CS.arg_begin(), AE = CS.arg_end(); unsigned ArgNo = 0; if (isStructRet) { // Skip struct return argument. ++AI; @@ -2999,7 +2999,7 @@ bool CWriter::visitBuiltinCall(CallInst &I, Intrinsic::ID ID, Out << "0; "; Out << "va_start(*(va_list*)"; - writeOperand(I.getOperand(1)); + writeOperand(I.getArgOperand(0)); Out << ", "; // Output the last argument to the enclosing function. if (I.getParent()->getParent()->arg_empty()) @@ -3009,9 +3009,9 @@ bool CWriter::visitBuiltinCall(CallInst &I, Intrinsic::ID ID, Out << ')'; return true; case Intrinsic::vaend: - if (!isa<ConstantPointerNull>(I.getOperand(1))) { + if (!isa<ConstantPointerNull>(I.getArgOperand(0))) { Out << "0; va_end(*(va_list*)"; - writeOperand(I.getOperand(1)); + writeOperand(I.getArgOperand(0)); Out << ')'; } else { Out << "va_end(*(va_list*)0)"; @@ -3020,47 +3020,47 @@ bool CWriter::visitBuiltinCall(CallInst &I, Intrinsic::ID ID, case Intrinsic::vacopy: Out << "0; "; Out << "va_copy(*(va_list*)"; - writeOperand(I.getOperand(1)); + writeOperand(I.getArgOperand(0)); Out << ", *(va_list*)"; - writeOperand(I.getOperand(2)); + writeOperand(I.getArgOperand(1)); Out << ')'; return true; case Intrinsic::returnaddress: Out << "__builtin_return_address("; - writeOperand(I.getOperand(1)); + writeOperand(I.getArgOperand(0)); Out << ')'; return true; case Intrinsic::frameaddress: Out << "__builtin_frame_address("; - writeOperand(I.getOperand(1)); + writeOperand(I.getArgOperand(0)); Out << ')'; return true; case Intrinsic::powi: Out << "__builtin_powi("; - writeOperand(I.getOperand(1)); + writeOperand(I.getArgOperand(0)); Out << ", "; - writeOperand(I.getOperand(2)); + writeOperand(I.getArgOperand(1)); Out << ')'; return true; case Intrinsic::setjmp: Out << "setjmp(*(jmp_buf*)"; - writeOperand(I.getOperand(1)); + writeOperand(I.getArgOperand(0)); Out << ')'; return true; case Intrinsic::longjmp: Out << "longjmp(*(jmp_buf*)"; - writeOperand(I.getOperand(1)); + writeOperand(I.getArgOperand(0)); Out << ", "; - writeOperand(I.getOperand(2)); + writeOperand(I.getArgOperand(1)); Out << ')'; return true; case Intrinsic::prefetch: Out << "LLVM_PREFETCH((const void *)"; - writeOperand(I.getOperand(1)); + writeOperand(I.getArgOperand(0)); Out << ", "; - writeOperand(I.getOperand(2)); + writeOperand(I.getArgOperand(1)); Out << ", "; - writeOperand(I.getOperand(3)); + writeOperand(I.getArgOperand(2)); Out << ")"; return true; case Intrinsic::stacksave: @@ -3077,7 +3077,7 @@ bool CWriter::visitBuiltinCall(CallInst &I, Intrinsic::ID ID, printType(Out, I.getType()); Out << ')'; // Multiple GCC builtins multiplex onto this intrinsic. - switch (cast<ConstantInt>(I.getOperand(3))->getZExtValue()) { + switch (cast<ConstantInt>(I.getArgOperand(2))->getZExtValue()) { default: llvm_unreachable("Invalid llvm.x86.sse.cmp!"); case 0: Out << "__builtin_ia32_cmpeq"; break; case 1: Out << "__builtin_ia32_cmplt"; break; @@ -3098,9 +3098,9 @@ bool CWriter::visitBuiltinCall(CallInst &I, Intrinsic::ID ID, Out << 'd'; Out << "("; - writeOperand(I.getOperand(1)); + writeOperand(I.getArgOperand(0)); Out << ", "; - writeOperand(I.getOperand(2)); + writeOperand(I.getArgOperand(1)); Out << ")"; return true; case Intrinsic::ppc_altivec_lvsl: @@ -3108,7 +3108,7 @@ bool CWriter::visitBuiltinCall(CallInst &I, Intrinsic::ID ID, printType(Out, I.getType()); Out << ')'; Out << "__builtin_altivec_lvsl(0, (void*)"; - writeOperand(I.getOperand(1)); + writeOperand(I.getArgOperand(0)); Out << ")"; return true; } @@ -3221,7 +3221,7 @@ void CWriter::visitInlineAsm(CallInst &CI) { DestVal = ResultVals[ValueCount].first; DestValNo = ResultVals[ValueCount].second; } else - DestVal = CI.getOperand(ValueCount-ResultVals.size()+1); + DestVal = CI.getArgOperand(ValueCount-ResultVals.size()); if (I->isEarlyClobber) C = "&"+C; @@ -3255,7 +3255,7 @@ void CWriter::visitInlineAsm(CallInst &CI) { } assert(ValueCount >= ResultVals.size() && "Input can't refer to result"); - Value *SrcVal = CI.getOperand(ValueCount-ResultVals.size()+1); + Value *SrcVal = CI.getArgOperand(ValueCount-ResultVals.size()); Out << "\"" << C << "\"("; if (!I->isIndirect) diff --git a/lib/Target/CellSPU/SPUCallingConv.td b/lib/Target/CellSPU/SPUCallingConv.td index 10dc837d90b7..ec2f663908f6 100644 --- a/lib/Target/CellSPU/SPUCallingConv.td +++ b/lib/Target/CellSPU/SPUCallingConv.td @@ -34,76 +34,19 @@ def RetCC_SPU : CallingConv<[ //===----------------------------------------------------------------------===// // CellSPU Argument Calling Conventions -// (note: this isn't used, but presumably should be at some point when other -// targets do.) //===----------------------------------------------------------------------===// -/* -def CC_SPU : CallingConv<[ - CCIfType<[i8], CCAssignToReg<[R3, R4, R5, R6, R7, R8, R9, R10, R11, - R12, R13, R14, R15, R16, R17, R18, R19, R20, - R21, R22, R23, R24, R25, R26, R27, R28, R29, - R30, R31, R32, R33, R34, R35, R36, R37, R38, - R39, R40, R41, R42, R43, R44, R45, R46, R47, - R48, R49, R50, R51, R52, R53, R54, R55, R56, - R57, R58, R59, R60, R61, R62, R63, R64, R65, - R66, R67, R68, R69, R70, R71, R72, R73, R74, - R75, R76, R77, R78, R79]>>, - CCIfType<[i16], CCAssignToReg<[R3, R4, R5, R6, R7, R8, R9, R10, R11, - R12, R13, R14, R15, R16, R17, R18, R19, R20, - R21, R22, R23, R24, R25, R26, R27, R28, R29, - R30, R31, R32, R33, R34, R35, R36, R37, R38, - R39, R40, R41, R42, R43, R44, R45, R46, R47, - R48, R49, R50, R51, R52, R53, R54, R55, R56, - R57, R58, R59, R60, R61, R62, R63, R64, R65, - R66, R67, R68, R69, R70, R71, R72, R73, R74, - R75, R76, R77, R78, R79]>>, - CCIfType<[i32], CCAssignToReg<[R3, R4, R5, R6, R7, R8, R9, R10, R11, - R12, R13, R14, R15, R16, R17, R18, R19, R20, - R21, R22, R23, R24, R25, R26, R27, R28, R29, - R30, R31, R32, R33, R34, R35, R36, R37, R38, - R39, R40, R41, R42, R43, R44, R45, R46, R47, - R48, R49, R50, R51, R52, R53, R54, R55, R56, - R57, R58, R59, R60, R61, R62, R63, R64, R65, - R66, R67, R68, R69, R70, R71, R72, R73, R74, - R75, R76, R77, R78, R79]>>, - CCIfType<[f32], CCAssignToReg<[R3, R4, R5, R6, R7, R8, R9, R10, R11, - R12, R13, R14, R15, R16, R17, R18, R19, R20, - R21, R22, R23, R24, R25, R26, R27, R28, R29, - R30, R31, R32, R33, R34, R35, R36, R37, R38, - R39, R40, R41, R42, R43, R44, R45, R46, R47, - R48, R49, R50, R51, R52, R53, R54, R55, R56, - R57, R58, R59, R60, R61, R62, R63, R64, R65, - R66, R67, R68, R69, R70, R71, R72, R73, R74, - R75, R76, R77, R78, R79]>>, - CCIfType<[i64], CCAssignToReg<[R3, R4, R5, R6, R7, R8, R9, R10, R11, - R12, R13, R14, R15, R16, R17, R18, R19, R20, - R21, R22, R23, R24, R25, R26, R27, R28, R29, - R30, R31, R32, R33, R34, R35, R36, R37, R38, - R39, R40, R41, R42, R43, R44, R45, R46, R47, - R48, R49, R50, R51, R52, R53, R54, R55, R56, - R57, R58, R59, R60, R61, R62, R63, R64, R65, - R66, R67, R68, R69, R70, R71, R72, R73, R74, - R75, R76, R77, R78, R79]>>, - CCIfType<[f64], CCAssignToReg<[R3, R4, R5, R6, R7, R8, R9, R10, R11, - R12, R13, R14, R15, R16, R17, R18, R19, R20, - R21, R22, R23, R24, R25, R26, R27, R28, R29, - R30, R31, R32, R33, R34, R35, R36, R37, R38, - R39, R40, R41, R42, R43, R44, R45, R46, R47, - R48, R49, R50, R51, R52, R53, R54, R55, R56, - R57, R58, R59, R60, R61, R62, R63, R64, R65, - R66, R67, R68, R69, R70, R71, R72, R73, R74, - R75, R76, R77, R78, R79]>>, - CCIfType<[v16i8, v8i16, v4i32, v4f32, v2i64, v2f64], - CCAssignToReg<[R3, R4, R5, R6, R7, R8, R9, R10, R11, - R12, R13, R14, R15, R16, R17, R18, R19, R20, - R21, R22, R23, R24, R25, R26, R27, R28, R29, - R30, R31, R32, R33, R34, R35, R36, R37, R38, - R39, R40, R41, R42, R43, R44, R45, R46, R47, - R48, R49, R50, R51, R52, R53, R54, R55, R56, - R57, R58, R59, R60, R61, R62, R63, R64, R65, - R66, R67, R68, R69, R70, R71, R72, R73, R74, - R75, R76, R77, R78, R79]>>, - +def CCC_SPU : CallingConv<[ + CCIfType<[i8, i16, i32, i64, i128, f32, f64, + v16i8, v8i16, v4i32, v4f32, v2i64, v2f64], + CCAssignToReg<[R3, R4, R5, R6, R7, R8, R9, R10, R11, + R12, R13, R14, R15, R16, R17, R18, R19, R20, + R21, R22, R23, R24, R25, R26, R27, R28, R29, + R30, R31, R32, R33, R34, R35, R36, R37, R38, + R39, R40, R41, R42, R43, R44, R45, R46, R47, + R48, R49, R50, R51, R52, R53, R54, R55, R56, + R57, R58, R59, R60, R61, R62, R63, R64, R65, + R66, R67, R68, R69, R70, R71, R72, R73, R74, + R75, R76, R77, R78, R79]>>, // Integer/FP values get stored in stack slots that are 8 bytes in size and // 8-byte aligned if there are no more registers to hold them. CCIfType<[i32, i64, f32, f64], CCAssignToStack<8, 8>>, @@ -112,4 +55,3 @@ def CC_SPU : CallingConv<[ CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], CCAssignToStack<16, 16>> ]>; -*/ diff --git a/lib/Target/CellSPU/SPUFrameInfo.h b/lib/Target/CellSPU/SPUFrameInfo.h index e8ca333f0b69..f511acd64954 100644 --- a/lib/Target/CellSPU/SPUFrameInfo.h +++ b/lib/Target/CellSPU/SPUFrameInfo.h @@ -53,10 +53,6 @@ namespace llvm { static int minStackSize() { return (2 * stackSlotSize()); } - //! Frame size required to spill all registers plus frame info - static int fullSpillSize() { - return (SPURegisterInfo::getNumArgRegs() * stackSlotSize()); - } //! Convert frame index to stack offset static int FItoStackOffset(int frame_index) { return frame_index * stackSlotSize(); diff --git a/lib/Target/CellSPU/SPUISelDAGToDAG.cpp b/lib/Target/CellSPU/SPUISelDAGToDAG.cpp index 9afdb2b97f30..9b8c2ddd0635 100644 --- a/lib/Target/CellSPU/SPUISelDAGToDAG.cpp +++ b/lib/Target/CellSPU/SPUISelDAGToDAG.cpp @@ -275,7 +275,6 @@ namespace { SDNode *emitBuildVector(SDNode *bvNode) { EVT vecVT = bvNode->getValueType(0); - EVT eltVT = vecVT.getVectorElementType(); DebugLoc dl = bvNode->getDebugLoc(); // Check to see if this vector can be represented as a CellSPU immediate @@ -606,18 +605,14 @@ SPUDAGToDAGISel::DFormAddressPredicate(SDNode *Op, SDValue N, SDValue &Base, Base = CurDAG->getTargetConstant(0, N.getValueType()); Index = N; return true; - } else if (Opc == ISD::Register || Opc == ISD::CopyFromReg) { + } else if (Opc == ISD::Register + ||Opc == ISD::CopyFromReg + ||Opc == ISD::UNDEF) { unsigned OpOpc = Op->getOpcode(); if (OpOpc == ISD::STORE || OpOpc == ISD::LOAD) { // Direct load/store without getelementptr - SDValue Addr, Offs; - - // Get the register from CopyFromReg - if (Opc == ISD::CopyFromReg) - Addr = N.getOperand(1); - else - Addr = N; // Register + SDValue Offs; Offs = ((OpOpc == ISD::STORE) ? Op->getOperand(3) : Op->getOperand(2)); @@ -626,7 +621,7 @@ SPUDAGToDAGISel::DFormAddressPredicate(SDNode *Op, SDValue N, SDValue &Base, Offs = CurDAG->getTargetConstant(0, Offs.getValueType()); Base = Offs; - Index = Addr; + Index = N; return true; } } else { diff --git a/lib/Target/CellSPU/SPUISelLowering.cpp b/lib/Target/CellSPU/SPUISelLowering.cpp index 081e8d0db0ee..ece19b9b89f6 100644 --- a/lib/Target/CellSPU/SPUISelLowering.cpp +++ b/lib/Target/CellSPU/SPUISelLowering.cpp @@ -953,7 +953,8 @@ LowerGlobalAddress(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) { EVT PtrVT = Op.getValueType(); GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op); const GlobalValue *GV = GSDN->getGlobal(); - SDValue GA = DAG.getTargetGlobalAddress(GV, PtrVT, GSDN->getOffset()); + SDValue GA = DAG.getTargetGlobalAddress(GV, Op.getDebugLoc(), + PtrVT, GSDN->getOffset()); const TargetMachine &TM = DAG.getTarget(); SDValue Zero = DAG.getConstant(0, PtrVT); // FIXME there is no actual debug info here @@ -1013,22 +1014,26 @@ SPUTargetLowering::LowerFormalArguments(SDValue Chain, MachineRegisterInfo &RegInfo = MF.getRegInfo(); SPUFunctionInfo *FuncInfo = MF.getInfo<SPUFunctionInfo>(); - const unsigned *ArgRegs = SPURegisterInfo::getArgRegs(); - const unsigned NumArgRegs = SPURegisterInfo::getNumArgRegs(); - unsigned ArgOffset = SPUFrameInfo::minStackSize(); unsigned ArgRegIdx = 0; unsigned StackSlotSize = SPUFrameInfo::stackSlotSize(); EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); + SmallVector<CCValAssign, 16> ArgLocs; + CCState CCInfo(CallConv, isVarArg, getTargetMachine(), ArgLocs, + *DAG.getContext()); + // FIXME: allow for other calling conventions + CCInfo.AnalyzeFormalArguments(Ins, CCC_SPU); + // Add DAG nodes to load the arguments or copy them out of registers. for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) { EVT ObjectVT = Ins[ArgNo].VT; unsigned ObjSize = ObjectVT.getSizeInBits()/8; SDValue ArgVal; + CCValAssign &VA = ArgLocs[ArgNo]; - if (ArgRegIdx < NumArgRegs) { + if (VA.isRegLoc()) { const TargetRegisterClass *ArgRegClass; switch (ObjectVT.getSimpleVT().SimpleTy) { @@ -1067,14 +1072,14 @@ SPUTargetLowering::LowerFormalArguments(SDValue Chain, } unsigned VReg = RegInfo.createVirtualRegister(ArgRegClass); - RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg); + RegInfo.addLiveIn(VA.getLocReg(), VReg); ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT); ++ArgRegIdx; } else { // We need to load the argument to a virtual register if we determined // above that we ran out of physical registers of the appropriate type // or we're forced to do vararg - int FI = MFI->CreateFixedObject(ObjSize, ArgOffset, true, false); + int FI = MFI->CreateFixedObject(ObjSize, ArgOffset, true); SDValue FIN = DAG.getFrameIndex(FI, PtrVT); ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, NULL, 0, false, false, 0); ArgOffset += StackSlotSize; @@ -1087,16 +1092,31 @@ SPUTargetLowering::LowerFormalArguments(SDValue Chain, // vararg handling: if (isVarArg) { - // unsigned int ptr_size = PtrVT.getSizeInBits() / 8; + // FIXME: we should be able to query the argument registers from + // tablegen generated code. + static const unsigned ArgRegs[] = { + SPU::R3, SPU::R4, SPU::R5, SPU::R6, SPU::R7, SPU::R8, SPU::R9, + SPU::R10, SPU::R11, SPU::R12, SPU::R13, SPU::R14, SPU::R15, SPU::R16, + SPU::R17, SPU::R18, SPU::R19, SPU::R20, SPU::R21, SPU::R22, SPU::R23, + SPU::R24, SPU::R25, SPU::R26, SPU::R27, SPU::R28, SPU::R29, SPU::R30, + SPU::R31, SPU::R32, SPU::R33, SPU::R34, SPU::R35, SPU::R36, SPU::R37, + SPU::R38, SPU::R39, SPU::R40, SPU::R41, SPU::R42, SPU::R43, SPU::R44, + SPU::R45, SPU::R46, SPU::R47, SPU::R48, SPU::R49, SPU::R50, SPU::R51, + SPU::R52, SPU::R53, SPU::R54, SPU::R55, SPU::R56, SPU::R57, SPU::R58, + SPU::R59, SPU::R60, SPU::R61, SPU::R62, SPU::R63, SPU::R64, SPU::R65, + SPU::R66, SPU::R67, SPU::R68, SPU::R69, SPU::R70, SPU::R71, SPU::R72, + SPU::R73, SPU::R74, SPU::R75, SPU::R76, SPU::R77, SPU::R78, SPU::R79 + }; + // size of ArgRegs array + unsigned NumArgRegs = 77; + // We will spill (79-3)+1 registers to the stack SmallVector<SDValue, 79-3+1> MemOps; // Create the frame slot - for (; ArgRegIdx != NumArgRegs; ++ArgRegIdx) { FuncInfo->setVarArgsFrameIndex( - MFI->CreateFixedObject(StackSlotSize, ArgOffset, - true, false)); + MFI->CreateFixedObject(StackSlotSize, ArgOffset, true)); SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT); unsigned VReg = MF.addLiveIn(ArgRegs[ArgRegIdx], &SPU::R32CRegClass); SDValue ArgVal = DAG.getRegister(VReg, MVT::v16i8); @@ -1135,6 +1155,7 @@ SPUTargetLowering::LowerCall(SDValue Chain, SDValue Callee, CallingConv::ID CallConv, bool isVarArg, bool &isTailCall, const SmallVectorImpl<ISD::OutputArg> &Outs, + const SmallVectorImpl<SDValue> &OutVals, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const { @@ -1144,8 +1165,15 @@ SPUTargetLowering::LowerCall(SDValue Chain, SDValue Callee, const SPUSubtarget *ST = SPUTM.getSubtargetImpl(); unsigned NumOps = Outs.size(); unsigned StackSlotSize = SPUFrameInfo::stackSlotSize(); - const unsigned *ArgRegs = SPURegisterInfo::getArgRegs(); - const unsigned NumArgRegs = SPURegisterInfo::getNumArgRegs(); + + SmallVector<CCValAssign, 16> ArgLocs; + CCState CCInfo(CallConv, isVarArg, getTargetMachine(), ArgLocs, + *DAG.getContext()); + // FIXME: allow for other calling conventions + CCInfo.AnalyzeCallOperands(Outs, CCC_SPU); + + const unsigned NumArgRegs = ArgLocs.size(); + // Handy pointer type EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); @@ -1165,8 +1193,9 @@ SPUTargetLowering::LowerCall(SDValue Chain, SDValue Callee, // And the arguments passed on the stack SmallVector<SDValue, 8> MemOpChains; - for (unsigned i = 0; i != NumOps; ++i) { - SDValue Arg = Outs[i].Val; + for (; ArgRegIdx != NumOps; ++ArgRegIdx) { + SDValue Arg = OutVals[ArgRegIdx]; + CCValAssign &VA = ArgLocs[ArgRegIdx]; // PtrOff will be used to store the current argument to the stack if a // register cannot be found for it. @@ -1180,24 +1209,8 @@ SPUTargetLowering::LowerCall(SDValue Chain, SDValue Callee, case MVT::i32: case MVT::i64: case MVT::i128: - if (ArgRegIdx != NumArgRegs) { - RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg)); - } else { - MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, NULL, 0, - false, false, 0)); - ArgOffset += StackSlotSize; - } - break; case MVT::f32: case MVT::f64: - if (ArgRegIdx != NumArgRegs) { - RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg)); - } else { - MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, NULL, 0, - false, false, 0)); - ArgOffset += StackSlotSize; - } - break; case MVT::v2i64: case MVT::v2f64: case MVT::v4f32: @@ -1205,7 +1218,7 @@ SPUTargetLowering::LowerCall(SDValue Chain, SDValue Callee, case MVT::v8i16: case MVT::v16i8: if (ArgRegIdx != NumArgRegs) { - RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg)); + RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg)); } else { MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, NULL, 0, false, false, 0)); @@ -1249,7 +1262,7 @@ SPUTargetLowering::LowerCall(SDValue Chain, SDValue Callee, const GlobalValue *GV = G->getGlobal(); EVT CalleeVT = Callee.getValueType(); SDValue Zero = DAG.getConstant(0, PtrVT); - SDValue GA = DAG.getTargetGlobalAddress(GV, CalleeVT); + SDValue GA = DAG.getTargetGlobalAddress(GV, dl, CalleeVT); if (!ST->usingLargeMem()) { // Turn calls to targets that are defined (i.e., have bodies) into BRSL @@ -1355,6 +1368,7 @@ SDValue SPUTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl<ISD::OutputArg> &Outs, + const SmallVectorImpl<SDValue> &OutVals, DebugLoc dl, SelectionDAG &DAG) const { SmallVector<CCValAssign, 16> RVLocs; @@ -1376,7 +1390,7 @@ SPUTargetLowering::LowerReturn(SDValue Chain, CCValAssign &VA = RVLocs[i]; assert(VA.isRegLoc() && "Can only return in registers!"); Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), - Outs[i].Val, Flag); + OutVals[i], Flag); Flag = Chain.getValue(1); } @@ -1746,15 +1760,20 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) { unsigned V0Elt = 0; bool monotonic = true; bool rotate = true; + EVT maskVT; // which of the c?d instructions to use if (EltVT == MVT::i8) { V2EltIdx0 = 16; + maskVT = MVT::v16i8; } else if (EltVT == MVT::i16) { V2EltIdx0 = 8; + maskVT = MVT::v8i16; } else if (EltVT == MVT::i32 || EltVT == MVT::f32) { V2EltIdx0 = 4; + maskVT = MVT::v4i32; } else if (EltVT == MVT::i64 || EltVT == MVT::f64) { V2EltIdx0 = 2; + maskVT = MVT::v2i64; } else llvm_unreachable("Unhandled vector type in LowerVECTOR_SHUFFLE"); @@ -1786,7 +1805,7 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) { } else { rotate = false; } - } else if (PrevElt == 0) { + } else if (i == 0) { // First time through, need to keep track of previous element PrevElt = SrcElt; } else { @@ -1798,18 +1817,16 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) { if (EltsFromV2 == 1 && monotonic) { // Compute mask and shuffle - MachineFunction &MF = DAG.getMachineFunction(); - MachineRegisterInfo &RegInfo = MF.getRegInfo(); - unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass); EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); - // Initialize temporary register to 0 - SDValue InitTempReg = - DAG.getCopyToReg(DAG.getEntryNode(), dl, VReg, DAG.getConstant(0, PtrVT)); - // Copy register's contents as index in SHUFFLE_MASK: - SDValue ShufMaskOp = - DAG.getNode(SPUISD::SHUFFLE_MASK, dl, MVT::v4i32, - DAG.getTargetConstant(V2Elt, MVT::i32), - DAG.getCopyFromReg(InitTempReg, dl, VReg, PtrVT)); + + // As SHUFFLE_MASK becomes a c?d instruction, feed it an address + // R1 ($sp) is used here only as it is guaranteed to have last bits zero + SDValue Pointer = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, + DAG.getRegister(SPU::R1, PtrVT), + DAG.getConstant(V2Elt, MVT::i32)); + SDValue ShufMaskOp = DAG.getNode(SPUISD::SHUFFLE_MASK, dl, + maskVT, Pointer); + // Use shuffle mask in SHUFB synthetic instruction: return DAG.getNode(SPUISD::SHUFB, dl, V1.getValueType(), V2, V1, ShufMaskOp); @@ -2056,14 +2073,19 @@ static SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) { DebugLoc dl = Op.getDebugLoc(); EVT VT = Op.getValueType(); - ConstantSDNode *CN = cast<ConstantSDNode>(IdxOp); - assert(CN != 0 && "LowerINSERT_VECTOR_ELT: Index is not constant!"); + // use 0 when the lane to insert to is 'undef' + int64_t Idx=0; + if (IdxOp.getOpcode() != ISD::UNDEF) { + ConstantSDNode *CN = cast<ConstantSDNode>(IdxOp); + assert(CN != 0 && "LowerINSERT_VECTOR_ELT: Index is not constant!"); + Idx = (CN->getSExtValue()); + } EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); // Use $sp ($1) because it's always 16-byte aligned and it's available: SDValue Pointer = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, DAG.getRegister(SPU::R1, PtrVT), - DAG.getConstant(CN->getSExtValue(), PtrVT)); + DAG.getConstant(Idx, PtrVT)); SDValue ShufMask = DAG.getNode(SPUISD::SHUFFLE_MASK, dl, VT, Pointer); SDValue result = @@ -2862,7 +2884,7 @@ SPUTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const case SPUISD::IndirectAddr: { if (!ST->usingLargeMem() && Op0.getOpcode() == SPUISD::AFormAddr) { ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N->getOperand(1)); - if (CN != 0 && CN->getZExtValue() == 0) { + if (CN != 0 && CN->isNullValue()) { // (SPUindirect (SPUaform <addr>, 0), 0) -> // (SPUaform <addr>, 0) @@ -3056,12 +3078,10 @@ SPUTargetLowering::ComputeNumSignBitsForTargetNode(SDValue Op, void SPUTargetLowering::LowerAsmOperandForConstraint(SDValue Op, char ConstraintLetter, - bool hasMemory, std::vector<SDValue> &Ops, SelectionDAG &DAG) const { // Default, for the time being, to the base class handler - TargetLowering::LowerAsmOperandForConstraint(Op, ConstraintLetter, hasMemory, - Ops, DAG); + TargetLowering::LowerAsmOperandForConstraint(Op, ConstraintLetter, Ops, DAG); } /// isLegalAddressImmediate - Return true if the integer value can be used diff --git a/lib/Target/CellSPU/SPUISelLowering.h b/lib/Target/CellSPU/SPUISelLowering.h index 9ebd442b43c7..6d3c90b7512c 100644 --- a/lib/Target/CellSPU/SPUISelLowering.h +++ b/lib/Target/CellSPU/SPUISelLowering.h @@ -134,7 +134,6 @@ namespace llvm { EVT VT) const; void LowerAsmOperandForConstraint(SDValue Op, char ConstraintLetter, - bool hasMemory, std::vector<SDValue> &Ops, SelectionDAG &DAG) const; @@ -160,6 +159,7 @@ namespace llvm { CallingConv::ID CallConv, bool isVarArg, bool &isTailCall, const SmallVectorImpl<ISD::OutputArg> &Outs, + const SmallVectorImpl<SDValue> &OutVals, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const; @@ -168,6 +168,7 @@ namespace llvm { LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl<ISD::OutputArg> &Outs, + const SmallVectorImpl<SDValue> &OutVals, DebugLoc dl, SelectionDAG &DAG) const; }; } diff --git a/lib/Target/CellSPU/SPUInstrInfo.cpp b/lib/Target/CellSPU/SPUInstrInfo.cpp index 4c53c988d336..69aa0887bd77 100644 --- a/lib/Target/CellSPU/SPUInstrInfo.cpp +++ b/lib/Target/CellSPU/SPUInstrInfo.cpp @@ -164,11 +164,9 @@ SPUInstrInfo::isMoveInstr(const MachineInstr& MI, MI.getOperand(0).isReg() && MI.getOperand(1).isReg() && "invalid SPU OR<type>_<vec> or LR instruction!"); - if (MI.getOperand(0).getReg() == MI.getOperand(1).getReg()) { sourceReg = MI.getOperand(1).getReg(); destReg = MI.getOperand(0).getReg(); return true; - } break; } case SPU::ORv16i8: @@ -251,40 +249,18 @@ SPUInstrInfo::isStoreToStackSlot(const MachineInstr *MI, return 0; } -bool SPUInstrInfo::copyRegToReg(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MI, - unsigned DestReg, unsigned SrcReg, - const TargetRegisterClass *DestRC, - const TargetRegisterClass *SrcRC, - DebugLoc DL) const +void SPUInstrInfo::copyPhysReg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, DebugLoc DL, + unsigned DestReg, unsigned SrcReg, + bool KillSrc) const { // We support cross register class moves for our aliases, such as R3 in any // reg class to any other reg class containing R3. This is required because // we instruction select bitconvert i64 -> f64 as a noop for example, so our // types have no specific meaning. - if (DestRC == SPU::R8CRegisterClass) { - BuildMI(MBB, MI, DL, get(SPU::LRr8), DestReg).addReg(SrcReg); - } else if (DestRC == SPU::R16CRegisterClass) { - BuildMI(MBB, MI, DL, get(SPU::LRr16), DestReg).addReg(SrcReg); - } else if (DestRC == SPU::R32CRegisterClass) { - BuildMI(MBB, MI, DL, get(SPU::LRr32), DestReg).addReg(SrcReg); - } else if (DestRC == SPU::R32FPRegisterClass) { - BuildMI(MBB, MI, DL, get(SPU::LRf32), DestReg).addReg(SrcReg); - } else if (DestRC == SPU::R64CRegisterClass) { - BuildMI(MBB, MI, DL, get(SPU::LRr64), DestReg).addReg(SrcReg); - } else if (DestRC == SPU::R64FPRegisterClass) { - BuildMI(MBB, MI, DL, get(SPU::LRf64), DestReg).addReg(SrcReg); - } else if (DestRC == SPU::GPRCRegisterClass) { - BuildMI(MBB, MI, DL, get(SPU::LRr128), DestReg).addReg(SrcReg); - } else if (DestRC == SPU::VECREGRegisterClass) { - BuildMI(MBB, MI, DL, get(SPU::LRv16i8), DestReg).addReg(SrcReg); - } else { - // Attempt to copy unknown/unsupported register class! - return false; - } - - return true; + BuildMI(MBB, I, DL, get(SPU::LRr128), DestReg) + .addReg(SrcReg, getKillRegState(KillSrc)); } void @@ -356,88 +332,6 @@ SPUInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, addFrameReference(BuildMI(MBB, MI, DL, get(opc), DestReg), FrameIdx); } -//! Return true if the specified load or store can be folded -bool -SPUInstrInfo::canFoldMemoryOperand(const MachineInstr *MI, - const SmallVectorImpl<unsigned> &Ops) const { - if (Ops.size() != 1) return false; - - // Make sure this is a reg-reg copy. - unsigned Opc = MI->getOpcode(); - - switch (Opc) { - case SPU::ORv16i8: - case SPU::ORv8i16: - case SPU::ORv4i32: - case SPU::ORv2i64: - case SPU::ORr8: - case SPU::ORr16: - case SPU::ORr32: - case SPU::ORr64: - case SPU::ORf32: - case SPU::ORf64: - if (MI->getOperand(1).getReg() == MI->getOperand(2).getReg()) - return true; - break; - } - - return false; -} - -/// foldMemoryOperand - SPU, like PPC, can only fold spills into -/// copy instructions, turning them into load/store instructions. -MachineInstr * -SPUInstrInfo::foldMemoryOperandImpl(MachineFunction &MF, - MachineInstr *MI, - const SmallVectorImpl<unsigned> &Ops, - int FrameIndex) const -{ - if (Ops.size() != 1) return 0; - - unsigned OpNum = Ops[0]; - unsigned Opc = MI->getOpcode(); - MachineInstr *NewMI = 0; - - switch (Opc) { - case SPU::ORv16i8: - case SPU::ORv8i16: - case SPU::ORv4i32: - case SPU::ORv2i64: - case SPU::ORr8: - case SPU::ORr16: - case SPU::ORr32: - case SPU::ORr64: - case SPU::ORf32: - case SPU::ORf64: - if (OpNum == 0) { // move -> store - unsigned InReg = MI->getOperand(1).getReg(); - bool isKill = MI->getOperand(1).isKill(); - bool isUndef = MI->getOperand(1).isUndef(); - if (FrameIndex < SPUFrameInfo::maxFrameOffset()) { - MachineInstrBuilder MIB = BuildMI(MF, MI->getDebugLoc(), - get(SPU::STQDr32)); - - MIB.addReg(InReg, getKillRegState(isKill) | getUndefRegState(isUndef)); - NewMI = addFrameReference(MIB, FrameIndex); - } - } else { // move -> load - unsigned OutReg = MI->getOperand(0).getReg(); - bool isDead = MI->getOperand(0).isDead(); - bool isUndef = MI->getOperand(0).isUndef(); - MachineInstrBuilder MIB = BuildMI(MF, MI->getDebugLoc(), get(Opc)); - - MIB.addReg(OutReg, RegState::Define | getDeadRegState(isDead) | - getUndefRegState(isUndef)); - Opc = (FrameIndex < SPUFrameInfo::maxFrameOffset()) - ? SPU::STQDr32 : SPU::STQXr32; - NewMI = addFrameReference(MIB, FrameIndex); - break; - } - } - - return NewMI; -} - //! Branch analysis /*! \note This code was kiped from PPC. There may be more branch analysis for @@ -554,9 +448,8 @@ SPUInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const { unsigned SPUInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, - const SmallVectorImpl<MachineOperand> &Cond) const { - // FIXME this should probably have a DebugLoc argument - DebugLoc dl; + const SmallVectorImpl<MachineOperand> &Cond, + DebugLoc DL) const { // Shouldn't be a fall through. assert(TBB && "InsertBranch must not be told to insert a fallthrough"); assert((Cond.size() == 2 || Cond.size() == 0) && @@ -566,14 +459,14 @@ SPUInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, if (FBB == 0) { if (Cond.empty()) { // Unconditional branch - MachineInstrBuilder MIB = BuildMI(&MBB, dl, get(SPU::BR)); + MachineInstrBuilder MIB = BuildMI(&MBB, DL, get(SPU::BR)); MIB.addMBB(TBB); DEBUG(errs() << "Inserted one-way uncond branch: "); DEBUG((*MIB).dump()); } else { // Conditional branch - MachineInstrBuilder MIB = BuildMI(&MBB, dl, get(Cond[0].getImm())); + MachineInstrBuilder MIB = BuildMI(&MBB, DL, get(Cond[0].getImm())); MIB.addReg(Cond[1].getReg()).addMBB(TBB); DEBUG(errs() << "Inserted one-way cond branch: "); @@ -581,8 +474,8 @@ SPUInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, } return 1; } else { - MachineInstrBuilder MIB = BuildMI(&MBB, dl, get(Cond[0].getImm())); - MachineInstrBuilder MIB2 = BuildMI(&MBB, dl, get(SPU::BR)); + MachineInstrBuilder MIB = BuildMI(&MBB, DL, get(Cond[0].getImm())); + MachineInstrBuilder MIB2 = BuildMI(&MBB, DL, get(SPU::BR)); // Two-way Conditional Branch. MIB.addReg(Cond[1].getReg()).addMBB(TBB); diff --git a/lib/Target/CellSPU/SPUInstrInfo.h b/lib/Target/CellSPU/SPUInstrInfo.h index 6dabd7c27273..fbb173318148 100644 --- a/lib/Target/CellSPU/SPUInstrInfo.h +++ b/lib/Target/CellSPU/SPUInstrInfo.h @@ -23,19 +23,6 @@ namespace llvm { class SPUInstrInfo : public TargetInstrInfoImpl { SPUTargetMachine &TM; const SPURegisterInfo RI; - protected: - virtual MachineInstr* foldMemoryOperandImpl(MachineFunction &MF, - MachineInstr* MI, - const SmallVectorImpl<unsigned> &Ops, - int FrameIndex) const; - - virtual MachineInstr* foldMemoryOperandImpl(MachineFunction &MF, - MachineInstr* MI, - const SmallVectorImpl<unsigned> &Ops, - MachineInstr* LoadMI) const { - return 0; - } - public: explicit SPUInstrInfo(SPUTargetMachine &tm); @@ -56,12 +43,10 @@ namespace llvm { unsigned isStoreToStackSlot(const MachineInstr *MI, int &FrameIndex) const; - virtual bool copyRegToReg(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MI, - unsigned DestReg, unsigned SrcReg, - const TargetRegisterClass *DestRC, - const TargetRegisterClass *SrcRC, - DebugLoc DL) const; + virtual void copyPhysReg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, DebugLoc DL, + unsigned DestReg, unsigned SrcReg, + bool KillSrc) const; //! Store a register to a stack slot, based on its register class. virtual void storeRegToStackSlot(MachineBasicBlock &MBB, @@ -77,11 +62,6 @@ namespace llvm { const TargetRegisterClass *RC, const TargetRegisterInfo *TRI) const; - //! Return true if the specified load or store can be folded - virtual - bool canFoldMemoryOperand(const MachineInstr *MI, - const SmallVectorImpl<unsigned> &Ops) const; - //! Reverses a branch's condition, returning false on success. virtual bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const; @@ -94,8 +74,9 @@ namespace llvm { virtual unsigned RemoveBranch(MachineBasicBlock &MBB) const; virtual unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, - MachineBasicBlock *FBB, - const SmallVectorImpl<MachineOperand> &Cond) const; + MachineBasicBlock *FBB, + const SmallVectorImpl<MachineOperand> &Cond, + DebugLoc DL) const; }; } diff --git a/lib/Target/CellSPU/SPUNodes.td b/lib/Target/CellSPU/SPUNodes.td index 846c7edc02a2..647da3051d3d 100644 --- a/lib/Target/CellSPU/SPUNodes.td +++ b/lib/Target/CellSPU/SPUNodes.td @@ -21,7 +21,7 @@ def SPUshufmask : SDNode<"SPUISD::SHUFFLE_MASK", SPU_GenControl, []>; def callseq_start : SDNode<"ISD::CALLSEQ_START", SDT_SPUCallSeq, [SDNPHasChain, SDNPOutFlag]>; def callseq_end : SDNode<"ISD::CALLSEQ_END", SDT_SPUCallSeq, - [SDNPHasChain, SDNPOutFlag]>; + [SDNPHasChain, SDNPInFlag, SDNPOutFlag]>; //===----------------------------------------------------------------------===// // Operand constraints: //===----------------------------------------------------------------------===// diff --git a/lib/Target/CellSPU/SPURegisterInfo.cpp b/lib/Target/CellSPU/SPURegisterInfo.cpp index d8937ec5745c..f7cfa42f2a95 100644 --- a/lib/Target/CellSPU/SPURegisterInfo.cpp +++ b/lib/Target/CellSPU/SPURegisterInfo.cpp @@ -191,33 +191,6 @@ SPURegisterInfo::SPURegisterInfo(const SPUSubtarget &subtarget, { } -// SPU's 128-bit registers used for argument passing: -static const unsigned SPU_ArgRegs[] = { - SPU::R3, SPU::R4, SPU::R5, SPU::R6, SPU::R7, SPU::R8, SPU::R9, - SPU::R10, SPU::R11, SPU::R12, SPU::R13, SPU::R14, SPU::R15, SPU::R16, - SPU::R17, SPU::R18, SPU::R19, SPU::R20, SPU::R21, SPU::R22, SPU::R23, - SPU::R24, SPU::R25, SPU::R26, SPU::R27, SPU::R28, SPU::R29, SPU::R30, - SPU::R31, SPU::R32, SPU::R33, SPU::R34, SPU::R35, SPU::R36, SPU::R37, - SPU::R38, SPU::R39, SPU::R40, SPU::R41, SPU::R42, SPU::R43, SPU::R44, - SPU::R45, SPU::R46, SPU::R47, SPU::R48, SPU::R49, SPU::R50, SPU::R51, - SPU::R52, SPU::R53, SPU::R54, SPU::R55, SPU::R56, SPU::R57, SPU::R58, - SPU::R59, SPU::R60, SPU::R61, SPU::R62, SPU::R63, SPU::R64, SPU::R65, - SPU::R66, SPU::R67, SPU::R68, SPU::R69, SPU::R70, SPU::R71, SPU::R72, - SPU::R73, SPU::R74, SPU::R75, SPU::R76, SPU::R77, SPU::R78, SPU::R79 -}; - -const unsigned * -SPURegisterInfo::getArgRegs() -{ - return SPU_ArgRegs; -} - -unsigned -SPURegisterInfo::getNumArgRegs() -{ - return sizeof(SPU_ArgRegs) / sizeof(SPU_ArgRegs[0]); -} - /// getPointerRegClass - Return the register class to use to hold pointers. /// This is used for addressing modes. const TargetRegisterClass * @@ -251,36 +224,6 @@ SPURegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const return SPU_CalleeSaveRegs; } -const TargetRegisterClass* const* -SPURegisterInfo::getCalleeSavedRegClasses(const MachineFunction *MF) const -{ - // Cell ABI Calling Convention - static const TargetRegisterClass * const SPU_CalleeSaveRegClasses[] = { - &SPU::GPRCRegClass, &SPU::GPRCRegClass, &SPU::GPRCRegClass, - &SPU::GPRCRegClass, &SPU::GPRCRegClass, &SPU::GPRCRegClass, - &SPU::GPRCRegClass, &SPU::GPRCRegClass, &SPU::GPRCRegClass, - &SPU::GPRCRegClass, &SPU::GPRCRegClass, &SPU::GPRCRegClass, - &SPU::GPRCRegClass, &SPU::GPRCRegClass, &SPU::GPRCRegClass, - &SPU::GPRCRegClass, &SPU::GPRCRegClass, &SPU::GPRCRegClass, - &SPU::GPRCRegClass, &SPU::GPRCRegClass, &SPU::GPRCRegClass, - &SPU::GPRCRegClass, &SPU::GPRCRegClass, &SPU::GPRCRegClass, - &SPU::GPRCRegClass, &SPU::GPRCRegClass, &SPU::GPRCRegClass, - &SPU::GPRCRegClass, &SPU::GPRCRegClass, &SPU::GPRCRegClass, - &SPU::GPRCRegClass, &SPU::GPRCRegClass, &SPU::GPRCRegClass, - &SPU::GPRCRegClass, &SPU::GPRCRegClass, &SPU::GPRCRegClass, - &SPU::GPRCRegClass, &SPU::GPRCRegClass, &SPU::GPRCRegClass, - &SPU::GPRCRegClass, &SPU::GPRCRegClass, &SPU::GPRCRegClass, - &SPU::GPRCRegClass, &SPU::GPRCRegClass, &SPU::GPRCRegClass, - &SPU::GPRCRegClass, &SPU::GPRCRegClass, &SPU::GPRCRegClass, - &SPU::GPRCRegClass, /* environment pointer */ - &SPU::GPRCRegClass, /* stack pointer */ - &SPU::GPRCRegClass, /* link register */ - 0 /* end */ - }; - - return SPU_CalleeSaveRegClasses; -} - /*! R0 (link register), R1 (stack pointer) and R2 (environment pointer -- this is generally unused) are the Cell's reserved registers diff --git a/lib/Target/CellSPU/SPURegisterInfo.h b/lib/Target/CellSPU/SPURegisterInfo.h index 0a7031835696..7a6ae6d43c7e 100644 --- a/lib/Target/CellSPU/SPURegisterInfo.h +++ b/lib/Target/CellSPU/SPURegisterInfo.h @@ -49,10 +49,6 @@ namespace llvm { //! Return the array of callee-saved registers virtual const unsigned* getCalleeSavedRegs(const MachineFunction *MF) const; - //! Return the register class array of the callee-saved registers - virtual const TargetRegisterClass* const * - getCalleeSavedRegClasses(const MachineFunction *MF) const; - //! Allow for scavenging, so we can get scratch registers when needed. virtual bool requiresRegisterScavenging(const MachineFunction &MF) const { return true; } @@ -90,15 +86,6 @@ namespace llvm { // New methods added: //------------------------------------------------------------------------ - //! Return the array of argument passing registers - /*! - \note The size of this array is returned by getArgRegsSize(). - */ - static const unsigned *getArgRegs(); - - //! Return the size of the argument passing register array - static unsigned getNumArgRegs(); - //! Get DWARF debugging register number int getDwarfRegNum(unsigned RegNum, bool isEH) const; diff --git a/lib/Target/CppBackend/CPPBackend.cpp b/lib/Target/CppBackend/CPPBackend.cpp index 45a0c84a4f01..145568adcd4a 100644 --- a/lib/Target/CppBackend/CPPBackend.cpp +++ b/lib/Target/CppBackend/CPPBackend.cpp @@ -99,11 +99,12 @@ namespace { ValueSet DefinedValues; ForwardRefMap ForwardRefs; bool is_inline; + unsigned indent_level; public: static char ID; explicit CppWriter(formatted_raw_ostream &o) : - ModulePass(&ID), Out(o), uniqueNum(0), is_inline(false) {} + ModulePass(&ID), Out(o), uniqueNum(0), is_inline(false), indent_level(0){} virtual const char *getPassName() const { return "C++ backend"; } @@ -120,6 +121,11 @@ namespace { void error(const std::string& msg); + + formatted_raw_ostream& nl(formatted_raw_ostream &Out, int delta = 0); + inline void in() { indent_level++; } + inline void out() { if (indent_level >0) indent_level--; } + private: void printLinkageType(GlobalValue::LinkageTypes LT); void printVisibilityType(GlobalValue::VisibilityTypes VisTypes); @@ -153,1857 +159,1856 @@ namespace { void printModuleBody(); }; +} // end anonymous namespace. + +formatted_raw_ostream &CppWriter::nl(formatted_raw_ostream &Out, int delta) { + Out << '\n'; + if (delta >= 0 || indent_level >= unsigned(-delta)) + indent_level += delta; + Out.indent(indent_level); + return Out; +} + +static inline void sanitize(std::string &str) { + for (size_t i = 0; i < str.length(); ++i) + if (!isalnum(str[i]) && str[i] != '_') + str[i] = '_'; +} - static unsigned indent_level = 0; - inline formatted_raw_ostream& nl(formatted_raw_ostream& Out, int delta = 0) { - Out << "\n"; - if (delta >= 0 || indent_level >= unsigned(-delta)) - indent_level += delta; - for (unsigned i = 0; i < indent_level; ++i) - Out << " "; - return Out; +static std::string getTypePrefix(const Type *Ty) { + switch (Ty->getTypeID()) { + case Type::VoidTyID: return "void_"; + case Type::IntegerTyID: + return "int" + utostr(cast<IntegerType>(Ty)->getBitWidth()) + "_"; + case Type::FloatTyID: return "float_"; + case Type::DoubleTyID: return "double_"; + case Type::LabelTyID: return "label_"; + case Type::FunctionTyID: return "func_"; + case Type::StructTyID: return "struct_"; + case Type::ArrayTyID: return "array_"; + case Type::PointerTyID: return "ptr_"; + case Type::VectorTyID: return "packed_"; + case Type::OpaqueTyID: return "opaque_"; + default: return "other_"; } + return "unknown_"; +} - inline void in() { indent_level++; } - inline void out() { if (indent_level >0) indent_level--; } +// Looks up the type in the symbol table and returns a pointer to its name or +// a null pointer if it wasn't found. Note that this isn't the same as the +// Mode::getTypeName function which will return an empty string, not a null +// pointer if the name is not found. +static const std::string * +findTypeName(const TypeSymbolTable& ST, const Type* Ty) { + TypeSymbolTable::const_iterator TI = ST.begin(); + TypeSymbolTable::const_iterator TE = ST.end(); + for (;TI != TE; ++TI) + if (TI->second == Ty) + return &(TI->first); + return 0; +} - inline void - sanitize(std::string& str) { - for (size_t i = 0; i < str.length(); ++i) - if (!isalnum(str[i]) && str[i] != '_') - str[i] = '_'; - } +void CppWriter::error(const std::string& msg) { + report_fatal_error(msg); +} - inline std::string - getTypePrefix(const Type* Ty ) { - switch (Ty->getTypeID()) { - case Type::VoidTyID: return "void_"; - case Type::IntegerTyID: - return std::string("int") + utostr(cast<IntegerType>(Ty)->getBitWidth()) + - "_"; - case Type::FloatTyID: return "float_"; - case Type::DoubleTyID: return "double_"; - case Type::LabelTyID: return "label_"; - case Type::FunctionTyID: return "func_"; - case Type::StructTyID: return "struct_"; - case Type::ArrayTyID: return "array_"; - case Type::PointerTyID: return "ptr_"; - case Type::VectorTyID: return "packed_"; - case Type::OpaqueTyID: return "opaque_"; - default: return "other_"; - } - return "unknown_"; - } - - // Looks up the type in the symbol table and returns a pointer to its name or - // a null pointer if it wasn't found. Note that this isn't the same as the - // Mode::getTypeName function which will return an empty string, not a null - // pointer if the name is not found. - inline const std::string* - findTypeName(const TypeSymbolTable& ST, const Type* Ty) { - TypeSymbolTable::const_iterator TI = ST.begin(); - TypeSymbolTable::const_iterator TE = ST.end(); - for (;TI != TE; ++TI) - if (TI->second == Ty) - return &(TI->first); - return 0; - } - - void CppWriter::error(const std::string& msg) { - report_fatal_error(msg); - } - - // printCFP - Print a floating point constant .. very carefully :) - // This makes sure that conversion to/from floating yields the same binary - // result so that we don't lose precision. - void CppWriter::printCFP(const ConstantFP *CFP) { - bool ignored; - APFloat APF = APFloat(CFP->getValueAPF()); // copy - if (CFP->getType() == Type::getFloatTy(CFP->getContext())) - APF.convert(APFloat::IEEEdouble, APFloat::rmNearestTiesToEven, &ignored); - Out << "ConstantFP::get(mod->getContext(), "; - Out << "APFloat("; +// printCFP - Print a floating point constant .. very carefully :) +// This makes sure that conversion to/from floating yields the same binary +// result so that we don't lose precision. +void CppWriter::printCFP(const ConstantFP *CFP) { + bool ignored; + APFloat APF = APFloat(CFP->getValueAPF()); // copy + if (CFP->getType() == Type::getFloatTy(CFP->getContext())) + APF.convert(APFloat::IEEEdouble, APFloat::rmNearestTiesToEven, &ignored); + Out << "ConstantFP::get(mod->getContext(), "; + Out << "APFloat("; #if HAVE_PRINTF_A - char Buffer[100]; - sprintf(Buffer, "%A", APF.convertToDouble()); - if ((!strncmp(Buffer, "0x", 2) || - !strncmp(Buffer, "-0x", 3) || - !strncmp(Buffer, "+0x", 3)) && - APF.bitwiseIsEqual(APFloat(atof(Buffer)))) { - if (CFP->getType() == Type::getDoubleTy(CFP->getContext())) - Out << "BitsToDouble(" << Buffer << ")"; - else - Out << "BitsToFloat((float)" << Buffer << ")"; - Out << ")"; - } else { + char Buffer[100]; + sprintf(Buffer, "%A", APF.convertToDouble()); + if ((!strncmp(Buffer, "0x", 2) || + !strncmp(Buffer, "-0x", 3) || + !strncmp(Buffer, "+0x", 3)) && + APF.bitwiseIsEqual(APFloat(atof(Buffer)))) { + if (CFP->getType() == Type::getDoubleTy(CFP->getContext())) + Out << "BitsToDouble(" << Buffer << ")"; + else + Out << "BitsToFloat((float)" << Buffer << ")"; + Out << ")"; + } else { #endif - std::string StrVal = ftostr(CFP->getValueAPF()); - - while (StrVal[0] == ' ') - StrVal.erase(StrVal.begin()); - - // Check to make sure that the stringized number is not some string like - // "Inf" or NaN. Check that the string matches the "[-+]?[0-9]" regex. - if (((StrVal[0] >= '0' && StrVal[0] <= '9') || - ((StrVal[0] == '-' || StrVal[0] == '+') && - (StrVal[1] >= '0' && StrVal[1] <= '9'))) && - (CFP->isExactlyValue(atof(StrVal.c_str())))) { - if (CFP->getType() == Type::getDoubleTy(CFP->getContext())) - Out << StrVal; - else - Out << StrVal << "f"; - } else if (CFP->getType() == Type::getDoubleTy(CFP->getContext())) - Out << "BitsToDouble(0x" - << utohexstr(CFP->getValueAPF().bitcastToAPInt().getZExtValue()) - << "ULL) /* " << StrVal << " */"; + std::string StrVal = ftostr(CFP->getValueAPF()); + + while (StrVal[0] == ' ') + StrVal.erase(StrVal.begin()); + + // Check to make sure that the stringized number is not some string like + // "Inf" or NaN. Check that the string matches the "[-+]?[0-9]" regex. + if (((StrVal[0] >= '0' && StrVal[0] <= '9') || + ((StrVal[0] == '-' || StrVal[0] == '+') && + (StrVal[1] >= '0' && StrVal[1] <= '9'))) && + (CFP->isExactlyValue(atof(StrVal.c_str())))) { + if (CFP->getType() == Type::getDoubleTy(CFP->getContext())) + Out << StrVal; else - Out << "BitsToFloat(0x" - << utohexstr((uint32_t)CFP->getValueAPF(). - bitcastToAPInt().getZExtValue()) - << "U) /* " << StrVal << " */"; - Out << ")"; + Out << StrVal << "f"; + } else if (CFP->getType() == Type::getDoubleTy(CFP->getContext())) + Out << "BitsToDouble(0x" + << utohexstr(CFP->getValueAPF().bitcastToAPInt().getZExtValue()) + << "ULL) /* " << StrVal << " */"; + else + Out << "BitsToFloat(0x" + << utohexstr((uint32_t)CFP->getValueAPF(). + bitcastToAPInt().getZExtValue()) + << "U) /* " << StrVal << " */"; + Out << ")"; #if HAVE_PRINTF_A - } + } #endif - Out << ")"; + Out << ")"; +} + +void CppWriter::printCallingConv(CallingConv::ID cc){ + // Print the calling convention. + switch (cc) { + case CallingConv::C: Out << "CallingConv::C"; break; + case CallingConv::Fast: Out << "CallingConv::Fast"; break; + case CallingConv::Cold: Out << "CallingConv::Cold"; break; + case CallingConv::FirstTargetCC: Out << "CallingConv::FirstTargetCC"; break; + default: Out << cc; break; } +} - void CppWriter::printCallingConv(CallingConv::ID cc){ - // Print the calling convention. - switch (cc) { - case CallingConv::C: Out << "CallingConv::C"; break; - case CallingConv::Fast: Out << "CallingConv::Fast"; break; - case CallingConv::Cold: Out << "CallingConv::Cold"; break; - case CallingConv::FirstTargetCC: Out << "CallingConv::FirstTargetCC"; break; - default: Out << cc; break; - } +void CppWriter::printLinkageType(GlobalValue::LinkageTypes LT) { + switch (LT) { + case GlobalValue::InternalLinkage: + Out << "GlobalValue::InternalLinkage"; break; + case GlobalValue::PrivateLinkage: + Out << "GlobalValue::PrivateLinkage"; break; + case GlobalValue::LinkerPrivateLinkage: + Out << "GlobalValue::LinkerPrivateLinkage"; break; + case GlobalValue::LinkerPrivateWeakLinkage: + Out << "GlobalValue::LinkerPrivateWeakLinkage"; break; + case GlobalValue::AvailableExternallyLinkage: + Out << "GlobalValue::AvailableExternallyLinkage "; break; + case GlobalValue::LinkOnceAnyLinkage: + Out << "GlobalValue::LinkOnceAnyLinkage "; break; + case GlobalValue::LinkOnceODRLinkage: + Out << "GlobalValue::LinkOnceODRLinkage "; break; + case GlobalValue::WeakAnyLinkage: + Out << "GlobalValue::WeakAnyLinkage"; break; + case GlobalValue::WeakODRLinkage: + Out << "GlobalValue::WeakODRLinkage"; break; + case GlobalValue::AppendingLinkage: + Out << "GlobalValue::AppendingLinkage"; break; + case GlobalValue::ExternalLinkage: + Out << "GlobalValue::ExternalLinkage"; break; + case GlobalValue::DLLImportLinkage: + Out << "GlobalValue::DLLImportLinkage"; break; + case GlobalValue::DLLExportLinkage: + Out << "GlobalValue::DLLExportLinkage"; break; + case GlobalValue::ExternalWeakLinkage: + Out << "GlobalValue::ExternalWeakLinkage"; break; + case GlobalValue::CommonLinkage: + Out << "GlobalValue::CommonLinkage"; break; } +} - void CppWriter::printLinkageType(GlobalValue::LinkageTypes LT) { - switch (LT) { - case GlobalValue::InternalLinkage: - Out << "GlobalValue::InternalLinkage"; break; - case GlobalValue::PrivateLinkage: - Out << "GlobalValue::PrivateLinkage"; break; - case GlobalValue::LinkerPrivateLinkage: - Out << "GlobalValue::LinkerPrivateLinkage"; break; - case GlobalValue::AvailableExternallyLinkage: - Out << "GlobalValue::AvailableExternallyLinkage "; break; - case GlobalValue::LinkOnceAnyLinkage: - Out << "GlobalValue::LinkOnceAnyLinkage "; break; - case GlobalValue::LinkOnceODRLinkage: - Out << "GlobalValue::LinkOnceODRLinkage "; break; - case GlobalValue::WeakAnyLinkage: - Out << "GlobalValue::WeakAnyLinkage"; break; - case GlobalValue::WeakODRLinkage: - Out << "GlobalValue::WeakODRLinkage"; break; - case GlobalValue::AppendingLinkage: - Out << "GlobalValue::AppendingLinkage"; break; - case GlobalValue::ExternalLinkage: - Out << "GlobalValue::ExternalLinkage"; break; - case GlobalValue::DLLImportLinkage: - Out << "GlobalValue::DLLImportLinkage"; break; - case GlobalValue::DLLExportLinkage: - Out << "GlobalValue::DLLExportLinkage"; break; - case GlobalValue::ExternalWeakLinkage: - Out << "GlobalValue::ExternalWeakLinkage"; break; - case GlobalValue::CommonLinkage: - Out << "GlobalValue::CommonLinkage"; break; - } +void CppWriter::printVisibilityType(GlobalValue::VisibilityTypes VisType) { + switch (VisType) { + default: llvm_unreachable("Unknown GVar visibility"); + case GlobalValue::DefaultVisibility: + Out << "GlobalValue::DefaultVisibility"; + break; + case GlobalValue::HiddenVisibility: + Out << "GlobalValue::HiddenVisibility"; + break; + case GlobalValue::ProtectedVisibility: + Out << "GlobalValue::ProtectedVisibility"; + break; } +} - void CppWriter::printVisibilityType(GlobalValue::VisibilityTypes VisType) { - switch (VisType) { - default: llvm_unreachable("Unknown GVar visibility"); - case GlobalValue::DefaultVisibility: - Out << "GlobalValue::DefaultVisibility"; - break; - case GlobalValue::HiddenVisibility: - Out << "GlobalValue::HiddenVisibility"; - break; - case GlobalValue::ProtectedVisibility: - Out << "GlobalValue::ProtectedVisibility"; - break; +// printEscapedString - Print each character of the specified string, escaping +// it if it is not printable or if it is an escape char. +void CppWriter::printEscapedString(const std::string &Str) { + for (unsigned i = 0, e = Str.size(); i != e; ++i) { + unsigned char C = Str[i]; + if (isprint(C) && C != '"' && C != '\\') { + Out << C; + } else { + Out << "\\x" + << (char) ((C/16 < 10) ? ( C/16 +'0') : ( C/16 -10+'A')) + << (char)(((C&15) < 10) ? ((C&15)+'0') : ((C&15)-10+'A')); } } +} - // printEscapedString - Print each character of the specified string, escaping - // it if it is not printable or if it is an escape char. - void CppWriter::printEscapedString(const std::string &Str) { - for (unsigned i = 0, e = Str.size(); i != e; ++i) { - unsigned char C = Str[i]; - if (isprint(C) && C != '"' && C != '\\') { - Out << C; - } else { - Out << "\\x" - << (char) ((C/16 < 10) ? ( C/16 +'0') : ( C/16 -10+'A')) - << (char)(((C&15) < 10) ? ((C&15)+'0') : ((C&15)-10+'A')); - } +std::string CppWriter::getCppName(const Type* Ty) { + // First, handle the primitive types .. easy + if (Ty->isPrimitiveType() || Ty->isIntegerTy()) { + switch (Ty->getTypeID()) { + case Type::VoidTyID: return "Type::getVoidTy(mod->getContext())"; + case Type::IntegerTyID: { + unsigned BitWidth = cast<IntegerType>(Ty)->getBitWidth(); + return "IntegerType::get(mod->getContext(), " + utostr(BitWidth) + ")"; + } + case Type::X86_FP80TyID: return "Type::getX86_FP80Ty(mod->getContext())"; + case Type::FloatTyID: return "Type::getFloatTy(mod->getContext())"; + case Type::DoubleTyID: return "Type::getDoubleTy(mod->getContext())"; + case Type::LabelTyID: return "Type::getLabelTy(mod->getContext())"; + default: + error("Invalid primitive type"); + break; } + // shouldn't be returned, but make it sensible + return "Type::getVoidTy(mod->getContext())"; } - std::string CppWriter::getCppName(const Type* Ty) { - // First, handle the primitive types .. easy - if (Ty->isPrimitiveType() || Ty->isIntegerTy()) { - switch (Ty->getTypeID()) { - case Type::VoidTyID: return "Type::getVoidTy(mod->getContext())"; - case Type::IntegerTyID: { - unsigned BitWidth = cast<IntegerType>(Ty)->getBitWidth(); - return "IntegerType::get(mod->getContext(), " + utostr(BitWidth) + ")"; - } - case Type::X86_FP80TyID: return "Type::getX86_FP80Ty(mod->getContext())"; - case Type::FloatTyID: return "Type::getFloatTy(mod->getContext())"; - case Type::DoubleTyID: return "Type::getDoubleTy(mod->getContext())"; - case Type::LabelTyID: return "Type::getLabelTy(mod->getContext())"; - default: - error("Invalid primitive type"); - break; - } - // shouldn't be returned, but make it sensible - return "Type::getVoidTy(mod->getContext())"; - } + // Now, see if we've seen the type before and return that + TypeMap::iterator I = TypeNames.find(Ty); + if (I != TypeNames.end()) + return I->second; + + // Okay, let's build a new name for this type. Start with a prefix + const char* prefix = 0; + switch (Ty->getTypeID()) { + case Type::FunctionTyID: prefix = "FuncTy_"; break; + case Type::StructTyID: prefix = "StructTy_"; break; + case Type::ArrayTyID: prefix = "ArrayTy_"; break; + case Type::PointerTyID: prefix = "PointerTy_"; break; + case Type::OpaqueTyID: prefix = "OpaqueTy_"; break; + case Type::VectorTyID: prefix = "VectorTy_"; break; + default: prefix = "OtherTy_"; break; // prevent breakage + } - // Now, see if we've seen the type before and return that - TypeMap::iterator I = TypeNames.find(Ty); - if (I != TypeNames.end()) - return I->second; + // See if the type has a name in the symboltable and build accordingly + const std::string* tName = findTypeName(TheModule->getTypeSymbolTable(), Ty); + std::string name; + if (tName) + name = std::string(prefix) + *tName; + else + name = std::string(prefix) + utostr(uniqueNum++); + sanitize(name); + + // Save the name + return TypeNames[Ty] = name; +} - // Okay, let's build a new name for this type. Start with a prefix - const char* prefix = 0; - switch (Ty->getTypeID()) { - case Type::FunctionTyID: prefix = "FuncTy_"; break; - case Type::StructTyID: prefix = "StructTy_"; break; - case Type::ArrayTyID: prefix = "ArrayTy_"; break; - case Type::PointerTyID: prefix = "PointerTy_"; break; - case Type::OpaqueTyID: prefix = "OpaqueTy_"; break; - case Type::VectorTyID: prefix = "VectorTy_"; break; - default: prefix = "OtherTy_"; break; // prevent breakage - } +void CppWriter::printCppName(const Type* Ty) { + printEscapedString(getCppName(Ty)); +} - // See if the type has a name in the symboltable and build accordingly - const std::string* tName = findTypeName(TheModule->getTypeSymbolTable(), Ty); - std::string name; - if (tName) - name = std::string(prefix) + *tName; - else - name = std::string(prefix) + utostr(uniqueNum++); - sanitize(name); - - // Save the name - return TypeNames[Ty] = name; - } - - void CppWriter::printCppName(const Type* Ty) { - printEscapedString(getCppName(Ty)); - } - - std::string CppWriter::getCppName(const Value* val) { - std::string name; - ValueMap::iterator I = ValueNames.find(val); - if (I != ValueNames.end() && I->first == val) - return I->second; - - if (const GlobalVariable* GV = dyn_cast<GlobalVariable>(val)) { - name = std::string("gvar_") + - getTypePrefix(GV->getType()->getElementType()); - } else if (isa<Function>(val)) { - name = std::string("func_"); - } else if (const Constant* C = dyn_cast<Constant>(val)) { - name = std::string("const_") + getTypePrefix(C->getType()); - } else if (const Argument* Arg = dyn_cast<Argument>(val)) { - if (is_inline) { - unsigned argNum = std::distance(Arg->getParent()->arg_begin(), - Function::const_arg_iterator(Arg)) + 1; - name = std::string("arg_") + utostr(argNum); - NameSet::iterator NI = UsedNames.find(name); - if (NI != UsedNames.end()) - name += std::string("_") + utostr(uniqueNum++); - UsedNames.insert(name); - return ValueNames[val] = name; - } else { - name = getTypePrefix(val->getType()); - } +std::string CppWriter::getCppName(const Value* val) { + std::string name; + ValueMap::iterator I = ValueNames.find(val); + if (I != ValueNames.end() && I->first == val) + return I->second; + + if (const GlobalVariable* GV = dyn_cast<GlobalVariable>(val)) { + name = std::string("gvar_") + + getTypePrefix(GV->getType()->getElementType()); + } else if (isa<Function>(val)) { + name = std::string("func_"); + } else if (const Constant* C = dyn_cast<Constant>(val)) { + name = std::string("const_") + getTypePrefix(C->getType()); + } else if (const Argument* Arg = dyn_cast<Argument>(val)) { + if (is_inline) { + unsigned argNum = std::distance(Arg->getParent()->arg_begin(), + Function::const_arg_iterator(Arg)) + 1; + name = std::string("arg_") + utostr(argNum); + NameSet::iterator NI = UsedNames.find(name); + if (NI != UsedNames.end()) + name += std::string("_") + utostr(uniqueNum++); + UsedNames.insert(name); + return ValueNames[val] = name; } else { name = getTypePrefix(val->getType()); } - if (val->hasName()) - name += val->getName(); - else - name += utostr(uniqueNum++); - sanitize(name); - NameSet::iterator NI = UsedNames.find(name); - if (NI != UsedNames.end()) - name += std::string("_") + utostr(uniqueNum++); - UsedNames.insert(name); - return ValueNames[val] = name; + } else { + name = getTypePrefix(val->getType()); } + if (val->hasName()) + name += val->getName(); + else + name += utostr(uniqueNum++); + sanitize(name); + NameSet::iterator NI = UsedNames.find(name); + if (NI != UsedNames.end()) + name += std::string("_") + utostr(uniqueNum++); + UsedNames.insert(name); + return ValueNames[val] = name; +} - void CppWriter::printCppName(const Value* val) { - printEscapedString(getCppName(val)); - } +void CppWriter::printCppName(const Value* val) { + printEscapedString(getCppName(val)); +} - void CppWriter::printAttributes(const AttrListPtr &PAL, - const std::string &name) { - Out << "AttrListPtr " << name << "_PAL;"; - nl(Out); - if (!PAL.isEmpty()) { - Out << '{'; in(); nl(Out); - Out << "SmallVector<AttributeWithIndex, 4> Attrs;"; nl(Out); - Out << "AttributeWithIndex PAWI;"; nl(Out); - for (unsigned i = 0; i < PAL.getNumSlots(); ++i) { - unsigned index = PAL.getSlot(i).Index; - Attributes attrs = PAL.getSlot(i).Attrs; - Out << "PAWI.Index = " << index << "U; PAWI.Attrs = 0 "; +void CppWriter::printAttributes(const AttrListPtr &PAL, + const std::string &name) { + Out << "AttrListPtr " << name << "_PAL;"; + nl(Out); + if (!PAL.isEmpty()) { + Out << '{'; in(); nl(Out); + Out << "SmallVector<AttributeWithIndex, 4> Attrs;"; nl(Out); + Out << "AttributeWithIndex PAWI;"; nl(Out); + for (unsigned i = 0; i < PAL.getNumSlots(); ++i) { + unsigned index = PAL.getSlot(i).Index; + Attributes attrs = PAL.getSlot(i).Attrs; + Out << "PAWI.Index = " << index << "U; PAWI.Attrs = 0 "; #define HANDLE_ATTR(X) \ - if (attrs & Attribute::X) \ - Out << " | Attribute::" #X; \ - attrs &= ~Attribute::X; - - HANDLE_ATTR(SExt); - HANDLE_ATTR(ZExt); - HANDLE_ATTR(NoReturn); - HANDLE_ATTR(InReg); - HANDLE_ATTR(StructRet); - HANDLE_ATTR(NoUnwind); - HANDLE_ATTR(NoAlias); - HANDLE_ATTR(ByVal); - HANDLE_ATTR(Nest); - HANDLE_ATTR(ReadNone); - HANDLE_ATTR(ReadOnly); - HANDLE_ATTR(InlineHint); - HANDLE_ATTR(NoInline); - HANDLE_ATTR(AlwaysInline); - HANDLE_ATTR(OptimizeForSize); - HANDLE_ATTR(StackProtect); - HANDLE_ATTR(StackProtectReq); - HANDLE_ATTR(NoCapture); + if (attrs & Attribute::X) \ + Out << " | Attribute::" #X; \ + attrs &= ~Attribute::X; + + HANDLE_ATTR(SExt); + HANDLE_ATTR(ZExt); + HANDLE_ATTR(NoReturn); + HANDLE_ATTR(InReg); + HANDLE_ATTR(StructRet); + HANDLE_ATTR(NoUnwind); + HANDLE_ATTR(NoAlias); + HANDLE_ATTR(ByVal); + HANDLE_ATTR(Nest); + HANDLE_ATTR(ReadNone); + HANDLE_ATTR(ReadOnly); + HANDLE_ATTR(InlineHint); + HANDLE_ATTR(NoInline); + HANDLE_ATTR(AlwaysInline); + HANDLE_ATTR(OptimizeForSize); + HANDLE_ATTR(StackProtect); + HANDLE_ATTR(StackProtectReq); + HANDLE_ATTR(NoCapture); #undef HANDLE_ATTR - assert(attrs == 0 && "Unhandled attribute!"); - Out << ";"; - nl(Out); - Out << "Attrs.push_back(PAWI);"; - nl(Out); - } - Out << name << "_PAL = AttrListPtr::get(Attrs.begin(), Attrs.end());"; + assert(attrs == 0 && "Unhandled attribute!"); + Out << ";"; + nl(Out); + Out << "Attrs.push_back(PAWI);"; nl(Out); - out(); nl(Out); - Out << '}'; nl(Out); } + Out << name << "_PAL = AttrListPtr::get(Attrs.begin(), Attrs.end());"; + nl(Out); + out(); nl(Out); + Out << '}'; nl(Out); } +} - bool CppWriter::printTypeInternal(const Type* Ty) { - // We don't print definitions for primitive types - if (Ty->isPrimitiveType() || Ty->isIntegerTy()) - return false; - - // If we already defined this type, we don't need to define it again. - if (DefinedTypes.find(Ty) != DefinedTypes.end()) - return false; - - // Everything below needs the name for the type so get it now. - std::string typeName(getCppName(Ty)); - - // Search the type stack for recursion. If we find it, then generate this - // as an OpaqueType, but make sure not to do this multiple times because - // the type could appear in multiple places on the stack. Once the opaque - // definition is issued, it must not be re-issued. Consequently we have to - // check the UnresolvedTypes list as well. - TypeList::const_iterator TI = std::find(TypeStack.begin(), TypeStack.end(), - Ty); - if (TI != TypeStack.end()) { - TypeMap::const_iterator I = UnresolvedTypes.find(Ty); - if (I == UnresolvedTypes.end()) { - Out << "PATypeHolder " << typeName; - Out << "_fwd = OpaqueType::get(mod->getContext());"; - nl(Out); - UnresolvedTypes[Ty] = typeName; - } - return true; - } +bool CppWriter::printTypeInternal(const Type* Ty) { + // We don't print definitions for primitive types + if (Ty->isPrimitiveType() || Ty->isIntegerTy()) + return false; - // We're going to print a derived type which, by definition, contains other - // types. So, push this one we're printing onto the type stack to assist with - // recursive definitions. - TypeStack.push_back(Ty); + // If we already defined this type, we don't need to define it again. + if (DefinedTypes.find(Ty) != DefinedTypes.end()) + return false; - // Print the type definition - switch (Ty->getTypeID()) { - case Type::FunctionTyID: { - const FunctionType* FT = cast<FunctionType>(Ty); - Out << "std::vector<const Type*>" << typeName << "_args;"; + // Everything below needs the name for the type so get it now. + std::string typeName(getCppName(Ty)); + + // Search the type stack for recursion. If we find it, then generate this + // as an OpaqueType, but make sure not to do this multiple times because + // the type could appear in multiple places on the stack. Once the opaque + // definition is issued, it must not be re-issued. Consequently we have to + // check the UnresolvedTypes list as well. + TypeList::const_iterator TI = std::find(TypeStack.begin(), TypeStack.end(), + Ty); + if (TI != TypeStack.end()) { + TypeMap::const_iterator I = UnresolvedTypes.find(Ty); + if (I == UnresolvedTypes.end()) { + Out << "PATypeHolder " << typeName; + Out << "_fwd = OpaqueType::get(mod->getContext());"; nl(Out); - FunctionType::param_iterator PI = FT->param_begin(); - FunctionType::param_iterator PE = FT->param_end(); - for (; PI != PE; ++PI) { - const Type* argTy = static_cast<const Type*>(*PI); - bool isForward = printTypeInternal(argTy); - std::string argName(getCppName(argTy)); - Out << typeName << "_args.push_back(" << argName; - if (isForward) - Out << "_fwd"; - Out << ");"; - nl(Out); - } - bool isForward = printTypeInternal(FT->getReturnType()); - std::string retTypeName(getCppName(FT->getReturnType())); - Out << "FunctionType* " << typeName << " = FunctionType::get("; - in(); nl(Out) << "/*Result=*/" << retTypeName; + UnresolvedTypes[Ty] = typeName; + } + return true; + } + + // We're going to print a derived type which, by definition, contains other + // types. So, push this one we're printing onto the type stack to assist with + // recursive definitions. + TypeStack.push_back(Ty); + + // Print the type definition + switch (Ty->getTypeID()) { + case Type::FunctionTyID: { + const FunctionType* FT = cast<FunctionType>(Ty); + Out << "std::vector<const Type*>" << typeName << "_args;"; + nl(Out); + FunctionType::param_iterator PI = FT->param_begin(); + FunctionType::param_iterator PE = FT->param_end(); + for (; PI != PE; ++PI) { + const Type* argTy = static_cast<const Type*>(*PI); + bool isForward = printTypeInternal(argTy); + std::string argName(getCppName(argTy)); + Out << typeName << "_args.push_back(" << argName; if (isForward) Out << "_fwd"; - Out << ","; - nl(Out) << "/*Params=*/" << typeName << "_args,"; - nl(Out) << "/*isVarArg=*/" << (FT->isVarArg() ? "true" : "false") << ");"; - out(); - nl(Out); - break; - } - case Type::StructTyID: { - const StructType* ST = cast<StructType>(Ty); - Out << "std::vector<const Type*>" << typeName << "_fields;"; - nl(Out); - StructType::element_iterator EI = ST->element_begin(); - StructType::element_iterator EE = ST->element_end(); - for (; EI != EE; ++EI) { - const Type* fieldTy = static_cast<const Type*>(*EI); - bool isForward = printTypeInternal(fieldTy); - std::string fieldName(getCppName(fieldTy)); - Out << typeName << "_fields.push_back(" << fieldName; - if (isForward) - Out << "_fwd"; - Out << ");"; - nl(Out); - } - Out << "StructType* " << typeName << " = StructType::get(" - << "mod->getContext(), " - << typeName << "_fields, /*isPacked=*/" - << (ST->isPacked() ? "true" : "false") << ");"; - nl(Out); - break; - } - case Type::ArrayTyID: { - const ArrayType* AT = cast<ArrayType>(Ty); - const Type* ET = AT->getElementType(); - bool isForward = printTypeInternal(ET); - std::string elemName(getCppName(ET)); - Out << "ArrayType* " << typeName << " = ArrayType::get(" - << elemName << (isForward ? "_fwd" : "") - << ", " << utostr(AT->getNumElements()) << ");"; - nl(Out); - break; - } - case Type::PointerTyID: { - const PointerType* PT = cast<PointerType>(Ty); - const Type* ET = PT->getElementType(); - bool isForward = printTypeInternal(ET); - std::string elemName(getCppName(ET)); - Out << "PointerType* " << typeName << " = PointerType::get(" - << elemName << (isForward ? "_fwd" : "") - << ", " << utostr(PT->getAddressSpace()) << ");"; - nl(Out); - break; - } - case Type::VectorTyID: { - const VectorType* PT = cast<VectorType>(Ty); - const Type* ET = PT->getElementType(); - bool isForward = printTypeInternal(ET); - std::string elemName(getCppName(ET)); - Out << "VectorType* " << typeName << " = VectorType::get(" - << elemName << (isForward ? "_fwd" : "") - << ", " << utostr(PT->getNumElements()) << ");"; - nl(Out); - break; - } - case Type::OpaqueTyID: { - Out << "OpaqueType* " << typeName; - Out << " = OpaqueType::get(mod->getContext());"; + Out << ");"; nl(Out); - break; - } - default: - error("Invalid TypeID"); } - - // If the type had a name, make sure we recreate it. - const std::string* progTypeName = - findTypeName(TheModule->getTypeSymbolTable(),Ty); - if (progTypeName) { - Out << "mod->addTypeName(\"" << *progTypeName << "\", " - << typeName << ");"; + bool isForward = printTypeInternal(FT->getReturnType()); + std::string retTypeName(getCppName(FT->getReturnType())); + Out << "FunctionType* " << typeName << " = FunctionType::get("; + in(); nl(Out) << "/*Result=*/" << retTypeName; + if (isForward) + Out << "_fwd"; + Out << ","; + nl(Out) << "/*Params=*/" << typeName << "_args,"; + nl(Out) << "/*isVarArg=*/" << (FT->isVarArg() ? "true" : "false") << ");"; + out(); + nl(Out); + break; + } + case Type::StructTyID: { + const StructType* ST = cast<StructType>(Ty); + Out << "std::vector<const Type*>" << typeName << "_fields;"; + nl(Out); + StructType::element_iterator EI = ST->element_begin(); + StructType::element_iterator EE = ST->element_end(); + for (; EI != EE; ++EI) { + const Type* fieldTy = static_cast<const Type*>(*EI); + bool isForward = printTypeInternal(fieldTy); + std::string fieldName(getCppName(fieldTy)); + Out << typeName << "_fields.push_back(" << fieldName; + if (isForward) + Out << "_fwd"; + Out << ");"; nl(Out); } + Out << "StructType* " << typeName << " = StructType::get(" + << "mod->getContext(), " + << typeName << "_fields, /*isPacked=*/" + << (ST->isPacked() ? "true" : "false") << ");"; + nl(Out); + break; + } + case Type::ArrayTyID: { + const ArrayType* AT = cast<ArrayType>(Ty); + const Type* ET = AT->getElementType(); + bool isForward = printTypeInternal(ET); + std::string elemName(getCppName(ET)); + Out << "ArrayType* " << typeName << " = ArrayType::get(" + << elemName << (isForward ? "_fwd" : "") + << ", " << utostr(AT->getNumElements()) << ");"; + nl(Out); + break; + } + case Type::PointerTyID: { + const PointerType* PT = cast<PointerType>(Ty); + const Type* ET = PT->getElementType(); + bool isForward = printTypeInternal(ET); + std::string elemName(getCppName(ET)); + Out << "PointerType* " << typeName << " = PointerType::get(" + << elemName << (isForward ? "_fwd" : "") + << ", " << utostr(PT->getAddressSpace()) << ");"; + nl(Out); + break; + } + case Type::VectorTyID: { + const VectorType* PT = cast<VectorType>(Ty); + const Type* ET = PT->getElementType(); + bool isForward = printTypeInternal(ET); + std::string elemName(getCppName(ET)); + Out << "VectorType* " << typeName << " = VectorType::get(" + << elemName << (isForward ? "_fwd" : "") + << ", " << utostr(PT->getNumElements()) << ");"; + nl(Out); + break; + } + case Type::OpaqueTyID: { + Out << "OpaqueType* " << typeName; + Out << " = OpaqueType::get(mod->getContext());"; + nl(Out); + break; + } + default: + error("Invalid TypeID"); + } - // Pop us off the type stack - TypeStack.pop_back(); + // If the type had a name, make sure we recreate it. + const std::string* progTypeName = + findTypeName(TheModule->getTypeSymbolTable(),Ty); + if (progTypeName) { + Out << "mod->addTypeName(\"" << *progTypeName << "\", " + << typeName << ");"; + nl(Out); + } - // Indicate that this type is now defined. - DefinedTypes.insert(Ty); + // Pop us off the type stack + TypeStack.pop_back(); - // Early resolve as many unresolved types as possible. Search the unresolved - // types map for the type we just printed. Now that its definition is complete - // we can resolve any previous references to it. This prevents a cascade of - // unresolved types. - TypeMap::iterator I = UnresolvedTypes.find(Ty); - if (I != UnresolvedTypes.end()) { - Out << "cast<OpaqueType>(" << I->second - << "_fwd.get())->refineAbstractTypeTo(" << I->second << ");"; - nl(Out); - Out << I->second << " = cast<"; - switch (Ty->getTypeID()) { - case Type::FunctionTyID: Out << "FunctionType"; break; - case Type::ArrayTyID: Out << "ArrayType"; break; - case Type::StructTyID: Out << "StructType"; break; - case Type::VectorTyID: Out << "VectorType"; break; - case Type::PointerTyID: Out << "PointerType"; break; - case Type::OpaqueTyID: Out << "OpaqueType"; break; - default: Out << "NoSuchDerivedType"; break; - } - Out << ">(" << I->second << "_fwd.get());"; - nl(Out); nl(Out); - UnresolvedTypes.erase(I); - } + // Indicate that this type is now defined. + DefinedTypes.insert(Ty); - // Finally, separate the type definition from other with a newline. + // Early resolve as many unresolved types as possible. Search the unresolved + // types map for the type we just printed. Now that its definition is complete + // we can resolve any previous references to it. This prevents a cascade of + // unresolved types. + TypeMap::iterator I = UnresolvedTypes.find(Ty); + if (I != UnresolvedTypes.end()) { + Out << "cast<OpaqueType>(" << I->second + << "_fwd.get())->refineAbstractTypeTo(" << I->second << ");"; nl(Out); - - // We weren't a recursive type - return false; + Out << I->second << " = cast<"; + switch (Ty->getTypeID()) { + case Type::FunctionTyID: Out << "FunctionType"; break; + case Type::ArrayTyID: Out << "ArrayType"; break; + case Type::StructTyID: Out << "StructType"; break; + case Type::VectorTyID: Out << "VectorType"; break; + case Type::PointerTyID: Out << "PointerType"; break; + case Type::OpaqueTyID: Out << "OpaqueType"; break; + default: Out << "NoSuchDerivedType"; break; + } + Out << ">(" << I->second << "_fwd.get());"; + nl(Out); nl(Out); + UnresolvedTypes.erase(I); } - // Prints a type definition. Returns true if it could not resolve all the - // types in the definition but had to use a forward reference. - void CppWriter::printType(const Type* Ty) { - assert(TypeStack.empty()); - TypeStack.clear(); - printTypeInternal(Ty); - assert(TypeStack.empty()); - } - - void CppWriter::printTypes(const Module* M) { - // Walk the symbol table and print out all its types - const TypeSymbolTable& symtab = M->getTypeSymbolTable(); - for (TypeSymbolTable::const_iterator TI = symtab.begin(), TE = symtab.end(); - TI != TE; ++TI) { - - // For primitive types and types already defined, just add a name - TypeMap::const_iterator TNI = TypeNames.find(TI->second); - if (TI->second->isIntegerTy() || TI->second->isPrimitiveType() || - TNI != TypeNames.end()) { - Out << "mod->addTypeName(\""; - printEscapedString(TI->first); - Out << "\", " << getCppName(TI->second) << ");"; - nl(Out); - // For everything else, define the type - } else { - printType(TI->second); - } - } + // Finally, separate the type definition from other with a newline. + nl(Out); - // Add all of the global variables to the value table... - for (Module::const_global_iterator I = TheModule->global_begin(), - E = TheModule->global_end(); I != E; ++I) { - if (I->hasInitializer()) - printType(I->getInitializer()->getType()); - printType(I->getType()); + // We weren't a recursive type + return false; +} + +// Prints a type definition. Returns true if it could not resolve all the +// types in the definition but had to use a forward reference. +void CppWriter::printType(const Type* Ty) { + assert(TypeStack.empty()); + TypeStack.clear(); + printTypeInternal(Ty); + assert(TypeStack.empty()); +} + +void CppWriter::printTypes(const Module* M) { + // Walk the symbol table and print out all its types + const TypeSymbolTable& symtab = M->getTypeSymbolTable(); + for (TypeSymbolTable::const_iterator TI = symtab.begin(), TE = symtab.end(); + TI != TE; ++TI) { + + // For primitive types and types already defined, just add a name + TypeMap::const_iterator TNI = TypeNames.find(TI->second); + if (TI->second->isIntegerTy() || TI->second->isPrimitiveType() || + TNI != TypeNames.end()) { + Out << "mod->addTypeName(\""; + printEscapedString(TI->first); + Out << "\", " << getCppName(TI->second) << ");"; + nl(Out); + // For everything else, define the type + } else { + printType(TI->second); } + } - // Add all the functions to the table - for (Module::const_iterator FI = TheModule->begin(), FE = TheModule->end(); - FI != FE; ++FI) { - printType(FI->getReturnType()); - printType(FI->getFunctionType()); - // Add all the function arguments - for (Function::const_arg_iterator AI = FI->arg_begin(), - AE = FI->arg_end(); AI != AE; ++AI) { - printType(AI->getType()); - } + // Add all of the global variables to the value table... + for (Module::const_global_iterator I = TheModule->global_begin(), + E = TheModule->global_end(); I != E; ++I) { + if (I->hasInitializer()) + printType(I->getInitializer()->getType()); + printType(I->getType()); + } - // Add all of the basic blocks and instructions - for (Function::const_iterator BB = FI->begin(), - E = FI->end(); BB != E; ++BB) { - printType(BB->getType()); - for (BasicBlock::const_iterator I = BB->begin(), E = BB->end(); I!=E; - ++I) { - printType(I->getType()); - for (unsigned i = 0; i < I->getNumOperands(); ++i) - printType(I->getOperand(i)->getType()); - } + // Add all the functions to the table + for (Module::const_iterator FI = TheModule->begin(), FE = TheModule->end(); + FI != FE; ++FI) { + printType(FI->getReturnType()); + printType(FI->getFunctionType()); + // Add all the function arguments + for (Function::const_arg_iterator AI = FI->arg_begin(), + AE = FI->arg_end(); AI != AE; ++AI) { + printType(AI->getType()); + } + + // Add all of the basic blocks and instructions + for (Function::const_iterator BB = FI->begin(), + E = FI->end(); BB != E; ++BB) { + printType(BB->getType()); + for (BasicBlock::const_iterator I = BB->begin(), E = BB->end(); I!=E; + ++I) { + printType(I->getType()); + for (unsigned i = 0; i < I->getNumOperands(); ++i) + printType(I->getOperand(i)->getType()); } } } +} - // printConstant - Print out a constant pool entry... - void CppWriter::printConstant(const Constant *CV) { - // First, if the constant is actually a GlobalValue (variable or function) - // or its already in the constant list then we've printed it already and we - // can just return. - if (isa<GlobalValue>(CV) || ValueNames.find(CV) != ValueNames.end()) - return; +// printConstant - Print out a constant pool entry... +void CppWriter::printConstant(const Constant *CV) { + // First, if the constant is actually a GlobalValue (variable or function) + // or its already in the constant list then we've printed it already and we + // can just return. + if (isa<GlobalValue>(CV) || ValueNames.find(CV) != ValueNames.end()) + return; - std::string constName(getCppName(CV)); - std::string typeName(getCppName(CV->getType())); + std::string constName(getCppName(CV)); + std::string typeName(getCppName(CV->getType())); - if (isa<GlobalValue>(CV)) { - // Skip variables and functions, we emit them elsewhere - return; - } + if (isa<GlobalValue>(CV)) { + // Skip variables and functions, we emit them elsewhere + return; + } - if (const ConstantInt *CI = dyn_cast<ConstantInt>(CV)) { - std::string constValue = CI->getValue().toString(10, true); - Out << "ConstantInt* " << constName - << " = ConstantInt::get(mod->getContext(), APInt(" - << cast<IntegerType>(CI->getType())->getBitWidth() - << ", StringRef(\"" << constValue << "\"), 10));"; - } else if (isa<ConstantAggregateZero>(CV)) { - Out << "ConstantAggregateZero* " << constName - << " = ConstantAggregateZero::get(" << typeName << ");"; - } else if (isa<ConstantPointerNull>(CV)) { - Out << "ConstantPointerNull* " << constName - << " = ConstantPointerNull::get(" << typeName << ");"; - } else if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CV)) { - Out << "ConstantFP* " << constName << " = "; - printCFP(CFP); - Out << ";"; - } else if (const ConstantArray *CA = dyn_cast<ConstantArray>(CV)) { - if (CA->isString() && - CA->getType()->getElementType() == - Type::getInt8Ty(CA->getContext())) { - Out << "Constant* " << constName << - " = ConstantArray::get(mod->getContext(), \""; - std::string tmp = CA->getAsString(); - bool nullTerminate = false; - if (tmp[tmp.length()-1] == 0) { - tmp.erase(tmp.length()-1); - nullTerminate = true; - } - printEscapedString(tmp); - // Determine if we want null termination or not. - if (nullTerminate) - Out << "\", true"; // Indicate that the null terminator should be - // added. - else - Out << "\", false";// No null terminator - Out << ");"; - } else { - Out << "std::vector<Constant*> " << constName << "_elems;"; - nl(Out); - unsigned N = CA->getNumOperands(); - for (unsigned i = 0; i < N; ++i) { - printConstant(CA->getOperand(i)); // recurse to print operands - Out << constName << "_elems.push_back(" - << getCppName(CA->getOperand(i)) << ");"; - nl(Out); - } - Out << "Constant* " << constName << " = ConstantArray::get(" - << typeName << ", " << constName << "_elems);"; - } - } else if (const ConstantStruct *CS = dyn_cast<ConstantStruct>(CV)) { - Out << "std::vector<Constant*> " << constName << "_fields;"; - nl(Out); - unsigned N = CS->getNumOperands(); - for (unsigned i = 0; i < N; i++) { - printConstant(CS->getOperand(i)); - Out << constName << "_fields.push_back(" - << getCppName(CS->getOperand(i)) << ");"; - nl(Out); + if (const ConstantInt *CI = dyn_cast<ConstantInt>(CV)) { + std::string constValue = CI->getValue().toString(10, true); + Out << "ConstantInt* " << constName + << " = ConstantInt::get(mod->getContext(), APInt(" + << cast<IntegerType>(CI->getType())->getBitWidth() + << ", StringRef(\"" << constValue << "\"), 10));"; + } else if (isa<ConstantAggregateZero>(CV)) { + Out << "ConstantAggregateZero* " << constName + << " = ConstantAggregateZero::get(" << typeName << ");"; + } else if (isa<ConstantPointerNull>(CV)) { + Out << "ConstantPointerNull* " << constName + << " = ConstantPointerNull::get(" << typeName << ");"; + } else if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CV)) { + Out << "ConstantFP* " << constName << " = "; + printCFP(CFP); + Out << ";"; + } else if (const ConstantArray *CA = dyn_cast<ConstantArray>(CV)) { + if (CA->isString() && + CA->getType()->getElementType() == + Type::getInt8Ty(CA->getContext())) { + Out << "Constant* " << constName << + " = ConstantArray::get(mod->getContext(), \""; + std::string tmp = CA->getAsString(); + bool nullTerminate = false; + if (tmp[tmp.length()-1] == 0) { + tmp.erase(tmp.length()-1); + nullTerminate = true; } - Out << "Constant* " << constName << " = ConstantStruct::get(" - << typeName << ", " << constName << "_fields);"; - } else if (const ConstantVector *CP = dyn_cast<ConstantVector>(CV)) { + printEscapedString(tmp); + // Determine if we want null termination or not. + if (nullTerminate) + Out << "\", true"; // Indicate that the null terminator should be + // added. + else + Out << "\", false";// No null terminator + Out << ");"; + } else { Out << "std::vector<Constant*> " << constName << "_elems;"; nl(Out); - unsigned N = CP->getNumOperands(); + unsigned N = CA->getNumOperands(); for (unsigned i = 0; i < N; ++i) { - printConstant(CP->getOperand(i)); + printConstant(CA->getOperand(i)); // recurse to print operands Out << constName << "_elems.push_back(" - << getCppName(CP->getOperand(i)) << ");"; + << getCppName(CA->getOperand(i)) << ");"; nl(Out); } - Out << "Constant* " << constName << " = ConstantVector::get(" + Out << "Constant* " << constName << " = ConstantArray::get(" << typeName << ", " << constName << "_elems);"; - } else if (isa<UndefValue>(CV)) { - Out << "UndefValue* " << constName << " = UndefValue::get(" - << typeName << ");"; - } else if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(CV)) { - if (CE->getOpcode() == Instruction::GetElementPtr) { - Out << "std::vector<Constant*> " << constName << "_indices;"; + } + } else if (const ConstantStruct *CS = dyn_cast<ConstantStruct>(CV)) { + Out << "std::vector<Constant*> " << constName << "_fields;"; + nl(Out); + unsigned N = CS->getNumOperands(); + for (unsigned i = 0; i < N; i++) { + printConstant(CS->getOperand(i)); + Out << constName << "_fields.push_back(" + << getCppName(CS->getOperand(i)) << ");"; + nl(Out); + } + Out << "Constant* " << constName << " = ConstantStruct::get(" + << typeName << ", " << constName << "_fields);"; + } else if (const ConstantVector *CP = dyn_cast<ConstantVector>(CV)) { + Out << "std::vector<Constant*> " << constName << "_elems;"; + nl(Out); + unsigned N = CP->getNumOperands(); + for (unsigned i = 0; i < N; ++i) { + printConstant(CP->getOperand(i)); + Out << constName << "_elems.push_back(" + << getCppName(CP->getOperand(i)) << ");"; + nl(Out); + } + Out << "Constant* " << constName << " = ConstantVector::get(" + << typeName << ", " << constName << "_elems);"; + } else if (isa<UndefValue>(CV)) { + Out << "UndefValue* " << constName << " = UndefValue::get(" + << typeName << ");"; + } else if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(CV)) { + if (CE->getOpcode() == Instruction::GetElementPtr) { + Out << "std::vector<Constant*> " << constName << "_indices;"; + nl(Out); + printConstant(CE->getOperand(0)); + for (unsigned i = 1; i < CE->getNumOperands(); ++i ) { + printConstant(CE->getOperand(i)); + Out << constName << "_indices.push_back(" + << getCppName(CE->getOperand(i)) << ");"; nl(Out); - printConstant(CE->getOperand(0)); - for (unsigned i = 1; i < CE->getNumOperands(); ++i ) { - printConstant(CE->getOperand(i)); - Out << constName << "_indices.push_back(" - << getCppName(CE->getOperand(i)) << ");"; - nl(Out); - } - Out << "Constant* " << constName - << " = ConstantExpr::getGetElementPtr(" - << getCppName(CE->getOperand(0)) << ", " - << "&" << constName << "_indices[0], " - << constName << "_indices.size()" - << ");"; - } else if (CE->isCast()) { - printConstant(CE->getOperand(0)); - Out << "Constant* " << constName << " = ConstantExpr::getCast("; - switch (CE->getOpcode()) { - default: llvm_unreachable("Invalid cast opcode"); - case Instruction::Trunc: Out << "Instruction::Trunc"; break; - case Instruction::ZExt: Out << "Instruction::ZExt"; break; - case Instruction::SExt: Out << "Instruction::SExt"; break; - case Instruction::FPTrunc: Out << "Instruction::FPTrunc"; break; - case Instruction::FPExt: Out << "Instruction::FPExt"; break; - case Instruction::FPToUI: Out << "Instruction::FPToUI"; break; - case Instruction::FPToSI: Out << "Instruction::FPToSI"; break; - case Instruction::UIToFP: Out << "Instruction::UIToFP"; break; - case Instruction::SIToFP: Out << "Instruction::SIToFP"; break; - case Instruction::PtrToInt: Out << "Instruction::PtrToInt"; break; - case Instruction::IntToPtr: Out << "Instruction::IntToPtr"; break; - case Instruction::BitCast: Out << "Instruction::BitCast"; break; - } - Out << ", " << getCppName(CE->getOperand(0)) << ", " - << getCppName(CE->getType()) << ");"; - } else { - unsigned N = CE->getNumOperands(); - for (unsigned i = 0; i < N; ++i ) { - printConstant(CE->getOperand(i)); + } + Out << "Constant* " << constName + << " = ConstantExpr::getGetElementPtr(" + << getCppName(CE->getOperand(0)) << ", " + << "&" << constName << "_indices[0], " + << constName << "_indices.size()" + << ");"; + } else if (CE->isCast()) { + printConstant(CE->getOperand(0)); + Out << "Constant* " << constName << " = ConstantExpr::getCast("; + switch (CE->getOpcode()) { + default: llvm_unreachable("Invalid cast opcode"); + case Instruction::Trunc: Out << "Instruction::Trunc"; break; + case Instruction::ZExt: Out << "Instruction::ZExt"; break; + case Instruction::SExt: Out << "Instruction::SExt"; break; + case Instruction::FPTrunc: Out << "Instruction::FPTrunc"; break; + case Instruction::FPExt: Out << "Instruction::FPExt"; break; + case Instruction::FPToUI: Out << "Instruction::FPToUI"; break; + case Instruction::FPToSI: Out << "Instruction::FPToSI"; break; + case Instruction::UIToFP: Out << "Instruction::UIToFP"; break; + case Instruction::SIToFP: Out << "Instruction::SIToFP"; break; + case Instruction::PtrToInt: Out << "Instruction::PtrToInt"; break; + case Instruction::IntToPtr: Out << "Instruction::IntToPtr"; break; + case Instruction::BitCast: Out << "Instruction::BitCast"; break; + } + Out << ", " << getCppName(CE->getOperand(0)) << ", " + << getCppName(CE->getType()) << ");"; + } else { + unsigned N = CE->getNumOperands(); + for (unsigned i = 0; i < N; ++i ) { + printConstant(CE->getOperand(i)); + } + Out << "Constant* " << constName << " = ConstantExpr::"; + switch (CE->getOpcode()) { + case Instruction::Add: Out << "getAdd("; break; + case Instruction::FAdd: Out << "getFAdd("; break; + case Instruction::Sub: Out << "getSub("; break; + case Instruction::FSub: Out << "getFSub("; break; + case Instruction::Mul: Out << "getMul("; break; + case Instruction::FMul: Out << "getFMul("; break; + case Instruction::UDiv: Out << "getUDiv("; break; + case Instruction::SDiv: Out << "getSDiv("; break; + case Instruction::FDiv: Out << "getFDiv("; break; + case Instruction::URem: Out << "getURem("; break; + case Instruction::SRem: Out << "getSRem("; break; + case Instruction::FRem: Out << "getFRem("; break; + case Instruction::And: Out << "getAnd("; break; + case Instruction::Or: Out << "getOr("; break; + case Instruction::Xor: Out << "getXor("; break; + case Instruction::ICmp: + Out << "getICmp(ICmpInst::ICMP_"; + switch (CE->getPredicate()) { + case ICmpInst::ICMP_EQ: Out << "EQ"; break; + case ICmpInst::ICMP_NE: Out << "NE"; break; + case ICmpInst::ICMP_SLT: Out << "SLT"; break; + case ICmpInst::ICMP_ULT: Out << "ULT"; break; + case ICmpInst::ICMP_SGT: Out << "SGT"; break; + case ICmpInst::ICMP_UGT: Out << "UGT"; break; + case ICmpInst::ICMP_SLE: Out << "SLE"; break; + case ICmpInst::ICMP_ULE: Out << "ULE"; break; + case ICmpInst::ICMP_SGE: Out << "SGE"; break; + case ICmpInst::ICMP_UGE: Out << "UGE"; break; + default: error("Invalid ICmp Predicate"); } - Out << "Constant* " << constName << " = ConstantExpr::"; - switch (CE->getOpcode()) { - case Instruction::Add: Out << "getAdd("; break; - case Instruction::FAdd: Out << "getFAdd("; break; - case Instruction::Sub: Out << "getSub("; break; - case Instruction::FSub: Out << "getFSub("; break; - case Instruction::Mul: Out << "getMul("; break; - case Instruction::FMul: Out << "getFMul("; break; - case Instruction::UDiv: Out << "getUDiv("; break; - case Instruction::SDiv: Out << "getSDiv("; break; - case Instruction::FDiv: Out << "getFDiv("; break; - case Instruction::URem: Out << "getURem("; break; - case Instruction::SRem: Out << "getSRem("; break; - case Instruction::FRem: Out << "getFRem("; break; - case Instruction::And: Out << "getAnd("; break; - case Instruction::Or: Out << "getOr("; break; - case Instruction::Xor: Out << "getXor("; break; - case Instruction::ICmp: - Out << "getICmp(ICmpInst::ICMP_"; - switch (CE->getPredicate()) { - case ICmpInst::ICMP_EQ: Out << "EQ"; break; - case ICmpInst::ICMP_NE: Out << "NE"; break; - case ICmpInst::ICMP_SLT: Out << "SLT"; break; - case ICmpInst::ICMP_ULT: Out << "ULT"; break; - case ICmpInst::ICMP_SGT: Out << "SGT"; break; - case ICmpInst::ICMP_UGT: Out << "UGT"; break; - case ICmpInst::ICMP_SLE: Out << "SLE"; break; - case ICmpInst::ICMP_ULE: Out << "ULE"; break; - case ICmpInst::ICMP_SGE: Out << "SGE"; break; - case ICmpInst::ICMP_UGE: Out << "UGE"; break; - default: error("Invalid ICmp Predicate"); - } - break; - case Instruction::FCmp: - Out << "getFCmp(FCmpInst::FCMP_"; - switch (CE->getPredicate()) { - case FCmpInst::FCMP_FALSE: Out << "FALSE"; break; - case FCmpInst::FCMP_ORD: Out << "ORD"; break; - case FCmpInst::FCMP_UNO: Out << "UNO"; break; - case FCmpInst::FCMP_OEQ: Out << "OEQ"; break; - case FCmpInst::FCMP_UEQ: Out << "UEQ"; break; - case FCmpInst::FCMP_ONE: Out << "ONE"; break; - case FCmpInst::FCMP_UNE: Out << "UNE"; break; - case FCmpInst::FCMP_OLT: Out << "OLT"; break; - case FCmpInst::FCMP_ULT: Out << "ULT"; break; - case FCmpInst::FCMP_OGT: Out << "OGT"; break; - case FCmpInst::FCMP_UGT: Out << "UGT"; break; - case FCmpInst::FCMP_OLE: Out << "OLE"; break; - case FCmpInst::FCMP_ULE: Out << "ULE"; break; - case FCmpInst::FCMP_OGE: Out << "OGE"; break; - case FCmpInst::FCMP_UGE: Out << "UGE"; break; - case FCmpInst::FCMP_TRUE: Out << "TRUE"; break; - default: error("Invalid FCmp Predicate"); - } - break; - case Instruction::Shl: Out << "getShl("; break; - case Instruction::LShr: Out << "getLShr("; break; - case Instruction::AShr: Out << "getAShr("; break; - case Instruction::Select: Out << "getSelect("; break; - case Instruction::ExtractElement: Out << "getExtractElement("; break; - case Instruction::InsertElement: Out << "getInsertElement("; break; - case Instruction::ShuffleVector: Out << "getShuffleVector("; break; - default: - error("Invalid constant expression"); - break; + break; + case Instruction::FCmp: + Out << "getFCmp(FCmpInst::FCMP_"; + switch (CE->getPredicate()) { + case FCmpInst::FCMP_FALSE: Out << "FALSE"; break; + case FCmpInst::FCMP_ORD: Out << "ORD"; break; + case FCmpInst::FCMP_UNO: Out << "UNO"; break; + case FCmpInst::FCMP_OEQ: Out << "OEQ"; break; + case FCmpInst::FCMP_UEQ: Out << "UEQ"; break; + case FCmpInst::FCMP_ONE: Out << "ONE"; break; + case FCmpInst::FCMP_UNE: Out << "UNE"; break; + case FCmpInst::FCMP_OLT: Out << "OLT"; break; + case FCmpInst::FCMP_ULT: Out << "ULT"; break; + case FCmpInst::FCMP_OGT: Out << "OGT"; break; + case FCmpInst::FCMP_UGT: Out << "UGT"; break; + case FCmpInst::FCMP_OLE: Out << "OLE"; break; + case FCmpInst::FCMP_ULE: Out << "ULE"; break; + case FCmpInst::FCMP_OGE: Out << "OGE"; break; + case FCmpInst::FCMP_UGE: Out << "UGE"; break; + case FCmpInst::FCMP_TRUE: Out << "TRUE"; break; + default: error("Invalid FCmp Predicate"); } - Out << getCppName(CE->getOperand(0)); - for (unsigned i = 1; i < CE->getNumOperands(); ++i) - Out << ", " << getCppName(CE->getOperand(i)); - Out << ");"; + break; + case Instruction::Shl: Out << "getShl("; break; + case Instruction::LShr: Out << "getLShr("; break; + case Instruction::AShr: Out << "getAShr("; break; + case Instruction::Select: Out << "getSelect("; break; + case Instruction::ExtractElement: Out << "getExtractElement("; break; + case Instruction::InsertElement: Out << "getInsertElement("; break; + case Instruction::ShuffleVector: Out << "getShuffleVector("; break; + default: + error("Invalid constant expression"); + break; } - } else { - error("Bad Constant"); - Out << "Constant* " << constName << " = 0; "; + Out << getCppName(CE->getOperand(0)); + for (unsigned i = 1; i < CE->getNumOperands(); ++i) + Out << ", " << getCppName(CE->getOperand(i)); + Out << ");"; } - nl(Out); + } else if (const BlockAddress *BA = dyn_cast<BlockAddress>(CV)) { + Out << "Constant* " << constName << " = "; + Out << "BlockAddress::get(" << getOpName(BA->getBasicBlock()) << ");"; + } else { + error("Bad Constant"); + Out << "Constant* " << constName << " = 0; "; } + nl(Out); +} - void CppWriter::printConstants(const Module* M) { - // Traverse all the global variables looking for constant initializers - for (Module::const_global_iterator I = TheModule->global_begin(), - E = TheModule->global_end(); I != E; ++I) - if (I->hasInitializer()) - printConstant(I->getInitializer()); - - // Traverse the LLVM functions looking for constants - for (Module::const_iterator FI = TheModule->begin(), FE = TheModule->end(); - FI != FE; ++FI) { - // Add all of the basic blocks and instructions - for (Function::const_iterator BB = FI->begin(), - E = FI->end(); BB != E; ++BB) { - for (BasicBlock::const_iterator I = BB->begin(), E = BB->end(); I!=E; - ++I) { - for (unsigned i = 0; i < I->getNumOperands(); ++i) { - if (Constant* C = dyn_cast<Constant>(I->getOperand(i))) { - printConstant(C); - } +void CppWriter::printConstants(const Module* M) { + // Traverse all the global variables looking for constant initializers + for (Module::const_global_iterator I = TheModule->global_begin(), + E = TheModule->global_end(); I != E; ++I) + if (I->hasInitializer()) + printConstant(I->getInitializer()); + + // Traverse the LLVM functions looking for constants + for (Module::const_iterator FI = TheModule->begin(), FE = TheModule->end(); + FI != FE; ++FI) { + // Add all of the basic blocks and instructions + for (Function::const_iterator BB = FI->begin(), + E = FI->end(); BB != E; ++BB) { + for (BasicBlock::const_iterator I = BB->begin(), E = BB->end(); I!=E; + ++I) { + for (unsigned i = 0; i < I->getNumOperands(); ++i) { + if (Constant* C = dyn_cast<Constant>(I->getOperand(i))) { + printConstant(C); } } } } } +} - void CppWriter::printVariableUses(const GlobalVariable *GV) { - nl(Out) << "// Type Definitions"; - nl(Out); - printType(GV->getType()); - if (GV->hasInitializer()) { - Constant *Init = GV->getInitializer(); - printType(Init->getType()); - if (Function *F = dyn_cast<Function>(Init)) { - nl(Out)<< "/ Function Declarations"; nl(Out); - printFunctionHead(F); - } else if (GlobalVariable* gv = dyn_cast<GlobalVariable>(Init)) { - nl(Out) << "// Global Variable Declarations"; nl(Out); - printVariableHead(gv); - - nl(Out) << "// Global Variable Definitions"; nl(Out); - printVariableBody(gv); - } else { - nl(Out) << "// Constant Definitions"; nl(Out); - printConstant(Init); - } +void CppWriter::printVariableUses(const GlobalVariable *GV) { + nl(Out) << "// Type Definitions"; + nl(Out); + printType(GV->getType()); + if (GV->hasInitializer()) { + Constant *Init = GV->getInitializer(); + printType(Init->getType()); + if (Function *F = dyn_cast<Function>(Init)) { + nl(Out)<< "/ Function Declarations"; nl(Out); + printFunctionHead(F); + } else if (GlobalVariable* gv = dyn_cast<GlobalVariable>(Init)) { + nl(Out) << "// Global Variable Declarations"; nl(Out); + printVariableHead(gv); + + nl(Out) << "// Global Variable Definitions"; nl(Out); + printVariableBody(gv); + } else { + nl(Out) << "// Constant Definitions"; nl(Out); + printConstant(Init); } } +} - void CppWriter::printVariableHead(const GlobalVariable *GV) { - nl(Out) << "GlobalVariable* " << getCppName(GV); - if (is_inline) { - Out << " = mod->getGlobalVariable(mod->getContext(), "; - printEscapedString(GV->getName()); - Out << ", " << getCppName(GV->getType()->getElementType()) << ",true)"; - nl(Out) << "if (!" << getCppName(GV) << ") {"; - in(); nl(Out) << getCppName(GV); - } - Out << " = new GlobalVariable(/*Module=*/*mod, "; - nl(Out) << "/*Type=*/"; - printCppName(GV->getType()->getElementType()); - Out << ","; - nl(Out) << "/*isConstant=*/" << (GV->isConstant()?"true":"false"); - Out << ","; - nl(Out) << "/*Linkage=*/"; - printLinkageType(GV->getLinkage()); - Out << ","; - nl(Out) << "/*Initializer=*/0, "; - if (GV->hasInitializer()) { - Out << "// has initializer, specified below"; - } - nl(Out) << "/*Name=*/\""; +void CppWriter::printVariableHead(const GlobalVariable *GV) { + nl(Out) << "GlobalVariable* " << getCppName(GV); + if (is_inline) { + Out << " = mod->getGlobalVariable(mod->getContext(), "; printEscapedString(GV->getName()); + Out << ", " << getCppName(GV->getType()->getElementType()) << ",true)"; + nl(Out) << "if (!" << getCppName(GV) << ") {"; + in(); nl(Out) << getCppName(GV); + } + Out << " = new GlobalVariable(/*Module=*/*mod, "; + nl(Out) << "/*Type=*/"; + printCppName(GV->getType()->getElementType()); + Out << ","; + nl(Out) << "/*isConstant=*/" << (GV->isConstant()?"true":"false"); + Out << ","; + nl(Out) << "/*Linkage=*/"; + printLinkageType(GV->getLinkage()); + Out << ","; + nl(Out) << "/*Initializer=*/0, "; + if (GV->hasInitializer()) { + Out << "// has initializer, specified below"; + } + nl(Out) << "/*Name=*/\""; + printEscapedString(GV->getName()); + Out << "\");"; + nl(Out); + + if (GV->hasSection()) { + printCppName(GV); + Out << "->setSection(\""; + printEscapedString(GV->getSection()); Out << "\");"; nl(Out); - - if (GV->hasSection()) { - printCppName(GV); - Out << "->setSection(\""; - printEscapedString(GV->getSection()); - Out << "\");"; - nl(Out); - } - if (GV->getAlignment()) { - printCppName(GV); - Out << "->setAlignment(" << utostr(GV->getAlignment()) << ");"; - nl(Out); - } - if (GV->getVisibility() != GlobalValue::DefaultVisibility) { - printCppName(GV); - Out << "->setVisibility("; - printVisibilityType(GV->getVisibility()); - Out << ");"; - nl(Out); - } - if (GV->isThreadLocal()) { - printCppName(GV); - Out << "->setThreadLocal(true);"; - nl(Out); - } - if (is_inline) { - out(); Out << "}"; nl(Out); - } } - - void CppWriter::printVariableBody(const GlobalVariable *GV) { - if (GV->hasInitializer()) { - printCppName(GV); - Out << "->setInitializer("; - Out << getCppName(GV->getInitializer()) << ");"; - nl(Out); - } + if (GV->getAlignment()) { + printCppName(GV); + Out << "->setAlignment(" << utostr(GV->getAlignment()) << ");"; + nl(Out); } + if (GV->getVisibility() != GlobalValue::DefaultVisibility) { + printCppName(GV); + Out << "->setVisibility("; + printVisibilityType(GV->getVisibility()); + Out << ");"; + nl(Out); + } + if (GV->isThreadLocal()) { + printCppName(GV); + Out << "->setThreadLocal(true);"; + nl(Out); + } + if (is_inline) { + out(); Out << "}"; nl(Out); + } +} - std::string CppWriter::getOpName(Value* V) { - if (!isa<Instruction>(V) || DefinedValues.find(V) != DefinedValues.end()) - return getCppName(V); - - // See if its alread in the map of forward references, if so just return the - // name we already set up for it - ForwardRefMap::const_iterator I = ForwardRefs.find(V); - if (I != ForwardRefs.end()) - return I->second; - - // This is a new forward reference. Generate a unique name for it - std::string result(std::string("fwdref_") + utostr(uniqueNum++)); - - // Yes, this is a hack. An Argument is the smallest instantiable value that - // we can make as a placeholder for the real value. We'll replace these - // Argument instances later. - Out << "Argument* " << result << " = new Argument(" - << getCppName(V->getType()) << ");"; +void CppWriter::printVariableBody(const GlobalVariable *GV) { + if (GV->hasInitializer()) { + printCppName(GV); + Out << "->setInitializer("; + Out << getCppName(GV->getInitializer()) << ");"; nl(Out); - ForwardRefs[V] = result; - return result; } +} - // printInstruction - This member is called for each Instruction in a function. - void CppWriter::printInstruction(const Instruction *I, - const std::string& bbname) { - std::string iName(getCppName(I)); +std::string CppWriter::getOpName(Value* V) { + if (!isa<Instruction>(V) || DefinedValues.find(V) != DefinedValues.end()) + return getCppName(V); - // Before we emit this instruction, we need to take care of generating any - // forward references. So, we get the names of all the operands in advance - const unsigned Ops(I->getNumOperands()); - std::string* opNames = new std::string[Ops]; - for (unsigned i = 0; i < Ops; i++) { - opNames[i] = getOpName(I->getOperand(i)); - } + // See if its alread in the map of forward references, if so just return the + // name we already set up for it + ForwardRefMap::const_iterator I = ForwardRefs.find(V); + if (I != ForwardRefs.end()) + return I->second; - switch (I->getOpcode()) { - default: - error("Invalid instruction"); - break; + // This is a new forward reference. Generate a unique name for it + std::string result(std::string("fwdref_") + utostr(uniqueNum++)); - case Instruction::Ret: { - const ReturnInst* ret = cast<ReturnInst>(I); - Out << "ReturnInst::Create(mod->getContext(), " - << (ret->getReturnValue() ? opNames[0] + ", " : "") << bbname << ");"; - break; + // Yes, this is a hack. An Argument is the smallest instantiable value that + // we can make as a placeholder for the real value. We'll replace these + // Argument instances later. + Out << "Argument* " << result << " = new Argument(" + << getCppName(V->getType()) << ");"; + nl(Out); + ForwardRefs[V] = result; + return result; +} + +// printInstruction - This member is called for each Instruction in a function. +void CppWriter::printInstruction(const Instruction *I, + const std::string& bbname) { + std::string iName(getCppName(I)); + + // Before we emit this instruction, we need to take care of generating any + // forward references. So, we get the names of all the operands in advance + const unsigned Ops(I->getNumOperands()); + std::string* opNames = new std::string[Ops]; + for (unsigned i = 0; i < Ops; i++) + opNames[i] = getOpName(I->getOperand(i)); + + switch (I->getOpcode()) { + default: + error("Invalid instruction"); + break; + + case Instruction::Ret: { + const ReturnInst* ret = cast<ReturnInst>(I); + Out << "ReturnInst::Create(mod->getContext(), " + << (ret->getReturnValue() ? opNames[0] + ", " : "") << bbname << ");"; + break; + } + case Instruction::Br: { + const BranchInst* br = cast<BranchInst>(I); + Out << "BranchInst::Create(" ; + if (br->getNumOperands() == 3) { + Out << opNames[2] << ", " + << opNames[1] << ", " + << opNames[0] << ", "; + + } else if (br->getNumOperands() == 1) { + Out << opNames[0] << ", "; + } else { + error("Branch with 2 operands?"); } - case Instruction::Br: { - const BranchInst* br = cast<BranchInst>(I); - Out << "BranchInst::Create(" ; - if (br->getNumOperands() == 3 ) { - Out << opNames[2] << ", " - << opNames[1] << ", " - << opNames[0] << ", "; - - } else if (br->getNumOperands() == 1) { - Out << opNames[0] << ", "; - } else { - error("Branch with 2 operands?"); - } - Out << bbname << ");"; - break; + Out << bbname << ");"; + break; + } + case Instruction::Switch: { + const SwitchInst *SI = cast<SwitchInst>(I); + Out << "SwitchInst* " << iName << " = SwitchInst::Create(" + << opNames[0] << ", " + << opNames[1] << ", " + << SI->getNumCases() << ", " << bbname << ");"; + nl(Out); + for (unsigned i = 2; i != SI->getNumOperands(); i += 2) { + Out << iName << "->addCase(" + << opNames[i] << ", " + << opNames[i+1] << ");"; + nl(Out); } - case Instruction::Switch: { - const SwitchInst *SI = cast<SwitchInst>(I); - Out << "SwitchInst* " << iName << " = SwitchInst::Create(" - << opNames[0] << ", " - << opNames[1] << ", " - << SI->getNumCases() << ", " << bbname << ");"; + break; + } + case Instruction::IndirectBr: { + const IndirectBrInst *IBI = cast<IndirectBrInst>(I); + Out << "IndirectBrInst *" << iName << " = IndirectBrInst::Create(" + << opNames[0] << ", " << IBI->getNumDestinations() << ");"; + nl(Out); + for (unsigned i = 1; i != IBI->getNumOperands(); ++i) { + Out << iName << "->addDestination(" << opNames[i] << ");"; nl(Out); - for (unsigned i = 2; i != SI->getNumOperands(); i += 2) { - Out << iName << "->addCase(" - << opNames[i] << ", " - << opNames[i+1] << ");"; - nl(Out); - } - break; } - case Instruction::IndirectBr: { - const IndirectBrInst *IBI = cast<IndirectBrInst>(I); - Out << "IndirectBrInst *" << iName << " = IndirectBrInst::Create(" - << opNames[0] << ", " << IBI->getNumDestinations() << ");"; + break; + } + case Instruction::Invoke: { + const InvokeInst* inv = cast<InvokeInst>(I); + Out << "std::vector<Value*> " << iName << "_params;"; + nl(Out); + for (unsigned i = 0; i < inv->getNumArgOperands(); ++i) { + Out << iName << "_params.push_back(" + << getOpName(inv->getArgOperand(i)) << ");"; nl(Out); - for (unsigned i = 1; i != IBI->getNumOperands(); ++i) { - Out << iName << "->addDestination(" << opNames[i] << ");"; - nl(Out); - } - break; } - case Instruction::Invoke: { - const InvokeInst* inv = cast<InvokeInst>(I); - Out << "std::vector<Value*> " << iName << "_params;"; + // FIXME: This shouldn't use magic numbers -3, -2, and -1. + Out << "InvokeInst *" << iName << " = InvokeInst::Create(" + << getOpName(inv->getCalledFunction()) << ", " + << getOpName(inv->getNormalDest()) << ", " + << getOpName(inv->getUnwindDest()) << ", " + << iName << "_params.begin(), " + << iName << "_params.end(), \""; + printEscapedString(inv->getName()); + Out << "\", " << bbname << ");"; + nl(Out) << iName << "->setCallingConv("; + printCallingConv(inv->getCallingConv()); + Out << ");"; + printAttributes(inv->getAttributes(), iName); + Out << iName << "->setAttributes(" << iName << "_PAL);"; + nl(Out); + break; + } + case Instruction::Unwind: { + Out << "new UnwindInst(" + << bbname << ");"; + break; + } + case Instruction::Unreachable: { + Out << "new UnreachableInst(" + << "mod->getContext(), " + << bbname << ");"; + break; + } + case Instruction::Add: + case Instruction::FAdd: + case Instruction::Sub: + case Instruction::FSub: + case Instruction::Mul: + case Instruction::FMul: + case Instruction::UDiv: + case Instruction::SDiv: + case Instruction::FDiv: + case Instruction::URem: + case Instruction::SRem: + case Instruction::FRem: + case Instruction::And: + case Instruction::Or: + case Instruction::Xor: + case Instruction::Shl: + case Instruction::LShr: + case Instruction::AShr:{ + Out << "BinaryOperator* " << iName << " = BinaryOperator::Create("; + switch (I->getOpcode()) { + case Instruction::Add: Out << "Instruction::Add"; break; + case Instruction::FAdd: Out << "Instruction::FAdd"; break; + case Instruction::Sub: Out << "Instruction::Sub"; break; + case Instruction::FSub: Out << "Instruction::FSub"; break; + case Instruction::Mul: Out << "Instruction::Mul"; break; + case Instruction::FMul: Out << "Instruction::FMul"; break; + case Instruction::UDiv:Out << "Instruction::UDiv"; break; + case Instruction::SDiv:Out << "Instruction::SDiv"; break; + case Instruction::FDiv:Out << "Instruction::FDiv"; break; + case Instruction::URem:Out << "Instruction::URem"; break; + case Instruction::SRem:Out << "Instruction::SRem"; break; + case Instruction::FRem:Out << "Instruction::FRem"; break; + case Instruction::And: Out << "Instruction::And"; break; + case Instruction::Or: Out << "Instruction::Or"; break; + case Instruction::Xor: Out << "Instruction::Xor"; break; + case Instruction::Shl: Out << "Instruction::Shl"; break; + case Instruction::LShr:Out << "Instruction::LShr"; break; + case Instruction::AShr:Out << "Instruction::AShr"; break; + default: Out << "Instruction::BadOpCode"; break; + } + Out << ", " << opNames[0] << ", " << opNames[1] << ", \""; + printEscapedString(I->getName()); + Out << "\", " << bbname << ");"; + break; + } + case Instruction::FCmp: { + Out << "FCmpInst* " << iName << " = new FCmpInst(*" << bbname << ", "; + switch (cast<FCmpInst>(I)->getPredicate()) { + case FCmpInst::FCMP_FALSE: Out << "FCmpInst::FCMP_FALSE"; break; + case FCmpInst::FCMP_OEQ : Out << "FCmpInst::FCMP_OEQ"; break; + case FCmpInst::FCMP_OGT : Out << "FCmpInst::FCMP_OGT"; break; + case FCmpInst::FCMP_OGE : Out << "FCmpInst::FCMP_OGE"; break; + case FCmpInst::FCMP_OLT : Out << "FCmpInst::FCMP_OLT"; break; + case FCmpInst::FCMP_OLE : Out << "FCmpInst::FCMP_OLE"; break; + case FCmpInst::FCMP_ONE : Out << "FCmpInst::FCMP_ONE"; break; + case FCmpInst::FCMP_ORD : Out << "FCmpInst::FCMP_ORD"; break; + case FCmpInst::FCMP_UNO : Out << "FCmpInst::FCMP_UNO"; break; + case FCmpInst::FCMP_UEQ : Out << "FCmpInst::FCMP_UEQ"; break; + case FCmpInst::FCMP_UGT : Out << "FCmpInst::FCMP_UGT"; break; + case FCmpInst::FCMP_UGE : Out << "FCmpInst::FCMP_UGE"; break; + case FCmpInst::FCMP_ULT : Out << "FCmpInst::FCMP_ULT"; break; + case FCmpInst::FCMP_ULE : Out << "FCmpInst::FCMP_ULE"; break; + case FCmpInst::FCMP_UNE : Out << "FCmpInst::FCMP_UNE"; break; + case FCmpInst::FCMP_TRUE : Out << "FCmpInst::FCMP_TRUE"; break; + default: Out << "FCmpInst::BAD_ICMP_PREDICATE"; break; + } + Out << ", " << opNames[0] << ", " << opNames[1] << ", \""; + printEscapedString(I->getName()); + Out << "\");"; + break; + } + case Instruction::ICmp: { + Out << "ICmpInst* " << iName << " = new ICmpInst(*" << bbname << ", "; + switch (cast<ICmpInst>(I)->getPredicate()) { + case ICmpInst::ICMP_EQ: Out << "ICmpInst::ICMP_EQ"; break; + case ICmpInst::ICMP_NE: Out << "ICmpInst::ICMP_NE"; break; + case ICmpInst::ICMP_ULE: Out << "ICmpInst::ICMP_ULE"; break; + case ICmpInst::ICMP_SLE: Out << "ICmpInst::ICMP_SLE"; break; + case ICmpInst::ICMP_UGE: Out << "ICmpInst::ICMP_UGE"; break; + case ICmpInst::ICMP_SGE: Out << "ICmpInst::ICMP_SGE"; break; + case ICmpInst::ICMP_ULT: Out << "ICmpInst::ICMP_ULT"; break; + case ICmpInst::ICMP_SLT: Out << "ICmpInst::ICMP_SLT"; break; + case ICmpInst::ICMP_UGT: Out << "ICmpInst::ICMP_UGT"; break; + case ICmpInst::ICMP_SGT: Out << "ICmpInst::ICMP_SGT"; break; + default: Out << "ICmpInst::BAD_ICMP_PREDICATE"; break; + } + Out << ", " << opNames[0] << ", " << opNames[1] << ", \""; + printEscapedString(I->getName()); + Out << "\");"; + break; + } + case Instruction::Alloca: { + const AllocaInst* allocaI = cast<AllocaInst>(I); + Out << "AllocaInst* " << iName << " = new AllocaInst(" + << getCppName(allocaI->getAllocatedType()) << ", "; + if (allocaI->isArrayAllocation()) + Out << opNames[0] << ", "; + Out << "\""; + printEscapedString(allocaI->getName()); + Out << "\", " << bbname << ");"; + if (allocaI->getAlignment()) + nl(Out) << iName << "->setAlignment(" + << allocaI->getAlignment() << ");"; + break; + } + case Instruction::Load: { + const LoadInst* load = cast<LoadInst>(I); + Out << "LoadInst* " << iName << " = new LoadInst(" + << opNames[0] << ", \""; + printEscapedString(load->getName()); + Out << "\", " << (load->isVolatile() ? "true" : "false" ) + << ", " << bbname << ");"; + break; + } + case Instruction::Store: { + const StoreInst* store = cast<StoreInst>(I); + Out << " new StoreInst(" + << opNames[0] << ", " + << opNames[1] << ", " + << (store->isVolatile() ? "true" : "false") + << ", " << bbname << ");"; + break; + } + case Instruction::GetElementPtr: { + const GetElementPtrInst* gep = cast<GetElementPtrInst>(I); + if (gep->getNumOperands() <= 2) { + Out << "GetElementPtrInst* " << iName << " = GetElementPtrInst::Create(" + << opNames[0]; + if (gep->getNumOperands() == 2) + Out << ", " << opNames[1]; + } else { + Out << "std::vector<Value*> " << iName << "_indices;"; nl(Out); - for (unsigned i = 0; i < inv->getNumOperands() - 3; ++i) { - Out << iName << "_params.push_back(" + for (unsigned i = 1; i < gep->getNumOperands(); ++i ) { + Out << iName << "_indices.push_back(" << opNames[i] << ");"; nl(Out); } - Out << "InvokeInst *" << iName << " = InvokeInst::Create(" - << opNames[Ops - 3] << ", " - << opNames[Ops - 2] << ", " - << opNames[Ops - 1] << ", " - << iName << "_params.begin(), " << iName << "_params.end(), \""; - printEscapedString(inv->getName()); - Out << "\", " << bbname << ");"; - nl(Out) << iName << "->setCallingConv("; - printCallingConv(inv->getCallingConv()); - Out << ");"; - printAttributes(inv->getAttributes(), iName); - Out << iName << "->setAttributes(" << iName << "_PAL);"; + Out << "Instruction* " << iName << " = GetElementPtrInst::Create(" + << opNames[0] << ", " << iName << "_indices.begin(), " + << iName << "_indices.end()"; + } + Out << ", \""; + printEscapedString(gep->getName()); + Out << "\", " << bbname << ");"; + break; + } + case Instruction::PHI: { + const PHINode* phi = cast<PHINode>(I); + + Out << "PHINode* " << iName << " = PHINode::Create(" + << getCppName(phi->getType()) << ", \""; + printEscapedString(phi->getName()); + Out << "\", " << bbname << ");"; + nl(Out) << iName << "->reserveOperandSpace(" + << phi->getNumIncomingValues() + << ");"; + nl(Out); + for (unsigned i = 0; i < phi->getNumOperands(); i+=2) { + Out << iName << "->addIncoming(" + << opNames[i] << ", " << opNames[i+1] << ");"; nl(Out); - break; - } - case Instruction::Unwind: { - Out << "new UnwindInst(" - << bbname << ");"; - break; - } - case Instruction::Unreachable: { - Out << "new UnreachableInst(" - << "mod->getContext(), " - << bbname << ");"; - break; - } - case Instruction::Add: - case Instruction::FAdd: - case Instruction::Sub: - case Instruction::FSub: - case Instruction::Mul: - case Instruction::FMul: - case Instruction::UDiv: - case Instruction::SDiv: - case Instruction::FDiv: - case Instruction::URem: - case Instruction::SRem: - case Instruction::FRem: - case Instruction::And: - case Instruction::Or: - case Instruction::Xor: - case Instruction::Shl: - case Instruction::LShr: - case Instruction::AShr:{ - Out << "BinaryOperator* " << iName << " = BinaryOperator::Create("; - switch (I->getOpcode()) { - case Instruction::Add: Out << "Instruction::Add"; break; - case Instruction::FAdd: Out << "Instruction::FAdd"; break; - case Instruction::Sub: Out << "Instruction::Sub"; break; - case Instruction::FSub: Out << "Instruction::FSub"; break; - case Instruction::Mul: Out << "Instruction::Mul"; break; - case Instruction::FMul: Out << "Instruction::FMul"; break; - case Instruction::UDiv:Out << "Instruction::UDiv"; break; - case Instruction::SDiv:Out << "Instruction::SDiv"; break; - case Instruction::FDiv:Out << "Instruction::FDiv"; break; - case Instruction::URem:Out << "Instruction::URem"; break; - case Instruction::SRem:Out << "Instruction::SRem"; break; - case Instruction::FRem:Out << "Instruction::FRem"; break; - case Instruction::And: Out << "Instruction::And"; break; - case Instruction::Or: Out << "Instruction::Or"; break; - case Instruction::Xor: Out << "Instruction::Xor"; break; - case Instruction::Shl: Out << "Instruction::Shl"; break; - case Instruction::LShr:Out << "Instruction::LShr"; break; - case Instruction::AShr:Out << "Instruction::AShr"; break; - default: Out << "Instruction::BadOpCode"; break; - } - Out << ", " << opNames[0] << ", " << opNames[1] << ", \""; - printEscapedString(I->getName()); - Out << "\", " << bbname << ");"; - break; } - case Instruction::FCmp: { - Out << "FCmpInst* " << iName << " = new FCmpInst(*" << bbname << ", "; - switch (cast<FCmpInst>(I)->getPredicate()) { - case FCmpInst::FCMP_FALSE: Out << "FCmpInst::FCMP_FALSE"; break; - case FCmpInst::FCMP_OEQ : Out << "FCmpInst::FCMP_OEQ"; break; - case FCmpInst::FCMP_OGT : Out << "FCmpInst::FCMP_OGT"; break; - case FCmpInst::FCMP_OGE : Out << "FCmpInst::FCMP_OGE"; break; - case FCmpInst::FCMP_OLT : Out << "FCmpInst::FCMP_OLT"; break; - case FCmpInst::FCMP_OLE : Out << "FCmpInst::FCMP_OLE"; break; - case FCmpInst::FCMP_ONE : Out << "FCmpInst::FCMP_ONE"; break; - case FCmpInst::FCMP_ORD : Out << "FCmpInst::FCMP_ORD"; break; - case FCmpInst::FCMP_UNO : Out << "FCmpInst::FCMP_UNO"; break; - case FCmpInst::FCMP_UEQ : Out << "FCmpInst::FCMP_UEQ"; break; - case FCmpInst::FCMP_UGT : Out << "FCmpInst::FCMP_UGT"; break; - case FCmpInst::FCMP_UGE : Out << "FCmpInst::FCMP_UGE"; break; - case FCmpInst::FCMP_ULT : Out << "FCmpInst::FCMP_ULT"; break; - case FCmpInst::FCMP_ULE : Out << "FCmpInst::FCMP_ULE"; break; - case FCmpInst::FCMP_UNE : Out << "FCmpInst::FCMP_UNE"; break; - case FCmpInst::FCMP_TRUE : Out << "FCmpInst::FCMP_TRUE"; break; - default: Out << "FCmpInst::BAD_ICMP_PREDICATE"; break; - } - Out << ", " << opNames[0] << ", " << opNames[1] << ", \""; - printEscapedString(I->getName()); - Out << "\");"; - break; - } - case Instruction::ICmp: { - Out << "ICmpInst* " << iName << " = new ICmpInst(*" << bbname << ", "; - switch (cast<ICmpInst>(I)->getPredicate()) { - case ICmpInst::ICMP_EQ: Out << "ICmpInst::ICMP_EQ"; break; - case ICmpInst::ICMP_NE: Out << "ICmpInst::ICMP_NE"; break; - case ICmpInst::ICMP_ULE: Out << "ICmpInst::ICMP_ULE"; break; - case ICmpInst::ICMP_SLE: Out << "ICmpInst::ICMP_SLE"; break; - case ICmpInst::ICMP_UGE: Out << "ICmpInst::ICMP_UGE"; break; - case ICmpInst::ICMP_SGE: Out << "ICmpInst::ICMP_SGE"; break; - case ICmpInst::ICMP_ULT: Out << "ICmpInst::ICMP_ULT"; break; - case ICmpInst::ICMP_SLT: Out << "ICmpInst::ICMP_SLT"; break; - case ICmpInst::ICMP_UGT: Out << "ICmpInst::ICMP_UGT"; break; - case ICmpInst::ICMP_SGT: Out << "ICmpInst::ICMP_SGT"; break; - default: Out << "ICmpInst::BAD_ICMP_PREDICATE"; break; - } - Out << ", " << opNames[0] << ", " << opNames[1] << ", \""; - printEscapedString(I->getName()); - Out << "\");"; - break; - } - case Instruction::Alloca: { - const AllocaInst* allocaI = cast<AllocaInst>(I); - Out << "AllocaInst* " << iName << " = new AllocaInst(" - << getCppName(allocaI->getAllocatedType()) << ", "; - if (allocaI->isArrayAllocation()) - Out << opNames[0] << ", "; - Out << "\""; - printEscapedString(allocaI->getName()); - Out << "\", " << bbname << ");"; - if (allocaI->getAlignment()) - nl(Out) << iName << "->setAlignment(" - << allocaI->getAlignment() << ");"; - break; - } - case Instruction::Load:{ - const LoadInst* load = cast<LoadInst>(I); - Out << "LoadInst* " << iName << " = new LoadInst(" - << opNames[0] << ", \""; - printEscapedString(load->getName()); - Out << "\", " << (load->isVolatile() ? "true" : "false" ) - << ", " << bbname << ");"; - break; - } - case Instruction::Store: { - const StoreInst* store = cast<StoreInst>(I); - Out << " new StoreInst(" - << opNames[0] << ", " - << opNames[1] << ", " - << (store->isVolatile() ? "true" : "false") - << ", " << bbname << ");"; - break; - } - case Instruction::GetElementPtr: { - const GetElementPtrInst* gep = cast<GetElementPtrInst>(I); - if (gep->getNumOperands() <= 2) { - Out << "GetElementPtrInst* " << iName << " = GetElementPtrInst::Create(" - << opNames[0]; - if (gep->getNumOperands() == 2) - Out << ", " << opNames[1]; - } else { - Out << "std::vector<Value*> " << iName << "_indices;"; - nl(Out); - for (unsigned i = 1; i < gep->getNumOperands(); ++i ) { - Out << iName << "_indices.push_back(" - << opNames[i] << ");"; - nl(Out); - } - Out << "Instruction* " << iName << " = GetElementPtrInst::Create(" - << opNames[0] << ", " << iName << "_indices.begin(), " - << iName << "_indices.end()"; - } - Out << ", \""; - printEscapedString(gep->getName()); - Out << "\", " << bbname << ");"; - break; - } - case Instruction::PHI: { - const PHINode* phi = cast<PHINode>(I); - - Out << "PHINode* " << iName << " = PHINode::Create(" - << getCppName(phi->getType()) << ", \""; - printEscapedString(phi->getName()); - Out << "\", " << bbname << ");"; - nl(Out) << iName << "->reserveOperandSpace(" - << phi->getNumIncomingValues() - << ");"; + break; + } + case Instruction::Trunc: + case Instruction::ZExt: + case Instruction::SExt: + case Instruction::FPTrunc: + case Instruction::FPExt: + case Instruction::FPToUI: + case Instruction::FPToSI: + case Instruction::UIToFP: + case Instruction::SIToFP: + case Instruction::PtrToInt: + case Instruction::IntToPtr: + case Instruction::BitCast: { + const CastInst* cst = cast<CastInst>(I); + Out << "CastInst* " << iName << " = new "; + switch (I->getOpcode()) { + case Instruction::Trunc: Out << "TruncInst"; break; + case Instruction::ZExt: Out << "ZExtInst"; break; + case Instruction::SExt: Out << "SExtInst"; break; + case Instruction::FPTrunc: Out << "FPTruncInst"; break; + case Instruction::FPExt: Out << "FPExtInst"; break; + case Instruction::FPToUI: Out << "FPToUIInst"; break; + case Instruction::FPToSI: Out << "FPToSIInst"; break; + case Instruction::UIToFP: Out << "UIToFPInst"; break; + case Instruction::SIToFP: Out << "SIToFPInst"; break; + case Instruction::PtrToInt: Out << "PtrToIntInst"; break; + case Instruction::IntToPtr: Out << "IntToPtrInst"; break; + case Instruction::BitCast: Out << "BitCastInst"; break; + default: assert(!"Unreachable"); break; + } + Out << "(" << opNames[0] << ", " + << getCppName(cst->getType()) << ", \""; + printEscapedString(cst->getName()); + Out << "\", " << bbname << ");"; + break; + } + case Instruction::Call: { + const CallInst* call = cast<CallInst>(I); + if (const InlineAsm* ila = dyn_cast<InlineAsm>(call->getCalledValue())) { + Out << "InlineAsm* " << getCppName(ila) << " = InlineAsm::get(" + << getCppName(ila->getFunctionType()) << ", \"" + << ila->getAsmString() << "\", \"" + << ila->getConstraintString() << "\"," + << (ila->hasSideEffects() ? "true" : "false") << ");"; nl(Out); - for (unsigned i = 0; i < phi->getNumOperands(); i+=2) { - Out << iName << "->addIncoming(" - << opNames[i] << ", " << opNames[i+1] << ");"; - nl(Out); - } - break; - } - case Instruction::Trunc: - case Instruction::ZExt: - case Instruction::SExt: - case Instruction::FPTrunc: - case Instruction::FPExt: - case Instruction::FPToUI: - case Instruction::FPToSI: - case Instruction::UIToFP: - case Instruction::SIToFP: - case Instruction::PtrToInt: - case Instruction::IntToPtr: - case Instruction::BitCast: { - const CastInst* cst = cast<CastInst>(I); - Out << "CastInst* " << iName << " = new "; - switch (I->getOpcode()) { - case Instruction::Trunc: Out << "TruncInst"; break; - case Instruction::ZExt: Out << "ZExtInst"; break; - case Instruction::SExt: Out << "SExtInst"; break; - case Instruction::FPTrunc: Out << "FPTruncInst"; break; - case Instruction::FPExt: Out << "FPExtInst"; break; - case Instruction::FPToUI: Out << "FPToUIInst"; break; - case Instruction::FPToSI: Out << "FPToSIInst"; break; - case Instruction::UIToFP: Out << "UIToFPInst"; break; - case Instruction::SIToFP: Out << "SIToFPInst"; break; - case Instruction::PtrToInt: Out << "PtrToIntInst"; break; - case Instruction::IntToPtr: Out << "IntToPtrInst"; break; - case Instruction::BitCast: Out << "BitCastInst"; break; - default: assert(!"Unreachable"); break; - } - Out << "(" << opNames[0] << ", " - << getCppName(cst->getType()) << ", \""; - printEscapedString(cst->getName()); - Out << "\", " << bbname << ");"; - break; } - case Instruction::Call:{ - const CallInst* call = cast<CallInst>(I); - if (const InlineAsm* ila = dyn_cast<InlineAsm>(call->getCalledValue())) { - Out << "InlineAsm* " << getCppName(ila) << " = InlineAsm::get(" - << getCppName(ila->getFunctionType()) << ", \"" - << ila->getAsmString() << "\", \"" - << ila->getConstraintString() << "\"," - << (ila->hasSideEffects() ? "true" : "false") << ");"; - nl(Out); - } - if (call->getNumOperands() > 2) { - Out << "std::vector<Value*> " << iName << "_params;"; + if (call->getNumArgOperands() > 1) { + Out << "std::vector<Value*> " << iName << "_params;"; + nl(Out); + for (unsigned i = 0; i < call->getNumArgOperands(); ++i) { + Out << iName << "_params.push_back(" << opNames[i] << ");"; nl(Out); - for (unsigned i = 1; i < call->getNumOperands(); ++i) { - Out << iName << "_params.push_back(" << opNames[i] << ");"; - nl(Out); - } - Out << "CallInst* " << iName << " = CallInst::Create(" - << opNames[0] << ", " << iName << "_params.begin(), " - << iName << "_params.end(), \""; - } else if (call->getNumOperands() == 2) { - Out << "CallInst* " << iName << " = CallInst::Create(" - << opNames[0] << ", " << opNames[1] << ", \""; - } else { - Out << "CallInst* " << iName << " = CallInst::Create(" << opNames[0] - << ", \""; } - printEscapedString(call->getName()); - Out << "\", " << bbname << ");"; - nl(Out) << iName << "->setCallingConv("; - printCallingConv(call->getCallingConv()); - Out << ");"; - nl(Out) << iName << "->setTailCall(" - << (call->isTailCall() ? "true":"false"); - Out << ");"; - printAttributes(call->getAttributes(), iName); - Out << iName << "->setAttributes(" << iName << "_PAL);"; - nl(Out); - break; - } - case Instruction::Select: { - const SelectInst* sel = cast<SelectInst>(I); - Out << "SelectInst* " << getCppName(sel) << " = SelectInst::Create("; - Out << opNames[0] << ", " << opNames[1] << ", " << opNames[2] << ", \""; - printEscapedString(sel->getName()); - Out << "\", " << bbname << ");"; - break; - } - case Instruction::UserOp1: - /// FALL THROUGH - case Instruction::UserOp2: { - /// FIXME: What should be done here? - break; - } - case Instruction::VAArg: { - const VAArgInst* va = cast<VAArgInst>(I); - Out << "VAArgInst* " << getCppName(va) << " = new VAArgInst(" - << opNames[0] << ", " << getCppName(va->getType()) << ", \""; - printEscapedString(va->getName()); - Out << "\", " << bbname << ");"; - break; - } - case Instruction::ExtractElement: { - const ExtractElementInst* eei = cast<ExtractElementInst>(I); - Out << "ExtractElementInst* " << getCppName(eei) - << " = new ExtractElementInst(" << opNames[0] - << ", " << opNames[1] << ", \""; - printEscapedString(eei->getName()); - Out << "\", " << bbname << ");"; - break; - } - case Instruction::InsertElement: { - const InsertElementInst* iei = cast<InsertElementInst>(I); - Out << "InsertElementInst* " << getCppName(iei) - << " = InsertElementInst::Create(" << opNames[0] - << ", " << opNames[1] << ", " << opNames[2] << ", \""; - printEscapedString(iei->getName()); - Out << "\", " << bbname << ");"; - break; - } - case Instruction::ShuffleVector: { - const ShuffleVectorInst* svi = cast<ShuffleVectorInst>(I); - Out << "ShuffleVectorInst* " << getCppName(svi) - << " = new ShuffleVectorInst(" << opNames[0] - << ", " << opNames[1] << ", " << opNames[2] << ", \""; - printEscapedString(svi->getName()); - Out << "\", " << bbname << ");"; - break; + Out << "CallInst* " << iName << " = CallInst::Create(" + << opNames[call->getNumArgOperands()] << ", " << iName << "_params.begin(), " + << iName << "_params.end(), \""; + } else if (call->getNumArgOperands() == 1) { + Out << "CallInst* " << iName << " = CallInst::Create(" + << opNames[call->getNumArgOperands()] << ", " << opNames[0] << ", \""; + } else { + Out << "CallInst* " << iName << " = CallInst::Create(" + << opNames[call->getNumArgOperands()] << ", \""; } - case Instruction::ExtractValue: { - const ExtractValueInst *evi = cast<ExtractValueInst>(I); - Out << "std::vector<unsigned> " << iName << "_indices;"; + printEscapedString(call->getName()); + Out << "\", " << bbname << ");"; + nl(Out) << iName << "->setCallingConv("; + printCallingConv(call->getCallingConv()); + Out << ");"; + nl(Out) << iName << "->setTailCall(" + << (call->isTailCall() ? "true" : "false"); + Out << ");"; + nl(Out); + printAttributes(call->getAttributes(), iName); + Out << iName << "->setAttributes(" << iName << "_PAL);"; + nl(Out); + break; + } + case Instruction::Select: { + const SelectInst* sel = cast<SelectInst>(I); + Out << "SelectInst* " << getCppName(sel) << " = SelectInst::Create("; + Out << opNames[0] << ", " << opNames[1] << ", " << opNames[2] << ", \""; + printEscapedString(sel->getName()); + Out << "\", " << bbname << ");"; + break; + } + case Instruction::UserOp1: + /// FALL THROUGH + case Instruction::UserOp2: { + /// FIXME: What should be done here? + break; + } + case Instruction::VAArg: { + const VAArgInst* va = cast<VAArgInst>(I); + Out << "VAArgInst* " << getCppName(va) << " = new VAArgInst(" + << opNames[0] << ", " << getCppName(va->getType()) << ", \""; + printEscapedString(va->getName()); + Out << "\", " << bbname << ");"; + break; + } + case Instruction::ExtractElement: { + const ExtractElementInst* eei = cast<ExtractElementInst>(I); + Out << "ExtractElementInst* " << getCppName(eei) + << " = new ExtractElementInst(" << opNames[0] + << ", " << opNames[1] << ", \""; + printEscapedString(eei->getName()); + Out << "\", " << bbname << ");"; + break; + } + case Instruction::InsertElement: { + const InsertElementInst* iei = cast<InsertElementInst>(I); + Out << "InsertElementInst* " << getCppName(iei) + << " = InsertElementInst::Create(" << opNames[0] + << ", " << opNames[1] << ", " << opNames[2] << ", \""; + printEscapedString(iei->getName()); + Out << "\", " << bbname << ");"; + break; + } + case Instruction::ShuffleVector: { + const ShuffleVectorInst* svi = cast<ShuffleVectorInst>(I); + Out << "ShuffleVectorInst* " << getCppName(svi) + << " = new ShuffleVectorInst(" << opNames[0] + << ", " << opNames[1] << ", " << opNames[2] << ", \""; + printEscapedString(svi->getName()); + Out << "\", " << bbname << ");"; + break; + } + case Instruction::ExtractValue: { + const ExtractValueInst *evi = cast<ExtractValueInst>(I); + Out << "std::vector<unsigned> " << iName << "_indices;"; + nl(Out); + for (unsigned i = 0; i < evi->getNumIndices(); ++i) { + Out << iName << "_indices.push_back(" + << evi->idx_begin()[i] << ");"; nl(Out); - for (unsigned i = 0; i < evi->getNumIndices(); ++i) { - Out << iName << "_indices.push_back(" - << evi->idx_begin()[i] << ");"; - nl(Out); - } - Out << "ExtractValueInst* " << getCppName(evi) - << " = ExtractValueInst::Create(" << opNames[0] - << ", " - << iName << "_indices.begin(), " << iName << "_indices.end(), \""; - printEscapedString(evi->getName()); - Out << "\", " << bbname << ");"; - break; } - case Instruction::InsertValue: { - const InsertValueInst *ivi = cast<InsertValueInst>(I); - Out << "std::vector<unsigned> " << iName << "_indices;"; + Out << "ExtractValueInst* " << getCppName(evi) + << " = ExtractValueInst::Create(" << opNames[0] + << ", " + << iName << "_indices.begin(), " << iName << "_indices.end(), \""; + printEscapedString(evi->getName()); + Out << "\", " << bbname << ");"; + break; + } + case Instruction::InsertValue: { + const InsertValueInst *ivi = cast<InsertValueInst>(I); + Out << "std::vector<unsigned> " << iName << "_indices;"; + nl(Out); + for (unsigned i = 0; i < ivi->getNumIndices(); ++i) { + Out << iName << "_indices.push_back(" + << ivi->idx_begin()[i] << ");"; nl(Out); - for (unsigned i = 0; i < ivi->getNumIndices(); ++i) { - Out << iName << "_indices.push_back(" - << ivi->idx_begin()[i] << ");"; - nl(Out); - } - Out << "InsertValueInst* " << getCppName(ivi) - << " = InsertValueInst::Create(" << opNames[0] - << ", " << opNames[1] << ", " - << iName << "_indices.begin(), " << iName << "_indices.end(), \""; - printEscapedString(ivi->getName()); - Out << "\", " << bbname << ");"; - break; } + Out << "InsertValueInst* " << getCppName(ivi) + << " = InsertValueInst::Create(" << opNames[0] + << ", " << opNames[1] << ", " + << iName << "_indices.begin(), " << iName << "_indices.end(), \""; + printEscapedString(ivi->getName()); + Out << "\", " << bbname << ");"; + break; + } } DefinedValues.insert(I); nl(Out); delete [] opNames; } - // Print out the types, constants and declarations needed by one function - void CppWriter::printFunctionUses(const Function* F) { - nl(Out) << "// Type Definitions"; nl(Out); - if (!is_inline) { - // Print the function's return type - printType(F->getReturnType()); +// Print out the types, constants and declarations needed by one function +void CppWriter::printFunctionUses(const Function* F) { + nl(Out) << "// Type Definitions"; nl(Out); + if (!is_inline) { + // Print the function's return type + printType(F->getReturnType()); - // Print the function's function type - printType(F->getFunctionType()); + // Print the function's function type + printType(F->getFunctionType()); - // Print the types of each of the function's arguments - for (Function::const_arg_iterator AI = F->arg_begin(), AE = F->arg_end(); - AI != AE; ++AI) { - printType(AI->getType()); - } + // Print the types of each of the function's arguments + for (Function::const_arg_iterator AI = F->arg_begin(), AE = F->arg_end(); + AI != AE; ++AI) { + printType(AI->getType()); } + } - // Print type definitions for every type referenced by an instruction and - // make a note of any global values or constants that are referenced - SmallPtrSet<GlobalValue*,64> gvs; - SmallPtrSet<Constant*,64> consts; - for (Function::const_iterator BB = F->begin(), BE = F->end(); - BB != BE; ++BB){ - for (BasicBlock::const_iterator I = BB->begin(), E = BB->end(); - I != E; ++I) { - // Print the type of the instruction itself - printType(I->getType()); + // Print type definitions for every type referenced by an instruction and + // make a note of any global values or constants that are referenced + SmallPtrSet<GlobalValue*,64> gvs; + SmallPtrSet<Constant*,64> consts; + for (Function::const_iterator BB = F->begin(), BE = F->end(); + BB != BE; ++BB){ + for (BasicBlock::const_iterator I = BB->begin(), E = BB->end(); + I != E; ++I) { + // Print the type of the instruction itself + printType(I->getType()); - // Print the type of each of the instruction's operands - for (unsigned i = 0; i < I->getNumOperands(); ++i) { - Value* operand = I->getOperand(i); - printType(operand->getType()); - - // If the operand references a GVal or Constant, make a note of it - if (GlobalValue* GV = dyn_cast<GlobalValue>(operand)) { - gvs.insert(GV); - if (GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV)) - if (GVar->hasInitializer()) - consts.insert(GVar->getInitializer()); - } else if (Constant* C = dyn_cast<Constant>(operand)) - consts.insert(C); - } + // Print the type of each of the instruction's operands + for (unsigned i = 0; i < I->getNumOperands(); ++i) { + Value* operand = I->getOperand(i); + printType(operand->getType()); + + // If the operand references a GVal or Constant, make a note of it + if (GlobalValue* GV = dyn_cast<GlobalValue>(operand)) { + gvs.insert(GV); + if (GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV)) + if (GVar->hasInitializer()) + consts.insert(GVar->getInitializer()); + } else if (Constant* C = dyn_cast<Constant>(operand)) + consts.insert(C); } } + } - // Print the function declarations for any functions encountered - nl(Out) << "// Function Declarations"; nl(Out); - for (SmallPtrSet<GlobalValue*,64>::iterator I = gvs.begin(), E = gvs.end(); - I != E; ++I) { - if (Function* Fun = dyn_cast<Function>(*I)) { - if (!is_inline || Fun != F) - printFunctionHead(Fun); - } + // Print the function declarations for any functions encountered + nl(Out) << "// Function Declarations"; nl(Out); + for (SmallPtrSet<GlobalValue*,64>::iterator I = gvs.begin(), E = gvs.end(); + I != E; ++I) { + if (Function* Fun = dyn_cast<Function>(*I)) { + if (!is_inline || Fun != F) + printFunctionHead(Fun); } + } - // Print the global variable declarations for any variables encountered - nl(Out) << "// Global Variable Declarations"; nl(Out); - for (SmallPtrSet<GlobalValue*,64>::iterator I = gvs.begin(), E = gvs.end(); - I != E; ++I) { - if (GlobalVariable* F = dyn_cast<GlobalVariable>(*I)) - printVariableHead(F); - } + // Print the global variable declarations for any variables encountered + nl(Out) << "// Global Variable Declarations"; nl(Out); + for (SmallPtrSet<GlobalValue*,64>::iterator I = gvs.begin(), E = gvs.end(); + I != E; ++I) { + if (GlobalVariable* F = dyn_cast<GlobalVariable>(*I)) + printVariableHead(F); + } - // Print the constants found - nl(Out) << "// Constant Definitions"; nl(Out); - for (SmallPtrSet<Constant*,64>::iterator I = consts.begin(), - E = consts.end(); I != E; ++I) { - printConstant(*I); - } +// Print the constants found + nl(Out) << "// Constant Definitions"; nl(Out); + for (SmallPtrSet<Constant*,64>::iterator I = consts.begin(), + E = consts.end(); I != E; ++I) { + printConstant(*I); + } - // Process the global variables definitions now that all the constants have - // been emitted. These definitions just couple the gvars with their constant - // initializers. - nl(Out) << "// Global Variable Definitions"; nl(Out); - for (SmallPtrSet<GlobalValue*,64>::iterator I = gvs.begin(), E = gvs.end(); - I != E; ++I) { - if (GlobalVariable* GV = dyn_cast<GlobalVariable>(*I)) - printVariableBody(GV); - } + // Process the global variables definitions now that all the constants have + // been emitted. These definitions just couple the gvars with their constant + // initializers. + nl(Out) << "// Global Variable Definitions"; nl(Out); + for (SmallPtrSet<GlobalValue*,64>::iterator I = gvs.begin(), E = gvs.end(); + I != E; ++I) { + if (GlobalVariable* GV = dyn_cast<GlobalVariable>(*I)) + printVariableBody(GV); } +} - void CppWriter::printFunctionHead(const Function* F) { - nl(Out) << "Function* " << getCppName(F); - if (is_inline) { - Out << " = mod->getFunction(\""; - printEscapedString(F->getName()); - Out << "\", " << getCppName(F->getFunctionType()) << ");"; - nl(Out) << "if (!" << getCppName(F) << ") {"; - nl(Out) << getCppName(F); - } - Out<< " = Function::Create("; - nl(Out,1) << "/*Type=*/" << getCppName(F->getFunctionType()) << ","; - nl(Out) << "/*Linkage=*/"; - printLinkageType(F->getLinkage()); - Out << ","; - nl(Out) << "/*Name=*/\""; +void CppWriter::printFunctionHead(const Function* F) { + nl(Out) << "Function* " << getCppName(F); + if (is_inline) { + Out << " = mod->getFunction(\""; printEscapedString(F->getName()); - Out << "\", mod); " << (F->isDeclaration()? "// (external, no body)" : ""); - nl(Out,-1); + Out << "\", " << getCppName(F->getFunctionType()) << ");"; + nl(Out) << "if (!" << getCppName(F) << ") {"; + nl(Out) << getCppName(F); + } + Out<< " = Function::Create("; + nl(Out,1) << "/*Type=*/" << getCppName(F->getFunctionType()) << ","; + nl(Out) << "/*Linkage=*/"; + printLinkageType(F->getLinkage()); + Out << ","; + nl(Out) << "/*Name=*/\""; + printEscapedString(F->getName()); + Out << "\", mod); " << (F->isDeclaration()? "// (external, no body)" : ""); + nl(Out,-1); + printCppName(F); + Out << "->setCallingConv("; + printCallingConv(F->getCallingConv()); + Out << ");"; + nl(Out); + if (F->hasSection()) { + printCppName(F); + Out << "->setSection(\"" << F->getSection() << "\");"; + nl(Out); + } + if (F->getAlignment()) { + printCppName(F); + Out << "->setAlignment(" << F->getAlignment() << ");"; + nl(Out); + } + if (F->getVisibility() != GlobalValue::DefaultVisibility) { printCppName(F); - Out << "->setCallingConv("; - printCallingConv(F->getCallingConv()); + Out << "->setVisibility("; + printVisibilityType(F->getVisibility()); Out << ");"; nl(Out); - if (F->hasSection()) { - printCppName(F); - Out << "->setSection(\"" << F->getSection() << "\");"; - nl(Out); - } - if (F->getAlignment()) { - printCppName(F); - Out << "->setAlignment(" << F->getAlignment() << ");"; - nl(Out); - } - if (F->getVisibility() != GlobalValue::DefaultVisibility) { - printCppName(F); - Out << "->setVisibility("; - printVisibilityType(F->getVisibility()); - Out << ");"; - nl(Out); - } - if (F->hasGC()) { - printCppName(F); - Out << "->setGC(\"" << F->getGC() << "\");"; - nl(Out); - } - if (is_inline) { - Out << "}"; - nl(Out); - } - printAttributes(F->getAttributes(), getCppName(F)); + } + if (F->hasGC()) { printCppName(F); - Out << "->setAttributes(" << getCppName(F) << "_PAL);"; + Out << "->setGC(\"" << F->getGC() << "\");"; nl(Out); } + if (is_inline) { + Out << "}"; + nl(Out); + } + printAttributes(F->getAttributes(), getCppName(F)); + printCppName(F); + Out << "->setAttributes(" << getCppName(F) << "_PAL);"; + nl(Out); +} - void CppWriter::printFunctionBody(const Function *F) { - if (F->isDeclaration()) - return; // external functions have no bodies. - - // Clear the DefinedValues and ForwardRefs maps because we can't have - // cross-function forward refs - ForwardRefs.clear(); - DefinedValues.clear(); +void CppWriter::printFunctionBody(const Function *F) { + if (F->isDeclaration()) + return; // external functions have no bodies. - // Create all the argument values - if (!is_inline) { - if (!F->arg_empty()) { - Out << "Function::arg_iterator args = " << getCppName(F) - << "->arg_begin();"; - nl(Out); - } - for (Function::const_arg_iterator AI = F->arg_begin(), AE = F->arg_end(); - AI != AE; ++AI) { - Out << "Value* " << getCppName(AI) << " = args++;"; - nl(Out); - if (AI->hasName()) { - Out << getCppName(AI) << "->setName(\"" << AI->getName() << "\");"; - nl(Out); - } - } - } + // Clear the DefinedValues and ForwardRefs maps because we can't have + // cross-function forward refs + ForwardRefs.clear(); + DefinedValues.clear(); - // Create all the basic blocks - nl(Out); - for (Function::const_iterator BI = F->begin(), BE = F->end(); - BI != BE; ++BI) { - std::string bbname(getCppName(BI)); - Out << "BasicBlock* " << bbname << - " = BasicBlock::Create(mod->getContext(), \""; - if (BI->hasName()) - printEscapedString(BI->getName()); - Out << "\"," << getCppName(BI->getParent()) << ",0);"; + // Create all the argument values + if (!is_inline) { + if (!F->arg_empty()) { + Out << "Function::arg_iterator args = " << getCppName(F) + << "->arg_begin();"; nl(Out); } - - // Output all of its basic blocks... for the function - for (Function::const_iterator BI = F->begin(), BE = F->end(); - BI != BE; ++BI) { - std::string bbname(getCppName(BI)); - nl(Out) << "// Block " << BI->getName() << " (" << bbname << ")"; + for (Function::const_arg_iterator AI = F->arg_begin(), AE = F->arg_end(); + AI != AE; ++AI) { + Out << "Value* " << getCppName(AI) << " = args++;"; nl(Out); - - // Output all of the instructions in the basic block... - for (BasicBlock::const_iterator I = BI->begin(), E = BI->end(); - I != E; ++I) { - printInstruction(I,bbname); + if (AI->hasName()) { + Out << getCppName(AI) << "->setName(\"" << AI->getName() << "\");"; + nl(Out); } } + } - // Loop over the ForwardRefs and resolve them now that all instructions - // are generated. - if (!ForwardRefs.empty()) { - nl(Out) << "// Resolve Forward References"; - nl(Out); - } + // Create all the basic blocks + nl(Out); + for (Function::const_iterator BI = F->begin(), BE = F->end(); + BI != BE; ++BI) { + std::string bbname(getCppName(BI)); + Out << "BasicBlock* " << bbname << + " = BasicBlock::Create(mod->getContext(), \""; + if (BI->hasName()) + printEscapedString(BI->getName()); + Out << "\"," << getCppName(BI->getParent()) << ",0);"; + nl(Out); + } - while (!ForwardRefs.empty()) { - ForwardRefMap::iterator I = ForwardRefs.begin(); - Out << I->second << "->replaceAllUsesWith(" - << getCppName(I->first) << "); delete " << I->second << ";"; - nl(Out); - ForwardRefs.erase(I); + // Output all of its basic blocks... for the function + for (Function::const_iterator BI = F->begin(), BE = F->end(); + BI != BE; ++BI) { + std::string bbname(getCppName(BI)); + nl(Out) << "// Block " << BI->getName() << " (" << bbname << ")"; + nl(Out); + + // Output all of the instructions in the basic block... + for (BasicBlock::const_iterator I = BI->begin(), E = BI->end(); + I != E; ++I) { + printInstruction(I,bbname); } } - void CppWriter::printInline(const std::string& fname, - const std::string& func) { - const Function* F = TheModule->getFunction(func); - if (!F) { - error(std::string("Function '") + func + "' not found in input module"); - return; - } - if (F->isDeclaration()) { - error(std::string("Function '") + func + "' is external!"); - return; - } - nl(Out) << "BasicBlock* " << fname << "(Module* mod, Function *" - << getCppName(F); - unsigned arg_count = 1; - for (Function::const_arg_iterator AI = F->arg_begin(), AE = F->arg_end(); - AI != AE; ++AI) { - Out << ", Value* arg_" << arg_count; - } - Out << ") {"; + // Loop over the ForwardRefs and resolve them now that all instructions + // are generated. + if (!ForwardRefs.empty()) { + nl(Out) << "// Resolve Forward References"; nl(Out); - is_inline = true; - printFunctionUses(F); - printFunctionBody(F); - is_inline = false; - Out << "return " << getCppName(F->begin()) << ";"; - nl(Out) << "}"; + } + + while (!ForwardRefs.empty()) { + ForwardRefMap::iterator I = ForwardRefs.begin(); + Out << I->second << "->replaceAllUsesWith(" + << getCppName(I->first) << "); delete " << I->second << ";"; nl(Out); + ForwardRefs.erase(I); } +} - void CppWriter::printModuleBody() { - // Print out all the type definitions - nl(Out) << "// Type Definitions"; nl(Out); - printTypes(TheModule); - - // Functions can call each other and global variables can reference them so - // define all the functions first before emitting their function bodies. - nl(Out) << "// Function Declarations"; nl(Out); - for (Module::const_iterator I = TheModule->begin(), E = TheModule->end(); - I != E; ++I) - printFunctionHead(I); - - // Process the global variables declarations. We can't initialze them until - // after the constants are printed so just print a header for each global - nl(Out) << "// Global Variable Declarations\n"; nl(Out); - for (Module::const_global_iterator I = TheModule->global_begin(), - E = TheModule->global_end(); I != E; ++I) { - printVariableHead(I); - } +void CppWriter::printInline(const std::string& fname, + const std::string& func) { + const Function* F = TheModule->getFunction(func); + if (!F) { + error(std::string("Function '") + func + "' not found in input module"); + return; + } + if (F->isDeclaration()) { + error(std::string("Function '") + func + "' is external!"); + return; + } + nl(Out) << "BasicBlock* " << fname << "(Module* mod, Function *" + << getCppName(F); + unsigned arg_count = 1; + for (Function::const_arg_iterator AI = F->arg_begin(), AE = F->arg_end(); + AI != AE; ++AI) { + Out << ", Value* arg_" << arg_count; + } + Out << ") {"; + nl(Out); + is_inline = true; + printFunctionUses(F); + printFunctionBody(F); + is_inline = false; + Out << "return " << getCppName(F->begin()) << ";"; + nl(Out) << "}"; + nl(Out); +} - // Print out all the constants definitions. Constants don't recurse except - // through GlobalValues. All GlobalValues have been declared at this point - // so we can proceed to generate the constants. - nl(Out) << "// Constant Definitions"; nl(Out); - printConstants(TheModule); - - // Process the global variables definitions now that all the constants have - // been emitted. These definitions just couple the gvars with their constant - // initializers. - nl(Out) << "// Global Variable Definitions"; nl(Out); - for (Module::const_global_iterator I = TheModule->global_begin(), - E = TheModule->global_end(); I != E; ++I) { - printVariableBody(I); - } +void CppWriter::printModuleBody() { + // Print out all the type definitions + nl(Out) << "// Type Definitions"; nl(Out); + printTypes(TheModule); + + // Functions can call each other and global variables can reference them so + // define all the functions first before emitting their function bodies. + nl(Out) << "// Function Declarations"; nl(Out); + for (Module::const_iterator I = TheModule->begin(), E = TheModule->end(); + I != E; ++I) + printFunctionHead(I); + + // Process the global variables declarations. We can't initialze them until + // after the constants are printed so just print a header for each global + nl(Out) << "// Global Variable Declarations\n"; nl(Out); + for (Module::const_global_iterator I = TheModule->global_begin(), + E = TheModule->global_end(); I != E; ++I) { + printVariableHead(I); + } - // Finally, we can safely put out all of the function bodies. - nl(Out) << "// Function Definitions"; nl(Out); - for (Module::const_iterator I = TheModule->begin(), E = TheModule->end(); - I != E; ++I) { - if (!I->isDeclaration()) { - nl(Out) << "// Function: " << I->getName() << " (" << getCppName(I) - << ")"; - nl(Out) << "{"; - nl(Out,1); - printFunctionBody(I); - nl(Out,-1) << "}"; - nl(Out); - } - } + // Print out all the constants definitions. Constants don't recurse except + // through GlobalValues. All GlobalValues have been declared at this point + // so we can proceed to generate the constants. + nl(Out) << "// Constant Definitions"; nl(Out); + printConstants(TheModule); + + // Process the global variables definitions now that all the constants have + // been emitted. These definitions just couple the gvars with their constant + // initializers. + nl(Out) << "// Global Variable Definitions"; nl(Out); + for (Module::const_global_iterator I = TheModule->global_begin(), + E = TheModule->global_end(); I != E; ++I) { + printVariableBody(I); } - void CppWriter::printProgram(const std::string& fname, - const std::string& mName) { - Out << "#include <llvm/LLVMContext.h>\n"; - Out << "#include <llvm/Module.h>\n"; - Out << "#include <llvm/DerivedTypes.h>\n"; - Out << "#include <llvm/Constants.h>\n"; - Out << "#include <llvm/GlobalVariable.h>\n"; - Out << "#include <llvm/Function.h>\n"; - Out << "#include <llvm/CallingConv.h>\n"; - Out << "#include <llvm/BasicBlock.h>\n"; - Out << "#include <llvm/Instructions.h>\n"; - Out << "#include <llvm/InlineAsm.h>\n"; - Out << "#include <llvm/Support/FormattedStream.h>\n"; - Out << "#include <llvm/Support/MathExtras.h>\n"; - Out << "#include <llvm/Pass.h>\n"; - Out << "#include <llvm/PassManager.h>\n"; - Out << "#include <llvm/ADT/SmallVector.h>\n"; - Out << "#include <llvm/Analysis/Verifier.h>\n"; - Out << "#include <llvm/Assembly/PrintModulePass.h>\n"; - Out << "#include <algorithm>\n"; - Out << "using namespace llvm;\n\n"; - Out << "Module* " << fname << "();\n\n"; - Out << "int main(int argc, char**argv) {\n"; - Out << " Module* Mod = " << fname << "();\n"; - Out << " verifyModule(*Mod, PrintMessageAction);\n"; - Out << " PassManager PM;\n"; - Out << " PM.add(createPrintModulePass(&outs()));\n"; - Out << " PM.run(*Mod);\n"; - Out << " return 0;\n"; - Out << "}\n\n"; - printModule(fname,mName); - } - - void CppWriter::printModule(const std::string& fname, - const std::string& mName) { - nl(Out) << "Module* " << fname << "() {"; - nl(Out,1) << "// Module Construction"; - nl(Out) << "Module* mod = new Module(\""; - printEscapedString(mName); - Out << "\", getGlobalContext());"; - if (!TheModule->getTargetTriple().empty()) { - nl(Out) << "mod->setDataLayout(\"" << TheModule->getDataLayout() << "\");"; - } - if (!TheModule->getTargetTriple().empty()) { - nl(Out) << "mod->setTargetTriple(\"" << TheModule->getTargetTriple() - << "\");"; + // Finally, we can safely put out all of the function bodies. + nl(Out) << "// Function Definitions"; nl(Out); + for (Module::const_iterator I = TheModule->begin(), E = TheModule->end(); + I != E; ++I) { + if (!I->isDeclaration()) { + nl(Out) << "// Function: " << I->getName() << " (" << getCppName(I) + << ")"; + nl(Out) << "{"; + nl(Out,1); + printFunctionBody(I); + nl(Out,-1) << "}"; + nl(Out); } + } +} - if (!TheModule->getModuleInlineAsm().empty()) { - nl(Out) << "mod->setModuleInlineAsm(\""; - printEscapedString(TheModule->getModuleInlineAsm()); - Out << "\");"; - } - nl(Out); +void CppWriter::printProgram(const std::string& fname, + const std::string& mName) { + Out << "#include <llvm/LLVMContext.h>\n"; + Out << "#include <llvm/Module.h>\n"; + Out << "#include <llvm/DerivedTypes.h>\n"; + Out << "#include <llvm/Constants.h>\n"; + Out << "#include <llvm/GlobalVariable.h>\n"; + Out << "#include <llvm/Function.h>\n"; + Out << "#include <llvm/CallingConv.h>\n"; + Out << "#include <llvm/BasicBlock.h>\n"; + Out << "#include <llvm/Instructions.h>\n"; + Out << "#include <llvm/InlineAsm.h>\n"; + Out << "#include <llvm/Support/FormattedStream.h>\n"; + Out << "#include <llvm/Support/MathExtras.h>\n"; + Out << "#include <llvm/Pass.h>\n"; + Out << "#include <llvm/PassManager.h>\n"; + Out << "#include <llvm/ADT/SmallVector.h>\n"; + Out << "#include <llvm/Analysis/Verifier.h>\n"; + Out << "#include <llvm/Assembly/PrintModulePass.h>\n"; + Out << "#include <algorithm>\n"; + Out << "using namespace llvm;\n\n"; + Out << "Module* " << fname << "();\n\n"; + Out << "int main(int argc, char**argv) {\n"; + Out << " Module* Mod = " << fname << "();\n"; + Out << " verifyModule(*Mod, PrintMessageAction);\n"; + Out << " PassManager PM;\n"; + Out << " PM.add(createPrintModulePass(&outs()));\n"; + Out << " PM.run(*Mod);\n"; + Out << " return 0;\n"; + Out << "}\n\n"; + printModule(fname,mName); +} - // Loop over the dependent libraries and emit them. - Module::lib_iterator LI = TheModule->lib_begin(); - Module::lib_iterator LE = TheModule->lib_end(); - while (LI != LE) { - Out << "mod->addLibrary(\"" << *LI << "\");"; - nl(Out); - ++LI; - } - printModuleBody(); - nl(Out) << "return mod;"; - nl(Out,-1) << "}"; +void CppWriter::printModule(const std::string& fname, + const std::string& mName) { + nl(Out) << "Module* " << fname << "() {"; + nl(Out,1) << "// Module Construction"; + nl(Out) << "Module* mod = new Module(\""; + printEscapedString(mName); + Out << "\", getGlobalContext());"; + if (!TheModule->getTargetTriple().empty()) { + nl(Out) << "mod->setDataLayout(\"" << TheModule->getDataLayout() << "\");"; + } + if (!TheModule->getTargetTriple().empty()) { + nl(Out) << "mod->setTargetTriple(\"" << TheModule->getTargetTriple() + << "\");"; + } + + if (!TheModule->getModuleInlineAsm().empty()) { + nl(Out) << "mod->setModuleInlineAsm(\""; + printEscapedString(TheModule->getModuleInlineAsm()); + Out << "\");"; + } + nl(Out); + + // Loop over the dependent libraries and emit them. + Module::lib_iterator LI = TheModule->lib_begin(); + Module::lib_iterator LE = TheModule->lib_end(); + while (LI != LE) { + Out << "mod->addLibrary(\"" << *LI << "\");"; nl(Out); + ++LI; } + printModuleBody(); + nl(Out) << "return mod;"; + nl(Out,-1) << "}"; + nl(Out); +} + +void CppWriter::printContents(const std::string& fname, + const std::string& mName) { + Out << "\nModule* " << fname << "(Module *mod) {\n"; + Out << "\nmod->setModuleIdentifier(\""; + printEscapedString(mName); + Out << "\");\n"; + printModuleBody(); + Out << "\nreturn mod;\n"; + Out << "\n}\n"; +} - void CppWriter::printContents(const std::string& fname, - const std::string& mName) { - Out << "\nModule* " << fname << "(Module *mod) {\n"; - Out << "\nmod->setModuleIdentifier(\""; - printEscapedString(mName); - Out << "\");\n"; - printModuleBody(); - Out << "\nreturn mod;\n"; - Out << "\n}\n"; +void CppWriter::printFunction(const std::string& fname, + const std::string& funcName) { + const Function* F = TheModule->getFunction(funcName); + if (!F) { + error(std::string("Function '") + funcName + "' not found in input module"); + return; } + Out << "\nFunction* " << fname << "(Module *mod) {\n"; + printFunctionUses(F); + printFunctionHead(F); + printFunctionBody(F); + Out << "return " << getCppName(F) << ";\n"; + Out << "}\n"; +} - void CppWriter::printFunction(const std::string& fname, - const std::string& funcName) { - const Function* F = TheModule->getFunction(funcName); - if (!F) { - error(std::string("Function '") + funcName + "' not found in input module"); - return; - } - Out << "\nFunction* " << fname << "(Module *mod) {\n"; - printFunctionUses(F); - printFunctionHead(F); - printFunctionBody(F); - Out << "return " << getCppName(F) << ";\n"; - Out << "}\n"; - } - - void CppWriter::printFunctions() { - const Module::FunctionListType &funcs = TheModule->getFunctionList(); - Module::const_iterator I = funcs.begin(); - Module::const_iterator IE = funcs.end(); - - for (; I != IE; ++I) { - const Function &func = *I; - if (!func.isDeclaration()) { - std::string name("define_"); - name += func.getName(); - printFunction(name, func.getName()); - } +void CppWriter::printFunctions() { + const Module::FunctionListType &funcs = TheModule->getFunctionList(); + Module::const_iterator I = funcs.begin(); + Module::const_iterator IE = funcs.end(); + + for (; I != IE; ++I) { + const Function &func = *I; + if (!func.isDeclaration()) { + std::string name("define_"); + name += func.getName(); + printFunction(name, func.getName()); } } +} - void CppWriter::printVariable(const std::string& fname, - const std::string& varName) { - const GlobalVariable* GV = TheModule->getNamedGlobal(varName); +void CppWriter::printVariable(const std::string& fname, + const std::string& varName) { + const GlobalVariable* GV = TheModule->getNamedGlobal(varName); - if (!GV) { - error(std::string("Variable '") + varName + "' not found in input module"); - return; - } - Out << "\nGlobalVariable* " << fname << "(Module *mod) {\n"; - printVariableUses(GV); - printVariableHead(GV); - printVariableBody(GV); - Out << "return " << getCppName(GV) << ";\n"; - Out << "}\n"; - } - - void CppWriter::printType(const std::string& fname, - const std::string& typeName) { - const Type* Ty = TheModule->getTypeByName(typeName); - if (!Ty) { - error(std::string("Type '") + typeName + "' not found in input module"); - return; - } - Out << "\nType* " << fname << "(Module *mod) {\n"; - printType(Ty); - Out << "return " << getCppName(Ty) << ";\n"; - Out << "}\n"; - } - - bool CppWriter::runOnModule(Module &M) { - TheModule = &M; - - // Emit a header - Out << "// Generated by llvm2cpp - DO NOT MODIFY!\n\n"; - - // Get the name of the function we're supposed to generate - std::string fname = FuncName.getValue(); - - // Get the name of the thing we are to generate - std::string tgtname = NameToGenerate.getValue(); - if (GenerationType == GenModule || - GenerationType == GenContents || - GenerationType == GenProgram || - GenerationType == GenFunctions) { - if (tgtname == "!bad!") { - if (M.getModuleIdentifier() == "-") - tgtname = "<stdin>"; - else - tgtname = M.getModuleIdentifier(); - } - } else if (tgtname == "!bad!") - error("You must use the -for option with -gen-{function,variable,type}"); - - switch (WhatToGenerate(GenerationType)) { - case GenProgram: - if (fname.empty()) - fname = "makeLLVMModule"; - printProgram(fname,tgtname); - break; - case GenModule: - if (fname.empty()) - fname = "makeLLVMModule"; - printModule(fname,tgtname); - break; - case GenContents: - if (fname.empty()) - fname = "makeLLVMModuleContents"; - printContents(fname,tgtname); - break; - case GenFunction: - if (fname.empty()) - fname = "makeLLVMFunction"; - printFunction(fname,tgtname); - break; - case GenFunctions: - printFunctions(); - break; - case GenInline: - if (fname.empty()) - fname = "makeLLVMInline"; - printInline(fname,tgtname); - break; - case GenVariable: - if (fname.empty()) - fname = "makeLLVMVariable"; - printVariable(fname,tgtname); - break; - case GenType: - if (fname.empty()) - fname = "makeLLVMType"; - printType(fname,tgtname); - break; - default: - error("Invalid generation option"); - } + if (!GV) { + error(std::string("Variable '") + varName + "' not found in input module"); + return; + } + Out << "\nGlobalVariable* " << fname << "(Module *mod) {\n"; + printVariableUses(GV); + printVariableHead(GV); + printVariableBody(GV); + Out << "return " << getCppName(GV) << ";\n"; + Out << "}\n"; +} - return false; +void CppWriter::printType(const std::string& fname, + const std::string& typeName) { + const Type* Ty = TheModule->getTypeByName(typeName); + if (!Ty) { + error(std::string("Type '") + typeName + "' not found in input module"); + return; } + Out << "\nType* " << fname << "(Module *mod) {\n"; + printType(Ty); + Out << "return " << getCppName(Ty) << ";\n"; + Out << "}\n"; +} + +bool CppWriter::runOnModule(Module &M) { + TheModule = &M; + + // Emit a header + Out << "// Generated by llvm2cpp - DO NOT MODIFY!\n\n"; + + // Get the name of the function we're supposed to generate + std::string fname = FuncName.getValue(); + + // Get the name of the thing we are to generate + std::string tgtname = NameToGenerate.getValue(); + if (GenerationType == GenModule || + GenerationType == GenContents || + GenerationType == GenProgram || + GenerationType == GenFunctions) { + if (tgtname == "!bad!") { + if (M.getModuleIdentifier() == "-") + tgtname = "<stdin>"; + else + tgtname = M.getModuleIdentifier(); + } + } else if (tgtname == "!bad!") + error("You must use the -for option with -gen-{function,variable,type}"); + + switch (WhatToGenerate(GenerationType)) { + case GenProgram: + if (fname.empty()) + fname = "makeLLVMModule"; + printProgram(fname,tgtname); + break; + case GenModule: + if (fname.empty()) + fname = "makeLLVMModule"; + printModule(fname,tgtname); + break; + case GenContents: + if (fname.empty()) + fname = "makeLLVMModuleContents"; + printContents(fname,tgtname); + break; + case GenFunction: + if (fname.empty()) + fname = "makeLLVMFunction"; + printFunction(fname,tgtname); + break; + case GenFunctions: + printFunctions(); + break; + case GenInline: + if (fname.empty()) + fname = "makeLLVMInline"; + printInline(fname,tgtname); + break; + case GenVariable: + if (fname.empty()) + fname = "makeLLVMVariable"; + printVariable(fname,tgtname); + break; + case GenType: + if (fname.empty()) + fname = "makeLLVMType"; + printType(fname,tgtname); + break; + default: + error("Invalid generation option"); + } + + return false; } char CppWriter::ID = 0; diff --git a/lib/Target/MBlaze/AsmPrinter/MBlazeAsmPrinter.cpp b/lib/Target/MBlaze/AsmPrinter/MBlazeAsmPrinter.cpp index e42e9b3505d8..b6e4d654f4aa 100644 --- a/lib/Target/MBlaze/AsmPrinter/MBlazeAsmPrinter.cpp +++ b/lib/Target/MBlaze/AsmPrinter/MBlazeAsmPrinter.cpp @@ -145,8 +145,9 @@ void MBlazeAsmPrinter::printSavedRegsBitmask(raw_ostream &O) { const MachineFrameInfo *MFI = MF->getFrameInfo(); const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo(); for (unsigned i = 0, e = CSI.size(); i != e; ++i) { - unsigned RegNum = MBlazeRegisterInfo::getRegisterNumbering(CSI[i].getReg()); - if (CSI[i].getRegClass() == MBlaze::CPURegsRegisterClass) + unsigned Reg = CSI[i].getReg(); + unsigned RegNum = MBlazeRegisterInfo::getRegisterNumbering(Reg); + if (MBlaze::CPURegsRegisterClass->contains(Reg)) CPUBitmask |= (1 << RegNum); } diff --git a/lib/Target/MBlaze/MBlazeISelLowering.cpp b/lib/Target/MBlaze/MBlazeISelLowering.cpp index 23889b128ffe..1730b689d361 100644 --- a/lib/Target/MBlaze/MBlazeISelLowering.cpp +++ b/lib/Target/MBlaze/MBlazeISelLowering.cpp @@ -234,6 +234,24 @@ MBlazeTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, MachineRegisterInfo &R = F->getRegInfo(); MachineBasicBlock *loop = F->CreateMachineBasicBlock(LLVM_BB); MachineBasicBlock *finish = F->CreateMachineBasicBlock(LLVM_BB); + F->insert(It, loop); + F->insert(It, finish); + + // Update machine-CFG edges by transfering adding all successors and + // remaining instructions from the current block to the new block which + // will contain the Phi node for the select. + finish->splice(finish->begin(), BB, + llvm::next(MachineBasicBlock::iterator(MI)), + BB->end()); + finish->transferSuccessorsAndUpdatePHIs(BB); + + // Add the true and fallthrough blocks as its successors. + BB->addSuccessor(loop); + BB->addSuccessor(finish); + + // Next, add the finish block as a successor of the loop block + loop->addSuccessor(finish); + loop->addSuccessor(loop); unsigned IAMT = R.createVirtualRegister(MBlaze::CPURegsRegisterClass); BuildMI(BB, dl, TII->get(MBlaze::ANDI), IAMT) @@ -249,26 +267,6 @@ MBlazeTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, .addReg(IAMT) .addMBB(finish); - F->insert(It, loop); - F->insert(It, finish); - - // Update machine-CFG edges by first adding all successors of the current - // block to the new block which will contain the Phi node for the select. - for(MachineBasicBlock::succ_iterator i = BB->succ_begin(), - e = BB->succ_end(); i != e; ++i) - finish->addSuccessor(*i); - - // Next, remove all successors of the current block, and add the true - // and fallthrough blocks as its successors. - while(!BB->succ_empty()) - BB->removeSuccessor(BB->succ_begin()); - BB->addSuccessor(loop); - BB->addSuccessor(finish); - - // Next, add the finish block as a successor of the loop block - loop->addSuccessor(finish); - loop->addSuccessor(loop); - unsigned DST = R.createVirtualRegister(MBlaze::CPURegsRegisterClass); unsigned NDST = R.createVirtualRegister(MBlaze::CPURegsRegisterClass); BuildMI(loop, dl, TII->get(MBlaze::PHI), DST) @@ -298,12 +296,13 @@ MBlazeTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, .addReg(NAMT) .addMBB(loop); - BuildMI(finish, dl, TII->get(MBlaze::PHI), MI->getOperand(0).getReg()) + BuildMI(*finish, finish->begin(), dl, + TII->get(MBlaze::PHI), MI->getOperand(0).getReg()) .addReg(IVAL).addMBB(BB) .addReg(NDST).addMBB(loop); // The pseudo instruction is no longer needed so remove it - F->DeleteMachineInstr(MI); + MI->eraseFromParent(); return finish; } @@ -338,27 +337,23 @@ MBlazeTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, case MBlazeCC::LE: Opc = MBlaze::BGTID; break; } - BuildMI(BB, dl, TII->get(Opc)) - .addReg(MI->getOperand(3).getReg()) - .addMBB(dneBB); - F->insert(It, flsBB); F->insert(It, dneBB); - // Update machine-CFG edges by first adding all successors of the current - // block to the new block which will contain the Phi node for the select. - for(MachineBasicBlock::succ_iterator i = BB->succ_begin(), - e = BB->succ_end(); i != e; ++i) - dneBB->addSuccessor(*i); + // Transfer the remainder of BB and its successor edges to dneBB. + dneBB->splice(dneBB->begin(), BB, + llvm::next(MachineBasicBlock::iterator(MI)), + BB->end()); + dneBB->transferSuccessorsAndUpdatePHIs(BB); - // Next, remove all successors of the current block, and add the true - // and fallthrough blocks as its successors. - while(!BB->succ_empty()) - BB->removeSuccessor(BB->succ_begin()); BB->addSuccessor(flsBB); BB->addSuccessor(dneBB); flsBB->addSuccessor(dneBB); + BuildMI(BB, dl, TII->get(Opc)) + .addReg(MI->getOperand(3).getReg()) + .addMBB(dneBB); + // sinkMBB: // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ] // ... @@ -366,11 +361,12 @@ MBlazeTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, // .addReg(MI->getOperand(1).getReg()).addMBB(flsBB) // .addReg(MI->getOperand(2).getReg()).addMBB(BB); - BuildMI(dneBB, dl, TII->get(MBlaze::PHI), MI->getOperand(0).getReg()) + BuildMI(*dneBB, dneBB->begin(), dl, + TII->get(MBlaze::PHI), MI->getOperand(0).getReg()) .addReg(MI->getOperand(2).getReg()).addMBB(flsBB) .addReg(MI->getOperand(1).getReg()).addMBB(BB); - F->DeleteMachineInstr(MI); // The pseudo instruction is gone now. + MI->eraseFromParent(); // The pseudo instruction is gone now. return dneBB; } } @@ -408,7 +404,7 @@ LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const { // FIXME there isn't actually debug info here DebugLoc dl = Op.getDebugLoc(); const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal(); - SDValue GA = DAG.getTargetGlobalAddress(GV, MVT::i32); + SDValue GA = DAG.getTargetGlobalAddress(GV, dl, MVT::i32); return DAG.getNode(MBlazeISD::Wrap, dl, MVT::i32, GA); } @@ -439,10 +435,8 @@ LowerJumpTable(SDValue Op, SelectionDAG &DAG) const { SDValue MBlazeTargetLowering:: LowerConstantPool(SDValue Op, SelectionDAG &DAG) const { SDValue ResNode; - EVT PtrVT = Op.getValueType(); ConstantPoolSDNode *N = cast<ConstantPoolSDNode>(Op); const Constant *C = N->getConstVal(); - SDValue Zero = DAG.getConstant(0, PtrVT); DebugLoc dl = Op.getDebugLoc(); SDValue CP = DAG.getTargetConstantPool(C, MVT::i32, N->getAlignment(), @@ -531,6 +525,7 @@ SDValue MBlazeTargetLowering:: LowerCall(SDValue Chain, SDValue Callee, CallingConv::ID CallConv, bool isVarArg, bool &isTailCall, const SmallVectorImpl<ISD::OutputArg> &Outs, + const SmallVectorImpl<SDValue> &OutVals, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const { @@ -562,7 +557,7 @@ LowerCall(SDValue Chain, SDValue Callee, CallingConv::ID CallConv, for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { CCValAssign &VA = ArgLocs[i]; EVT RegVT = VA.getLocVT(); - SDValue Arg = Outs[i].Val; + SDValue Arg = OutVals[i]; // Promote the value if needed. switch (VA.getLocInfo()) { @@ -590,7 +585,7 @@ LowerCall(SDValue Chain, SDValue Callee, CallingConv::ID CallConv, // Create the frame index object for this incoming parameter LastArgStackLoc = (FirstStackArgLoc + VA.getLocMemOffset()); int FI = MFI->CreateFixedObject(VA.getValVT().getSizeInBits()/8, - LastArgStackLoc, true, false); + LastArgStackLoc, true); SDValue PtrOff = DAG.getFrameIndex(FI,getPointerTy()); @@ -623,7 +618,7 @@ LowerCall(SDValue Chain, SDValue Callee, CallingConv::ID CallConv, // node so that legalize doesn't hack it. unsigned char OpFlag = MBlazeII::MO_NO_FLAG; if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) - Callee = DAG.getTargetGlobalAddress(G->getGlobal(), + Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl, getPointerTy(), 0, OpFlag); else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) Callee = DAG.getTargetExternalSymbol(S->getSymbol(), @@ -779,7 +774,7 @@ LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, // offset on PEI::calculateFrameObjectOffsets. // Arguments are always 32-bit. unsigned ArgSize = VA.getLocVT().getSizeInBits()/8; - int FI = MFI->CreateFixedObject(ArgSize, 0, true, false); + int FI = MFI->CreateFixedObject(ArgSize, 0, true); MBlazeFI->recordLoadArgsFI(FI, -(ArgSize+ (FirstStackArgLoc + VA.getLocMemOffset()))); @@ -810,7 +805,7 @@ LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, unsigned LiveReg = MF.addLiveIn(Reg, RC); SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, LiveReg, MVT::i32); - int FI = MFI->CreateFixedObject(4, 0, true, false); + int FI = MFI->CreateFixedObject(4, 0, true); MBlazeFI->recordStoreVarArgsFI(FI, -(4+(StackLoc*4))); SDValue PtrOff = DAG.getFrameIndex(FI, getPointerTy()); OutChains.push_back(DAG.getStore(Chain, dl, ArgValue, PtrOff, NULL, 0, @@ -841,6 +836,7 @@ LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, SDValue MBlazeTargetLowering:: LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl<ISD::OutputArg> &Outs, + const SmallVectorImpl<SDValue> &OutVals, DebugLoc dl, SelectionDAG &DAG) const { // CCValAssign - represent the assignment of // the return value to a location @@ -869,7 +865,7 @@ LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, assert(VA.isRegLoc() && "Can only return in registers!"); Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), - Outs[i].Val, Flag); + OutVals[i], Flag); // guarantee that all emitted copies are // stuck together, avoiding something bad diff --git a/lib/Target/MBlaze/MBlazeISelLowering.h b/lib/Target/MBlaze/MBlazeISelLowering.h index 9f9ac899c6fc..5ec2563c555c 100644 --- a/lib/Target/MBlaze/MBlazeISelLowering.h +++ b/lib/Target/MBlaze/MBlazeISelLowering.h @@ -109,6 +109,7 @@ namespace llvm { CallingConv::ID CallConv, bool isVarArg, bool &isTailCall, const SmallVectorImpl<ISD::OutputArg> &Outs, + const SmallVectorImpl<SDValue> &OutVals, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const; @@ -117,6 +118,7 @@ namespace llvm { LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl<ISD::OutputArg> &Outs, + const SmallVectorImpl<SDValue> &OutVals, DebugLoc dl, SelectionDAG &DAG) const; virtual MachineBasicBlock * diff --git a/lib/Target/MBlaze/MBlazeInstrInfo.cpp b/lib/Target/MBlaze/MBlazeInstrInfo.cpp index 4c4d86bd206d..6ff5825a26b6 100644 --- a/lib/Target/MBlaze/MBlazeInstrInfo.cpp +++ b/lib/Target/MBlaze/MBlazeInstrInfo.cpp @@ -110,15 +110,13 @@ insertNoop(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI) const { BuildMI(MBB, MI, DL, get(MBlaze::NOP)); } -bool MBlazeInstrInfo:: -copyRegToReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, - unsigned DestReg, unsigned SrcReg, - const TargetRegisterClass *DestRC, - const TargetRegisterClass *SrcRC, - DebugLoc DL) const { +void MBlazeInstrInfo:: +copyPhysReg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, DebugLoc DL, + unsigned DestReg, unsigned SrcReg, + bool KillSrc) const { llvm::BuildMI(MBB, I, DL, get(MBlaze::ADD), DestReg) - .addReg(SrcReg).addReg(MBlaze::R0); - return true; + .addReg(SrcReg, getKillRegState(KillSrc)).addReg(MBlaze::R0); } void MBlazeInstrInfo:: @@ -141,54 +139,17 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, .addImm(0).addFrameIndex(FI); } -MachineInstr *MBlazeInstrInfo:: -foldMemoryOperandImpl(MachineFunction &MF, - MachineInstr* MI, - const SmallVectorImpl<unsigned> &Ops, int FI) const { - if (Ops.size() != 1) return NULL; - - MachineInstr *NewMI = NULL; - - switch (MI->getOpcode()) { - case MBlaze::OR: - case MBlaze::ADD: - if ((MI->getOperand(0).isReg()) && - (MI->getOperand(2).isReg()) && - (MI->getOperand(2).getReg() == MBlaze::R0) && - (MI->getOperand(1).isReg())) { - if (Ops[0] == 0) { // COPY -> STORE - unsigned SrcReg = MI->getOperand(1).getReg(); - bool isKill = MI->getOperand(1).isKill(); - bool isUndef = MI->getOperand(1).isUndef(); - NewMI = BuildMI(MF, MI->getDebugLoc(), get(MBlaze::SW)) - .addReg(SrcReg, getKillRegState(isKill) | getUndefRegState(isUndef)) - .addImm(0).addFrameIndex(FI); - } else { // COPY -> LOAD - unsigned DstReg = MI->getOperand(0).getReg(); - bool isDead = MI->getOperand(0).isDead(); - bool isUndef = MI->getOperand(0).isUndef(); - NewMI = BuildMI(MF, MI->getDebugLoc(), get(MBlaze::LW)) - .addReg(DstReg, RegState::Define | getDeadRegState(isDead) | - getUndefRegState(isUndef)) - .addImm(0).addFrameIndex(FI); - } - } - break; - } - - return NewMI; -} - //===----------------------------------------------------------------------===// // Branch Analysis //===----------------------------------------------------------------------===// unsigned MBlazeInstrInfo:: InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, - const SmallVectorImpl<MachineOperand> &Cond) const { + const SmallVectorImpl<MachineOperand> &Cond, + DebugLoc DL) const { // Can only insert uncond branches so far. assert(Cond.empty() && !FBB && TBB && "Can only handle uncond branches!"); - BuildMI(&MBB, DebugLoc(), get(MBlaze::BRI)).addMBB(TBB); + BuildMI(&MBB, DL, get(MBlaze::BRI)).addMBB(TBB); return 1; } @@ -209,12 +170,8 @@ unsigned MBlazeInstrInfo::getGlobalBaseReg(MachineFunction *MF) const { const TargetInstrInfo *TII = MF->getTarget().getInstrInfo(); GlobalBaseReg = RegInfo.createVirtualRegister(MBlaze::CPURegsRegisterClass); - bool Ok = TII->copyRegToReg(FirstMBB, MBBI, GlobalBaseReg, MBlaze::R20, - MBlaze::CPURegsRegisterClass, - MBlaze::CPURegsRegisterClass, - DebugLoc()); - assert(Ok && "Couldn't assign to global base register!"); - Ok = Ok; // Silence warning when assertions are turned off. + BuildMI(FirstMBB, MBBI, DebugLoc(), TII->get(TargetOpcode::COPY), + GlobalBaseReg).addReg(MBlaze::R20); RegInfo.addLiveIn(MBlaze::R20); MBlazeFI->setGlobalBaseReg(GlobalBaseReg); diff --git a/lib/Target/MBlaze/MBlazeInstrInfo.h b/lib/Target/MBlaze/MBlazeInstrInfo.h index c9fdc8877d68..f0743705f010 100644 --- a/lib/Target/MBlaze/MBlazeInstrInfo.h +++ b/lib/Target/MBlaze/MBlazeInstrInfo.h @@ -198,13 +198,12 @@ public: /// Branch Analysis virtual unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, - const SmallVectorImpl<MachineOperand> &Cond) const; - virtual bool copyRegToReg(MachineBasicBlock &MBB, - MachineBasicBlock::iterator I, - unsigned DestReg, unsigned SrcReg, - const TargetRegisterClass *DestRC, - const TargetRegisterClass *SrcRC, - DebugLoc DL) const; + const SmallVectorImpl<MachineOperand> &Cond, + DebugLoc DL) const; + virtual void copyPhysReg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, DebugLoc DL, + unsigned DestReg, unsigned SrcReg, + bool KillSrc) const; virtual void storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned SrcReg, bool isKill, int FrameIndex, @@ -217,18 +216,6 @@ public: const TargetRegisterClass *RC, const TargetRegisterInfo *TRI) const; - virtual MachineInstr* foldMemoryOperandImpl(MachineFunction &MF, - MachineInstr* MI, - const SmallVectorImpl<unsigned> &Ops, - int FrameIndex) const; - - virtual MachineInstr* foldMemoryOperandImpl(MachineFunction &MF, - MachineInstr* MI, - const SmallVectorImpl<unsigned> &Ops, - MachineInstr* LoadMI) const { - return 0; - } - /// Insert nop instruction when hazard condition is found virtual void insertNoop(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI) const; diff --git a/lib/Target/MBlaze/MBlazeRegisterInfo.cpp b/lib/Target/MBlaze/MBlazeRegisterInfo.cpp index f15eea9507d6..8cafa8c519c6 100644 --- a/lib/Target/MBlaze/MBlazeRegisterInfo.cpp +++ b/lib/Target/MBlaze/MBlazeRegisterInfo.cpp @@ -148,22 +148,6 @@ getCalleeSavedRegs(const MachineFunction *MF) const { return CalleeSavedRegs; } -/// MBlaze Callee Saved Register Classes -const TargetRegisterClass* const* MBlazeRegisterInfo:: -getCalleeSavedRegClasses(const MachineFunction *MF) const { - static const TargetRegisterClass * const CalleeSavedRC[] = { - &MBlaze::CPURegsRegClass, &MBlaze::CPURegsRegClass, - &MBlaze::CPURegsRegClass, &MBlaze::CPURegsRegClass, - &MBlaze::CPURegsRegClass, &MBlaze::CPURegsRegClass, - &MBlaze::CPURegsRegClass, &MBlaze::CPURegsRegClass, - &MBlaze::CPURegsRegClass, &MBlaze::CPURegsRegClass, - &MBlaze::CPURegsRegClass, &MBlaze::CPURegsRegClass, - 0 - }; - - return CalleeSavedRC; -} - BitVector MBlazeRegisterInfo:: getReservedRegs(const MachineFunction &MF) const { BitVector Reserved(getNumRegs()); diff --git a/lib/Target/MBlaze/MBlazeRegisterInfo.h b/lib/Target/MBlaze/MBlazeRegisterInfo.h index b618bf4cf4c5..af97b0e2d79e 100644 --- a/lib/Target/MBlaze/MBlazeRegisterInfo.h +++ b/lib/Target/MBlaze/MBlazeRegisterInfo.h @@ -54,9 +54,6 @@ struct MBlazeRegisterInfo : public MBlazeGenRegisterInfo { /// Code Generation virtual methods... const unsigned *getCalleeSavedRegs(const MachineFunction* MF = 0) const; - const TargetRegisterClass* const* - getCalleeSavedRegClasses(const MachineFunction* MF = 0) const; - BitVector getReservedRegs(const MachineFunction &MF) const; bool hasFP(const MachineFunction &MF) const; diff --git a/lib/Target/MSIL/MSILWriter.cpp b/lib/Target/MSIL/MSILWriter.cpp index 3de173cc26ce..8f97d255077f 100644 --- a/lib/Target/MSIL/MSILWriter.cpp +++ b/lib/Target/MSIL/MSILWriter.cpp @@ -808,7 +808,7 @@ void MSILWriter::printIntrinsicCall(const IntrinsicInst* Inst) { std::string Name; switch (Inst->getIntrinsicID()) { case Intrinsic::vastart: - Name = getValueName(Inst->getOperand(1)); + Name = getValueName(Inst->getArgOperand(0)); Name.insert(Name.length()-1,"$valist"); // Obtain the argument handle. printSimpleInstruction("ldloca",Name.c_str()); @@ -817,20 +817,20 @@ void MSILWriter::printIntrinsicCall(const IntrinsicInst* Inst) { "instance void [mscorlib]System.ArgIterator::.ctor" "(valuetype [mscorlib]System.RuntimeArgumentHandle)"); // Save as pointer type "void*" - printValueLoad(Inst->getOperand(1)); + printValueLoad(Inst->getArgOperand(0)); printSimpleInstruction("ldloca",Name.c_str()); printIndirectSave(PointerType::getUnqual( IntegerType::get(Inst->getContext(), 8))); break; case Intrinsic::vaend: // Close argument list handle. - printIndirectLoad(Inst->getOperand(1)); + printIndirectLoad(Inst->getArgOperand(0)); printSimpleInstruction("call","instance void [mscorlib]System.ArgIterator::End()"); break; case Intrinsic::vacopy: // Copy "ArgIterator" valuetype. - printIndirectLoad(Inst->getOperand(1)); - printIndirectLoad(Inst->getOperand(2)); + printIndirectLoad(Inst->getArgOperand(0)); + printIndirectLoad(Inst->getArgOperand(1)); printSimpleInstruction("cpobj","[mscorlib]System.ArgIterator"); break; default: @@ -845,10 +845,11 @@ void MSILWriter::printCallInstruction(const Instruction* Inst) { // Handle intrinsic function. printIntrinsicCall(cast<IntrinsicInst>(Inst)); } else { + const CallInst *CI = cast<CallInst>(Inst); // Load arguments to stack and call function. - for (int I = 1, E = Inst->getNumOperands(); I!=E; ++I) - printValueLoad(Inst->getOperand(I)); - printFunctionCall(Inst->getOperand(0),Inst); + for (int I = 0, E = CI->getNumArgOperands(); I!=E; ++I) + printValueLoad(CI->getArgOperand(I)); + printFunctionCall(CI->getCalledFunction(), Inst); } } @@ -1002,8 +1003,8 @@ void MSILWriter::printInvokeInstruction(const InvokeInst* Inst) { std::string Label = "leave$normal_"+utostr(getUniqID()); Out << ".try {\n"; // Load arguments - for (int I = 3, E = Inst->getNumOperands(); I!=E; ++I) - printValueLoad(Inst->getOperand(I)); + for (int I = 0, E = Inst->getNumArgOperands(); I!=E; ++I) + printValueLoad(Inst->getArgOperand(I)); // Print call instruction printFunctionCall(Inst->getOperand(0),Inst); // Save function result and leave "try" block @@ -1280,7 +1281,7 @@ void MSILWriter::printLocalVariables(const Function& F) { case Intrinsic::vaend: case Intrinsic::vacopy: isVaList = true; - VaList = Inst->getOperand(1); + VaList = Inst->getArgOperand(0); break; default: isVaList = false; diff --git a/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp b/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp index 7b328bb12c3d..3395e9fc3437 100644 --- a/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp +++ b/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp @@ -272,7 +272,8 @@ bool MSP430DAGToDAGISel::SelectAddr(SDNode *Op, SDValue N, AM.Base.Reg; if (AM.GV) - Disp = CurDAG->getTargetGlobalAddress(AM.GV, MVT::i16, AM.Disp, + Disp = CurDAG->getTargetGlobalAddress(AM.GV, Op->getDebugLoc(), + MVT::i16, AM.Disp, 0/*AM.SymbolFlags*/); else if (AM.CP) Disp = CurDAG->getTargetConstantPool(AM.CP, MVT::i16, diff --git a/lib/Target/MSP430/MSP430ISelLowering.cpp b/lib/Target/MSP430/MSP430ISelLowering.cpp index 403400e35add..a1703a3e78bf 100644 --- a/lib/Target/MSP430/MSP430ISelLowering.cpp +++ b/lib/Target/MSP430/MSP430ISelLowering.cpp @@ -278,6 +278,7 @@ MSP430TargetLowering::LowerCall(SDValue Chain, SDValue Callee, CallingConv::ID CallConv, bool isVarArg, bool &isTailCall, const SmallVectorImpl<ISD::OutputArg> &Outs, + const SmallVectorImpl<SDValue> &OutVals, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const { @@ -290,7 +291,7 @@ MSP430TargetLowering::LowerCall(SDValue Chain, SDValue Callee, case CallingConv::Fast: case CallingConv::C: return LowerCCCCallTo(Chain, Callee, CallConv, isVarArg, isTailCall, - Outs, Ins, dl, DAG, InVals); + Outs, OutVals, Ins, dl, DAG, InVals); case CallingConv::MSP430_INTR: report_fatal_error("ISRs cannot be called directly"); return SDValue(); @@ -369,7 +370,7 @@ MSP430TargetLowering::LowerCCCArguments(SDValue Chain, << "\n"; } // Create the frame index object for this incoming parameter... - int FI = MFI->CreateFixedObject(ObjSize, VA.getLocMemOffset(), true, false); + int FI = MFI->CreateFixedObject(ObjSize, VA.getLocMemOffset(), true); // Create the SelectionDAG nodes corresponding to a load //from this parameter @@ -387,6 +388,7 @@ SDValue MSP430TargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl<ISD::OutputArg> &Outs, + const SmallVectorImpl<SDValue> &OutVals, DebugLoc dl, SelectionDAG &DAG) const { // CCValAssign - represent the assignment of the return value to a location @@ -421,7 +423,7 @@ MSP430TargetLowering::LowerReturn(SDValue Chain, assert(VA.isRegLoc() && "Can only return in registers!"); Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), - Outs[i].Val, Flag); + OutVals[i], Flag); // Guarantee that all emitted copies are stuck together, // avoiding something bad. @@ -447,6 +449,7 @@ MSP430TargetLowering::LowerCCCCallTo(SDValue Chain, SDValue Callee, bool isTailCall, const SmallVectorImpl<ISD::OutputArg> &Outs, + const SmallVectorImpl<SDValue> &OutVals, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const { @@ -471,7 +474,7 @@ MSP430TargetLowering::LowerCCCCallTo(SDValue Chain, SDValue Callee, for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { CCValAssign &VA = ArgLocs[i]; - SDValue Arg = Outs[i].Val; + SDValue Arg = OutVals[i]; // Promote the value if needed. switch (VA.getLocInfo()) { @@ -529,7 +532,7 @@ MSP430TargetLowering::LowerCCCCallTo(SDValue Chain, SDValue Callee, // turn it into a TargetGlobalAddress node so that legalize doesn't hack it. // Likewise ExternalSymbol -> TargetExternalSymbol. if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) - Callee = DAG.getTargetGlobalAddress(G->getGlobal(), MVT::i16); + Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl, MVT::i16); else if (ExternalSymbolSDNode *E = dyn_cast<ExternalSymbolSDNode>(Callee)) Callee = DAG.getTargetExternalSymbol(E->getSymbol(), MVT::i16); @@ -642,7 +645,8 @@ SDValue MSP430TargetLowering::LowerGlobalAddress(SDValue Op, int64_t Offset = cast<GlobalAddressSDNode>(Op)->getOffset(); // Create the TargetGlobalAddress node, folding in the constant offset. - SDValue Result = DAG.getTargetGlobalAddress(GV, getPointerTy(), Offset); + SDValue Result = DAG.getTargetGlobalAddress(GV, Op.getDebugLoc(), + getPointerTy(), Offset); return DAG.getNode(MSP430ISD::Wrapper, Op.getDebugLoc(), getPointerTy(), Result); } @@ -888,7 +892,7 @@ MSP430TargetLowering::getReturnAddressFrameIndex(SelectionDAG &DAG) const { // Set up a frame object for the return address. uint64_t SlotSize = TD->getPointerSize(); ReturnAddrIndex = MF.getFrameInfo()->CreateFixedObject(SlotSize, -SlotSize, - true, false); + true); FuncInfo->setRAIndex(ReturnAddrIndex); } @@ -1070,7 +1074,10 @@ MSP430TargetLowering::EmitShiftInstr(MachineInstr *MI, // Update machine-CFG edges by transferring all successors of the current // block to the block containing instructions after shift. - RemBB->transferSuccessors(BB); + RemBB->splice(RemBB->begin(), BB, + llvm::next(MachineBasicBlock::iterator(MI)), + BB->end()); + RemBB->transferSuccessorsAndUpdatePHIs(BB); // Add adges BB => LoopBB => RemBB, BB => RemBB, LoopBB => LoopBB BB->addSuccessor(LoopBB); @@ -1116,11 +1123,11 @@ MSP430TargetLowering::EmitShiftInstr(MachineInstr *MI, // RemBB: // DestReg = phi [%SrcReg, BB], [%ShiftReg, LoopBB] - BuildMI(RemBB, dl, TII.get(MSP430::PHI), DstReg) + BuildMI(*RemBB, RemBB->begin(), dl, TII.get(MSP430::PHI), DstReg) .addReg(SrcReg).addMBB(BB) .addReg(ShiftReg2).addMBB(LoopBB); - F->DeleteMachineInstr(MI); // The pseudo instruction is gone now. + MI->eraseFromParent(); // The pseudo instruction is gone now. return RemBB; } @@ -1158,18 +1165,22 @@ MSP430TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, MachineFunction *F = BB->getParent(); MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB); MachineBasicBlock *copy1MBB = F->CreateMachineBasicBlock(LLVM_BB); - BuildMI(BB, dl, TII.get(MSP430::JCC)) - .addMBB(copy1MBB) - .addImm(MI->getOperand(3).getImm()); F->insert(I, copy0MBB); F->insert(I, copy1MBB); // Update machine-CFG edges by transferring all successors of the current // block to the new block which will contain the Phi node for the select. - copy1MBB->transferSuccessors(BB); + copy1MBB->splice(copy1MBB->begin(), BB, + llvm::next(MachineBasicBlock::iterator(MI)), + BB->end()); + copy1MBB->transferSuccessorsAndUpdatePHIs(BB); // Next, add the true and fallthrough blocks as its successors. BB->addSuccessor(copy0MBB); BB->addSuccessor(copy1MBB); + BuildMI(BB, dl, TII.get(MSP430::JCC)) + .addMBB(copy1MBB) + .addImm(MI->getOperand(3).getImm()); + // copy0MBB: // %FalseValue = ... // # fallthrough to copy1MBB @@ -1182,11 +1193,11 @@ MSP430TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ] // ... BB = copy1MBB; - BuildMI(BB, dl, TII.get(MSP430::PHI), + BuildMI(*BB, BB->begin(), dl, TII.get(MSP430::PHI), MI->getOperand(0).getReg()) .addReg(MI->getOperand(2).getReg()).addMBB(copy0MBB) .addReg(MI->getOperand(1).getReg()).addMBB(thisMBB); - F->DeleteMachineInstr(MI); // The pseudo instruction is gone now. + MI->eraseFromParent(); // The pseudo instruction is gone now. return BB; } diff --git a/lib/Target/MSP430/MSP430ISelLowering.h b/lib/Target/MSP430/MSP430ISelLowering.h index 01c5071622a7..673c5433b96e 100644 --- a/lib/Target/MSP430/MSP430ISelLowering.h +++ b/lib/Target/MSP430/MSP430ISelLowering.h @@ -127,6 +127,7 @@ namespace llvm { CallingConv::ID CallConv, bool isVarArg, bool isTailCall, const SmallVectorImpl<ISD::OutputArg> &Outs, + const SmallVectorImpl<SDValue> &OutVals, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const; @@ -155,6 +156,7 @@ namespace llvm { LowerCall(SDValue Chain, SDValue Callee, CallingConv::ID CallConv, bool isVarArg, bool &isTailCall, const SmallVectorImpl<ISD::OutputArg> &Outs, + const SmallVectorImpl<SDValue> &OutVals, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const; @@ -163,6 +165,7 @@ namespace llvm { LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl<ISD::OutputArg> &Outs, + const SmallVectorImpl<SDValue> &OutVals, DebugLoc dl, SelectionDAG &DAG) const; virtual bool getPostIndexedAddressParts(SDNode *N, SDNode *Op, diff --git a/lib/Target/MSP430/MSP430InstrInfo.cpp b/lib/Target/MSP430/MSP430InstrInfo.cpp index 18226ab0f4ab..df28d07f5d71 100644 --- a/lib/Target/MSP430/MSP430InstrInfo.cpp +++ b/lib/Target/MSP430/MSP430InstrInfo.cpp @@ -83,27 +83,20 @@ void MSP430InstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, llvm_unreachable("Cannot store this register to stack slot!"); } -bool MSP430InstrInfo::copyRegToReg(MachineBasicBlock &MBB, - MachineBasicBlock::iterator I, - unsigned DestReg, unsigned SrcReg, - const TargetRegisterClass *DestRC, - const TargetRegisterClass *SrcRC, - DebugLoc DL) const { - if (DestRC == SrcRC) { - unsigned Opc; - if (DestRC == &MSP430::GR16RegClass) { - Opc = MSP430::MOV16rr; - } else if (DestRC == &MSP430::GR8RegClass) { - Opc = MSP430::MOV8rr; - } else { - return false; - } - - BuildMI(MBB, I, DL, get(Opc), DestReg).addReg(SrcReg); - return true; - } +void MSP430InstrInfo::copyPhysReg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, DebugLoc DL, + unsigned DestReg, unsigned SrcReg, + bool KillSrc) const { + unsigned Opc; + if (MSP430::GR16RegClass.contains(DestReg, SrcReg)) + Opc = MSP430::MOV16rr; + else if (MSP430::GR8RegClass.contains(DestReg, SrcReg)) + Opc = MSP430::MOV8rr; + else + llvm_unreachable("Impossible reg-to-reg copy"); - return false; + BuildMI(MBB, I, DL, get(Opc), DestReg) + .addReg(SrcReg, getKillRegState(KillSrc)); } bool @@ -330,10 +323,8 @@ bool MSP430InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, unsigned MSP430InstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, - const SmallVectorImpl<MachineOperand> &Cond) const { - // FIXME this should probably have a DebugLoc operand - DebugLoc DL; - + const SmallVectorImpl<MachineOperand> &Cond, + DebugLoc DL) const { // Shouldn't be a fall through. assert(TBB && "InsertBranch must not be told to insert a fallthrough"); assert((Cond.size() == 1 || Cond.size() == 0) && diff --git a/lib/Target/MSP430/MSP430InstrInfo.h b/lib/Target/MSP430/MSP430InstrInfo.h index 842b4cb06e41..ebbda1aeef51 100644 --- a/lib/Target/MSP430/MSP430InstrInfo.h +++ b/lib/Target/MSP430/MSP430InstrInfo.h @@ -49,11 +49,10 @@ public: /// virtual const TargetRegisterInfo &getRegisterInfo() const { return RI; } - bool copyRegToReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, - unsigned DestReg, unsigned SrcReg, - const TargetRegisterClass *DestRC, - const TargetRegisterClass *SrcRC, - DebugLoc DL) const; + void copyPhysReg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, DebugLoc DL, + unsigned DestReg, unsigned SrcReg, + bool KillSrc) const; bool isMoveInstr(const MachineInstr& MI, unsigned &SrcReg, unsigned &DstReg, @@ -93,7 +92,8 @@ public: unsigned RemoveBranch(MachineBasicBlock &MBB) const; unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, - const SmallVectorImpl<MachineOperand> &Cond) const; + const SmallVectorImpl<MachineOperand> &Cond, + DebugLoc DL) const; }; diff --git a/lib/Target/MSP430/MSP430InstrInfo.td b/lib/Target/MSP430/MSP430InstrInfo.td index 6b9a2f2f29f4..8792b2236855 100644 --- a/lib/Target/MSP430/MSP430InstrInfo.td +++ b/lib/Target/MSP430/MSP430InstrInfo.td @@ -25,13 +25,16 @@ class SDTCisI16<int OpNum> : SDTCisVT<OpNum, i16>; def SDT_MSP430Call : SDTypeProfile<0, -1, [SDTCisVT<0, iPTR>]>; def SDT_MSP430CallSeqStart : SDCallSeqStart<[SDTCisVT<0, i16>]>; def SDT_MSP430CallSeqEnd : SDCallSeqEnd<[SDTCisVT<0, i16>, SDTCisVT<1, i16>]>; -def SDT_MSP430Wrapper : SDTypeProfile<1, 1, [SDTCisSameAs<0, 1>, SDTCisPtrTy<0>]>; +def SDT_MSP430Wrapper : SDTypeProfile<1, 1, [SDTCisSameAs<0, 1>, + SDTCisPtrTy<0>]>; def SDT_MSP430Cmp : SDTypeProfile<0, 2, [SDTCisSameAs<0, 1>]>; def SDT_MSP430BrCC : SDTypeProfile<0, 2, [SDTCisVT<0, OtherVT>, SDTCisVT<1, i8>]>; -def SDT_MSP430SelectCC : SDTypeProfile<1, 3, [SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>, +def SDT_MSP430SelectCC : SDTypeProfile<1, 3, [SDTCisSameAs<0, 1>, + SDTCisSameAs<1, 2>, SDTCisVT<3, i8>]>; -def SDT_MSP430Shift : SDTypeProfile<1, 2, [SDTCisSameAs<0, 1>, SDTCisI8<2>]>; +def SDT_MSP430Shift : SDTypeProfile<1, 2, [SDTCisSameAs<0, 1>, + SDTCisI8<2>]>; //===----------------------------------------------------------------------===// // MSP430 Specific Node Definitions. @@ -46,7 +49,7 @@ def MSP430rla : SDNode<"MSP430ISD::RLA", SDTIntUnaryOp, []>; def MSP430rrc : SDNode<"MSP430ISD::RRC", SDTIntUnaryOp, []>; def MSP430call : SDNode<"MSP430ISD::CALL", SDT_MSP430Call, - [SDNPHasChain, SDNPOutFlag, SDNPOptInFlag]>; + [SDNPHasChain, SDNPOutFlag, SDNPOptInFlag, SDNPVariadic]>; def MSP430callseq_start : SDNode<"ISD::CALLSEQ_START", SDT_MSP430CallSeqStart, [SDNPHasChain, SDNPOutFlag]>; @@ -55,8 +58,10 @@ def MSP430callseq_end : [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>; def MSP430Wrapper : SDNode<"MSP430ISD::Wrapper", SDT_MSP430Wrapper>; def MSP430cmp : SDNode<"MSP430ISD::CMP", SDT_MSP430Cmp, [SDNPOutFlag]>; -def MSP430brcc : SDNode<"MSP430ISD::BR_CC", SDT_MSP430BrCC, [SDNPHasChain, SDNPInFlag]>; -def MSP430selectcc: SDNode<"MSP430ISD::SELECT_CC", SDT_MSP430SelectCC, [SDNPInFlag]>; +def MSP430brcc : SDNode<"MSP430ISD::BR_CC", SDT_MSP430BrCC, + [SDNPHasChain, SDNPInFlag]>; +def MSP430selectcc: SDNode<"MSP430ISD::SELECT_CC", SDT_MSP430SelectCC, + [SDNPInFlag]>; def MSP430shl : SDNode<"MSP430ISD::SHL", SDT_MSP430Shift, []>; def MSP430sra : SDNode<"MSP430ISD::SRA", SDT_MSP430Shift, []>; def MSP430srl : SDNode<"MSP430ISD::SRL", SDT_MSP430Shift, []>; @@ -117,14 +122,14 @@ def ADJCALLSTACKUP : Pseudo<(outs), (ins i16imm:$amt1, i16imm:$amt2), } let usesCustomInserter = 1 in { - def Select8 : Pseudo<(outs GR8:$dst), (ins GR8:$src1, GR8:$src2, i8imm:$cc), + def Select8 : Pseudo<(outs GR8:$dst), (ins GR8:$src, GR8:$src2, i8imm:$cc), "# Select8 PSEUDO", [(set GR8:$dst, - (MSP430selectcc GR8:$src1, GR8:$src2, imm:$cc))]>; - def Select16 : Pseudo<(outs GR16:$dst), (ins GR16:$src1, GR16:$src2, i8imm:$cc), + (MSP430selectcc GR8:$src, GR8:$src2, imm:$cc))]>; + def Select16 : Pseudo<(outs GR16:$dst), (ins GR16:$src, GR16:$src2, i8imm:$cc), "# Select16 PSEUDO", [(set GR16:$dst, - (MSP430selectcc GR16:$src1, GR16:$src2, imm:$cc))]>; + (MSP430selectcc GR16:$src, GR16:$src2, imm:$cc))]>; let Defs = [SRW] in { def Shl8 : Pseudo<(outs GR8:$dst), (ins GR8:$src, GR8:$cnt), "# Shl8 PSEUDO", @@ -330,60 +335,60 @@ def MOV16mm : I16mm<0x0, //===----------------------------------------------------------------------===// // Arithmetic Instructions -let isTwoAddress = 1 in { +let Constraints = "$src = $dst" in { let Defs = [SRW] in { let isCommutable = 1 in { // X = ADD Y, Z == X = ADD Z, Y def ADD8rr : I8rr<0x0, - (outs GR8:$dst), (ins GR8:$src1, GR8:$src2), + (outs GR8:$dst), (ins GR8:$src, GR8:$src2), "add.b\t{$src2, $dst}", - [(set GR8:$dst, (add GR8:$src1, GR8:$src2)), + [(set GR8:$dst, (add GR8:$src, GR8:$src2)), (implicit SRW)]>; def ADD16rr : I16rr<0x0, - (outs GR16:$dst), (ins GR16:$src1, GR16:$src2), + (outs GR16:$dst), (ins GR16:$src, GR16:$src2), "add.w\t{$src2, $dst}", - [(set GR16:$dst, (add GR16:$src1, GR16:$src2)), + [(set GR16:$dst, (add GR16:$src, GR16:$src2)), (implicit SRW)]>; } def ADD8rm : I8rm<0x0, - (outs GR8:$dst), (ins GR8:$src1, memsrc:$src2), + (outs GR8:$dst), (ins GR8:$src, memsrc:$src2), "add.b\t{$src2, $dst}", - [(set GR8:$dst, (add GR8:$src1, (load addr:$src2))), + [(set GR8:$dst, (add GR8:$src, (load addr:$src2))), (implicit SRW)]>; def ADD16rm : I16rm<0x0, - (outs GR16:$dst), (ins GR16:$src1, memsrc:$src2), + (outs GR16:$dst), (ins GR16:$src, memsrc:$src2), "add.w\t{$src2, $dst}", - [(set GR16:$dst, (add GR16:$src1, (load addr:$src2))), + [(set GR16:$dst, (add GR16:$src, (load addr:$src2))), (implicit SRW)]>; let mayLoad = 1, hasExtraDefRegAllocReq = 1, -Constraints = "$base = $base_wb, $src1 = $dst" in { +Constraints = "$base = $base_wb, $src = $dst" in { def ADD8rm_POST : IForm8<0x0, DstReg, SrcPostInc, Size2Bytes, (outs GR8:$dst, GR16:$base_wb), - (ins GR8:$src1, GR16:$base), + (ins GR8:$src, GR16:$base), "add.b\t{@$base+, $dst}", []>; def ADD16rm_POST : IForm16<0x0, DstReg, SrcPostInc, Size2Bytes, (outs GR16:$dst, GR16:$base_wb), - (ins GR16:$src1, GR16:$base), + (ins GR16:$src, GR16:$base), "add.w\t{@$base+, $dst}", []>; } def ADD8ri : I8ri<0x0, - (outs GR8:$dst), (ins GR8:$src1, i8imm:$src2), + (outs GR8:$dst), (ins GR8:$src, i8imm:$src2), "add.b\t{$src2, $dst}", - [(set GR8:$dst, (add GR8:$src1, imm:$src2)), + [(set GR8:$dst, (add GR8:$src, imm:$src2)), (implicit SRW)]>; def ADD16ri : I16ri<0x0, - (outs GR16:$dst), (ins GR16:$src1, i16imm:$src2), + (outs GR16:$dst), (ins GR16:$src, i16imm:$src2), "add.w\t{$src2, $dst}", - [(set GR16:$dst, (add GR16:$src1, imm:$src2)), + [(set GR16:$dst, (add GR16:$src, imm:$src2)), (implicit SRW)]>; -let isTwoAddress = 0 in { +let Constraints = "" in { def ADD8mr : I8mr<0x0, (outs), (ins memdst:$dst, GR8:$src), "add.b\t{$src, $dst}", @@ -424,40 +429,40 @@ let Uses = [SRW] in { let isCommutable = 1 in { // X = ADDC Y, Z == X = ADDC Z, Y def ADC8rr : I8rr<0x0, - (outs GR8:$dst), (ins GR8:$src1, GR8:$src2), + (outs GR8:$dst), (ins GR8:$src, GR8:$src2), "addc.b\t{$src2, $dst}", - [(set GR8:$dst, (adde GR8:$src1, GR8:$src2)), + [(set GR8:$dst, (adde GR8:$src, GR8:$src2)), (implicit SRW)]>; def ADC16rr : I16rr<0x0, - (outs GR16:$dst), (ins GR16:$src1, GR16:$src2), + (outs GR16:$dst), (ins GR16:$src, GR16:$src2), "addc.w\t{$src2, $dst}", - [(set GR16:$dst, (adde GR16:$src1, GR16:$src2)), + [(set GR16:$dst, (adde GR16:$src, GR16:$src2)), (implicit SRW)]>; } // isCommutable def ADC8ri : I8ri<0x0, - (outs GR8:$dst), (ins GR8:$src1, i8imm:$src2), + (outs GR8:$dst), (ins GR8:$src, i8imm:$src2), "addc.b\t{$src2, $dst}", - [(set GR8:$dst, (adde GR8:$src1, imm:$src2)), + [(set GR8:$dst, (adde GR8:$src, imm:$src2)), (implicit SRW)]>; def ADC16ri : I16ri<0x0, - (outs GR16:$dst), (ins GR16:$src1, i16imm:$src2), + (outs GR16:$dst), (ins GR16:$src, i16imm:$src2), "addc.w\t{$src2, $dst}", - [(set GR16:$dst, (adde GR16:$src1, imm:$src2)), + [(set GR16:$dst, (adde GR16:$src, imm:$src2)), (implicit SRW)]>; def ADC8rm : I8rm<0x0, - (outs GR8:$dst), (ins GR8:$src1, memsrc:$src2), + (outs GR8:$dst), (ins GR8:$src, memsrc:$src2), "addc.b\t{$src2, $dst}", - [(set GR8:$dst, (adde GR8:$src1, (load addr:$src2))), + [(set GR8:$dst, (adde GR8:$src, (load addr:$src2))), (implicit SRW)]>; def ADC16rm : I16rm<0x0, - (outs GR16:$dst), (ins GR16:$src1, memsrc:$src2), + (outs GR16:$dst), (ins GR16:$src, memsrc:$src2), "addc.w\t{$src2, $dst}", - [(set GR16:$dst, (adde GR16:$src1, (load addr:$src2))), + [(set GR16:$dst, (adde GR16:$src, (load addr:$src2))), (implicit SRW)]>; -let isTwoAddress = 0 in { +let Constraints = "" in { def ADC8mr : I8mr<0x0, (outs), (ins memdst:$dst, GR8:$src), "addc.b\t{$src, $dst}", @@ -498,52 +503,52 @@ def ADC16mm : I8mm<0x0, let isCommutable = 1 in { // X = AND Y, Z == X = AND Z, Y def AND8rr : I8rr<0x0, - (outs GR8:$dst), (ins GR8:$src1, GR8:$src2), + (outs GR8:$dst), (ins GR8:$src, GR8:$src2), "and.b\t{$src2, $dst}", - [(set GR8:$dst, (and GR8:$src1, GR8:$src2)), + [(set GR8:$dst, (and GR8:$src, GR8:$src2)), (implicit SRW)]>; def AND16rr : I16rr<0x0, - (outs GR16:$dst), (ins GR16:$src1, GR16:$src2), + (outs GR16:$dst), (ins GR16:$src, GR16:$src2), "and.w\t{$src2, $dst}", - [(set GR16:$dst, (and GR16:$src1, GR16:$src2)), + [(set GR16:$dst, (and GR16:$src, GR16:$src2)), (implicit SRW)]>; } def AND8ri : I8ri<0x0, - (outs GR8:$dst), (ins GR8:$src1, i8imm:$src2), + (outs GR8:$dst), (ins GR8:$src, i8imm:$src2), "and.b\t{$src2, $dst}", - [(set GR8:$dst, (and GR8:$src1, imm:$src2)), + [(set GR8:$dst, (and GR8:$src, imm:$src2)), (implicit SRW)]>; def AND16ri : I16ri<0x0, - (outs GR16:$dst), (ins GR16:$src1, i16imm:$src2), + (outs GR16:$dst), (ins GR16:$src, i16imm:$src2), "and.w\t{$src2, $dst}", - [(set GR16:$dst, (and GR16:$src1, imm:$src2)), + [(set GR16:$dst, (and GR16:$src, imm:$src2)), (implicit SRW)]>; def AND8rm : I8rm<0x0, - (outs GR8:$dst), (ins GR8:$src1, memsrc:$src2), + (outs GR8:$dst), (ins GR8:$src, memsrc:$src2), "and.b\t{$src2, $dst}", - [(set GR8:$dst, (and GR8:$src1, (load addr:$src2))), + [(set GR8:$dst, (and GR8:$src, (load addr:$src2))), (implicit SRW)]>; def AND16rm : I16rm<0x0, - (outs GR16:$dst), (ins GR16:$src1, memsrc:$src2), + (outs GR16:$dst), (ins GR16:$src, memsrc:$src2), "and.w\t{$src2, $dst}", - [(set GR16:$dst, (and GR16:$src1, (load addr:$src2))), + [(set GR16:$dst, (and GR16:$src, (load addr:$src2))), (implicit SRW)]>; let mayLoad = 1, hasExtraDefRegAllocReq = 1, -Constraints = "$base = $base_wb, $src1 = $dst" in { +Constraints = "$base = $base_wb, $src = $dst" in { def AND8rm_POST : IForm8<0x0, DstReg, SrcPostInc, Size2Bytes, (outs GR8:$dst, GR16:$base_wb), - (ins GR8:$src1, GR16:$base), + (ins GR8:$src, GR16:$base), "and.b\t{@$base+, $dst}", []>; def AND16rm_POST : IForm16<0x0, DstReg, SrcPostInc, Size2Bytes, (outs GR16:$dst, GR16:$base_wb), - (ins GR16:$src1, GR16:$base), + (ins GR16:$src, GR16:$base), "and.w\t{@$base+, $dst}", []>; } -let isTwoAddress = 0 in { +let Constraints = "" in { def AND8mr : I8mr<0x0, (outs), (ins memdst:$dst, GR8:$src), "and.b\t{$src, $dst}", @@ -582,46 +587,46 @@ def AND16mm : I16mm<0x0, let isCommutable = 1 in { // X = OR Y, Z == X = OR Z, Y def OR8rr : I8rr<0x0, - (outs GR8:$dst), (ins GR8:$src1, GR8:$src2), + (outs GR8:$dst), (ins GR8:$src, GR8:$src2), "bis.b\t{$src2, $dst}", - [(set GR8:$dst, (or GR8:$src1, GR8:$src2))]>; + [(set GR8:$dst, (or GR8:$src, GR8:$src2))]>; def OR16rr : I16rr<0x0, - (outs GR16:$dst), (ins GR16:$src1, GR16:$src2), + (outs GR16:$dst), (ins GR16:$src, GR16:$src2), "bis.w\t{$src2, $dst}", - [(set GR16:$dst, (or GR16:$src1, GR16:$src2))]>; + [(set GR16:$dst, (or GR16:$src, GR16:$src2))]>; } def OR8ri : I8ri<0x0, - (outs GR8:$dst), (ins GR8:$src1, i8imm:$src2), + (outs GR8:$dst), (ins GR8:$src, i8imm:$src2), "bis.b\t{$src2, $dst}", - [(set GR8:$dst, (or GR8:$src1, imm:$src2))]>; + [(set GR8:$dst, (or GR8:$src, imm:$src2))]>; def OR16ri : I16ri<0x0, - (outs GR16:$dst), (ins GR16:$src1, i16imm:$src2), + (outs GR16:$dst), (ins GR16:$src, i16imm:$src2), "bis.w\t{$src2, $dst}", - [(set GR16:$dst, (or GR16:$src1, imm:$src2))]>; + [(set GR16:$dst, (or GR16:$src, imm:$src2))]>; def OR8rm : I8rm<0x0, - (outs GR8:$dst), (ins GR8:$src1, memsrc:$src2), + (outs GR8:$dst), (ins GR8:$src, memsrc:$src2), "bis.b\t{$src2, $dst}", - [(set GR8:$dst, (or GR8:$src1, (load addr:$src2)))]>; + [(set GR8:$dst, (or GR8:$src, (load addr:$src2)))]>; def OR16rm : I16rm<0x0, - (outs GR16:$dst), (ins GR16:$src1, memsrc:$src2), + (outs GR16:$dst), (ins GR16:$src, memsrc:$src2), "bis.w\t{$src2, $dst}", - [(set GR16:$dst, (or GR16:$src1, (load addr:$src2)))]>; + [(set GR16:$dst, (or GR16:$src, (load addr:$src2)))]>; let mayLoad = 1, hasExtraDefRegAllocReq = 1, -Constraints = "$base = $base_wb, $src1 = $dst" in { +Constraints = "$base = $base_wb, $src = $dst" in { def OR8rm_POST : IForm8<0x0, DstReg, SrcPostInc, Size2Bytes, (outs GR8:$dst, GR16:$base_wb), - (ins GR8:$src1, GR16:$base), + (ins GR8:$src, GR16:$base), "bis.b\t{@$base+, $dst}", []>; def OR16rm_POST : IForm16<0x0, DstReg, SrcPostInc, Size2Bytes, (outs GR16:$dst, GR16:$base_wb), - (ins GR16:$src1, GR16:$base), + (ins GR16:$src, GR16:$base), "bis.w\t{@$base+, $dst}", []>; } -let isTwoAddress = 0 in { +let Constraints = "" in { def OR8mr : I8mr<0x0, (outs), (ins memdst:$dst, GR8:$src), "bis.b\t{$src, $dst}", @@ -654,24 +659,24 @@ def OR16mm : I16mm<0x0, // bic does not modify condition codes def BIC8rr : I8rr<0x0, - (outs GR8:$dst), (ins GR8:$src1, GR8:$src2), + (outs GR8:$dst), (ins GR8:$src, GR8:$src2), "bic.b\t{$src2, $dst}", - [(set GR8:$dst, (and GR8:$src1, (not GR8:$src2)))]>; + [(set GR8:$dst, (and GR8:$src, (not GR8:$src2)))]>; def BIC16rr : I16rr<0x0, - (outs GR16:$dst), (ins GR16:$src1, GR16:$src2), + (outs GR16:$dst), (ins GR16:$src, GR16:$src2), "bic.w\t{$src2, $dst}", - [(set GR16:$dst, (and GR16:$src1, (not GR16:$src2)))]>; + [(set GR16:$dst, (and GR16:$src, (not GR16:$src2)))]>; def BIC8rm : I8rm<0x0, - (outs GR8:$dst), (ins GR8:$src1, memsrc:$src2), + (outs GR8:$dst), (ins GR8:$src, memsrc:$src2), "bic.b\t{$src2, $dst}", - [(set GR8:$dst, (and GR8:$src1, (not (i8 (load addr:$src2)))))]>; + [(set GR8:$dst, (and GR8:$src, (not (i8 (load addr:$src2)))))]>; def BIC16rm : I16rm<0x0, - (outs GR16:$dst), (ins GR16:$src1, memsrc:$src2), + (outs GR16:$dst), (ins GR16:$src, memsrc:$src2), "bic.w\t{$src2, $dst}", - [(set GR16:$dst, (and GR16:$src1, (not (i16 (load addr:$src2)))))]>; + [(set GR16:$dst, (and GR16:$src, (not (i16 (load addr:$src2)))))]>; -let isTwoAddress = 0 in { +let Constraints = "" in { def BIC8mr : I8mr<0x0, (outs), (ins memdst:$dst, GR8:$src), "bic.b\t{$src, $dst}", @@ -695,52 +700,52 @@ def BIC16mm : I16mm<0x0, let isCommutable = 1 in { // X = XOR Y, Z == X = XOR Z, Y def XOR8rr : I8rr<0x0, - (outs GR8:$dst), (ins GR8:$src1, GR8:$src2), + (outs GR8:$dst), (ins GR8:$src, GR8:$src2), "xor.b\t{$src2, $dst}", - [(set GR8:$dst, (xor GR8:$src1, GR8:$src2)), + [(set GR8:$dst, (xor GR8:$src, GR8:$src2)), (implicit SRW)]>; def XOR16rr : I16rr<0x0, - (outs GR16:$dst), (ins GR16:$src1, GR16:$src2), + (outs GR16:$dst), (ins GR16:$src, GR16:$src2), "xor.w\t{$src2, $dst}", - [(set GR16:$dst, (xor GR16:$src1, GR16:$src2)), + [(set GR16:$dst, (xor GR16:$src, GR16:$src2)), (implicit SRW)]>; } def XOR8ri : I8ri<0x0, - (outs GR8:$dst), (ins GR8:$src1, i8imm:$src2), + (outs GR8:$dst), (ins GR8:$src, i8imm:$src2), "xor.b\t{$src2, $dst}", - [(set GR8:$dst, (xor GR8:$src1, imm:$src2)), + [(set GR8:$dst, (xor GR8:$src, imm:$src2)), (implicit SRW)]>; def XOR16ri : I16ri<0x0, - (outs GR16:$dst), (ins GR16:$src1, i16imm:$src2), + (outs GR16:$dst), (ins GR16:$src, i16imm:$src2), "xor.w\t{$src2, $dst}", - [(set GR16:$dst, (xor GR16:$src1, imm:$src2)), + [(set GR16:$dst, (xor GR16:$src, imm:$src2)), (implicit SRW)]>; def XOR8rm : I8rm<0x0, - (outs GR8:$dst), (ins GR8:$src1, memsrc:$src2), + (outs GR8:$dst), (ins GR8:$src, memsrc:$src2), "xor.b\t{$src2, $dst}", - [(set GR8:$dst, (xor GR8:$src1, (load addr:$src2))), + [(set GR8:$dst, (xor GR8:$src, (load addr:$src2))), (implicit SRW)]>; def XOR16rm : I16rm<0x0, - (outs GR16:$dst), (ins GR16:$src1, memsrc:$src2), + (outs GR16:$dst), (ins GR16:$src, memsrc:$src2), "xor.w\t{$src2, $dst}", - [(set GR16:$dst, (xor GR16:$src1, (load addr:$src2))), + [(set GR16:$dst, (xor GR16:$src, (load addr:$src2))), (implicit SRW)]>; let mayLoad = 1, hasExtraDefRegAllocReq = 1, -Constraints = "$base = $base_wb, $src1 = $dst" in { +Constraints = "$base = $base_wb, $src = $dst" in { def XOR8rm_POST : IForm8<0x0, DstReg, SrcPostInc, Size2Bytes, (outs GR8:$dst, GR16:$base_wb), - (ins GR8:$src1, GR16:$base), + (ins GR8:$src, GR16:$base), "xor.b\t{@$base+, $dst}", []>; def XOR16rm_POST : IForm16<0x0, DstReg, SrcPostInc, Size2Bytes, (outs GR16:$dst, GR16:$base_wb), - (ins GR16:$src1, GR16:$base), + (ins GR16:$src, GR16:$base), "xor.w\t{@$base+, $dst}", []>; } -let isTwoAddress = 0 in { +let Constraints = "" in { def XOR8mr : I8mr<0x0, (outs), (ins memdst:$dst, GR8:$src), "xor.b\t{$src, $dst}", @@ -777,51 +782,51 @@ def XOR16mm : I16mm<0x0, def SUB8rr : I8rr<0x0, - (outs GR8:$dst), (ins GR8:$src1, GR8:$src2), + (outs GR8:$dst), (ins GR8:$src, GR8:$src2), "sub.b\t{$src2, $dst}", - [(set GR8:$dst, (sub GR8:$src1, GR8:$src2)), + [(set GR8:$dst, (sub GR8:$src, GR8:$src2)), (implicit SRW)]>; def SUB16rr : I16rr<0x0, - (outs GR16:$dst), (ins GR16:$src1, GR16:$src2), + (outs GR16:$dst), (ins GR16:$src, GR16:$src2), "sub.w\t{$src2, $dst}", - [(set GR16:$dst, (sub GR16:$src1, GR16:$src2)), + [(set GR16:$dst, (sub GR16:$src, GR16:$src2)), (implicit SRW)]>; def SUB8ri : I8ri<0x0, - (outs GR8:$dst), (ins GR8:$src1, i8imm:$src2), + (outs GR8:$dst), (ins GR8:$src, i8imm:$src2), "sub.b\t{$src2, $dst}", - [(set GR8:$dst, (sub GR8:$src1, imm:$src2)), + [(set GR8:$dst, (sub GR8:$src, imm:$src2)), (implicit SRW)]>; def SUB16ri : I16ri<0x0, - (outs GR16:$dst), (ins GR16:$src1, i16imm:$src2), + (outs GR16:$dst), (ins GR16:$src, i16imm:$src2), "sub.w\t{$src2, $dst}", - [(set GR16:$dst, (sub GR16:$src1, imm:$src2)), + [(set GR16:$dst, (sub GR16:$src, imm:$src2)), (implicit SRW)]>; def SUB8rm : I8rm<0x0, - (outs GR8:$dst), (ins GR8:$src1, memsrc:$src2), + (outs GR8:$dst), (ins GR8:$src, memsrc:$src2), "sub.b\t{$src2, $dst}", - [(set GR8:$dst, (sub GR8:$src1, (load addr:$src2))), + [(set GR8:$dst, (sub GR8:$src, (load addr:$src2))), (implicit SRW)]>; def SUB16rm : I16rm<0x0, - (outs GR16:$dst), (ins GR16:$src1, memsrc:$src2), + (outs GR16:$dst), (ins GR16:$src, memsrc:$src2), "sub.w\t{$src2, $dst}", - [(set GR16:$dst, (sub GR16:$src1, (load addr:$src2))), + [(set GR16:$dst, (sub GR16:$src, (load addr:$src2))), (implicit SRW)]>; let mayLoad = 1, hasExtraDefRegAllocReq = 1, -Constraints = "$base = $base_wb, $src1 = $dst" in { +Constraints = "$base = $base_wb, $src = $dst" in { def SUB8rm_POST : IForm8<0x0, DstReg, SrcPostInc, Size2Bytes, (outs GR8:$dst, GR16:$base_wb), - (ins GR8:$src1, GR16:$base), + (ins GR8:$src, GR16:$base), "sub.b\t{@$base+, $dst}", []>; def SUB16rm_POST : IForm16<0x0, DstReg, SrcPostInc, Size2Bytes, (outs GR16:$dst, GR16:$base_wb), - (ins GR16:$src1, GR16:$base), + (ins GR16:$src, GR16:$base), "sub.w\t{@$base+, $dst}", []>; } -let isTwoAddress = 0 in { +let Constraints = "" in { def SUB8mr : I8mr<0x0, (outs), (ins memdst:$dst, GR8:$src), "sub.b\t{$src, $dst}", @@ -860,39 +865,39 @@ def SUB16mm : I16mm<0x0, let Uses = [SRW] in { def SBC8rr : I8rr<0x0, - (outs GR8:$dst), (ins GR8:$src1, GR8:$src2), + (outs GR8:$dst), (ins GR8:$src, GR8:$src2), "subc.b\t{$src2, $dst}", - [(set GR8:$dst, (sube GR8:$src1, GR8:$src2)), + [(set GR8:$dst, (sube GR8:$src, GR8:$src2)), (implicit SRW)]>; def SBC16rr : I16rr<0x0, - (outs GR16:$dst), (ins GR16:$src1, GR16:$src2), + (outs GR16:$dst), (ins GR16:$src, GR16:$src2), "subc.w\t{$src2, $dst}", - [(set GR16:$dst, (sube GR16:$src1, GR16:$src2)), + [(set GR16:$dst, (sube GR16:$src, GR16:$src2)), (implicit SRW)]>; def SBC8ri : I8ri<0x0, - (outs GR8:$dst), (ins GR8:$src1, i8imm:$src2), + (outs GR8:$dst), (ins GR8:$src, i8imm:$src2), "subc.b\t{$src2, $dst}", - [(set GR8:$dst, (sube GR8:$src1, imm:$src2)), + [(set GR8:$dst, (sube GR8:$src, imm:$src2)), (implicit SRW)]>; def SBC16ri : I16ri<0x0, - (outs GR16:$dst), (ins GR16:$src1, i16imm:$src2), + (outs GR16:$dst), (ins GR16:$src, i16imm:$src2), "subc.w\t{$src2, $dst}", - [(set GR16:$dst, (sube GR16:$src1, imm:$src2)), + [(set GR16:$dst, (sube GR16:$src, imm:$src2)), (implicit SRW)]>; def SBC8rm : I8rm<0x0, - (outs GR8:$dst), (ins GR8:$src1, memsrc:$src2), + (outs GR8:$dst), (ins GR8:$src, memsrc:$src2), "subc.b\t{$src2, $dst}", - [(set GR8:$dst, (sube GR8:$src1, (load addr:$src2))), + [(set GR8:$dst, (sube GR8:$src, (load addr:$src2))), (implicit SRW)]>; def SBC16rm : I16rm<0x0, - (outs GR16:$dst), (ins GR16:$src1, memsrc:$src2), + (outs GR16:$dst), (ins GR16:$src, memsrc:$src2), "subc.w\t{$src2, $dst}", - [(set GR16:$dst, (sube GR16:$src1, (load addr:$src2))), + [(set GR16:$dst, (sube GR16:$src, (load addr:$src2))), (implicit SRW)]>; -let isTwoAddress = 0 in { +let Constraints = "" in { def SBC8mr : I8mr<0x0, (outs), (ins memdst:$dst, GR8:$src), "subc.b\t{$src, $dst}", @@ -985,59 +990,59 @@ def SWPB16r : II16r<0x0, "swpb\t$dst", [(set GR16:$dst, (bswap GR16:$src))]>; -} // isTwoAddress = 1 +} // Constraints = "$src = $dst" // Integer comparisons let Defs = [SRW] in { def CMP8rr : I8rr<0x0, - (outs), (ins GR8:$src1, GR8:$src2), - "cmp.b\t{$src2, $src1}", - [(MSP430cmp GR8:$src1, GR8:$src2), (implicit SRW)]>; + (outs), (ins GR8:$src, GR8:$src2), + "cmp.b\t{$src2, $src}", + [(MSP430cmp GR8:$src, GR8:$src2), (implicit SRW)]>; def CMP16rr : I16rr<0x0, - (outs), (ins GR16:$src1, GR16:$src2), - "cmp.w\t{$src2, $src1}", - [(MSP430cmp GR16:$src1, GR16:$src2), (implicit SRW)]>; + (outs), (ins GR16:$src, GR16:$src2), + "cmp.w\t{$src2, $src}", + [(MSP430cmp GR16:$src, GR16:$src2), (implicit SRW)]>; def CMP8ri : I8ri<0x0, - (outs), (ins GR8:$src1, i8imm:$src2), - "cmp.b\t{$src2, $src1}", - [(MSP430cmp GR8:$src1, imm:$src2), (implicit SRW)]>; + (outs), (ins GR8:$src, i8imm:$src2), + "cmp.b\t{$src2, $src}", + [(MSP430cmp GR8:$src, imm:$src2), (implicit SRW)]>; def CMP16ri : I16ri<0x0, - (outs), (ins GR16:$src1, i16imm:$src2), - "cmp.w\t{$src2, $src1}", - [(MSP430cmp GR16:$src1, imm:$src2), (implicit SRW)]>; + (outs), (ins GR16:$src, i16imm:$src2), + "cmp.w\t{$src2, $src}", + [(MSP430cmp GR16:$src, imm:$src2), (implicit SRW)]>; def CMP8mi : I8mi<0x0, - (outs), (ins memsrc:$src1, i8imm:$src2), - "cmp.b\t{$src2, $src1}", - [(MSP430cmp (load addr:$src1), + (outs), (ins memsrc:$src, i8imm:$src2), + "cmp.b\t{$src2, $src}", + [(MSP430cmp (load addr:$src), (i8 imm:$src2)), (implicit SRW)]>; def CMP16mi : I16mi<0x0, - (outs), (ins memsrc:$src1, i16imm:$src2), - "cmp.w\t{$src2, $src1}", - [(MSP430cmp (load addr:$src1), + (outs), (ins memsrc:$src, i16imm:$src2), + "cmp.w\t{$src2, $src}", + [(MSP430cmp (load addr:$src), (i16 imm:$src2)), (implicit SRW)]>; def CMP8rm : I8rm<0x0, - (outs), (ins GR8:$src1, memsrc:$src2), - "cmp.b\t{$src2, $src1}", - [(MSP430cmp GR8:$src1, (load addr:$src2)), + (outs), (ins GR8:$src, memsrc:$src2), + "cmp.b\t{$src2, $src}", + [(MSP430cmp GR8:$src, (load addr:$src2)), (implicit SRW)]>; def CMP16rm : I16rm<0x0, - (outs), (ins GR16:$src1, memsrc:$src2), - "cmp.w\t{$src2, $src1}", - [(MSP430cmp GR16:$src1, (load addr:$src2)), + (outs), (ins GR16:$src, memsrc:$src2), + "cmp.w\t{$src2, $src}", + [(MSP430cmp GR16:$src, (load addr:$src2)), (implicit SRW)]>; def CMP8mr : I8mr<0x0, - (outs), (ins memsrc:$src1, GR8:$src2), - "cmp.b\t{$src2, $src1}", - [(MSP430cmp (load addr:$src1), GR8:$src2), + (outs), (ins memsrc:$src, GR8:$src2), + "cmp.b\t{$src2, $src}", + [(MSP430cmp (load addr:$src), GR8:$src2), (implicit SRW)]>; def CMP16mr : I16mr<0x0, - (outs), (ins memsrc:$src1, GR16:$src2), - "cmp.w\t{$src2, $src1}", - [(MSP430cmp (load addr:$src1), GR16:$src2), + (outs), (ins memsrc:$src, GR16:$src2), + "cmp.w\t{$src2, $src}", + [(MSP430cmp (load addr:$src), GR16:$src2), (implicit SRW)]>; @@ -1045,71 +1050,71 @@ def CMP16mr : I16mr<0x0, // Note that the C condition is set differently than when using CMP. let isCommutable = 1 in { def BIT8rr : I8rr<0x0, - (outs), (ins GR8:$src1, GR8:$src2), - "bit.b\t{$src2, $src1}", - [(MSP430cmp (and_su GR8:$src1, GR8:$src2), 0), + (outs), (ins GR8:$src, GR8:$src2), + "bit.b\t{$src2, $src}", + [(MSP430cmp (and_su GR8:$src, GR8:$src2), 0), (implicit SRW)]>; def BIT16rr : I16rr<0x0, - (outs), (ins GR16:$src1, GR16:$src2), - "bit.w\t{$src2, $src1}", - [(MSP430cmp (and_su GR16:$src1, GR16:$src2), 0), + (outs), (ins GR16:$src, GR16:$src2), + "bit.w\t{$src2, $src}", + [(MSP430cmp (and_su GR16:$src, GR16:$src2), 0), (implicit SRW)]>; } def BIT8ri : I8ri<0x0, - (outs), (ins GR8:$src1, i8imm:$src2), - "bit.b\t{$src2, $src1}", - [(MSP430cmp (and_su GR8:$src1, imm:$src2), 0), + (outs), (ins GR8:$src, i8imm:$src2), + "bit.b\t{$src2, $src}", + [(MSP430cmp (and_su GR8:$src, imm:$src2), 0), (implicit SRW)]>; def BIT16ri : I16ri<0x0, - (outs), (ins GR16:$src1, i16imm:$src2), - "bit.w\t{$src2, $src1}", - [(MSP430cmp (and_su GR16:$src1, imm:$src2), 0), + (outs), (ins GR16:$src, i16imm:$src2), + "bit.w\t{$src2, $src}", + [(MSP430cmp (and_su GR16:$src, imm:$src2), 0), (implicit SRW)]>; def BIT8rm : I8rm<0x0, - (outs), (ins GR8:$src1, memdst:$src2), - "bit.b\t{$src2, $src1}", - [(MSP430cmp (and_su GR8:$src1, (load addr:$src2)), 0), + (outs), (ins GR8:$src, memdst:$src2), + "bit.b\t{$src2, $src}", + [(MSP430cmp (and_su GR8:$src, (load addr:$src2)), 0), (implicit SRW)]>; def BIT16rm : I16rm<0x0, - (outs), (ins GR16:$src1, memdst:$src2), - "bit.w\t{$src2, $src1}", - [(MSP430cmp (and_su GR16:$src1, (load addr:$src2)), 0), + (outs), (ins GR16:$src, memdst:$src2), + "bit.w\t{$src2, $src}", + [(MSP430cmp (and_su GR16:$src, (load addr:$src2)), 0), (implicit SRW)]>; def BIT8mr : I8mr<0x0, - (outs), (ins memsrc:$src1, GR8:$src2), - "bit.b\t{$src2, $src1}", - [(MSP430cmp (and_su (load addr:$src1), GR8:$src2), 0), + (outs), (ins memsrc:$src, GR8:$src2), + "bit.b\t{$src2, $src}", + [(MSP430cmp (and_su (load addr:$src), GR8:$src2), 0), (implicit SRW)]>; def BIT16mr : I16mr<0x0, - (outs), (ins memsrc:$src1, GR16:$src2), - "bit.w\t{$src2, $src1}", - [(MSP430cmp (and_su (load addr:$src1), GR16:$src2), 0), + (outs), (ins memsrc:$src, GR16:$src2), + "bit.w\t{$src2, $src}", + [(MSP430cmp (and_su (load addr:$src), GR16:$src2), 0), (implicit SRW)]>; def BIT8mi : I8mi<0x0, - (outs), (ins memsrc:$src1, i8imm:$src2), - "bit.b\t{$src2, $src1}", - [(MSP430cmp (and_su (load addr:$src1), (i8 imm:$src2)), 0), + (outs), (ins memsrc:$src, i8imm:$src2), + "bit.b\t{$src2, $src}", + [(MSP430cmp (and_su (load addr:$src), (i8 imm:$src2)), 0), (implicit SRW)]>; def BIT16mi : I16mi<0x0, - (outs), (ins memsrc:$src1, i16imm:$src2), - "bit.w\t{$src2, $src1}", - [(MSP430cmp (and_su (load addr:$src1), (i16 imm:$src2)), 0), + (outs), (ins memsrc:$src, i16imm:$src2), + "bit.w\t{$src2, $src}", + [(MSP430cmp (and_su (load addr:$src), (i16 imm:$src2)), 0), (implicit SRW)]>; def BIT8mm : I8mm<0x0, - (outs), (ins memsrc:$src1, memsrc:$src2), - "bit.b\t{$src2, $src1}", - [(MSP430cmp (and_su (i8 (load addr:$src1)), + (outs), (ins memsrc:$src, memsrc:$src2), + "bit.b\t{$src2, $src}", + [(MSP430cmp (and_su (i8 (load addr:$src)), (load addr:$src2)), 0), (implicit SRW)]>; def BIT16mm : I16mm<0x0, - (outs), (ins memsrc:$src1, memsrc:$src2), - "bit.w\t{$src2, $src1}", - [(MSP430cmp (and_su (i16 (load addr:$src1)), + (outs), (ins memsrc:$src, memsrc:$src2), + "bit.w\t{$src2, $src}", + [(MSP430cmp (and_su (i16 (load addr:$src)), (load addr:$src2)), 0), (implicit SRW)]>; @@ -1134,12 +1139,12 @@ def : Pat<(i16 (MSP430Wrapper tglobaladdr:$dst)), (MOV16ri tglobaladdr:$dst)>; def : Pat<(i16 (MSP430Wrapper texternalsym:$dst)), (MOV16ri texternalsym:$dst)>; def : Pat<(i16 (MSP430Wrapper tblockaddress:$dst)), (MOV16ri tblockaddress:$dst)>; -def : Pat<(add GR16:$src1, (MSP430Wrapper tglobaladdr :$src2)), - (ADD16ri GR16:$src1, tglobaladdr:$src2)>; -def : Pat<(add GR16:$src1, (MSP430Wrapper texternalsym:$src2)), - (ADD16ri GR16:$src1, texternalsym:$src2)>; -def : Pat<(add GR16:$src1, (MSP430Wrapper tblockaddress:$src2)), - (ADD16ri GR16:$src1, tblockaddress:$src2)>; +def : Pat<(add GR16:$src, (MSP430Wrapper tglobaladdr :$src2)), + (ADD16ri GR16:$src, tglobaladdr:$src2)>; +def : Pat<(add GR16:$src, (MSP430Wrapper texternalsym:$src2)), + (ADD16ri GR16:$src, texternalsym:$src2)>; +def : Pat<(add GR16:$src, (MSP430Wrapper tblockaddress:$src2)), + (ADD16ri GR16:$src, tblockaddress:$src2)>; def : Pat<(store (i16 (MSP430Wrapper tglobaladdr:$src)), addr:$dst), (MOV16mi addr:$dst, tglobaladdr:$src)>; @@ -1155,45 +1160,45 @@ def : Pat<(MSP430call (i16 texternalsym:$dst)), (CALLi texternalsym:$dst)>; // add and sub always produce carry -def : Pat<(addc GR16:$src1, GR16:$src2), - (ADD16rr GR16:$src1, GR16:$src2)>; -def : Pat<(addc GR16:$src1, (load addr:$src2)), - (ADD16rm GR16:$src1, addr:$src2)>; -def : Pat<(addc GR16:$src1, imm:$src2), - (ADD16ri GR16:$src1, imm:$src2)>; +def : Pat<(addc GR16:$src, GR16:$src2), + (ADD16rr GR16:$src, GR16:$src2)>; +def : Pat<(addc GR16:$src, (load addr:$src2)), + (ADD16rm GR16:$src, addr:$src2)>; +def : Pat<(addc GR16:$src, imm:$src2), + (ADD16ri GR16:$src, imm:$src2)>; def : Pat<(store (addc (load addr:$dst), GR16:$src), addr:$dst), (ADD16mr addr:$dst, GR16:$src)>; def : Pat<(store (addc (load addr:$dst), (i16 (load addr:$src))), addr:$dst), (ADD16mm addr:$dst, addr:$src)>; -def : Pat<(addc GR8:$src1, GR8:$src2), - (ADD8rr GR8:$src1, GR8:$src2)>; -def : Pat<(addc GR8:$src1, (load addr:$src2)), - (ADD8rm GR8:$src1, addr:$src2)>; -def : Pat<(addc GR8:$src1, imm:$src2), - (ADD8ri GR8:$src1, imm:$src2)>; +def : Pat<(addc GR8:$src, GR8:$src2), + (ADD8rr GR8:$src, GR8:$src2)>; +def : Pat<(addc GR8:$src, (load addr:$src2)), + (ADD8rm GR8:$src, addr:$src2)>; +def : Pat<(addc GR8:$src, imm:$src2), + (ADD8ri GR8:$src, imm:$src2)>; def : Pat<(store (addc (load addr:$dst), GR8:$src), addr:$dst), (ADD8mr addr:$dst, GR8:$src)>; def : Pat<(store (addc (load addr:$dst), (i8 (load addr:$src))), addr:$dst), (ADD8mm addr:$dst, addr:$src)>; -def : Pat<(subc GR16:$src1, GR16:$src2), - (SUB16rr GR16:$src1, GR16:$src2)>; -def : Pat<(subc GR16:$src1, (load addr:$src2)), - (SUB16rm GR16:$src1, addr:$src2)>; -def : Pat<(subc GR16:$src1, imm:$src2), - (SUB16ri GR16:$src1, imm:$src2)>; +def : Pat<(subc GR16:$src, GR16:$src2), + (SUB16rr GR16:$src, GR16:$src2)>; +def : Pat<(subc GR16:$src, (load addr:$src2)), + (SUB16rm GR16:$src, addr:$src2)>; +def : Pat<(subc GR16:$src, imm:$src2), + (SUB16ri GR16:$src, imm:$src2)>; def : Pat<(store (subc (load addr:$dst), GR16:$src), addr:$dst), (SUB16mr addr:$dst, GR16:$src)>; def : Pat<(store (subc (load addr:$dst), (i16 (load addr:$src))), addr:$dst), (SUB16mm addr:$dst, addr:$src)>; -def : Pat<(subc GR8:$src1, GR8:$src2), - (SUB8rr GR8:$src1, GR8:$src2)>; -def : Pat<(subc GR8:$src1, (load addr:$src2)), - (SUB8rm GR8:$src1, addr:$src2)>; -def : Pat<(subc GR8:$src1, imm:$src2), - (SUB8ri GR8:$src1, imm:$src2)>; +def : Pat<(subc GR8:$src, GR8:$src2), + (SUB8rr GR8:$src, GR8:$src2)>; +def : Pat<(subc GR8:$src, (load addr:$src2)), + (SUB8rm GR8:$src, addr:$src2)>; +def : Pat<(subc GR8:$src, imm:$src2), + (SUB8ri GR8:$src, imm:$src2)>; def : Pat<(store (subc (load addr:$dst), GR8:$src), addr:$dst), (SUB8mr addr:$dst, GR8:$src)>; def : Pat<(store (subc (load addr:$dst), (i8 (load addr:$src))), addr:$dst), @@ -1201,6 +1206,6 @@ def : Pat<(store (subc (load addr:$dst), (i8 (load addr:$src))), addr:$dst), // peephole patterns def : Pat<(and GR16:$src, 255), (ZEXT16r GR16:$src)>; -def : Pat<(MSP430cmp (trunc (and_su GR16:$src1, GR16:$src2)), 0), - (BIT8rr (EXTRACT_SUBREG GR16:$src1, subreg_8bit), +def : Pat<(MSP430cmp (trunc (and_su GR16:$src, GR16:$src2)), 0), + (BIT8rr (EXTRACT_SUBREG GR16:$src, subreg_8bit), (EXTRACT_SUBREG GR16:$src2, subreg_8bit))>; diff --git a/lib/Target/MSP430/MSP430RegisterInfo.cpp b/lib/Target/MSP430/MSP430RegisterInfo.cpp index 0cae26714bfc..608ca49fcf78 100644 --- a/lib/Target/MSP430/MSP430RegisterInfo.cpp +++ b/lib/Target/MSP430/MSP430RegisterInfo.cpp @@ -71,48 +71,6 @@ MSP430RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { } -const TargetRegisterClass *const * -MSP430RegisterInfo::getCalleeSavedRegClasses(const MachineFunction *MF) const { - const Function* F = MF->getFunction(); - static const TargetRegisterClass * const CalleeSavedRegClasses[] = { - &MSP430::GR16RegClass, &MSP430::GR16RegClass, - &MSP430::GR16RegClass, &MSP430::GR16RegClass, - &MSP430::GR16RegClass, &MSP430::GR16RegClass, - &MSP430::GR16RegClass, &MSP430::GR16RegClass, - 0 - }; - static const TargetRegisterClass * const CalleeSavedRegClassesFP[] = { - &MSP430::GR16RegClass, &MSP430::GR16RegClass, - &MSP430::GR16RegClass, &MSP430::GR16RegClass, - &MSP430::GR16RegClass, &MSP430::GR16RegClass, - &MSP430::GR16RegClass, 0 - }; - static const TargetRegisterClass * const CalleeSavedRegClassesIntr[] = { - &MSP430::GR16RegClass, &MSP430::GR16RegClass, - &MSP430::GR16RegClass, &MSP430::GR16RegClass, - &MSP430::GR16RegClass, &MSP430::GR16RegClass, - &MSP430::GR16RegClass, &MSP430::GR16RegClass, - &MSP430::GR16RegClass, &MSP430::GR16RegClass, - &MSP430::GR16RegClass, &MSP430::GR16RegClass, - 0 - }; - static const TargetRegisterClass * const CalleeSavedRegClassesIntrFP[] = { - &MSP430::GR16RegClass, &MSP430::GR16RegClass, - &MSP430::GR16RegClass, &MSP430::GR16RegClass, - &MSP430::GR16RegClass, &MSP430::GR16RegClass, - &MSP430::GR16RegClass, &MSP430::GR16RegClass, - &MSP430::GR16RegClass, &MSP430::GR16RegClass, - &MSP430::GR16RegClass, 0 - }; - - if (hasFP(*MF)) - return (F->getCallingConv() == CallingConv::MSP430_INTR ? - CalleeSavedRegClassesIntrFP : CalleeSavedRegClassesFP); - else - return (F->getCallingConv() == CallingConv::MSP430_INTR ? - CalleeSavedRegClassesIntr : CalleeSavedRegClasses); -} - BitVector MSP430RegisterInfo::getReservedRegs(const MachineFunction &MF) const { BitVector Reserved(getNumRegs()); @@ -270,8 +228,8 @@ MSP430RegisterInfo::processFunctionBeforeFrameFinalized(MachineFunction &MF) const { // Create a frame entry for the FPW register that must be saved. if (hasFP(MF)) { - int ATTRIBUTE_UNUSED FrameIdx = - MF.getFrameInfo()->CreateFixedObject(2, -4, true, false); + int FrameIdx = MF.getFrameInfo()->CreateFixedObject(2, -4, true); + (void)FrameIdx; assert(FrameIdx == MF.getFrameInfo()->getObjectIndexBegin() && "Slot for FPW register must be last in order to be found!"); } diff --git a/lib/Target/MSP430/MSP430RegisterInfo.h b/lib/Target/MSP430/MSP430RegisterInfo.h index c8684dfdb041..6e58d3116d27 100644 --- a/lib/Target/MSP430/MSP430RegisterInfo.h +++ b/lib/Target/MSP430/MSP430RegisterInfo.h @@ -36,9 +36,6 @@ public: /// Code Generation virtual methods... const unsigned *getCalleeSavedRegs(const MachineFunction *MF = 0) const; - const TargetRegisterClass* const* - getCalleeSavedRegClasses(const MachineFunction *MF = 0) const; - BitVector getReservedRegs(const MachineFunction &MF) const; const TargetRegisterClass* getPointerRegClass(unsigned Kind = 0) const; diff --git a/lib/Target/Mangler.cpp b/lib/Target/Mangler.cpp index 4ef017ab9295..2037a9114559 100644 --- a/lib/Target/Mangler.cpp +++ b/lib/Target/Mangler.cpp @@ -180,7 +180,7 @@ void Mangler::getNameWithPrefix(SmallVectorImpl<char> &OutName, ManglerPrefixTy PrefixTy = Mangler::Default; if (GV->hasPrivateLinkage() || isImplicitlyPrivate) PrefixTy = Mangler::Private; - else if (GV->hasLinkerPrivateLinkage()) + else if (GV->hasLinkerPrivateLinkage() || GV->hasLinkerPrivateWeakLinkage()) PrefixTy = Mangler::LinkerPrivate; // If this global has a name, handle it simply. diff --git a/lib/Target/Mips/AsmPrinter/MipsAsmPrinter.cpp b/lib/Target/Mips/AsmPrinter/MipsAsmPrinter.cpp index 4d7fe4cc8262..8ae05b75e919 100644 --- a/lib/Target/Mips/AsmPrinter/MipsAsmPrinter.cpp +++ b/lib/Target/Mips/AsmPrinter/MipsAsmPrinter.cpp @@ -133,8 +133,9 @@ void MipsAsmPrinter::printSavedRegsBitmask(raw_ostream &O) { const MachineFrameInfo *MFI = MF->getFrameInfo(); const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo(); for (unsigned i = 0, e = CSI.size(); i != e; ++i) { - unsigned RegNum = MipsRegisterInfo::getRegisterNumbering(CSI[i].getReg()); - if (CSI[i].getRegClass() == Mips::CPURegsRegisterClass) + unsigned Reg = CSI[i].getReg(); + unsigned RegNum = MipsRegisterInfo::getRegisterNumbering(Reg); + if (Mips::CPURegsRegisterClass->contains(Reg)) CPUBitmask |= (1 << RegNum); else FPUBitmask |= (1 << RegNum); diff --git a/lib/Target/Mips/MipsISelLowering.cpp b/lib/Target/Mips/MipsISelLowering.cpp index e979c3fe69fc..b6ff2c371d5c 100644 --- a/lib/Target/Mips/MipsISelLowering.cpp +++ b/lib/Target/Mips/MipsISelLowering.cpp @@ -284,6 +284,18 @@ MipsTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, MachineFunction *F = BB->getParent(); MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB); MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB); + F->insert(It, copy0MBB); + F->insert(It, sinkMBB); + + // Transfer the remainder of BB and its successor edges to sinkMBB. + sinkMBB->splice(sinkMBB->begin(), BB, + llvm::next(MachineBasicBlock::iterator(MI)), + BB->end()); + sinkMBB->transferSuccessorsAndUpdatePHIs(BB); + + // Next, add the true and fallthrough blocks as its successors. + BB->addSuccessor(copy0MBB); + BB->addSuccessor(sinkMBB); // Emit the right instruction according to the type of the operands compared if (isFPCmp) { @@ -296,20 +308,6 @@ MipsTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, BuildMI(BB, dl, TII->get(Mips::BNE)).addReg(MI->getOperand(1).getReg()) .addReg(Mips::ZERO).addMBB(sinkMBB); - F->insert(It, copy0MBB); - F->insert(It, sinkMBB); - // Update machine-CFG edges by first adding all successors of the current - // block to the new block which will contain the Phi node for the select. - for(MachineBasicBlock::succ_iterator i = BB->succ_begin(), - e = BB->succ_end(); i != e; ++i) - sinkMBB->addSuccessor(*i); - // Next, remove all successors of the current block, and add the true - // and fallthrough blocks as its successors. - while(!BB->succ_empty()) - BB->removeSuccessor(BB->succ_begin()); - BB->addSuccessor(copy0MBB); - BB->addSuccessor(sinkMBB); - // copy0MBB: // %FalseValue = ... // # fallthrough to sinkMBB @@ -322,11 +320,12 @@ MipsTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ] // ... BB = sinkMBB; - BuildMI(BB, dl, TII->get(Mips::PHI), MI->getOperand(0).getReg()) + BuildMI(*BB, BB->begin(), dl, + TII->get(Mips::PHI), MI->getOperand(0).getReg()) .addReg(MI->getOperand(2).getReg()).addMBB(copy0MBB) .addReg(MI->getOperand(3).getReg()).addMBB(thisMBB); - F->DeleteMachineInstr(MI); // The pseudo instruction is gone now. + MI->eraseFromParent(); // The pseudo instruction is gone now. return BB; } } @@ -490,21 +489,21 @@ SDValue MipsTargetLowering::LowerGlobalAddress(SDValue Op, // %gp_rel relocation if (TLOF.IsGlobalInSmallSection(GV, getTargetMachine())) { - SDValue GA = DAG.getTargetGlobalAddress(GV, MVT::i32, 0, + SDValue GA = DAG.getTargetGlobalAddress(GV, dl, MVT::i32, 0, MipsII::MO_GPREL); SDValue GPRelNode = DAG.getNode(MipsISD::GPRel, dl, VTs, &GA, 1); SDValue GOT = DAG.getGLOBAL_OFFSET_TABLE(MVT::i32); return DAG.getNode(ISD::ADD, dl, MVT::i32, GOT, GPRelNode); } // %hi/%lo relocation - SDValue GA = DAG.getTargetGlobalAddress(GV, MVT::i32, 0, + SDValue GA = DAG.getTargetGlobalAddress(GV, dl, MVT::i32, 0, MipsII::MO_ABS_HILO); SDValue HiPart = DAG.getNode(MipsISD::Hi, dl, VTs, &GA, 1); SDValue Lo = DAG.getNode(MipsISD::Lo, dl, MVT::i32, GA); return DAG.getNode(ISD::ADD, dl, MVT::i32, HiPart, Lo); } else { - SDValue GA = DAG.getTargetGlobalAddress(GV, MVT::i32, 0, + SDValue GA = DAG.getTargetGlobalAddress(GV, dl, MVT::i32, 0, MipsII::MO_GOT); SDValue ResNode = DAG.getLoad(MVT::i32, dl, DAG.getEntryNode(), GA, NULL, 0, @@ -768,6 +767,7 @@ MipsTargetLowering::LowerCall(SDValue Chain, SDValue Callee, CallingConv::ID CallConv, bool isVarArg, bool &isTailCall, const SmallVectorImpl<ISD::OutputArg> &Outs, + const SmallVectorImpl<SDValue> &OutVals, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const { @@ -787,7 +787,7 @@ MipsTargetLowering::LowerCall(SDValue Chain, SDValue Callee, // the stack (even if less than 4 are used as arguments) if (Subtarget->isABI_O32()) { int VTsize = EVT(MVT::i32).getSizeInBits()/8; - MFI->CreateFixedObject(VTsize, (VTsize*3), true, false); + MFI->CreateFixedObject(VTsize, (VTsize*3), true); CCInfo.AnalyzeCallOperands(Outs, isVarArg ? CC_MipsO32_VarArgs : CC_MipsO32); } else @@ -808,7 +808,7 @@ MipsTargetLowering::LowerCall(SDValue Chain, SDValue Callee, // Walk the register/memloc assignments, inserting copies/loads. for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { - SDValue Arg = Outs[i].Val; + SDValue Arg = OutVals[i]; CCValAssign &VA = ArgLocs[i]; // Promote the value if needed. @@ -857,7 +857,7 @@ MipsTargetLowering::LowerCall(SDValue Chain, SDValue Callee, // if O32 ABI is used. For EABI the first address is zero. LastArgStackLoc = (FirstStackArgLoc + VA.getLocMemOffset()); int FI = MFI->CreateFixedObject(VA.getValVT().getSizeInBits()/8, - LastArgStackLoc, true, false); + LastArgStackLoc, true); SDValue PtrOff = DAG.getFrameIndex(FI,getPointerTy()); @@ -889,7 +889,7 @@ MipsTargetLowering::LowerCall(SDValue Chain, SDValue Callee, // node so that legalize doesn't hack it. unsigned char OpFlag = IsPIC ? MipsII::MO_GOT_CALL : MipsII::MO_NO_FLAG; if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) - Callee = DAG.getTargetGlobalAddress(G->getGlobal(), + Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl, getPointerTy(), 0, OpFlag); else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) Callee = DAG.getTargetExternalSymbol(S->getSymbol(), @@ -929,7 +929,7 @@ MipsTargetLowering::LowerCall(SDValue Chain, SDValue Callee, // Create the frame index only once. SPOffset here can be anything // (this will be fixed on processFunctionBeforeFrameFinalized) if (MipsFI->getGPStackOffset() == -1) { - FI = MFI->CreateFixedObject(4, 0, true, false); + FI = MFI->CreateFixedObject(4, 0, true); MipsFI->setGPFI(FI); } MipsFI->setGPStackOffset(LastArgStackLoc); @@ -1098,7 +1098,7 @@ MipsTargetLowering::LowerFormalArguments(SDValue Chain, // offset on PEI::calculateFrameObjectOffsets. // Arguments are always 32-bit. unsigned ArgSize = VA.getLocVT().getSizeInBits()/8; - int FI = MFI->CreateFixedObject(ArgSize, 0, true, false); + int FI = MFI->CreateFixedObject(ArgSize, 0, true); MipsFI->recordLoadArgsFI(FI, -(ArgSize+ (FirstStackArgLoc + VA.getLocMemOffset()))); @@ -1137,7 +1137,7 @@ MipsTargetLowering::LowerFormalArguments(SDValue Chain, unsigned Reg = AddLiveIn(DAG.getMachineFunction(), ArgRegEnd, RC); SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, MVT::i32); - int FI = MFI->CreateFixedObject(4, 0, true, false); + int FI = MFI->CreateFixedObject(4, 0, true); MipsFI->recordStoreVarArgsFI(FI, -(4+(StackLoc*4))); SDValue PtrOff = DAG.getFrameIndex(FI, getPointerTy()); OutChains.push_back(DAG.getStore(Chain, dl, ArgValue, PtrOff, NULL, 0, @@ -1169,6 +1169,7 @@ SDValue MipsTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl<ISD::OutputArg> &Outs, + const SmallVectorImpl<SDValue> &OutVals, DebugLoc dl, SelectionDAG &DAG) const { // CCValAssign - represent the assignment of @@ -1198,7 +1199,7 @@ MipsTargetLowering::LowerReturn(SDValue Chain, assert(VA.isRegLoc() && "Can only return in registers!"); Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), - Outs[i].Val, Flag); + OutVals[i], Flag); // guarantee that all emitted copies are // stuck together, avoiding something bad diff --git a/lib/Target/Mips/MipsISelLowering.h b/lib/Target/Mips/MipsISelLowering.h index f2de489a2643..460747bf5438 100644 --- a/lib/Target/Mips/MipsISelLowering.h +++ b/lib/Target/Mips/MipsISelLowering.h @@ -120,6 +120,7 @@ namespace llvm { CallingConv::ID CallConv, bool isVarArg, bool &isTailCall, const SmallVectorImpl<ISD::OutputArg> &Outs, + const SmallVectorImpl<SDValue> &OutVals, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const; @@ -128,6 +129,7 @@ namespace llvm { LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl<ISD::OutputArg> &Outs, + const SmallVectorImpl<SDValue> &OutVals, DebugLoc dl, SelectionDAG &DAG) const; virtual MachineBasicBlock * diff --git a/lib/Target/Mips/MipsInstrInfo.cpp b/lib/Target/Mips/MipsInstrInfo.cpp index 4005e35b35b1..6c09a3e10785 100644 --- a/lib/Target/Mips/MipsInstrInfo.cpp +++ b/lib/Target/Mips/MipsInstrInfo.cpp @@ -127,61 +127,75 @@ insertNoop(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI) const BuildMI(MBB, MI, DL, get(Mips::NOP)); } -bool MipsInstrInfo:: -copyRegToReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, - unsigned DestReg, unsigned SrcReg, - const TargetRegisterClass *DestRC, - const TargetRegisterClass *SrcRC, - DebugLoc DL) const { +void MipsInstrInfo:: +copyPhysReg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, DebugLoc DL, + unsigned DestReg, unsigned SrcReg, + bool KillSrc) const { + bool DestCPU = Mips::CPURegsRegClass.contains(DestReg); + bool SrcCPU = Mips::CPURegsRegClass.contains(SrcReg); + + // CPU-CPU is the most common. + if (DestCPU && SrcCPU) { + BuildMI(MBB, I, DL, get(Mips::ADDu), DestReg).addReg(Mips::ZERO) + .addReg(SrcReg, getKillRegState(KillSrc)); + return; + } - if (DestRC != SrcRC) { - - // Copy to/from FCR31 condition register - if ((DestRC == Mips::CPURegsRegisterClass) && - (SrcRC == Mips::CCRRegisterClass)) - BuildMI(MBB, I, DL, get(Mips::CFC1), DestReg).addReg(SrcReg); - else if ((DestRC == Mips::CCRRegisterClass) && - (SrcRC == Mips::CPURegsRegisterClass)) - BuildMI(MBB, I, DL, get(Mips::CTC1), DestReg).addReg(SrcReg); - - // Moves between coprocessors and cpu - else if ((DestRC == Mips::CPURegsRegisterClass) && - (SrcRC == Mips::FGR32RegisterClass)) - BuildMI(MBB, I, DL, get(Mips::MFC1), DestReg).addReg(SrcReg); - else if ((DestRC == Mips::FGR32RegisterClass) && - (SrcRC == Mips::CPURegsRegisterClass)) - BuildMI(MBB, I, DL, get(Mips::MTC1), DestReg).addReg(SrcReg); - - // Move from/to Hi/Lo registers - else if ((DestRC == Mips::HILORegisterClass) && - (SrcRC == Mips::CPURegsRegisterClass)) { - unsigned Opc = (DestReg == Mips::HI) ? Mips::MTHI : Mips::MTLO; - BuildMI(MBB, I, DL, get(Opc), DestReg); - } else if ((SrcRC == Mips::HILORegisterClass) && - (DestRC == Mips::CPURegsRegisterClass)) { - unsigned Opc = (SrcReg == Mips::HI) ? Mips::MFHI : Mips::MFLO; - BuildMI(MBB, I, DL, get(Opc), DestReg); - } else - // Can't copy this register - return false; + // Copy to CPU from other registers. + if (DestCPU) { + if (Mips::CCRRegClass.contains(SrcReg)) + BuildMI(MBB, I, DL, get(Mips::CFC1), DestReg) + .addReg(SrcReg, getKillRegState(KillSrc)); + else if (Mips::FGR32RegClass.contains(SrcReg)) + BuildMI(MBB, I, DL, get(Mips::MFC1), DestReg) + .addReg(SrcReg, getKillRegState(KillSrc)); + else if (SrcReg == Mips::HI) + BuildMI(MBB, I, DL, get(Mips::MFHI), DestReg); + else if (SrcReg == Mips::LO) + BuildMI(MBB, I, DL, get(Mips::MFLO), DestReg); + else + llvm_unreachable("Copy to CPU from invalid register"); + return; + } - return true; + // Copy to other registers from CPU. + if (SrcCPU) { + if (Mips::CCRRegClass.contains(DestReg)) + BuildMI(MBB, I, DL, get(Mips::CTC1), DestReg) + .addReg(SrcReg, getKillRegState(KillSrc)); + else if (Mips::FGR32RegClass.contains(DestReg)) + BuildMI(MBB, I, DL, get(Mips::MTC1), DestReg) + .addReg(SrcReg, getKillRegState(KillSrc)); + else if (DestReg == Mips::HI) + BuildMI(MBB, I, DL, get(Mips::MTHI)) + .addReg(SrcReg, getKillRegState(KillSrc)); + else if (DestReg == Mips::LO) + BuildMI(MBB, I, DL, get(Mips::MTLO)) + .addReg(SrcReg, getKillRegState(KillSrc)); + else + llvm_unreachable("Copy from CPU to invalid register"); + return; } - if (DestRC == Mips::CPURegsRegisterClass) - BuildMI(MBB, I, DL, get(Mips::ADDu), DestReg).addReg(Mips::ZERO) - .addReg(SrcReg); - else if (DestRC == Mips::FGR32RegisterClass) - BuildMI(MBB, I, DL, get(Mips::FMOV_S32), DestReg).addReg(SrcReg); - else if (DestRC == Mips::AFGR64RegisterClass) - BuildMI(MBB, I, DL, get(Mips::FMOV_D32), DestReg).addReg(SrcReg); - else if (DestRC == Mips::CCRRegisterClass) - BuildMI(MBB, I, DL, get(Mips::MOVCCRToCCR), DestReg).addReg(SrcReg); - else - // Can't copy this register - return false; + if (Mips::FGR32RegClass.contains(DestReg, SrcReg)) { + BuildMI(MBB, I, DL, get(Mips::FMOV_S32), DestReg) + .addReg(SrcReg, getKillRegState(KillSrc)); + return; + } - return true; + if (Mips::AFGR64RegClass.contains(DestReg, SrcReg)) { + BuildMI(MBB, I, DL, get(Mips::FMOV_D32), DestReg) + .addReg(SrcReg, getKillRegState(KillSrc)); + return; + } + + if (Mips::CCRRegClass.contains(DestReg, SrcReg)) { + BuildMI(MBB, I, DL, get(Mips::MOVCCRToCCR), DestReg) + .addReg(SrcReg, getKillRegState(KillSrc)); + return; + } + llvm_unreachable("Cannot copy registers"); } void MipsInstrInfo:: @@ -247,80 +261,6 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, llvm_unreachable("Register class not handled!"); } -MachineInstr *MipsInstrInfo:: -foldMemoryOperandImpl(MachineFunction &MF, - MachineInstr* MI, - const SmallVectorImpl<unsigned> &Ops, int FI) const -{ - if (Ops.size() != 1) return NULL; - - MachineInstr *NewMI = NULL; - - switch (MI->getOpcode()) { - case Mips::ADDu: - if ((MI->getOperand(0).isReg()) && - (MI->getOperand(1).isReg()) && - (MI->getOperand(1).getReg() == Mips::ZERO) && - (MI->getOperand(2).isReg())) { - if (Ops[0] == 0) { // COPY -> STORE - unsigned SrcReg = MI->getOperand(2).getReg(); - bool isKill = MI->getOperand(2).isKill(); - bool isUndef = MI->getOperand(2).isUndef(); - NewMI = BuildMI(MF, MI->getDebugLoc(), get(Mips::SW)) - .addReg(SrcReg, getKillRegState(isKill) | getUndefRegState(isUndef)) - .addImm(0).addFrameIndex(FI); - } else { // COPY -> LOAD - unsigned DstReg = MI->getOperand(0).getReg(); - bool isDead = MI->getOperand(0).isDead(); - bool isUndef = MI->getOperand(0).isUndef(); - NewMI = BuildMI(MF, MI->getDebugLoc(), get(Mips::LW)) - .addReg(DstReg, RegState::Define | getDeadRegState(isDead) | - getUndefRegState(isUndef)) - .addImm(0).addFrameIndex(FI); - } - } - break; - case Mips::FMOV_S32: - case Mips::FMOV_D32: - if ((MI->getOperand(0).isReg()) && - (MI->getOperand(1).isReg())) { - const TargetRegisterClass - *RC = RI.getRegClass(MI->getOperand(0).getReg()); - unsigned StoreOpc, LoadOpc; - bool IsMips1 = TM.getSubtarget<MipsSubtarget>().isMips1(); - - if (RC == Mips::FGR32RegisterClass) { - LoadOpc = Mips::LWC1; StoreOpc = Mips::SWC1; - } else { - assert(RC == Mips::AFGR64RegisterClass); - // Mips1 doesn't have ldc/sdc instructions. - if (IsMips1) break; - LoadOpc = Mips::LDC1; StoreOpc = Mips::SDC1; - } - - if (Ops[0] == 0) { // COPY -> STORE - unsigned SrcReg = MI->getOperand(1).getReg(); - bool isKill = MI->getOperand(1).isKill(); - bool isUndef = MI->getOperand(2).isUndef(); - NewMI = BuildMI(MF, MI->getDebugLoc(), get(StoreOpc)) - .addReg(SrcReg, getKillRegState(isKill) | getUndefRegState(isUndef)) - .addImm(0).addFrameIndex(FI) ; - } else { // COPY -> LOAD - unsigned DstReg = MI->getOperand(0).getReg(); - bool isDead = MI->getOperand(0).isDead(); - bool isUndef = MI->getOperand(0).isUndef(); - NewMI = BuildMI(MF, MI->getDebugLoc(), get(LoadOpc)) - .addReg(DstReg, RegState::Define | getDeadRegState(isDead) | - getUndefRegState(isUndef)) - .addImm(0).addFrameIndex(FI); - } - } - break; - } - - return NewMI; -} - //===----------------------------------------------------------------------===// // Branch Analysis //===----------------------------------------------------------------------===// @@ -520,9 +460,8 @@ bool MipsInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, unsigned MipsInstrInfo:: InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, - const SmallVectorImpl<MachineOperand> &Cond) const { - // FIXME this should probably have a DebugLoc argument - DebugLoc dl; + const SmallVectorImpl<MachineOperand> &Cond, + DebugLoc DL) const { // Shouldn't be a fall through. assert(TBB && "InsertBranch must not be told to insert a fallthrough"); assert((Cond.size() == 3 || Cond.size() == 2 || Cond.size() == 0) && @@ -531,18 +470,18 @@ InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, if (FBB == 0) { // One way branch. if (Cond.empty()) { // Unconditional branch? - BuildMI(&MBB, dl, get(Mips::J)).addMBB(TBB); + BuildMI(&MBB, DL, get(Mips::J)).addMBB(TBB); } else { // Conditional branch. unsigned Opc = GetCondBranchFromCond((Mips::CondCode)Cond[0].getImm()); const TargetInstrDesc &TID = get(Opc); if (TID.getNumOperands() == 3) - BuildMI(&MBB, dl, TID).addReg(Cond[1].getReg()) + BuildMI(&MBB, DL, TID).addReg(Cond[1].getReg()) .addReg(Cond[2].getReg()) .addMBB(TBB); else - BuildMI(&MBB, dl, TID).addReg(Cond[1].getReg()) + BuildMI(&MBB, DL, TID).addReg(Cond[1].getReg()) .addMBB(TBB); } @@ -554,12 +493,12 @@ InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, const TargetInstrDesc &TID = get(Opc); if (TID.getNumOperands() == 3) - BuildMI(&MBB, dl, TID).addReg(Cond[1].getReg()).addReg(Cond[2].getReg()) + BuildMI(&MBB, DL, TID).addReg(Cond[1].getReg()).addReg(Cond[2].getReg()) .addMBB(TBB); else - BuildMI(&MBB, dl, TID).addReg(Cond[1].getReg()).addMBB(TBB); + BuildMI(&MBB, DL, TID).addReg(Cond[1].getReg()).addMBB(TBB); - BuildMI(&MBB, dl, get(Mips::J)).addMBB(FBB); + BuildMI(&MBB, DL, get(Mips::J)).addMBB(FBB); return 2; } @@ -621,12 +560,8 @@ unsigned MipsInstrInfo::getGlobalBaseReg(MachineFunction *MF) const { const TargetInstrInfo *TII = MF->getTarget().getInstrInfo(); GlobalBaseReg = RegInfo.createVirtualRegister(Mips::CPURegsRegisterClass); - bool Ok = TII->copyRegToReg(FirstMBB, MBBI, GlobalBaseReg, Mips::GP, - Mips::CPURegsRegisterClass, - Mips::CPURegsRegisterClass, - DebugLoc()); - assert(Ok && "Couldn't assign to global base register!"); - Ok = Ok; // Silence warning when assertions are turned off. + BuildMI(FirstMBB, MBBI, DebugLoc(), TII->get(TargetOpcode::COPY), + GlobalBaseReg).addReg(Mips::GP); RegInfo.addLiveIn(Mips::GP); MipsFI->setGlobalBaseReg(GlobalBaseReg); diff --git a/lib/Target/Mips/MipsInstrInfo.h b/lib/Target/Mips/MipsInstrInfo.h index 7919d9a03853..d6f87f9b0ce8 100644 --- a/lib/Target/Mips/MipsInstrInfo.h +++ b/lib/Target/Mips/MipsInstrInfo.h @@ -204,13 +204,12 @@ public: virtual unsigned RemoveBranch(MachineBasicBlock &MBB) const; virtual unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, - const SmallVectorImpl<MachineOperand> &Cond) const; - virtual bool copyRegToReg(MachineBasicBlock &MBB, - MachineBasicBlock::iterator I, - unsigned DestReg, unsigned SrcReg, - const TargetRegisterClass *DestRC, - const TargetRegisterClass *SrcRC, - DebugLoc DL) const; + const SmallVectorImpl<MachineOperand> &Cond, + DebugLoc DL) const; + virtual void copyPhysReg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, DebugLoc DL, + unsigned DestReg, unsigned SrcReg, + bool KillSrc) const; virtual void storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned SrcReg, bool isKill, int FrameIndex, @@ -223,18 +222,6 @@ public: const TargetRegisterClass *RC, const TargetRegisterInfo *TRI) const; - virtual MachineInstr* foldMemoryOperandImpl(MachineFunction &MF, - MachineInstr* MI, - const SmallVectorImpl<unsigned> &Ops, - int FrameIndex) const; - - virtual MachineInstr* foldMemoryOperandImpl(MachineFunction &MF, - MachineInstr* MI, - const SmallVectorImpl<unsigned> &Ops, - MachineInstr* LoadMI) const { - return 0; - } - virtual bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const; diff --git a/lib/Target/Mips/MipsInstrInfo.td b/lib/Target/Mips/MipsInstrInfo.td index 2b9e94191dba..5337c9fb816a 100644 --- a/lib/Target/Mips/MipsInstrInfo.td +++ b/lib/Target/Mips/MipsInstrInfo.td @@ -541,7 +541,7 @@ let Predicates = [HasSwap] in { def MIPS_CMOV_ZERO : PatLeaf<(i32 0)>; def MIPS_CMOV_NZERO : PatLeaf<(i32 1)>; -let Predicates = [HasCondMov], isTwoAddress = 1 in { +let Predicates = [HasCondMov], Constraints = "$F = $dst" in { def MOVN : CondMov<0x0a, "movn", MIPS_CMOV_NZERO>; def MOVZ : CondMov<0x0b, "movz", MIPS_CMOV_ZERO>; } diff --git a/lib/Target/Mips/MipsRegisterInfo.cpp b/lib/Target/Mips/MipsRegisterInfo.cpp index 5e719af871d2..e15f0a58e501 100644 --- a/lib/Target/Mips/MipsRegisterInfo.cpp +++ b/lib/Target/Mips/MipsRegisterInfo.cpp @@ -116,34 +116,6 @@ getCalleeSavedRegs(const MachineFunction *MF) const return BitMode32CalleeSavedRegs; } -/// Mips Callee Saved Register Classes -const TargetRegisterClass* const* -MipsRegisterInfo::getCalleeSavedRegClasses(const MachineFunction *MF) const -{ - static const TargetRegisterClass * const SingleFloatOnlyCalleeSavedRC[] = { - &Mips::CPURegsRegClass, &Mips::CPURegsRegClass, &Mips::CPURegsRegClass, - &Mips::CPURegsRegClass, &Mips::CPURegsRegClass, &Mips::CPURegsRegClass, - &Mips::CPURegsRegClass, &Mips::CPURegsRegClass, - &Mips::FGR32RegClass, &Mips::FGR32RegClass, &Mips::FGR32RegClass, - &Mips::FGR32RegClass, &Mips::FGR32RegClass, &Mips::FGR32RegClass, - &Mips::FGR32RegClass, &Mips::FGR32RegClass, &Mips::FGR32RegClass, - &Mips::FGR32RegClass, &Mips::FGR32RegClass, 0 - }; - - static const TargetRegisterClass * const BitMode32CalleeSavedRC[] = { - &Mips::CPURegsRegClass, &Mips::CPURegsRegClass, &Mips::CPURegsRegClass, - &Mips::CPURegsRegClass, &Mips::CPURegsRegClass, &Mips::CPURegsRegClass, - &Mips::CPURegsRegClass, &Mips::CPURegsRegClass, - &Mips::FGR32RegClass, &Mips::FGR32RegClass, &Mips::FGR32RegClass, - &Mips::FGR32RegClass, &Mips::FGR32RegClass, &Mips::FGR32RegClass, 0 - }; - - if (Subtarget.isSingleFloat()) - return SingleFloatOnlyCalleeSavedRC; - else - return BitMode32CalleeSavedRC; -} - BitVector MipsRegisterInfo:: getReservedRegs(const MachineFunction &MF) const { @@ -279,7 +251,8 @@ void MipsRegisterInfo::adjustMipsStackFrame(MachineFunction &MF) const StackOffset = ((StackOffset+StackAlign-1)/StackAlign*StackAlign); for (unsigned i = 0, e = CSI.size(); i != e ; ++i) { - if (CSI[i].getRegClass() != Mips::CPURegsRegisterClass) + unsigned Reg = CSI[i].getReg(); + if (!Mips::CPURegsRegisterClass->contains(Reg)) break; MFI->setObjectOffset(CSI[i].getFrameIdx(), StackOffset); TopCPUSavedRegOff = StackOffset; @@ -311,7 +284,8 @@ void MipsRegisterInfo::adjustMipsStackFrame(MachineFunction &MF) const // Adjust FPU Callee Saved Registers Area. This Area must be // aligned to the default Stack Alignment requirements. for (unsigned i = 0, e = CSI.size(); i != e; ++i) { - if (CSI[i].getRegClass() == Mips::CPURegsRegisterClass) + unsigned Reg = CSI[i].getReg(); + if (Mips::CPURegsRegisterClass->contains(Reg)) continue; MFI->setObjectOffset(CSI[i].getFrameIdx(), StackOffset); TopFPUSavedRegOff = StackOffset; @@ -528,4 +502,3 @@ getDwarfRegNum(unsigned RegNum, bool isEH) const { } #include "MipsGenRegisterInfo.inc" - diff --git a/lib/Target/Mips/MipsRegisterInfo.h b/lib/Target/Mips/MipsRegisterInfo.h index bc857b85cabe..b500a650f7cc 100644 --- a/lib/Target/Mips/MipsRegisterInfo.h +++ b/lib/Target/Mips/MipsRegisterInfo.h @@ -42,9 +42,6 @@ struct MipsRegisterInfo : public MipsGenRegisterInfo { /// Code Generation virtual methods... const unsigned *getCalleeSavedRegs(const MachineFunction* MF = 0) const; - const TargetRegisterClass* const* - getCalleeSavedRegClasses(const MachineFunction* MF = 0) const; - BitVector getReservedRegs(const MachineFunction &MF) const; bool hasFP(const MachineFunction &MF) const; diff --git a/lib/Target/PIC16/PIC16ISelLowering.cpp b/lib/Target/PIC16/PIC16ISelLowering.cpp index f479f4626fd7..54a6a28992bf 100644 --- a/lib/Target/PIC16/PIC16ISelLowering.cpp +++ b/lib/Target/PIC16/PIC16ISelLowering.cpp @@ -672,7 +672,8 @@ SDValue PIC16TargetLowering::ExpandGlobalAddress(SDNode *N, // FIXME there isn't really debug info here DebugLoc dl = G->getDebugLoc(); - SDValue TGA = DAG.getTargetGlobalAddress(G->getGlobal(), MVT::i8, + SDValue TGA = DAG.getTargetGlobalAddress(G->getGlobal(), N->getDebugLoc(), + MVT::i8, G->getOffset()); SDValue Offset = DAG.getConstant(0, MVT::i8); @@ -1120,6 +1121,7 @@ SDValue PIC16TargetLowering:: LowerIndirectCallArguments(SDValue Chain, SDValue InFlag, SDValue DataAddr_Lo, SDValue DataAddr_Hi, const SmallVectorImpl<ISD::OutputArg> &Outs, + const SmallVectorImpl<SDValue> &OutVals, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG) const { unsigned NumOps = Outs.size(); @@ -1136,7 +1138,7 @@ LowerIndirectCallArguments(SDValue Chain, SDValue InFlag, unsigned RetVals = Ins.size(); for (unsigned i = 0, ArgOffset = RetVals; i < NumOps; i++) { // Get the arguments - Arg = Outs[i].Val; + Arg = OutVals[i]; Ops.clear(); Ops.push_back(Chain); @@ -1158,6 +1160,7 @@ LowerIndirectCallArguments(SDValue Chain, SDValue InFlag, SDValue PIC16TargetLowering:: LowerDirectCallArguments(SDValue ArgLabel, SDValue Chain, SDValue InFlag, const SmallVectorImpl<ISD::OutputArg> &Outs, + const SmallVectorImpl<SDValue> &OutVals, DebugLoc dl, SelectionDAG &DAG) const { unsigned NumOps = Outs.size(); std::string Name; @@ -1183,7 +1186,7 @@ LowerDirectCallArguments(SDValue ArgLabel, SDValue Chain, SDValue InFlag, SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag); for (unsigned i=0, Offset = 0; i<NumOps; i++) { // Get the argument - Arg = Outs[i].Val; + Arg = OutVals[i]; StoreOffset = (Offset + AddressOffset); // Store the argument on frame @@ -1282,6 +1285,7 @@ SDValue PIC16TargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl<ISD::OutputArg> &Outs, + const SmallVectorImpl<SDValue> &OutVals, DebugLoc dl, SelectionDAG &DAG) const { // Number of values to return @@ -1298,7 +1302,7 @@ PIC16TargetLowering::LowerReturn(SDValue Chain, SDValue BS = DAG.getConstant(1, MVT::i8); SDValue RetVal; for(unsigned i=0;i<NumRet; ++i) { - RetVal = Outs[i].Val; + RetVal = OutVals[i]; Chain = DAG.getNode (PIC16ISD::PIC16Store, dl, MVT::Other, Chain, RetVal, ES, BS, DAG.getConstant (i, MVT::i8)); @@ -1374,6 +1378,7 @@ PIC16TargetLowering::LowerCall(SDValue Chain, SDValue Callee, CallingConv::ID CallConv, bool isVarArg, bool &isTailCall, const SmallVectorImpl<ISD::OutputArg> &Outs, + const SmallVectorImpl<SDValue> &OutVals, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const { @@ -1428,7 +1433,7 @@ PIC16TargetLowering::LowerCall(SDValue Chain, SDValue Callee, // Considering the GlobalAddressNode case here. if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) { const GlobalValue *GV = G->getGlobal(); - Callee = DAG.getTargetGlobalAddress(GV, MVT::i8); + Callee = DAG.getTargetGlobalAddress(GV, dl, MVT::i8); Name = G->getGlobal()->getName(); } else {// Considering the ExternalSymbol case here ExternalSymbolSDNode *ES = dyn_cast<ExternalSymbolSDNode>(Callee); @@ -1461,12 +1466,13 @@ PIC16TargetLowering::LowerCall(SDValue Chain, SDValue Callee, SDValue CallArgs; if (IsDirectCall) { CallArgs = LowerDirectCallArguments(ArgLabel, Chain, OperFlag, - Outs, dl, DAG); + Outs, OutVals, dl, DAG); Chain = getChain(CallArgs); OperFlag = getOutFlag(CallArgs); } else { CallArgs = LowerIndirectCallArguments(Chain, OperFlag, DataAddr_Lo, - DataAddr_Hi, Outs, Ins, dl, DAG); + DataAddr_Hi, Outs, OutVals, Ins, + dl, DAG); Chain = getChain(CallArgs); OperFlag = getOutFlag(CallArgs); } @@ -1791,14 +1797,14 @@ static PIC16CC::CondCodes IntCCToPIC16CC(ISD::CondCode CC) { static void LookThroughSetCC(SDValue &LHS, SDValue &RHS, ISD::CondCode CC, unsigned &SPCC) { if (isa<ConstantSDNode>(RHS) && - cast<ConstantSDNode>(RHS)->getZExtValue() == 0 && + cast<ConstantSDNode>(RHS)->isNullValue() && CC == ISD::SETNE && (LHS.getOpcode() == PIC16ISD::SELECT_ICC && LHS.getOperand(3).getOpcode() == PIC16ISD::SUBCC) && isa<ConstantSDNode>(LHS.getOperand(0)) && isa<ConstantSDNode>(LHS.getOperand(1)) && - cast<ConstantSDNode>(LHS.getOperand(0))->getZExtValue() == 1 && - cast<ConstantSDNode>(LHS.getOperand(1))->getZExtValue() == 0) { + cast<ConstantSDNode>(LHS.getOperand(0))->isOne() && + cast<ConstantSDNode>(LHS.getOperand(1))->isNullValue()) { SDValue CMPCC = LHS.getOperand(3); SPCC = cast<ConstantSDNode>(LHS.getOperand(2))->getZExtValue(); LHS = CMPCC.getOperand(0); @@ -1928,15 +1934,12 @@ PIC16TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, F->insert(It, copy0MBB); F->insert(It, sinkMBB); - // Update machine-CFG edges by first adding all successors of the current - // block to the new block which will contain the Phi node for the select. - for (MachineBasicBlock::succ_iterator I = BB->succ_begin(), - E = BB->succ_end(); I != E; ++I) - sinkMBB->addSuccessor(*I); - // Next, remove all successors of the current block, and add the true - // and fallthrough blocks as its successors. - while (!BB->succ_empty()) - BB->removeSuccessor(BB->succ_begin()); + // Transfer the remainder of BB and its successor edges to sinkMBB. + sinkMBB->splice(sinkMBB->begin(), BB, + llvm::next(MachineBasicBlock::iterator(MI)), + BB->end()); + sinkMBB->transferSuccessorsAndUpdatePHIs(BB); + // Next, add the true and fallthrough blocks as its successors. BB->addSuccessor(copy0MBB); BB->addSuccessor(sinkMBB); @@ -1953,11 +1956,12 @@ PIC16TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ] // ... BB = sinkMBB; - BuildMI(BB, dl, TII.get(PIC16::PHI), MI->getOperand(0).getReg()) + BuildMI(*BB, BB->begin(), dl, + TII.get(PIC16::PHI), MI->getOperand(0).getReg()) .addReg(MI->getOperand(2).getReg()).addMBB(copy0MBB) .addReg(MI->getOperand(1).getReg()).addMBB(thisMBB); - F->DeleteMachineInstr(MI); // The pseudo instruction is gone now. + MI->eraseFromParent(); // The pseudo instruction is gone now. return BB; } diff --git a/lib/Target/PIC16/PIC16ISelLowering.h b/lib/Target/PIC16/PIC16ISelLowering.h index eea17f898365..0a7506cb497f 100644 --- a/lib/Target/PIC16/PIC16ISelLowering.h +++ b/lib/Target/PIC16/PIC16ISelLowering.h @@ -106,12 +106,14 @@ namespace llvm { SDValue LowerDirectCallArguments(SDValue ArgLabel, SDValue Chain, SDValue InFlag, const SmallVectorImpl<ISD::OutputArg> &Outs, + const SmallVectorImpl<SDValue> &OutVals, DebugLoc dl, SelectionDAG &DAG) const; SDValue LowerIndirectCallArguments(SDValue Chain, SDValue InFlag, SDValue DataAddr_Lo, SDValue DataAddr_Hi, const SmallVectorImpl<ISD::OutputArg> &Outs, + const SmallVectorImpl<SDValue> &OutVals, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG) const; @@ -143,6 +145,7 @@ namespace llvm { LowerCall(SDValue Chain, SDValue Callee, CallingConv::ID CallConv, bool isVarArg, bool &isTailCall, const SmallVectorImpl<ISD::OutputArg> &Outs, + const SmallVectorImpl<SDValue> &OutVals, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const; @@ -151,6 +154,7 @@ namespace llvm { LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl<ISD::OutputArg> &Outs, + const SmallVectorImpl<SDValue> &OutVals, DebugLoc dl, SelectionDAG &DAG) const; SDValue ExpandStore(SDNode *N, SelectionDAG &DAG) const; diff --git a/lib/Target/PIC16/PIC16InstrInfo.cpp b/lib/Target/PIC16/PIC16InstrInfo.cpp index 793dd9f029f9..e784f746f7f9 100644 --- a/lib/Target/PIC16/PIC16InstrInfo.cpp +++ b/lib/Target/PIC16/PIC16InstrInfo.cpp @@ -151,25 +151,20 @@ void PIC16InstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, llvm_unreachable("Can't load this register from stack slot"); } -bool PIC16InstrInfo::copyRegToReg (MachineBasicBlock &MBB, - MachineBasicBlock::iterator I, - unsigned DestReg, unsigned SrcReg, - const TargetRegisterClass *DestRC, - const TargetRegisterClass *SrcRC, - DebugLoc DL) const { - - if (DestRC == PIC16::FSR16RegisterClass) { - BuildMI(MBB, I, DL, get(PIC16::copy_fsr), DestReg).addReg(SrcReg); - return true; - } - - if (DestRC == PIC16::GPRRegisterClass) { - BuildMI(MBB, I, DL, get(PIC16::copy_w), DestReg).addReg(SrcReg); - return true; - } +void PIC16InstrInfo::copyPhysReg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, DebugLoc DL, + unsigned DestReg, unsigned SrcReg, + bool KillSrc) const { + unsigned Opc; + if (PIC16::FSR16RegClass.contains(DestReg, SrcReg)) + Opc = PIC16::copy_fsr; + else if (PIC16::GPRRegClass.contains(DestReg, SrcReg)) + Opc = PIC16::copy_w; + else + llvm_unreachable("Impossible reg-to-reg copy"); - // Not yet supported. - return false; + BuildMI(MBB, I, DL, get(Opc), DestReg) + .addReg(SrcReg, getKillRegState(KillSrc)); } bool PIC16InstrInfo::isMoveInstr(const MachineInstr &MI, @@ -196,15 +191,15 @@ bool PIC16InstrInfo::isMoveInstr(const MachineInstr &MI, unsigned PIC16InstrInfo:: InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, - const SmallVectorImpl<MachineOperand> &Cond) const { + const SmallVectorImpl<MachineOperand> &Cond, + DebugLoc DL) const { // Shouldn't be a fall through. assert(TBB && "InsertBranch must not be told to insert a fallthrough"); if (FBB == 0) { // One way branch. if (Cond.empty()) { // Unconditional branch? - DebugLoc dl; - BuildMI(&MBB, dl, get(PIC16::br_uncond)).addMBB(TBB); + BuildMI(&MBB, DL, get(PIC16::br_uncond)).addMBB(TBB); } return 1; } diff --git a/lib/Target/PIC16/PIC16InstrInfo.h b/lib/Target/PIC16/PIC16InstrInfo.h index 40a4cb4ddb2d..a3a77f11ba16 100644 --- a/lib/Target/PIC16/PIC16InstrInfo.h +++ b/lib/Target/PIC16/PIC16InstrInfo.h @@ -57,12 +57,10 @@ public: unsigned DestReg, int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI) const; - virtual bool copyRegToReg(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MBBI, - unsigned DestReg, unsigned SrcReg, - const TargetRegisterClass *DestRC, - const TargetRegisterClass *SrcRC, - DebugLoc DL) const; + virtual void copyPhysReg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, DebugLoc DL, + unsigned DestReg, unsigned SrcReg, + bool KillSrc) const; virtual bool isMoveInstr(const MachineInstr &MI, unsigned &SrcReg, unsigned &DstReg, unsigned &SrcSubIdx, unsigned &DstSubIdx) const; @@ -70,7 +68,8 @@ public: virtual unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, - const SmallVectorImpl<MachineOperand> &Cond) const; + const SmallVectorImpl<MachineOperand> &Cond, + DebugLoc DL) const; virtual bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl<MachineOperand> &Cond, diff --git a/lib/Target/PIC16/PIC16InstrInfo.td b/lib/Target/PIC16/PIC16InstrInfo.td index 24df251b5088..86d36cb76ee4 100644 --- a/lib/Target/PIC16/PIC16InstrInfo.td +++ b/lib/Target/PIC16/PIC16InstrInfo.td @@ -134,7 +134,7 @@ include "PIC16InstrFormats.td" //===----------------------------------------------------------------------===// // W = W Op F : Load the value from F and do Op to W. -let isTwoAddress = 1, mayLoad = 1 in +let Constraints = "$src = $dst", mayLoad = 1 in class BinOpFW<bits<6> OpCode, string OpcStr, SDNode OpNode>: ByteFormat<OpCode, (outs GPR:$dst), (ins GPR:$src, i8imm:$offset, i8mem:$ptrlo, i8imm:$ptrhi), @@ -146,7 +146,7 @@ class BinOpFW<bits<6> OpCode, string OpcStr, SDNode OpNode>: // F = F Op W : Load the value from F, do op with W and store in F. // This insn class is not marked as TwoAddress because the reg is // being used as a source operand only. (Remember a TwoAddress insn -// needs a copyRegToReg.) +// needs a copy.) let mayStore = 1 in class BinOpWF<bits<6> OpCode, string OpcStr, SDNode OpNode>: ByteFormat<OpCode, (outs), @@ -160,7 +160,7 @@ class BinOpWF<bits<6> OpCode, string OpcStr, SDNode OpNode>: )]>; // W = W Op L : Do Op of L with W and place result in W. -let isTwoAddress = 1 in +let Constraints = "$src = $dst" in class BinOpWL<bits<6> opcode, string OpcStr, SDNode OpNode> : LiteralFormat<opcode, (outs GPR:$dst), (ins GPR:$src, i8imm:$literal), @@ -220,7 +220,7 @@ def set_fsrlo: "movwf ${fsr}L", []>; -let isTwoAddress = 1 in +let Constraints = "$src = $dst" in def set_fsrhi: ByteFormat<0, (outs FSR16:$dst), (ins FSR16:$src, GPR:$val), @@ -234,8 +234,8 @@ def set_pclath: [(set PCLATHR:$dst , (MTPCLATH GPR:$val))]>; //---------------------------- -// copyRegToReg -// copyRegToReg insns. These are dummy. They should always be deleted +// copyPhysReg +// copyPhysReg insns. These are dummy. They should always be deleted // by the optimizer and never be present in the final generated code. // if they are, then we have to write correct macros for these insns. //---------------------------- @@ -362,7 +362,7 @@ def addwfc: BinOpWF<0, "addwfc", adde>; // With Carry. } // W -= [F] ; load from F and sub the value from W. -let isTwoAddress = 1, mayLoad = 1 in +let Constraints = "$src = $dst", mayLoad = 1 in class SUBFW<bits<6> OpCode, string OpcStr, SDNode OpNode>: ByteFormat<OpCode, (outs GPR:$dst), (ins GPR:$src, i8imm:$offset, i8mem:$ptrlo, i8imm:$ptrhi), @@ -418,7 +418,7 @@ def orlw : BinOpWL<0, "iorlw", or>; // sublw // W = C - W ; sub W from literal. (Without borrow). -let isTwoAddress = 1 in +let Constraints = "$src = $dst" in class SUBLW<bits<6> opcode, string OpcStr, SDNode OpNode> : LiteralFormat<opcode, (outs GPR:$dst), (ins GPR:$src, i8imm:$literal), @@ -426,7 +426,7 @@ class SUBLW<bits<6> opcode, string OpcStr, SDNode OpNode> : [(set GPR:$dst, (OpNode (i8 imm:$literal), GPR:$src))]>; // subwl // W = W - C ; sub literal from W (Without borrow). -let isTwoAddress = 1 in +let Constraints = "$src = $dst" in class SUBWL<bits<6> opcode, string OpcStr, SDNode OpNode> : LiteralFormat<opcode, (outs GPR:$dst), (ins GPR:$src, i8imm:$literal), diff --git a/lib/Target/PIC16/PIC16MemSelOpt.cpp b/lib/Target/PIC16/PIC16MemSelOpt.cpp index ab81ed1bca99..241170b11c2a 100644 --- a/lib/Target/PIC16/PIC16MemSelOpt.cpp +++ b/lib/Target/PIC16/PIC16MemSelOpt.cpp @@ -117,7 +117,7 @@ bool MemSelOpt::processBasicBlock(MachineFunction &MF, MachineBasicBlock &BB) { DebugLoc dl = I->getDebugLoc(); BuildMI(*MBB, I, dl, TII->get(PIC16::pagesel)).addExternalSymbol("$"); Changed = true; - PageChanged = 0; + PageChanged = 0; } } } diff --git a/lib/Target/PIC16/PIC16Passes/PIC16Cloner.cpp b/lib/Target/PIC16/PIC16Passes/PIC16Cloner.cpp index c282521be324..27f1cf572ae6 100644 --- a/lib/Target/PIC16/PIC16Passes/PIC16Cloner.cpp +++ b/lib/Target/PIC16/PIC16Passes/PIC16Cloner.cpp @@ -150,8 +150,8 @@ void PIC16Cloner::markCallGraph(CallGraphNode *CGN, string StringMark) { // For PIC16, automatic variables of a function are emitted as globals. -// Clone the auto variables of a function and put them in ValueMap, -// this ValueMap will be used while +// Clone the auto variables of a function and put them in VMap, +// this VMap will be used while // Cloning the code of function itself. // void PIC16Cloner::CloneAutos(Function *F) { @@ -160,11 +160,11 @@ void PIC16Cloner::CloneAutos(Function *F) { Module *M = F->getParent(); Module::GlobalListType &Globals = M->getGlobalList(); - // Clear the leftovers in ValueMap by any previous cloning. - ValueMap.clear(); + // Clear the leftovers in VMap by any previous cloning. + VMap.clear(); // Find the auto globls for this function and clone them, and put them - // in ValueMap. + // in VMap. std::string FnName = F->getName().str(); std::string VarName, ClonedVarName; for (Module::global_iterator I = M->global_begin(), E = M->global_end(); @@ -182,8 +182,8 @@ void PIC16Cloner::CloneAutos(Function *F) { // Add these new globals to module's globals list. Globals.push_back(ClonedGV); - // Update ValueMap. - ValueMap[GV] = ClonedGV; + // Update VMap. + VMap[GV] = ClonedGV; } } } @@ -236,10 +236,10 @@ void PIC16Cloner::cloneSharedFunctions(CallGraphNode *CGN) { } // Clone the given function and return it. -// Note: it uses the ValueMap member of the class, which is already populated +// Note: it uses the VMap member of the class, which is already populated // by cloneAutos by the time we reach here. -// FIXME: Should we just pass ValueMap's ref as a parameter here? rather -// than keeping the ValueMap as a member. +// FIXME: Should we just pass VMap's ref as a parameter here? rather +// than keeping the VMap as a member. Function * PIC16Cloner::cloneFunction(Function *OrgF) { Function *ClonedF; @@ -252,11 +252,11 @@ PIC16Cloner::cloneFunction(Function *OrgF) { } // Clone does not exist. - // First clone the autos, and populate ValueMap. + // First clone the autos, and populate VMap. CloneAutos(OrgF); // Now create the clone. - ClonedF = CloneFunction(OrgF, ValueMap); + ClonedF = CloneFunction(OrgF, VMap); // The new function should be for interrupt line. Therefore should have // the name suffixed with IL and section attribute marked with IL. diff --git a/lib/Target/PIC16/PIC16Passes/PIC16Cloner.h b/lib/Target/PIC16/PIC16Passes/PIC16Cloner.h index 24c11527b03c..e8b5aa45cdca 100644 --- a/lib/Target/PIC16/PIC16Passes/PIC16Cloner.h +++ b/lib/Target/PIC16/PIC16Passes/PIC16Cloner.h @@ -15,7 +15,7 @@ #ifndef PIC16CLONER_H #define PIC16CLONER_H -#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/ValueMap.h" using namespace llvm; using std::vector; @@ -72,7 +72,7 @@ namespace llvm { // the corresponding cloned auto variable of the cloned function. // This value map is passed during the function cloning so that all the // uses of auto variables be updated properly. - DenseMap<const Value*, Value*> ValueMap; + ValueMap<const Value*, Value*> VMap; // Map of a already cloned functions. map<Function *, Function *> ClonedFunctionMap; diff --git a/lib/Target/PIC16/PIC16RegisterInfo.cpp b/lib/Target/PIC16/PIC16RegisterInfo.cpp index 30a1d4a25d3b..dff98d12c2ae 100644 --- a/lib/Target/PIC16/PIC16RegisterInfo.cpp +++ b/lib/Target/PIC16/PIC16RegisterInfo.cpp @@ -35,13 +35,6 @@ getCalleeSavedRegs(const MachineFunction *MF) const { return CalleeSavedRegs; } -// PIC16 Callee Saved Reg Classes -const TargetRegisterClass* const* -PIC16RegisterInfo::getCalleeSavedRegClasses(const MachineFunction *MF) const { - static const TargetRegisterClass * const CalleeSavedRegClasses[] = { 0 }; - return CalleeSavedRegClasses; -} - BitVector PIC16RegisterInfo::getReservedRegs(const MachineFunction &MF) const { BitVector Reserved(getNumRegs()); return Reserved; diff --git a/lib/Target/PIC16/PIC16RegisterInfo.h b/lib/Target/PIC16/PIC16RegisterInfo.h index 6a9a038feda9..5536a617d2be 100644 --- a/lib/Target/PIC16/PIC16RegisterInfo.h +++ b/lib/Target/PIC16/PIC16RegisterInfo.h @@ -41,10 +41,6 @@ class PIC16RegisterInfo : public PIC16GenRegisterInfo { virtual const unsigned* getCalleeSavedRegs(const MachineFunction *MF = 0) const; - // PIC16 callee saved register classes - virtual const TargetRegisterClass* const * - getCalleeSavedRegClasses(const MachineFunction *MF) const; - virtual BitVector getReservedRegs(const MachineFunction &MF) const; virtual bool hasFP(const MachineFunction &MF) const; diff --git a/lib/Target/PowerPC/PPCHazardRecognizers.cpp b/lib/Target/PowerPC/PPCHazardRecognizers.cpp index 66dfd4b73794..db11fdeb7c1e 100644 --- a/lib/Target/PowerPC/PPCHazardRecognizers.cpp +++ b/lib/Target/PowerPC/PPCHazardRecognizers.cpp @@ -78,7 +78,7 @@ PPCHazardRecognizer970::GetInstrType(unsigned Opcode, isLoad = TID.mayLoad(); isStore = TID.mayStore(); - unsigned TSFlags = TID.TSFlags; + uint64_t TSFlags = TID.TSFlags; isFirst = TSFlags & PPCII::PPC970_First; isSingle = TSFlags & PPCII::PPC970_Single; diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp index 10b516aa133a..d47d989b34c0 100644 --- a/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/lib/Target/PowerPC/PPCISelLowering.cpp @@ -1203,11 +1203,11 @@ SDValue PPCTargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const { EVT PtrVT = Op.getValueType(); GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op); - const GlobalValue *GV = GSDN->getGlobal(); - SDValue GA = DAG.getTargetGlobalAddress(GV, PtrVT, GSDN->getOffset()); - SDValue Zero = DAG.getConstant(0, PtrVT); // FIXME there isn't really any debug info here DebugLoc dl = GSDN->getDebugLoc(); + const GlobalValue *GV = GSDN->getGlobal(); + SDValue GA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, GSDN->getOffset()); + SDValue Zero = DAG.getConstant(0, PtrVT); const TargetMachine &TM = DAG.getTarget(); @@ -1631,7 +1631,7 @@ PPCTargetLowering::LowerFormalArguments_SVR4( unsigned ArgSize = VA.getLocVT().getSizeInBits() / 8; int FI = MFI->CreateFixedObject(ArgSize, VA.getLocMemOffset(), - isImmutable, false); + isImmutable); // Create load nodes to retrieve arguments from the stack. SDValue FIN = DAG.getFrameIndex(FI, PtrVT); @@ -1700,8 +1700,7 @@ PPCTargetLowering::LowerFormalArguments_SVR4( FuncInfo->setVarArgsStackOffset( MFI->CreateFixedObject(PtrVT.getSizeInBits()/8, - CCInfo.getNextStackOffset(), - true, false)); + CCInfo.getNextStackOffset(), true)); FuncInfo->setVarArgsFrameIndex(MFI->CreateStackObject(Depth, 8, false)); SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT); @@ -1911,7 +1910,7 @@ PPCTargetLowering::LowerFormalArguments_Darwin( CurArgOffset = CurArgOffset + (4 - ObjSize); } // The value of the object is its address. - int FI = MFI->CreateFixedObject(ObjSize, CurArgOffset, true, false); + int FI = MFI->CreateFixedObject(ObjSize, CurArgOffset, true); SDValue FIN = DAG.getFrameIndex(FI, PtrVT); InVals.push_back(FIN); if (ObjSize==1 || ObjSize==2) { @@ -1936,7 +1935,7 @@ PPCTargetLowering::LowerFormalArguments_Darwin( // the object. if (GPR_idx != Num_GPR_Regs) { unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass); - int FI = MFI->CreateFixedObject(PtrByteSize, ArgOffset, true, false); + int FI = MFI->CreateFixedObject(PtrByteSize, ArgOffset, true); SDValue FIN = DAG.getFrameIndex(FI, PtrVT); SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT); SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, NULL, 0, @@ -2062,7 +2061,7 @@ PPCTargetLowering::LowerFormalArguments_Darwin( if (needsLoad) { int FI = MFI->CreateFixedObject(ObjSize, CurArgOffset + (ArgSize - ObjSize), - isImmutable, false); + isImmutable); SDValue FIN = DAG.getFrameIndex(FI, PtrVT); ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, NULL, 0, false, false, 0); @@ -2097,7 +2096,7 @@ PPCTargetLowering::LowerFormalArguments_Darwin( FuncInfo->setVarArgsFrameIndex( MFI->CreateFixedObject(PtrVT.getSizeInBits()/8, - Depth, true, false)); + Depth, true)); SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT); // If this function is vararg, store any remaining integer argument regs @@ -2137,6 +2136,7 @@ CalculateParameterAndLinkageAreaSize(SelectionDAG &DAG, unsigned CC, const SmallVectorImpl<ISD::OutputArg> &Outs, + const SmallVectorImpl<SDValue> &OutVals, unsigned &nAltivecParamsAtEnd) { // Count how many bytes are to be pushed on the stack, including the linkage // area, and parameter passing area. We start with 24/48 bytes, which is @@ -2153,9 +2153,9 @@ CalculateParameterAndLinkageAreaSize(SelectionDAG &DAG, // 16-byte aligned. nAltivecParamsAtEnd = 0; for (unsigned i = 0; i != NumOps; ++i) { - SDValue Arg = Outs[i].Val; + SDValue Arg = OutVals[i]; ISD::ArgFlagsTy Flags = Outs[i].Flags; - EVT ArgVT = Arg.getValueType(); + EVT ArgVT = Outs[i].VT; // Varargs Altivec parameters are padded to a 16 byte boundary. if (ArgVT==MVT::v4f32 || ArgVT==MVT::v4i32 || ArgVT==MVT::v8i16 || ArgVT==MVT::v16i8) { @@ -2314,8 +2314,7 @@ static SDValue EmitTailCallStoreFPAndRetAddr(SelectionDAG &DAG, int NewRetAddrLoc = SPDiff + PPCFrameInfo::getReturnSaveOffset(isPPC64, isDarwinABI); int NewRetAddr = MF.getFrameInfo()->CreateFixedObject(SlotSize, - NewRetAddrLoc, - true, false); + NewRetAddrLoc, true); EVT VT = isPPC64 ? MVT::i64 : MVT::i32; SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewRetAddr, VT); Chain = DAG.getStore(Chain, dl, OldRetAddr, NewRetAddrFrIdx, @@ -2328,7 +2327,7 @@ static SDValue EmitTailCallStoreFPAndRetAddr(SelectionDAG &DAG, int NewFPLoc = SPDiff + PPCFrameInfo::getFramePointerSaveOffset(isPPC64, isDarwinABI); int NewFPIdx = MF.getFrameInfo()->CreateFixedObject(SlotSize, NewFPLoc, - true, false); + true); SDValue NewFramePtrIdx = DAG.getFrameIndex(NewFPIdx, VT); Chain = DAG.getStore(Chain, dl, OldFP, NewFramePtrIdx, PseudoSourceValue::getFixedStack(NewFPIdx), 0, @@ -2346,7 +2345,7 @@ CalculateTailCallArgDest(SelectionDAG &DAG, MachineFunction &MF, bool isPPC64, SmallVector<TailCallArgumentInfo, 8>& TailCallArguments) { int Offset = ArgOffset + SPDiff; uint32_t OpSize = (Arg.getValueType().getSizeInBits()+7)/8; - int FI = MF.getFrameInfo()->CreateFixedObject(OpSize, Offset, true,false); + int FI = MF.getFrameInfo()->CreateFixedObject(OpSize, Offset, true); EVT VT = isPPC64 ? MVT::i64 : MVT::i32; SDValue FIN = DAG.getFrameIndex(FI, VT); TailCallArgumentInfo Info; @@ -2472,7 +2471,8 @@ unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag, // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol // node so that legalize doesn't hack it. if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) - Callee = DAG.getTargetGlobalAddress(G->getGlobal(), Callee.getValueType()); + Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl, + Callee.getValueType()); else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) Callee = DAG.getTargetExternalSymbol(S->getSymbol(), Callee.getValueType()); else if (SDNode *Dest = isBLACompatibleAddress(Callee, DAG)) @@ -2705,6 +2705,7 @@ PPCTargetLowering::LowerCall(SDValue Chain, SDValue Callee, CallingConv::ID CallConv, bool isVarArg, bool &isTailCall, const SmallVectorImpl<ISD::OutputArg> &Outs, + const SmallVectorImpl<SDValue> &OutVals, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const { @@ -2714,11 +2715,11 @@ PPCTargetLowering::LowerCall(SDValue Chain, SDValue Callee, if (PPCSubTarget.isSVR4ABI() && !PPCSubTarget.isPPC64()) { return LowerCall_SVR4(Chain, Callee, CallConv, isVarArg, - isTailCall, Outs, Ins, + isTailCall, Outs, OutVals, Ins, dl, DAG, InVals); } else { return LowerCall_Darwin(Chain, Callee, CallConv, isVarArg, - isTailCall, Outs, Ins, + isTailCall, Outs, OutVals, Ins, dl, DAG, InVals); } } @@ -2728,6 +2729,7 @@ PPCTargetLowering::LowerCall_SVR4(SDValue Chain, SDValue Callee, CallingConv::ID CallConv, bool isVarArg, bool isTailCall, const SmallVectorImpl<ISD::OutputArg> &Outs, + const SmallVectorImpl<SDValue> &OutVals, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const { @@ -2737,7 +2739,6 @@ PPCTargetLowering::LowerCall_SVR4(SDValue Chain, SDValue Callee, assert((CallConv == CallingConv::C || CallConv == CallingConv::Fast) && "Unknown calling convention!"); - EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); unsigned PtrByteSize = 4; MachineFunction &MF = DAG.getMachineFunction(); @@ -2769,7 +2770,7 @@ PPCTargetLowering::LowerCall_SVR4(SDValue Chain, SDValue Callee, unsigned NumArgs = Outs.size(); for (unsigned i = 0; i != NumArgs; ++i) { - EVT ArgVT = Outs[i].Val.getValueType(); + EVT ArgVT = Outs[i].VT; ISD::ArgFlagsTy ArgFlags = Outs[i].Flags; bool Result; @@ -2838,7 +2839,7 @@ PPCTargetLowering::LowerCall_SVR4(SDValue Chain, SDValue Callee, i != e; ++i) { CCValAssign &VA = ArgLocs[i]; - SDValue Arg = Outs[i].Val; + SDValue Arg = OutVals[i]; ISD::ArgFlagsTy Flags = Outs[i].Flags; if (Flags.isByVal()) { @@ -2934,6 +2935,7 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee, CallingConv::ID CallConv, bool isVarArg, bool isTailCall, const SmallVectorImpl<ISD::OutputArg> &Outs, + const SmallVectorImpl<SDValue> &OutVals, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const { @@ -2961,7 +2963,7 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee, // prereserved space for [SP][CR][LR][3 x unused]. unsigned NumBytes = CalculateParameterAndLinkageAreaSize(DAG, isPPC64, isVarArg, CallConv, - Outs, + Outs, OutVals, nAltivecParamsAtEnd); // Calculate by how many bytes the stack has to be adjusted in case of tail @@ -3025,7 +3027,7 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee, SmallVector<SDValue, 8> MemOpChains; for (unsigned i = 0; i != NumOps; ++i) { - SDValue Arg = Outs[i].Val; + SDValue Arg = OutVals[i]; ISD::ArgFlagsTy Flags = Outs[i].Flags; // PtrOff will be used to store the current argument to the stack if a @@ -3051,7 +3053,7 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee, // Everything else is passed left-justified. EVT VT = (Size==1) ? MVT::i8 : MVT::i16; if (GPR_idx != NumGPRs) { - SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, PtrVT, Chain, Arg, + SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, PtrVT, dl, Chain, Arg, NULL, 0, VT, false, false, 0); MemOpChains.push_back(Load.getValue(1)); RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load)); @@ -3228,8 +3230,8 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee, ArgOffset = ((ArgOffset+15)/16)*16; ArgOffset += 12*16; for (unsigned i = 0; i != NumOps; ++i) { - SDValue Arg = Outs[i].Val; - EVT ArgType = Arg.getValueType(); + SDValue Arg = OutVals[i]; + EVT ArgType = Outs[i].VT; if (ArgType==MVT::v4f32 || ArgType==MVT::v4i32 || ArgType==MVT::v8i16 || ArgType==MVT::v16i8) { if (++j > NumVRs) { @@ -3297,6 +3299,7 @@ SDValue PPCTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl<ISD::OutputArg> &Outs, + const SmallVectorImpl<SDValue> &OutVals, DebugLoc dl, SelectionDAG &DAG) const { SmallVector<CCValAssign, 16> RVLocs; @@ -3318,7 +3321,7 @@ PPCTargetLowering::LowerReturn(SDValue Chain, CCValAssign &VA = RVLocs[i]; assert(VA.isRegLoc() && "Can only return in registers!"); Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), - Outs[i].Val, Flag); + OutVals[i], Flag); Flag = Chain.getValue(1); } @@ -3376,8 +3379,7 @@ PPCTargetLowering::getReturnAddrFrameIndex(SelectionDAG & DAG) const { // Find out what the fix offset of the frame pointer save area. int LROffset = PPCFrameInfo::getReturnSaveOffset(isPPC64, isDarwinABI); // Allocate the frame index for frame pointer save area. - RASI = MF.getFrameInfo()->CreateFixedObject(isPPC64? 8 : 4, LROffset, - true, false); + RASI = MF.getFrameInfo()->CreateFixedObject(isPPC64? 8 : 4, LROffset, true); // Save the result. FI->setReturnAddrSaveIndex(RASI); } @@ -3403,8 +3405,7 @@ PPCTargetLowering::getFramePointerFrameIndex(SelectionDAG & DAG) const { isDarwinABI); // Allocate the frame index for frame pointer save area. - FPSI = MF.getFrameInfo()->CreateFixedObject(isPPC64? 8 : 4, FPOffset, - true, false); + FPSI = MF.getFrameInfo()->CreateFixedObject(isPPC64? 8 : 4, FPOffset, true); // Save the result. FI->setFramePointerSaveIndex(FPSI); } @@ -4518,7 +4519,10 @@ PPCTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB, MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB); F->insert(It, loopMBB); F->insert(It, exitMBB); - exitMBB->transferSuccessors(BB); + exitMBB->splice(exitMBB->begin(), BB, + llvm::next(MachineBasicBlock::iterator(MI)), + BB->end()); + exitMBB->transferSuccessorsAndUpdatePHIs(BB); MachineRegisterInfo &RegInfo = F->getRegInfo(); unsigned TmpReg = (!BinOpcode) ? incr : @@ -4583,7 +4587,10 @@ PPCTargetLowering::EmitPartwordAtomicBinary(MachineInstr *MI, MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB); F->insert(It, loopMBB); F->insert(It, exitMBB); - exitMBB->transferSuccessors(BB); + exitMBB->splice(exitMBB->begin(), BB, + llvm::next(MachineBasicBlock::iterator(MI)), + BB->end()); + exitMBB->transferSuccessorsAndUpdatePHIs(BB); MachineRegisterInfo &RegInfo = F->getRegInfo(); const TargetRegisterClass *RC = @@ -4716,23 +4723,22 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB); unsigned SelectPred = MI->getOperand(4).getImm(); DebugLoc dl = MI->getDebugLoc(); - BuildMI(BB, dl, TII->get(PPC::BCC)) - .addImm(SelectPred).addReg(MI->getOperand(1).getReg()).addMBB(sinkMBB); F->insert(It, copy0MBB); F->insert(It, sinkMBB); - // Update machine-CFG edges by first adding all successors of the current - // block to the new block which will contain the Phi node for the select. - for (MachineBasicBlock::succ_iterator I = BB->succ_begin(), - E = BB->succ_end(); I != E; ++I) - sinkMBB->addSuccessor(*I); - // Next, remove all successors of the current block, and add the true - // and fallthrough blocks as its successors. - while (!BB->succ_empty()) - BB->removeSuccessor(BB->succ_begin()); + + // Transfer the remainder of BB and its successor edges to sinkMBB. + sinkMBB->splice(sinkMBB->begin(), BB, + llvm::next(MachineBasicBlock::iterator(MI)), + BB->end()); + sinkMBB->transferSuccessorsAndUpdatePHIs(BB); + // Next, add the true and fallthrough blocks as its successors. BB->addSuccessor(copy0MBB); BB->addSuccessor(sinkMBB); + BuildMI(BB, dl, TII->get(PPC::BCC)) + .addImm(SelectPred).addReg(MI->getOperand(1).getReg()).addMBB(sinkMBB); + // copy0MBB: // %FalseValue = ... // # fallthrough to sinkMBB @@ -4745,7 +4751,8 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ] // ... BB = sinkMBB; - BuildMI(BB, dl, TII->get(PPC::PHI), MI->getOperand(0).getReg()) + BuildMI(*BB, BB->begin(), dl, + TII->get(PPC::PHI), MI->getOperand(0).getReg()) .addReg(MI->getOperand(3).getReg()).addMBB(copy0MBB) .addReg(MI->getOperand(2).getReg()).addMBB(thisMBB); } @@ -4831,7 +4838,10 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, F->insert(It, loop2MBB); F->insert(It, midMBB); F->insert(It, exitMBB); - exitMBB->transferSuccessors(BB); + exitMBB->splice(exitMBB->begin(), BB, + llvm::next(MachineBasicBlock::iterator(MI)), + BB->end()); + exitMBB->transferSuccessorsAndUpdatePHIs(BB); // thisMBB: // ... @@ -4899,7 +4909,10 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, F->insert(It, loop2MBB); F->insert(It, midMBB); F->insert(It, exitMBB); - exitMBB->transferSuccessors(BB); + exitMBB->splice(exitMBB->begin(), BB, + llvm::next(MachineBasicBlock::iterator(MI)), + BB->end()); + exitMBB->transferSuccessorsAndUpdatePHIs(BB); MachineRegisterInfo &RegInfo = F->getRegInfo(); const TargetRegisterClass *RC = @@ -5025,7 +5038,7 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, llvm_unreachable("Unexpected instr type to insert"); } - F->DeleteMachineInstr(MI); // The pseudo instruction is gone now. + MI->eraseFromParent(); // The pseudo instruction is gone now. return BB; } @@ -5042,19 +5055,19 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, default: break; case PPCISD::SHL: if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(0))) { - if (C->getZExtValue() == 0) // 0 << V -> 0. + if (C->isNullValue()) // 0 << V -> 0. return N->getOperand(0); } break; case PPCISD::SRL: if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(0))) { - if (C->getZExtValue() == 0) // 0 >>u V -> 0. + if (C->isNullValue()) // 0 >>u V -> 0. return N->getOperand(0); } break; case PPCISD::SRA: if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(0))) { - if (C->getZExtValue() == 0 || // 0 >>s V -> 0. + if (C->isNullValue() || // 0 >>s V -> 0. C->isAllOnesValue()) // -1 >>s V -> -1. return N->getOperand(0); } @@ -5380,11 +5393,8 @@ PPCTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint, /// LowerAsmOperandForConstraint - Lower the specified operand into the Ops -/// vector. If it is invalid, don't add anything to Ops. If hasMemory is true -/// it means one of the asm constraint of the inline asm instruction being -/// processed is 'm'. +/// vector. If it is invalid, don't add anything to Ops. void PPCTargetLowering::LowerAsmOperandForConstraint(SDValue Op, char Letter, - bool hasMemory, std::vector<SDValue>&Ops, SelectionDAG &DAG) const { SDValue Result(0,0); @@ -5443,7 +5453,7 @@ void PPCTargetLowering::LowerAsmOperandForConstraint(SDValue Op, char Letter, } // Handle standard constraint letters. - TargetLowering::LowerAsmOperandForConstraint(Op, Letter, hasMemory, Ops, DAG); + TargetLowering::LowerAsmOperandForConstraint(Op, Letter, Ops, DAG); } // isLegalAddressingMode - Return true if the addressing mode represented diff --git a/lib/Target/PowerPC/PPCISelLowering.h b/lib/Target/PowerPC/PPCISelLowering.h index 6dcaf1e1a2c7..700816f5a129 100644 --- a/lib/Target/PowerPC/PPCISelLowering.h +++ b/lib/Target/PowerPC/PPCISelLowering.h @@ -318,12 +318,9 @@ namespace llvm { unsigned getByValTypeAlignment(const Type *Ty) const; /// LowerAsmOperandForConstraint - Lower the specified operand into the Ops - /// vector. If it is invalid, don't add anything to Ops. If hasMemory is - /// true it means one of the asm constraint of the inline asm instruction - /// being processed is 'm'. + /// vector. If it is invalid, don't add anything to Ops. virtual void LowerAsmOperandForConstraint(SDValue Op, char ConstraintLetter, - bool hasMemory, std::vector<SDValue> &Ops, SelectionDAG &DAG) const; @@ -438,6 +435,7 @@ namespace llvm { LowerCall(SDValue Chain, SDValue Callee, CallingConv::ID CallConv, bool isVarArg, bool &isTailCall, const SmallVectorImpl<ISD::OutputArg> &Outs, + const SmallVectorImpl<SDValue> &OutVals, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const; @@ -446,6 +444,7 @@ namespace llvm { LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl<ISD::OutputArg> &Outs, + const SmallVectorImpl<SDValue> &OutVals, DebugLoc dl, SelectionDAG &DAG) const; SDValue @@ -465,6 +464,7 @@ namespace llvm { LowerCall_Darwin(SDValue Chain, SDValue Callee, CallingConv::ID CallConv, bool isVarArg, bool isTailCall, const SmallVectorImpl<ISD::OutputArg> &Outs, + const SmallVectorImpl<SDValue> &OutVals, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const; @@ -472,6 +472,7 @@ namespace llvm { LowerCall_SVR4(SDValue Chain, SDValue Callee, CallingConv::ID CallConv, bool isVarArg, bool isTailCall, const SmallVectorImpl<ISD::OutputArg> &Outs, + const SmallVectorImpl<SDValue> &OutVals, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const; diff --git a/lib/Target/PowerPC/PPCInstrInfo.cpp b/lib/Target/PowerPC/PPCInstrInfo.cpp index 1b7a7783b23c..1574aa3fb23a 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.cpp +++ b/lib/Target/PowerPC/PPCInstrInfo.cpp @@ -316,9 +316,8 @@ unsigned PPCInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const { unsigned PPCInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, - const SmallVectorImpl<MachineOperand> &Cond) const { - // FIXME this should probably have a DebugLoc argument - DebugLoc dl; + const SmallVectorImpl<MachineOperand> &Cond, + DebugLoc DL) const { // Shouldn't be a fall through. assert(TBB && "InsertBranch must not be told to insert a fallthrough"); assert((Cond.size() == 2 || Cond.size() == 0) && @@ -327,50 +326,46 @@ PPCInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, // One-way branch. if (FBB == 0) { if (Cond.empty()) // Unconditional branch - BuildMI(&MBB, dl, get(PPC::B)).addMBB(TBB); + BuildMI(&MBB, DL, get(PPC::B)).addMBB(TBB); else // Conditional branch - BuildMI(&MBB, dl, get(PPC::BCC)) + BuildMI(&MBB, DL, get(PPC::BCC)) .addImm(Cond[0].getImm()).addReg(Cond[1].getReg()).addMBB(TBB); return 1; } // Two-way Conditional Branch. - BuildMI(&MBB, dl, get(PPC::BCC)) + BuildMI(&MBB, DL, get(PPC::BCC)) .addImm(Cond[0].getImm()).addReg(Cond[1].getReg()).addMBB(TBB); - BuildMI(&MBB, dl, get(PPC::B)).addMBB(FBB); + BuildMI(&MBB, DL, get(PPC::B)).addMBB(FBB); return 2; } -bool PPCInstrInfo::copyRegToReg(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MI, - unsigned DestReg, unsigned SrcReg, - const TargetRegisterClass *DestRC, - const TargetRegisterClass *SrcRC, - DebugLoc DL) const { - if (DestRC != SrcRC) { - // Not yet supported! - return false; - } - - if (DestRC == PPC::GPRCRegisterClass) { - BuildMI(MBB, MI, DL, get(PPC::OR), DestReg).addReg(SrcReg).addReg(SrcReg); - } else if (DestRC == PPC::G8RCRegisterClass) { - BuildMI(MBB, MI, DL, get(PPC::OR8), DestReg).addReg(SrcReg).addReg(SrcReg); - } else if (DestRC == PPC::F4RCRegisterClass || - DestRC == PPC::F8RCRegisterClass) { - BuildMI(MBB, MI, DL, get(PPC::FMR), DestReg).addReg(SrcReg); - } else if (DestRC == PPC::CRRCRegisterClass) { - BuildMI(MBB, MI, DL, get(PPC::MCRF), DestReg).addReg(SrcReg); - } else if (DestRC == PPC::VRRCRegisterClass) { - BuildMI(MBB, MI, DL, get(PPC::VOR), DestReg).addReg(SrcReg).addReg(SrcReg); - } else if (DestRC == PPC::CRBITRCRegisterClass) { - BuildMI(MBB, MI, DL, get(PPC::CROR), DestReg).addReg(SrcReg).addReg(SrcReg); - } else { - // Attempt to copy register that is not GPR or FPR - return false; - } - - return true; +void PPCInstrInfo::copyPhysReg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, DebugLoc DL, + unsigned DestReg, unsigned SrcReg, + bool KillSrc) const { + unsigned Opc; + if (PPC::GPRCRegClass.contains(DestReg, SrcReg)) + Opc = PPC::OR; + else if (PPC::G8RCRegClass.contains(DestReg, SrcReg)) + Opc = PPC::OR8; + else if (PPC::F4RCRegClass.contains(DestReg, SrcReg)) + Opc = PPC::FMR; + else if (PPC::CRRCRegClass.contains(DestReg, SrcReg)) + Opc = PPC::MCRF; + else if (PPC::VRRCRegClass.contains(DestReg, SrcReg)) + Opc = PPC::VOR; + else if (PPC::CRBITRCRegClass.contains(DestReg, SrcReg)) + Opc = PPC::CROR; + else + llvm_unreachable("Impossible reg-to-reg copy"); + + const TargetInstrDesc &TID = get(Opc); + if (TID.getNumOperands() == 3) + BuildMI(MBB, I, DL, TID, DestReg) + .addReg(SrcReg).addReg(SrcReg, getKillRegState(KillSrc)); + else + BuildMI(MBB, I, DL, TID, DestReg).addReg(SrcReg, getKillRegState(KillSrc)); } bool @@ -654,121 +649,6 @@ PPCInstrInfo::emitFrameIndexDebugValue(MachineFunction &MF, return &*MIB; } -/// foldMemoryOperand - PowerPC (like most RISC's) can only fold spills into -/// copy instructions, turning them into load/store instructions. -MachineInstr *PPCInstrInfo::foldMemoryOperandImpl(MachineFunction &MF, - MachineInstr *MI, - const SmallVectorImpl<unsigned> &Ops, - int FrameIndex) const { - if (Ops.size() != 1) return NULL; - - // Make sure this is a reg-reg copy. Note that we can't handle MCRF, because - // it takes more than one instruction to store it. - unsigned Opc = MI->getOpcode(); - unsigned OpNum = Ops[0]; - - MachineInstr *NewMI = NULL; - if ((Opc == PPC::OR && - MI->getOperand(1).getReg() == MI->getOperand(2).getReg())) { - if (OpNum == 0) { // move -> store - unsigned InReg = MI->getOperand(1).getReg(); - bool isKill = MI->getOperand(1).isKill(); - bool isUndef = MI->getOperand(1).isUndef(); - NewMI = addFrameReference(BuildMI(MF, MI->getDebugLoc(), get(PPC::STW)) - .addReg(InReg, - getKillRegState(isKill) | - getUndefRegState(isUndef)), - FrameIndex); - } else { // move -> load - unsigned OutReg = MI->getOperand(0).getReg(); - bool isDead = MI->getOperand(0).isDead(); - bool isUndef = MI->getOperand(0).isUndef(); - NewMI = addFrameReference(BuildMI(MF, MI->getDebugLoc(), get(PPC::LWZ)) - .addReg(OutReg, - RegState::Define | - getDeadRegState(isDead) | - getUndefRegState(isUndef)), - FrameIndex); - } - } else if ((Opc == PPC::OR8 && - MI->getOperand(1).getReg() == MI->getOperand(2).getReg())) { - if (OpNum == 0) { // move -> store - unsigned InReg = MI->getOperand(1).getReg(); - bool isKill = MI->getOperand(1).isKill(); - bool isUndef = MI->getOperand(1).isUndef(); - NewMI = addFrameReference(BuildMI(MF, MI->getDebugLoc(), get(PPC::STD)) - .addReg(InReg, - getKillRegState(isKill) | - getUndefRegState(isUndef)), - FrameIndex); - } else { // move -> load - unsigned OutReg = MI->getOperand(0).getReg(); - bool isDead = MI->getOperand(0).isDead(); - bool isUndef = MI->getOperand(0).isUndef(); - NewMI = addFrameReference(BuildMI(MF, MI->getDebugLoc(), get(PPC::LD)) - .addReg(OutReg, - RegState::Define | - getDeadRegState(isDead) | - getUndefRegState(isUndef)), - FrameIndex); - } - } else if (Opc == PPC::FMR || Opc == PPC::FMRSD) { - // The register may be F4RC or F8RC, and that determines the memory op. - unsigned OrigReg = MI->getOperand(OpNum).getReg(); - // We cannot tell the register class from a physreg alone. - if (TargetRegisterInfo::isPhysicalRegister(OrigReg)) - return NULL; - const TargetRegisterClass *RC = MF.getRegInfo().getRegClass(OrigReg); - const bool is64 = RC == PPC::F8RCRegisterClass; - - if (OpNum == 0) { // move -> store - unsigned InReg = MI->getOperand(1).getReg(); - bool isKill = MI->getOperand(1).isKill(); - bool isUndef = MI->getOperand(1).isUndef(); - NewMI = addFrameReference(BuildMI(MF, MI->getDebugLoc(), - get(is64 ? PPC::STFD : PPC::STFS)) - .addReg(InReg, - getKillRegState(isKill) | - getUndefRegState(isUndef)), - FrameIndex); - } else { // move -> load - unsigned OutReg = MI->getOperand(0).getReg(); - bool isDead = MI->getOperand(0).isDead(); - bool isUndef = MI->getOperand(0).isUndef(); - NewMI = addFrameReference(BuildMI(MF, MI->getDebugLoc(), - get(is64 ? PPC::LFD : PPC::LFS)) - .addReg(OutReg, - RegState::Define | - getDeadRegState(isDead) | - getUndefRegState(isUndef)), - FrameIndex); - } - } - - return NewMI; -} - -bool PPCInstrInfo::canFoldMemoryOperand(const MachineInstr *MI, - const SmallVectorImpl<unsigned> &Ops) const { - if (Ops.size() != 1) return false; - - // Make sure this is a reg-reg copy. Note that we can't handle MCRF, because - // it takes more than one instruction to store it. - unsigned Opc = MI->getOpcode(); - - if ((Opc == PPC::OR && - MI->getOperand(1).getReg() == MI->getOperand(2).getReg())) - return true; - else if ((Opc == PPC::OR8 && - MI->getOperand(1).getReg() == MI->getOperand(2).getReg())) - return true; - else if (Opc == PPC::FMR || Opc == PPC::FMRSD) - return true; - - return false; -} - - bool PPCInstrInfo:: ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const { assert(Cond.size() == 2 && "Invalid PPC branch opcode!"); diff --git a/lib/Target/PowerPC/PPCInstrInfo.h b/lib/Target/PowerPC/PPCInstrInfo.h index 7a9e11bd6676..eadb21e21702 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.h +++ b/lib/Target/PowerPC/PPCInstrInfo.h @@ -109,13 +109,12 @@ public: virtual unsigned RemoveBranch(MachineBasicBlock &MBB) const; virtual unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, - const SmallVectorImpl<MachineOperand> &Cond) const; - virtual bool copyRegToReg(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MI, - unsigned DestReg, unsigned SrcReg, - const TargetRegisterClass *DestRC, - const TargetRegisterClass *SrcRC, - DebugLoc DL) const; + const SmallVectorImpl<MachineOperand> &Cond, + DebugLoc DL) const; + virtual void copyPhysReg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, DebugLoc DL, + unsigned DestReg, unsigned SrcReg, + bool KillSrc) const; virtual void storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, @@ -135,23 +134,6 @@ public: const MDNode *MDPtr, DebugLoc DL) const; - /// foldMemoryOperand - PowerPC (like most RISC's) can only fold spills into - /// copy instructions, turning them into load/store instructions. - virtual MachineInstr* foldMemoryOperandImpl(MachineFunction &MF, - MachineInstr* MI, - const SmallVectorImpl<unsigned> &Ops, - int FrameIndex) const; - - virtual MachineInstr* foldMemoryOperandImpl(MachineFunction &MF, - MachineInstr* MI, - const SmallVectorImpl<unsigned> &Ops, - MachineInstr* LoadMI) const { - return 0; - } - - virtual bool canFoldMemoryOperand(const MachineInstr *MI, - const SmallVectorImpl<unsigned> &Ops) const; - virtual bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const; diff --git a/lib/Target/PowerPC/PPCRegisterInfo.cpp b/lib/Target/PowerPC/PPCRegisterInfo.cpp index 0ff852cf15a1..4d6132a9ec50 100644 --- a/lib/Target/PowerPC/PPCRegisterInfo.cpp +++ b/lib/Target/PowerPC/PPCRegisterInfo.cpp @@ -269,140 +269,6 @@ PPCRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { return Subtarget.isPPC64() ? SVR4_64_CalleeSavedRegs : SVR4_CalleeSavedRegs; } -const TargetRegisterClass* const* -PPCRegisterInfo::getCalleeSavedRegClasses(const MachineFunction *MF) const { - // 32-bit Darwin calling convention. - static const TargetRegisterClass * const Darwin32_CalleeSavedRegClasses[] = { - &PPC::GPRCRegClass,&PPC::GPRCRegClass,&PPC::GPRCRegClass, - &PPC::GPRCRegClass,&PPC::GPRCRegClass,&PPC::GPRCRegClass,&PPC::GPRCRegClass, - &PPC::GPRCRegClass,&PPC::GPRCRegClass,&PPC::GPRCRegClass,&PPC::GPRCRegClass, - &PPC::GPRCRegClass,&PPC::GPRCRegClass,&PPC::GPRCRegClass,&PPC::GPRCRegClass, - &PPC::GPRCRegClass,&PPC::GPRCRegClass,&PPC::GPRCRegClass,&PPC::GPRCRegClass, - - &PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass, - &PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass, - &PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass, - &PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass, - &PPC::F8RCRegClass,&PPC::F8RCRegClass, - - &PPC::CRRCRegClass,&PPC::CRRCRegClass,&PPC::CRRCRegClass, - - &PPC::VRRCRegClass,&PPC::VRRCRegClass,&PPC::VRRCRegClass,&PPC::VRRCRegClass, - &PPC::VRRCRegClass,&PPC::VRRCRegClass,&PPC::VRRCRegClass,&PPC::VRRCRegClass, - &PPC::VRRCRegClass,&PPC::VRRCRegClass,&PPC::VRRCRegClass,&PPC::VRRCRegClass, - - &PPC::CRBITRCRegClass,&PPC::CRBITRCRegClass,&PPC::CRBITRCRegClass, - &PPC::CRBITRCRegClass, - &PPC::CRBITRCRegClass,&PPC::CRBITRCRegClass,&PPC::CRBITRCRegClass, - &PPC::CRBITRCRegClass, - &PPC::CRBITRCRegClass,&PPC::CRBITRCRegClass,&PPC::CRBITRCRegClass, - &PPC::CRBITRCRegClass, - - &PPC::GPRCRegClass, 0 - }; - - // 32-bit SVR4 calling convention. - static const TargetRegisterClass * const SVR4_CalleeSavedRegClasses[] = { - &PPC::GPRCRegClass,&PPC::GPRCRegClass, - &PPC::GPRCRegClass,&PPC::GPRCRegClass,&PPC::GPRCRegClass,&PPC::GPRCRegClass, - &PPC::GPRCRegClass,&PPC::GPRCRegClass,&PPC::GPRCRegClass,&PPC::GPRCRegClass, - &PPC::GPRCRegClass,&PPC::GPRCRegClass,&PPC::GPRCRegClass,&PPC::GPRCRegClass, - &PPC::GPRCRegClass,&PPC::GPRCRegClass,&PPC::GPRCRegClass,&PPC::GPRCRegClass, - - &PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass, - &PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass, - &PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass, - &PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass, - &PPC::F8RCRegClass,&PPC::F8RCRegClass, - - &PPC::CRRCRegClass,&PPC::CRRCRegClass,&PPC::CRRCRegClass, - - &PPC::VRSAVERCRegClass, - - &PPC::VRRCRegClass,&PPC::VRRCRegClass,&PPC::VRRCRegClass,&PPC::VRRCRegClass, - &PPC::VRRCRegClass,&PPC::VRRCRegClass,&PPC::VRRCRegClass,&PPC::VRRCRegClass, - &PPC::VRRCRegClass,&PPC::VRRCRegClass,&PPC::VRRCRegClass,&PPC::VRRCRegClass, - - &PPC::CRBITRCRegClass,&PPC::CRBITRCRegClass,&PPC::CRBITRCRegClass, - &PPC::CRBITRCRegClass, - &PPC::CRBITRCRegClass,&PPC::CRBITRCRegClass,&PPC::CRBITRCRegClass, - &PPC::CRBITRCRegClass, - &PPC::CRBITRCRegClass,&PPC::CRBITRCRegClass,&PPC::CRBITRCRegClass, - &PPC::CRBITRCRegClass, - - 0 - }; - - // 64-bit Darwin calling convention. - static const TargetRegisterClass * const Darwin64_CalleeSavedRegClasses[] = { - &PPC::G8RCRegClass,&PPC::G8RCRegClass, - &PPC::G8RCRegClass,&PPC::G8RCRegClass,&PPC::G8RCRegClass,&PPC::G8RCRegClass, - &PPC::G8RCRegClass,&PPC::G8RCRegClass,&PPC::G8RCRegClass,&PPC::G8RCRegClass, - &PPC::G8RCRegClass,&PPC::G8RCRegClass,&PPC::G8RCRegClass,&PPC::G8RCRegClass, - &PPC::G8RCRegClass,&PPC::G8RCRegClass,&PPC::G8RCRegClass,&PPC::G8RCRegClass, - - &PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass, - &PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass, - &PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass, - &PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass, - &PPC::F8RCRegClass,&PPC::F8RCRegClass, - - &PPC::CRRCRegClass,&PPC::CRRCRegClass,&PPC::CRRCRegClass, - - &PPC::VRRCRegClass,&PPC::VRRCRegClass,&PPC::VRRCRegClass,&PPC::VRRCRegClass, - &PPC::VRRCRegClass,&PPC::VRRCRegClass,&PPC::VRRCRegClass,&PPC::VRRCRegClass, - &PPC::VRRCRegClass,&PPC::VRRCRegClass,&PPC::VRRCRegClass,&PPC::VRRCRegClass, - - &PPC::CRBITRCRegClass,&PPC::CRBITRCRegClass,&PPC::CRBITRCRegClass, - &PPC::CRBITRCRegClass, - &PPC::CRBITRCRegClass,&PPC::CRBITRCRegClass,&PPC::CRBITRCRegClass, - &PPC::CRBITRCRegClass, - &PPC::CRBITRCRegClass,&PPC::CRBITRCRegClass,&PPC::CRBITRCRegClass, - &PPC::CRBITRCRegClass, - - &PPC::G8RCRegClass, 0 - }; - - // 64-bit SVR4 calling convention. - static const TargetRegisterClass * const SVR4_64_CalleeSavedRegClasses[] = { - &PPC::G8RCRegClass,&PPC::G8RCRegClass, - &PPC::G8RCRegClass,&PPC::G8RCRegClass,&PPC::G8RCRegClass,&PPC::G8RCRegClass, - &PPC::G8RCRegClass,&PPC::G8RCRegClass,&PPC::G8RCRegClass,&PPC::G8RCRegClass, - &PPC::G8RCRegClass,&PPC::G8RCRegClass,&PPC::G8RCRegClass,&PPC::G8RCRegClass, - &PPC::G8RCRegClass,&PPC::G8RCRegClass,&PPC::G8RCRegClass,&PPC::G8RCRegClass, - - &PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass, - &PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass, - &PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass, - &PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass, - &PPC::F8RCRegClass,&PPC::F8RCRegClass, - - &PPC::CRRCRegClass,&PPC::CRRCRegClass,&PPC::CRRCRegClass, - - &PPC::VRSAVERCRegClass, - - &PPC::VRRCRegClass,&PPC::VRRCRegClass,&PPC::VRRCRegClass,&PPC::VRRCRegClass, - &PPC::VRRCRegClass,&PPC::VRRCRegClass,&PPC::VRRCRegClass,&PPC::VRRCRegClass, - &PPC::VRRCRegClass,&PPC::VRRCRegClass,&PPC::VRRCRegClass,&PPC::VRRCRegClass, - - &PPC::CRBITRCRegClass,&PPC::CRBITRCRegClass,&PPC::CRBITRCRegClass, - &PPC::CRBITRCRegClass, - &PPC::CRBITRCRegClass,&PPC::CRBITRCRegClass,&PPC::CRBITRCRegClass, - &PPC::CRBITRCRegClass, - &PPC::CRBITRCRegClass,&PPC::CRBITRCRegClass,&PPC::CRBITRCRegClass, - &PPC::CRBITRCRegClass, - - 0 - }; - - if (Subtarget.isDarwinABI()) - return Subtarget.isPPC64() ? Darwin64_CalleeSavedRegClasses : - Darwin32_CalleeSavedRegClasses; - - return Subtarget.isPPC64() ? SVR4_64_CalleeSavedRegClasses - : SVR4_CalleeSavedRegClasses; -} - // needsFP - Return true if the specified function should have a dedicated frame // pointer register. This is true if the function has variable sized allocas or // if frame pointer elimination is disabled. @@ -1060,8 +926,7 @@ PPCRegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, int FPOffset = PPCFrameInfo::getFramePointerSaveOffset(isPPC64, isDarwinABI); // Allocate the frame index for frame pointer save area. - FPSI = MF.getFrameInfo()->CreateFixedObject(isPPC64? 8 : 4, FPOffset, - true, false); + FPSI = MF.getFrameInfo()->CreateFixedObject(isPPC64? 8 : 4, FPOffset, true); // Save the result. FI->setFramePointerSaveIndex(FPSI); } @@ -1069,8 +934,7 @@ PPCRegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, // Reserve stack space to move the linkage area to in case of a tail call. int TCSPDelta = 0; if (GuaranteedTailCallOpt && (TCSPDelta = FI->getTailCallSPDelta()) < 0) { - MF.getFrameInfo()->CreateFixedObject(-1 * TCSPDelta, TCSPDelta, - true, false); + MF.getFrameInfo()->CreateFixedObject(-1 * TCSPDelta, TCSPDelta, true); } // Reserve a slot closest to SP or frame pointer if we have a dynalloc or @@ -1127,9 +991,7 @@ PPCRegisterInfo::processFunctionBeforeFrameFinalized(MachineFunction &MF) for (unsigned i = 0, e = CSI.size(); i != e; ++i) { unsigned Reg = CSI[i].getReg(); - const TargetRegisterClass *RC = CSI[i].getRegClass(); - - if (RC == PPC::GPRCRegisterClass) { + if (PPC::GPRCRegisterClass->contains(Reg)) { HasGPSaveArea = true; GPRegs.push_back(CSI[i]); @@ -1137,7 +999,7 @@ PPCRegisterInfo::processFunctionBeforeFrameFinalized(MachineFunction &MF) if (Reg < MinGPR) { MinGPR = Reg; } - } else if (RC == PPC::G8RCRegisterClass) { + } else if (PPC::G8RCRegisterClass->contains(Reg)) { HasG8SaveArea = true; G8Regs.push_back(CSI[i]); @@ -1145,7 +1007,7 @@ PPCRegisterInfo::processFunctionBeforeFrameFinalized(MachineFunction &MF) if (Reg < MinG8R) { MinG8R = Reg; } - } else if (RC == PPC::F8RCRegisterClass) { + } else if (PPC::F8RCRegisterClass->contains(Reg)) { HasFPSaveArea = true; FPRegs.push_back(CSI[i]); @@ -1154,12 +1016,12 @@ PPCRegisterInfo::processFunctionBeforeFrameFinalized(MachineFunction &MF) MinFPR = Reg; } // FIXME SVR4: Disable CR save area for now. - } else if ( RC == PPC::CRBITRCRegisterClass - || RC == PPC::CRRCRegisterClass) { + } else if (PPC::CRBITRCRegisterClass->contains(Reg) + || PPC::CRRCRegisterClass->contains(Reg)) { // HasCRSaveArea = true; - } else if (RC == PPC::VRSAVERCRegisterClass) { + } else if (PPC::VRSAVERCRegisterClass->contains(Reg)) { HasVRSAVESaveArea = true; - } else if (RC == PPC::VRRCRegisterClass) { + } else if (PPC::VRRCRegisterClass->contains(Reg)) { HasVRSaveArea = true; VRegs.push_back(CSI[i]); @@ -1240,9 +1102,10 @@ PPCRegisterInfo::processFunctionBeforeFrameFinalized(MachineFunction &MF) // which have the CR/CRBIT register class? // Adjust the frame index of the CR spill slot. for (unsigned i = 0, e = CSI.size(); i != e; ++i) { - const TargetRegisterClass *RC = CSI[i].getRegClass(); + unsigned Reg = CSI[i].getReg(); - if (RC == PPC::CRBITRCRegisterClass || RC == PPC::CRRCRegisterClass) { + if (PPC::CRBITRCRegisterClass->contains(Reg) || + PPC::CRRCRegisterClass->contains(Reg)) { int FI = CSI[i].getFrameIdx(); FFI->setObjectOffset(FI, LowerBound + FFI->getObjectOffset(FI)); @@ -1257,9 +1120,9 @@ PPCRegisterInfo::processFunctionBeforeFrameFinalized(MachineFunction &MF) // which have the VRSAVE register class? // Adjust the frame index of the VRSAVE spill slot. for (unsigned i = 0, e = CSI.size(); i != e; ++i) { - const TargetRegisterClass *RC = CSI[i].getRegClass(); + unsigned Reg = CSI[i].getReg(); - if (RC == PPC::VRSAVERCRegisterClass) { + if (PPC::VRSAVERCRegisterClass->contains(Reg)) { int FI = CSI[i].getFrameIdx(); FFI->setObjectOffset(FI, LowerBound + FFI->getObjectOffset(FI)); @@ -1762,4 +1625,3 @@ int PPCRegisterInfo::getDwarfRegNum(unsigned RegNum, bool isEH) const { } #include "PPCGenRegisterInfo.inc" - diff --git a/lib/Target/PowerPC/PPCRegisterInfo.h b/lib/Target/PowerPC/PPCRegisterInfo.h index 43cf53546bdb..f026847a540b 100644 --- a/lib/Target/PowerPC/PPCRegisterInfo.h +++ b/lib/Target/PowerPC/PPCRegisterInfo.h @@ -42,9 +42,6 @@ public: /// Code Generation virtual methods... const unsigned *getCalleeSavedRegs(const MachineFunction* MF = 0) const; - const TargetRegisterClass* const* - getCalleeSavedRegClasses(const MachineFunction *MF = 0) const; - BitVector getReservedRegs(const MachineFunction &MF) const; /// targetHandlesStackFrameRounding - Returns true if the target is diff --git a/lib/Target/README.txt b/lib/Target/README.txt index 7fa73edaefee..4d7ee08de1de 100644 --- a/lib/Target/README.txt +++ b/lib/Target/README.txt @@ -300,6 +300,14 @@ unsigned long reverse(unsigned v) { return v ^ (t >> 8); } +Neither is this (very standard idiom): + +int f(int n) +{ + return (((n) << 24) | (((n) & 0xff00) << 8) + | (((n) >> 8) & 0xff00) | ((n) >> 24)); +} + //===---------------------------------------------------------------------===// [LOOP RECOGNITION] @@ -898,17 +906,6 @@ The expression should optimize to something like //===---------------------------------------------------------------------===// -From GCC Bug 3756: -int -pn (int n) -{ - return (n >= 0 ? 1 : -1); -} -Should combine to (n >> 31) | 1. Currently not optimized with "clang --emit-llvm-bc | opt -std-compile-opts | llc". - -//===---------------------------------------------------------------------===// - void a(int variable) { if (variable == 4 || variable == 6) @@ -1439,33 +1436,6 @@ This pattern repeats several times, basically doing: //===---------------------------------------------------------------------===// -186.crafty contains this interesting pattern: - -%77 = call i8* @strstr(i8* getelementptr ([6 x i8]* @"\01LC5", i32 0, i32 0), - i8* %30) -%phitmp648 = icmp eq i8* %77, getelementptr ([6 x i8]* @"\01LC5", i32 0, i32 0) -br i1 %phitmp648, label %bb70, label %bb76 - -bb70: ; preds = %OptionMatch.exit91, %bb69 - %78 = call i32 @strlen(i8* %30) nounwind readonly align 1 ; <i32> [#uses=1] - -This is basically: - cststr = "abcdef"; - if (strstr(cststr, P) == cststr) { - x = strlen(P); - ... - -The strstr call would be significantly cheaper written as: - -cststr = "abcdef"; -if (memcmp(P, str, strlen(P))) - x = strlen(P); - -This is memcmp+strlen instead of strstr. This also makes the strlen fully -redundant. - -//===---------------------------------------------------------------------===// - 186.crafty also contains this code: %1906 = call i32 @strlen(i8* getelementptr ([32 x i8]* @pgn_event, i32 0,i32 0)) @@ -1863,3 +1833,91 @@ LLVM prefers comparisons with zero over non-zero in general, but in this case it choses instead to keep the max operation obvious. //===---------------------------------------------------------------------===// + +Take the following testcase on x86-64 (similar testcases exist for all targets +with addc/adde): + +define void @a(i64* nocapture %s, i64* nocapture %t, i64 %a, i64 %b, +i64 %c) nounwind { +entry: + %0 = zext i64 %a to i128 ; <i128> [#uses=1] + %1 = zext i64 %b to i128 ; <i128> [#uses=1] + %2 = add i128 %1, %0 ; <i128> [#uses=2] + %3 = zext i64 %c to i128 ; <i128> [#uses=1] + %4 = shl i128 %3, 64 ; <i128> [#uses=1] + %5 = add i128 %4, %2 ; <i128> [#uses=1] + %6 = lshr i128 %5, 64 ; <i128> [#uses=1] + %7 = trunc i128 %6 to i64 ; <i64> [#uses=1] + store i64 %7, i64* %s, align 8 + %8 = trunc i128 %2 to i64 ; <i64> [#uses=1] + store i64 %8, i64* %t, align 8 + ret void +} + +Generated code: + addq %rcx, %rdx + movl $0, %eax + adcq $0, %rax + addq %r8, %rax + movq %rax, (%rdi) + movq %rdx, (%rsi) + ret + +Expected code: + addq %rcx, %rdx + adcq $0, %r8 + movq %r8, (%rdi) + movq %rdx, (%rsi) + ret + +The generated SelectionDAG has an ADD of an ADDE, where both operands of the +ADDE are zero. Replacing one of the operands of the ADDE with the other operand +of the ADD, and replacing the ADD with the ADDE, should give the desired result. + +(That said, we are doing a lot better than gcc on this testcase. :) ) + +//===---------------------------------------------------------------------===// + +Switch lowering generates less than ideal code for the following switch: +define void @a(i32 %x) nounwind { +entry: + switch i32 %x, label %if.end [ + i32 0, label %if.then + i32 1, label %if.then + i32 2, label %if.then + i32 3, label %if.then + i32 5, label %if.then + ] +if.then: + tail call void @foo() nounwind + ret void +if.end: + ret void +} +declare void @foo() + +Generated code on x86-64 (other platforms give similar results): +a: + cmpl $5, %edi + ja .LBB0_2 + movl %edi, %eax + movl $47, %ecx + btq %rax, %rcx + jb .LBB0_3 +.LBB0_2: + ret +.LBB0_3: + jmp foo # TAILCALL + +The movl+movl+btq+jb could be simplified to a cmpl+jne. + +Or, if we wanted to be really clever, we could simplify the whole thing to +something like the following, which eliminates a branch: + xorl $1, %edi + cmpl $4, %edi + ja .LBB0_2 + ret +.LBB0_2: + jmp foo # TAILCALL + +//===---------------------------------------------------------------------===// diff --git a/lib/Target/Sparc/SparcISelLowering.cpp b/lib/Target/Sparc/SparcISelLowering.cpp index f47e53acbfc1..4099a628773f 100644 --- a/lib/Target/Sparc/SparcISelLowering.cpp +++ b/lib/Target/Sparc/SparcISelLowering.cpp @@ -38,6 +38,7 @@ SDValue SparcTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl<ISD::OutputArg> &Outs, + const SmallVectorImpl<SDValue> &OutVals, DebugLoc dl, SelectionDAG &DAG) const { // CCValAssign - represent the assignment of the return value to locations. @@ -66,7 +67,7 @@ SparcTargetLowering::LowerReturn(SDValue Chain, assert(VA.isRegLoc() && "Can only return in registers!"); Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), - Outs[i].Val, Flag); + OutVals[i], Flag); // Guarantee that all emitted copies are stuck together with flags. Flag = Chain.getValue(1); @@ -133,7 +134,7 @@ SparcTargetLowering::LowerFormalArguments(SDValue Chain, InVals.push_back(Arg); } else { int FrameIdx = MF.getFrameInfo()->CreateFixedObject(4, ArgOffset, - true, false); + true); SDValue FIPtr = DAG.getFrameIndex(FrameIdx, MVT::i32); SDValue Load; if (ObjectVT == MVT::i32) { @@ -146,7 +147,7 @@ SparcTargetLowering::LowerFormalArguments(SDValue Chain, unsigned Offset = 4-std::max(1U, ObjectVT.getSizeInBits()/8); FIPtr = DAG.getNode(ISD::ADD, dl, MVT::i32, FIPtr, DAG.getConstant(Offset, MVT::i32)); - Load = DAG.getExtLoad(LoadOp, dl, MVT::i32, Chain, FIPtr, + Load = DAG.getExtLoad(LoadOp, MVT::i32, dl, Chain, FIPtr, NULL, 0, ObjectVT, false, false, 0); Load = DAG.getNode(ISD::TRUNCATE, dl, ObjectVT, Load); } @@ -169,7 +170,7 @@ SparcTargetLowering::LowerFormalArguments(SDValue Chain, InVals.push_back(Arg); } else { int FrameIdx = MF.getFrameInfo()->CreateFixedObject(4, ArgOffset, - true, false); + true); SDValue FIPtr = DAG.getFrameIndex(FrameIdx, MVT::i32); SDValue Load = DAG.getLoad(MVT::f32, dl, Chain, FIPtr, NULL, 0, false, false, 0); @@ -192,7 +193,7 @@ SparcTargetLowering::LowerFormalArguments(SDValue Chain, HiVal = DAG.getCopyFromReg(Chain, dl, VRegHi, MVT::i32); } else { int FrameIdx = MF.getFrameInfo()->CreateFixedObject(4, ArgOffset, - true, false); + true); SDValue FIPtr = DAG.getFrameIndex(FrameIdx, MVT::i32); HiVal = DAG.getLoad(MVT::i32, dl, Chain, FIPtr, NULL, 0, false, false, 0); @@ -205,7 +206,7 @@ SparcTargetLowering::LowerFormalArguments(SDValue Chain, LoVal = DAG.getCopyFromReg(Chain, dl, VRegLo, MVT::i32); } else { int FrameIdx = MF.getFrameInfo()->CreateFixedObject(4, ArgOffset+4, - true, false); + true); SDValue FIPtr = DAG.getFrameIndex(FrameIdx, MVT::i32); LoVal = DAG.getLoad(MVT::i32, dl, Chain, FIPtr, NULL, 0, false, false, 0); @@ -239,7 +240,7 @@ SparcTargetLowering::LowerFormalArguments(SDValue Chain, SDValue Arg = DAG.getCopyFromReg(DAG.getRoot(), dl, VReg, MVT::i32); int FrameIdx = MF.getFrameInfo()->CreateFixedObject(4, ArgOffset, - true, false); + true); SDValue FIPtr = DAG.getFrameIndex(FrameIdx, MVT::i32); OutChains.push_back(DAG.getStore(DAG.getRoot(), dl, Arg, FIPtr, NULL, 0, @@ -262,6 +263,7 @@ SparcTargetLowering::LowerCall(SDValue Chain, SDValue Callee, CallingConv::ID CallConv, bool isVarArg, bool &isTailCall, const SmallVectorImpl<ISD::OutputArg> &Outs, + const SmallVectorImpl<SDValue> &OutVals, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const { @@ -283,7 +285,7 @@ SparcTargetLowering::LowerCall(SDValue Chain, SDValue Callee, // Count the size of the outgoing arguments. unsigned ArgsSize = 0; for (unsigned i = 0, e = Outs.size(); i != e; ++i) { - switch (Outs[i].Val.getValueType().getSimpleVT().SimpleTy) { + switch (Outs[i].VT.getSimpleVT().SimpleTy) { default: llvm_unreachable("Unknown value type!"); case MVT::i1: case MVT::i8: @@ -316,7 +318,7 @@ SparcTargetLowering::LowerCall(SDValue Chain, SDValue Callee, // Walk the register/memloc assignments, inserting copies/loads. for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { CCValAssign &VA = ArgLocs[i]; - SDValue Arg = Outs[i].Val; + SDValue Arg = OutVals[i]; // Promote the value if needed. switch (VA.getLocInfo()) { @@ -358,8 +360,8 @@ SparcTargetLowering::LowerCall(SDValue Chain, SDValue Callee, unsigned ArgOffset = 68; for (unsigned i = 0, e = Outs.size(); i != e; ++i) { - SDValue Val = Outs[i].Val; - EVT ObjectVT = Val.getValueType(); + SDValue Val = OutVals[i]; + EVT ObjectVT = Outs[i].VT; SDValue ValToStore(0, 0); unsigned ObjSize; switch (ObjectVT.getSimpleVT().SimpleTy) { @@ -478,7 +480,7 @@ SparcTargetLowering::LowerCall(SDValue Chain, SDValue Callee, // turn it into a TargetGlobalAddress node so that legalize doesn't hack it. // Likewise ExternalSymbol -> TargetExternalSymbol. if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) - Callee = DAG.getTargetGlobalAddress(G->getGlobal(), MVT::i32); + Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl, MVT::i32); else if (ExternalSymbolSDNode *E = dyn_cast<ExternalSymbolSDNode>(Callee)) Callee = DAG.getTargetExternalSymbol(E->getSymbol(), MVT::i32); @@ -737,7 +739,7 @@ void SparcTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op, static void LookThroughSetCC(SDValue &LHS, SDValue &RHS, ISD::CondCode CC, unsigned &SPCC) { if (isa<ConstantSDNode>(RHS) && - cast<ConstantSDNode>(RHS)->getZExtValue() == 0 && + cast<ConstantSDNode>(RHS)->isNullValue() && CC == ISD::SETNE && ((LHS.getOpcode() == SPISD::SELECT_ICC && LHS.getOperand(3).getOpcode() == SPISD::CMPICC) || @@ -745,8 +747,8 @@ static void LookThroughSetCC(SDValue &LHS, SDValue &RHS, LHS.getOperand(3).getOpcode() == SPISD::CMPFCC)) && isa<ConstantSDNode>(LHS.getOperand(0)) && isa<ConstantSDNode>(LHS.getOperand(1)) && - cast<ConstantSDNode>(LHS.getOperand(0))->getZExtValue() == 1 && - cast<ConstantSDNode>(LHS.getOperand(1))->getZExtValue() == 0) { + cast<ConstantSDNode>(LHS.getOperand(0))->isOne() && + cast<ConstantSDNode>(LHS.getOperand(1))->isNullValue()) { SDValue CMPCC = LHS.getOperand(3); SPCC = cast<ConstantSDNode>(LHS.getOperand(2))->getZExtValue(); LHS = CMPCC.getOperand(0); @@ -759,7 +761,7 @@ SDValue SparcTargetLowering::LowerGlobalAddress(SDValue Op, const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal(); // FIXME there isn't really any debug info here DebugLoc dl = Op.getDebugLoc(); - SDValue GA = DAG.getTargetGlobalAddress(GV, MVT::i32); + SDValue GA = DAG.getTargetGlobalAddress(GV, dl, MVT::i32); SDValue Hi = DAG.getNode(SPISD::Hi, dl, MVT::i32, GA); SDValue Lo = DAG.getNode(SPISD::Lo, dl, MVT::i32, GA); @@ -1007,21 +1009,20 @@ SparcTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, MachineFunction *F = BB->getParent(); MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB); MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB); + + // Transfer the remainder of BB and its successor edges to sinkMBB. + sinkMBB->splice(sinkMBB->begin(), BB, + llvm::next(MachineBasicBlock::iterator(MI)), + BB->end()); + sinkMBB->transferSuccessorsAndUpdatePHIs(BB); + + // Add the true and fallthrough blocks as its successors. + BB->addSuccessor(copy0MBB); + BB->addSuccessor(sinkMBB); + BuildMI(BB, dl, TII.get(BROpcode)).addMBB(sinkMBB).addImm(CC); F->insert(It, copy0MBB); F->insert(It, sinkMBB); - // Update machine-CFG edges by first adding all successors of the current - // block to the new block which will contain the Phi node for the select. - for (MachineBasicBlock::succ_iterator I = BB->succ_begin(), - E = BB->succ_end(); I != E; ++I) - sinkMBB->addSuccessor(*I); - // Next, remove all successors of the current block, and add the true - // and fallthrough blocks as its successors. - while (!BB->succ_empty()) - BB->removeSuccessor(BB->succ_begin()); - // Next, add the true and fallthrough blocks as its successors. - BB->addSuccessor(copy0MBB); - BB->addSuccessor(sinkMBB); // copy0MBB: // %FalseValue = ... @@ -1035,11 +1036,11 @@ SparcTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ] // ... BB = sinkMBB; - BuildMI(BB, dl, TII.get(SP::PHI), MI->getOperand(0).getReg()) + BuildMI(*BB, BB->begin(), dl, TII.get(SP::PHI), MI->getOperand(0).getReg()) .addReg(MI->getOperand(2).getReg()).addMBB(copy0MBB) .addReg(MI->getOperand(1).getReg()).addMBB(thisMBB); - F->DeleteMachineInstr(MI); // The pseudo instruction is gone now. + MI->eraseFromParent(); // The pseudo instruction is gone now. return BB; } diff --git a/lib/Target/Sparc/SparcISelLowering.h b/lib/Target/Sparc/SparcISelLowering.h index 5ebdcacba57d..db39e083a836 100644 --- a/lib/Target/Sparc/SparcISelLowering.h +++ b/lib/Target/Sparc/SparcISelLowering.h @@ -86,6 +86,7 @@ namespace llvm { CallingConv::ID CallConv, bool isVarArg, bool &isTailCall, const SmallVectorImpl<ISD::OutputArg> &Outs, + const SmallVectorImpl<SDValue> &OutVals, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const; @@ -94,6 +95,7 @@ namespace llvm { LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl<ISD::OutputArg> &Outs, + const SmallVectorImpl<SDValue> &OutVals, DebugLoc dl, SelectionDAG &DAG) const; SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const; diff --git a/lib/Target/Sparc/SparcInstrInfo.cpp b/lib/Target/Sparc/SparcInstrInfo.cpp index 8e49ecac4dfb..3a4c80ad076a 100644 --- a/lib/Target/Sparc/SparcInstrInfo.cpp +++ b/lib/Target/Sparc/SparcInstrInfo.cpp @@ -109,38 +109,29 @@ unsigned SparcInstrInfo::isStoreToStackSlot(const MachineInstr *MI, unsigned SparcInstrInfo::InsertBranch(MachineBasicBlock &MBB,MachineBasicBlock *TBB, MachineBasicBlock *FBB, - const SmallVectorImpl<MachineOperand> &Cond)const{ - // FIXME this should probably take a DebugLoc argument - DebugLoc dl; + const SmallVectorImpl<MachineOperand> &Cond, + DebugLoc DL)const{ // Can only insert uncond branches so far. assert(Cond.empty() && !FBB && TBB && "Can only handle uncond branches!"); - BuildMI(&MBB, dl, get(SP::BA)).addMBB(TBB); + BuildMI(&MBB, DL, get(SP::BA)).addMBB(TBB); return 1; } -bool SparcInstrInfo::copyRegToReg(MachineBasicBlock &MBB, - MachineBasicBlock::iterator I, - unsigned DestReg, unsigned SrcReg, - const TargetRegisterClass *DestRC, - const TargetRegisterClass *SrcRC, - DebugLoc DL) const { - if (DestRC != SrcRC) { - // Not yet supported! - return false; - } - - if (DestRC == SP::IntRegsRegisterClass) - BuildMI(MBB, I, DL, get(SP::ORrr), DestReg).addReg(SP::G0).addReg(SrcReg); - else if (DestRC == SP::FPRegsRegisterClass) - BuildMI(MBB, I, DL, get(SP::FMOVS), DestReg).addReg(SrcReg); - else if (DestRC == SP::DFPRegsRegisterClass) - BuildMI(MBB, I, DL, get(Subtarget.isV9() ? SP::FMOVD : SP::FpMOVD),DestReg) - .addReg(SrcReg); +void SparcInstrInfo::copyPhysReg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, DebugLoc DL, + unsigned DestReg, unsigned SrcReg, + bool KillSrc) const { + if (SP::IntRegsRegClass.contains(DestReg, SrcReg)) + BuildMI(MBB, I, DL, get(SP::ORrr), DestReg).addReg(SP::G0) + .addReg(SrcReg, getKillRegState(KillSrc)); + else if (SP::FPRegsRegClass.contains(DestReg, SrcReg)) + BuildMI(MBB, I, DL, get(SP::FMOVS), DestReg) + .addReg(SrcReg, getKillRegState(KillSrc)); + else if (SP::DFPRegsRegClass.contains(DestReg, SrcReg)) + BuildMI(MBB, I, DL, get(Subtarget.isV9() ? SP::FMOVD : SP::FpMOVD), DestReg) + .addReg(SrcReg, getKillRegState(KillSrc)); else - // Can't copy this register - return false; - - return true; + llvm_unreachable("Impossible reg-to-reg copy"); } void SparcInstrInfo:: @@ -183,61 +174,6 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, llvm_unreachable("Can't load this register from stack slot"); } -MachineInstr *SparcInstrInfo::foldMemoryOperandImpl(MachineFunction &MF, - MachineInstr* MI, - const SmallVectorImpl<unsigned> &Ops, - int FI) const { - if (Ops.size() != 1) return NULL; - - unsigned OpNum = Ops[0]; - bool isFloat = false; - MachineInstr *NewMI = NULL; - switch (MI->getOpcode()) { - case SP::ORrr: - if (MI->getOperand(1).isReg() && MI->getOperand(1).getReg() == SP::G0&& - MI->getOperand(0).isReg() && MI->getOperand(2).isReg()) { - if (OpNum == 0) // COPY -> STORE - NewMI = BuildMI(MF, MI->getDebugLoc(), get(SP::STri)) - .addFrameIndex(FI) - .addImm(0) - .addReg(MI->getOperand(2).getReg()); - else // COPY -> LOAD - NewMI = BuildMI(MF, MI->getDebugLoc(), get(SP::LDri), - MI->getOperand(0).getReg()) - .addFrameIndex(FI) - .addImm(0); - } - break; - case SP::FMOVS: - isFloat = true; - // FALLTHROUGH - case SP::FMOVD: - if (OpNum == 0) { // COPY -> STORE - unsigned SrcReg = MI->getOperand(1).getReg(); - bool isKill = MI->getOperand(1).isKill(); - bool isUndef = MI->getOperand(1).isUndef(); - NewMI = BuildMI(MF, MI->getDebugLoc(), - get(isFloat ? SP::STFri : SP::STDFri)) - .addFrameIndex(FI) - .addImm(0) - .addReg(SrcReg, getKillRegState(isKill) | getUndefRegState(isUndef)); - } else { // COPY -> LOAD - unsigned DstReg = MI->getOperand(0).getReg(); - bool isDead = MI->getOperand(0).isDead(); - bool isUndef = MI->getOperand(0).isUndef(); - NewMI = BuildMI(MF, MI->getDebugLoc(), - get(isFloat ? SP::LDFri : SP::LDDFri)) - .addReg(DstReg, RegState::Define | - getDeadRegState(isDead) | getUndefRegState(isUndef)) - .addFrameIndex(FI) - .addImm(0); - } - break; - } - - return NewMI; -} - unsigned SparcInstrInfo::getGlobalBaseReg(MachineFunction *MF) const { SparcMachineFunctionInfo *SparcFI = MF->getInfo<SparcMachineFunctionInfo>(); diff --git a/lib/Target/Sparc/SparcInstrInfo.h b/lib/Target/Sparc/SparcInstrInfo.h index a00ba3939fde..133471857bad 100644 --- a/lib/Target/Sparc/SparcInstrInfo.h +++ b/lib/Target/Sparc/SparcInstrInfo.h @@ -68,14 +68,13 @@ public: virtual unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, - const SmallVectorImpl<MachineOperand> &Cond) const; + const SmallVectorImpl<MachineOperand> &Cond, + DebugLoc DL) const; - virtual bool copyRegToReg(MachineBasicBlock &MBB, - MachineBasicBlock::iterator I, - unsigned DestReg, unsigned SrcReg, - const TargetRegisterClass *DestRC, - const TargetRegisterClass *SrcRC, - DebugLoc DL) const; + virtual void copyPhysReg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, DebugLoc DL, + unsigned DestReg, unsigned SrcReg, + bool KillSrc) const; virtual void storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, @@ -89,18 +88,6 @@ public: const TargetRegisterClass *RC, const TargetRegisterInfo *TRI) const; - virtual MachineInstr* foldMemoryOperandImpl(MachineFunction &MF, - MachineInstr* MI, - const SmallVectorImpl<unsigned> &Ops, - int FrameIndex) const; - - virtual MachineInstr* foldMemoryOperandImpl(MachineFunction &MF, - MachineInstr* MI, - const SmallVectorImpl<unsigned> &Ops, - MachineInstr* LoadMI) const { - return 0; - } - unsigned getGlobalBaseReg(MachineFunction *MF) const; }; diff --git a/lib/Target/Sparc/SparcInstrInfo.td b/lib/Target/Sparc/SparcInstrInfo.td index 9489580e900c..ddadd51a93a4 100644 --- a/lib/Target/Sparc/SparcInstrInfo.td +++ b/lib/Target/Sparc/SparcInstrInfo.td @@ -665,7 +665,7 @@ let Defs = [FCC] in { //===----------------------------------------------------------------------===// // V9 Conditional Moves. -let Predicates = [HasV9], isTwoAddress = 1 in { +let Predicates = [HasV9], Constraints = "$T = $dst" in { // Move Integer Register on Condition (MOVcc) p. 194 of the V9 manual. // FIXME: Add instruction encodings for the JIT some day. def MOVICCrr diff --git a/lib/Target/Sparc/SparcRegisterInfo.cpp b/lib/Target/Sparc/SparcRegisterInfo.cpp index 08373bb83869..427cc7fd4577 100644 --- a/lib/Target/Sparc/SparcRegisterInfo.cpp +++ b/lib/Target/Sparc/SparcRegisterInfo.cpp @@ -52,13 +52,6 @@ BitVector SparcRegisterInfo::getReservedRegs(const MachineFunction &MF) const { return Reserved; } - -const TargetRegisterClass* const* -SparcRegisterInfo::getCalleeSavedRegClasses(const MachineFunction *MF) const { - static const TargetRegisterClass * const CalleeSavedRegClasses[] = { 0 }; - return CalleeSavedRegClasses; -} - bool SparcRegisterInfo::hasFP(const MachineFunction &MF) const { return false; } diff --git a/lib/Target/Sparc/SparcRegisterInfo.h b/lib/Target/Sparc/SparcRegisterInfo.h index 24d43e3049cc..9f0cda707b3e 100644 --- a/lib/Target/Sparc/SparcRegisterInfo.h +++ b/lib/Target/Sparc/SparcRegisterInfo.h @@ -32,9 +32,6 @@ struct SparcRegisterInfo : public SparcGenRegisterInfo { /// Code Generation virtual methods... const unsigned *getCalleeSavedRegs(const MachineFunction *MF = 0) const; - const TargetRegisterClass* const* getCalleeSavedRegClasses( - const MachineFunction *MF = 0) const; - BitVector getReservedRegs(const MachineFunction &MF) const; bool hasFP(const MachineFunction &MF) const; diff --git a/lib/Target/SystemZ/AsmPrinter/SystemZAsmPrinter.cpp b/lib/Target/SystemZ/AsmPrinter/SystemZAsmPrinter.cpp index 90be2226b78d..d7ac8f50b69e 100644 --- a/lib/Target/SystemZ/AsmPrinter/SystemZAsmPrinter.cpp +++ b/lib/Target/SystemZ/AsmPrinter/SystemZAsmPrinter.cpp @@ -124,7 +124,7 @@ void SystemZAsmPrinter::printOperand(const MachineInstr *MI, int OpNum, unsigned Reg = MO.getReg(); if (Modifier && strncmp(Modifier, "subreg", 6) == 0) { if (strncmp(Modifier + 7, "even", 4) == 0) - Reg = TM.getRegisterInfo()->getSubReg(Reg, SystemZ::subreg_even32); + Reg = TM.getRegisterInfo()->getSubReg(Reg, SystemZ::subreg_32bit); else if (strncmp(Modifier + 7, "odd", 3) == 0) Reg = TM.getRegisterInfo()->getSubReg(Reg, SystemZ::subreg_odd32); else diff --git a/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp b/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp index bb2952a986dd..ed290ca7ed95 100644 --- a/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp +++ b/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp @@ -670,7 +670,7 @@ SDNode *SystemZDAGToDAGISel::Select(SDNode *Node) { // Copy the remainder (even subreg) result, if it is needed. if (!SDValue(Node, 1).use_empty()) { unsigned SubRegIdx = (is32Bit ? - SystemZ::subreg_even32 : SystemZ::subreg_even); + SystemZ::subreg_32bit : SystemZ::subreg_even); SDNode *Rem = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, NVT, SDValue(Result, 0), @@ -754,7 +754,7 @@ SDNode *SystemZDAGToDAGISel::Select(SDNode *Node) { // Copy the remainder (even subreg) result, if it is needed. if (!SDValue(Node, 1).use_empty()) { unsigned SubRegIdx = (is32Bit ? - SystemZ::subreg_even32 : SystemZ::subreg_even); + SystemZ::subreg_32bit : SystemZ::subreg_even); SDNode *Rem = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, NVT, SDValue(Result, 0), diff --git a/lib/Target/SystemZ/SystemZISelLowering.cpp b/lib/Target/SystemZ/SystemZISelLowering.cpp index 76f29010d08d..67f739f690dd 100644 --- a/lib/Target/SystemZ/SystemZISelLowering.cpp +++ b/lib/Target/SystemZ/SystemZISelLowering.cpp @@ -254,6 +254,7 @@ SystemZTargetLowering::LowerCall(SDValue Chain, SDValue Callee, CallingConv::ID CallConv, bool isVarArg, bool &isTailCall, const SmallVectorImpl<ISD::OutputArg> &Outs, + const SmallVectorImpl<SDValue> &OutVals, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const { @@ -266,7 +267,7 @@ SystemZTargetLowering::LowerCall(SDValue Chain, SDValue Callee, case CallingConv::Fast: case CallingConv::C: return LowerCCCCallTo(Chain, Callee, CallConv, isVarArg, isTailCall, - Outs, Ins, dl, DAG, InVals); + Outs, OutVals, Ins, dl, DAG, InVals); } } @@ -334,7 +335,7 @@ SystemZTargetLowering::LowerCCCArguments(SDValue Chain, // Create the nodes corresponding to a load from this parameter slot. // Create the frame index object for this incoming parameter... int FI = MFI->CreateFixedObject(LocVT.getSizeInBits()/8, - VA.getLocMemOffset(), true, false); + VA.getLocMemOffset(), true); // Create the SelectionDAG nodes corresponding to a load // from this parameter @@ -372,6 +373,7 @@ SystemZTargetLowering::LowerCCCCallTo(SDValue Chain, SDValue Callee, bool isTailCall, const SmallVectorImpl<ISD::OutputArg> &Outs, + const SmallVectorImpl<SDValue> &OutVals, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const { @@ -402,7 +404,7 @@ SystemZTargetLowering::LowerCCCCallTo(SDValue Chain, SDValue Callee, for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { CCValAssign &VA = ArgLocs[i]; - SDValue Arg = Outs[i].Val; + SDValue Arg = OutVals[i]; // Promote the value if needed. switch (VA.getLocInfo()) { @@ -464,7 +466,7 @@ SystemZTargetLowering::LowerCCCCallTo(SDValue Chain, SDValue Callee, // turn it into a TargetGlobalAddress node so that legalize doesn't hack it. // Likewise ExternalSymbol -> TargetExternalSymbol. if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) - Callee = DAG.getTargetGlobalAddress(G->getGlobal(), getPointerTy()); + Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl, getPointerTy()); else if (ExternalSymbolSDNode *E = dyn_cast<ExternalSymbolSDNode>(Callee)) Callee = DAG.getTargetExternalSymbol(E->getSymbol(), getPointerTy()); @@ -550,6 +552,7 @@ SDValue SystemZTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl<ISD::OutputArg> &Outs, + const SmallVectorImpl<SDValue> &OutVals, DebugLoc dl, SelectionDAG &DAG) const { // CCValAssign - represent the assignment of the return value to a location @@ -575,7 +578,7 @@ SystemZTargetLowering::LowerReturn(SDValue Chain, // Copy the result values into the output registers. for (unsigned i = 0; i != RVLocs.size(); ++i) { CCValAssign &VA = RVLocs[i]; - SDValue ResValue = Outs[i].Val; + SDValue ResValue = OutVals[i]; assert(VA.isRegLoc() && "Can only return in registers!"); // If this is an 8/16/32-bit value, it is really should be passed promoted @@ -729,14 +732,14 @@ SDValue SystemZTargetLowering::LowerGlobalAddress(SDValue Op, SDValue Result; if (!IsPic && !ExtraLoadRequired) { - Result = DAG.getTargetGlobalAddress(GV, getPointerTy(), Offset); + Result = DAG.getTargetGlobalAddress(GV, dl, getPointerTy(), Offset); Offset = 0; } else { unsigned char OpFlags = 0; if (ExtraLoadRequired) OpFlags = SystemZII::MO_GOTENT; - Result = DAG.getTargetGlobalAddress(GV, getPointerTy(), 0, OpFlags); + Result = DAG.getTargetGlobalAddress(GV, dl, getPointerTy(), 0, OpFlags); } Result = DAG.getNode(SystemZISD::PCRelativeWrapper, dl, @@ -827,16 +830,20 @@ SystemZTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB); MachineBasicBlock *copy1MBB = F->CreateMachineBasicBlock(LLVM_BB); SystemZCC::CondCodes CC = (SystemZCC::CondCodes)MI->getOperand(3).getImm(); - BuildMI(BB, dl, TII.getBrCond(CC)).addMBB(copy1MBB); F->insert(I, copy0MBB); F->insert(I, copy1MBB); // Update machine-CFG edges by transferring all successors of the current // block to the new block which will contain the Phi node for the select. - copy1MBB->transferSuccessors(BB); + copy1MBB->splice(copy1MBB->begin(), BB, + llvm::next(MachineBasicBlock::iterator(MI)), + BB->end()); + copy1MBB->transferSuccessorsAndUpdatePHIs(BB); // Next, add the true and fallthrough blocks as its successors. BB->addSuccessor(copy0MBB); BB->addSuccessor(copy1MBB); + BuildMI(BB, dl, TII.getBrCond(CC)).addMBB(copy1MBB); + // copy0MBB: // %FalseValue = ... // # fallthrough to copy1MBB @@ -849,11 +856,11 @@ SystemZTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ] // ... BB = copy1MBB; - BuildMI(BB, dl, TII.get(SystemZ::PHI), + BuildMI(*BB, BB->begin(), dl, TII.get(SystemZ::PHI), MI->getOperand(0).getReg()) .addReg(MI->getOperand(2).getReg()).addMBB(copy0MBB) .addReg(MI->getOperand(1).getReg()).addMBB(thisMBB); - F->DeleteMachineInstr(MI); // The pseudo instruction is gone now. + MI->eraseFromParent(); // The pseudo instruction is gone now. return BB; } diff --git a/lib/Target/SystemZ/SystemZISelLowering.h b/lib/Target/SystemZ/SystemZISelLowering.h index 94bd90617517..51d2df3a3008 100644 --- a/lib/Target/SystemZ/SystemZISelLowering.h +++ b/lib/Target/SystemZ/SystemZISelLowering.h @@ -98,6 +98,7 @@ namespace llvm { CallingConv::ID CallConv, bool isVarArg, bool isTailCall, const SmallVectorImpl<ISD::OutputArg> &Outs, + const SmallVectorImpl<SDValue> &OutVals, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const; @@ -126,6 +127,7 @@ namespace llvm { LowerCall(SDValue Chain, SDValue Callee, CallingConv::ID CallConv, bool isVarArg, bool &isTailCall, const SmallVectorImpl<ISD::OutputArg> &Outs, + const SmallVectorImpl<SDValue> &OutVals, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const; @@ -134,6 +136,7 @@ namespace llvm { LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl<ISD::OutputArg> &Outs, + const SmallVectorImpl<SDValue> &OutVals, DebugLoc dl, SelectionDAG &DAG) const; const SystemZSubtarget &Subtarget; diff --git a/lib/Target/SystemZ/SystemZInstrFP.td b/lib/Target/SystemZ/SystemZInstrFP.td index 8c5e9058561a..a65828061d3b 100644 --- a/lib/Target/SystemZ/SystemZInstrFP.td +++ b/lib/Target/SystemZ/SystemZInstrFP.td @@ -126,7 +126,7 @@ def FNABS64rr : Pseudo<(outs FP64:$dst), (ins FP64:$src), (implicit PSW)]>; } -let isTwoAddress = 1 in { +let Constraints = "$src1 = $dst" in { let Defs = [PSW] in { let isCommutable = 1 in { // X = ADD Y, Z == X = ADD Z, Y def FADD32rr : Pseudo<(outs FP32:$dst), (ins FP32:$src1, FP32:$src2), @@ -237,7 +237,7 @@ def FDIV64rm : Pseudo<(outs FP64:$dst), (ins FP64:$src1, rriaddr12:$src2), "ddb\t{$dst, $src2}", [(set FP64:$dst, (fdiv FP64:$src1, (load rriaddr12:$src2)))]>; -} // isTwoAddress = 1 +} // Constraints = "$src1 = $dst" def FSQRT32rr : Pseudo<(outs FP32:$dst), (ins FP32:$src), "sqebr\t{$dst, $src}", diff --git a/lib/Target/SystemZ/SystemZInstrInfo.cpp b/lib/Target/SystemZ/SystemZInstrInfo.cpp index 043686cfd49d..c03864fe41e4 100644 --- a/lib/Target/SystemZ/SystemZInstrInfo.cpp +++ b/lib/Target/SystemZ/SystemZInstrInfo.cpp @@ -117,59 +117,28 @@ void SystemZInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, addFrameReference(BuildMI(MBB, MI, DL, get(Opc), DestReg), FrameIdx); } -bool SystemZInstrInfo::copyRegToReg(MachineBasicBlock &MBB, - MachineBasicBlock::iterator I, - unsigned DestReg, unsigned SrcReg, - const TargetRegisterClass *DestRC, - const TargetRegisterClass *SrcRC, - DebugLoc DL) const { - - // Determine if DstRC and SrcRC have a common superclass. - const TargetRegisterClass *CommonRC = DestRC; - if (DestRC == SrcRC) - /* Same regclass for source and dest */; - else if (CommonRC->hasSuperClass(SrcRC)) - CommonRC = SrcRC; - else if (!CommonRC->hasSubClass(SrcRC)) - CommonRC = 0; - - if (CommonRC) { - if (CommonRC == &SystemZ::GR64RegClass || - CommonRC == &SystemZ::ADDR64RegClass) { - BuildMI(MBB, I, DL, get(SystemZ::MOV64rr), DestReg).addReg(SrcReg); - } else if (CommonRC == &SystemZ::GR32RegClass || - CommonRC == &SystemZ::ADDR32RegClass) { - BuildMI(MBB, I, DL, get(SystemZ::MOV32rr), DestReg).addReg(SrcReg); - } else if (CommonRC == &SystemZ::GR64PRegClass) { - BuildMI(MBB, I, DL, get(SystemZ::MOV64rrP), DestReg).addReg(SrcReg); - } else if (CommonRC == &SystemZ::GR128RegClass) { - BuildMI(MBB, I, DL, get(SystemZ::MOV128rr), DestReg).addReg(SrcReg); - } else if (CommonRC == &SystemZ::FP32RegClass) { - BuildMI(MBB, I, DL, get(SystemZ::FMOV32rr), DestReg).addReg(SrcReg); - } else if (CommonRC == &SystemZ::FP64RegClass) { - BuildMI(MBB, I, DL, get(SystemZ::FMOV64rr), DestReg).addReg(SrcReg); - } else { - return false; - } - - return true; - } - - if ((SrcRC == &SystemZ::GR64RegClass && - DestRC == &SystemZ::ADDR64RegClass) || - (DestRC == &SystemZ::GR64RegClass && - SrcRC == &SystemZ::ADDR64RegClass)) { - BuildMI(MBB, I, DL, get(SystemZ::MOV64rr), DestReg).addReg(SrcReg); - return true; - } else if ((SrcRC == &SystemZ::GR32RegClass && - DestRC == &SystemZ::ADDR32RegClass) || - (DestRC == &SystemZ::GR32RegClass && - SrcRC == &SystemZ::ADDR32RegClass)) { - BuildMI(MBB, I, DL, get(SystemZ::MOV32rr), DestReg).addReg(SrcReg); - return true; - } - - return false; +void SystemZInstrInfo::copyPhysReg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, DebugLoc DL, + unsigned DestReg, unsigned SrcReg, + bool KillSrc) const { + unsigned Opc; + if (SystemZ::GR64RegClass.contains(DestReg, SrcReg)) + Opc = SystemZ::MOV64rr; + else if (SystemZ::GR32RegClass.contains(DestReg, SrcReg)) + Opc = SystemZ::MOV32rr; + else if (SystemZ::GR64PRegClass.contains(DestReg, SrcReg)) + Opc = SystemZ::MOV64rrP; + else if (SystemZ::GR128RegClass.contains(DestReg, SrcReg)) + Opc = SystemZ::MOV128rr; + else if (SystemZ::FP32RegClass.contains(DestReg, SrcReg)) + Opc = SystemZ::FMOV32rr; + else if (SystemZ::FP64RegClass.contains(DestReg, SrcReg)) + Opc = SystemZ::FMOV64rr; + else + llvm_unreachable("Impossible reg-to-reg copy"); + + BuildMI(MBB, I, DL, get(Opc), DestReg) + .addReg(SrcReg, getKillRegState(KillSrc)); } bool @@ -286,8 +255,7 @@ SystemZInstrInfo::spillCalleeSavedRegisters(MachineBasicBlock &MBB, unsigned LowReg = 0, HighReg = 0, StartOffset = -1U, EndOffset = 0; for (unsigned i = 0, e = CSI.size(); i != e; ++i) { unsigned Reg = CSI[i].getReg(); - const TargetRegisterClass *RegClass = CSI[i].getRegClass(); - if (RegClass != &SystemZ::FP64RegClass) { + if (!SystemZ::FP64RegClass.contains(Reg)) { unsigned Offset = RegSpillOffsets[Reg]; CalleeFrameSize += 8; if (StartOffset > Offset) { @@ -332,11 +300,10 @@ SystemZInstrInfo::spillCalleeSavedRegisters(MachineBasicBlock &MBB, // Save FPRs for (unsigned i = 0, e = CSI.size(); i != e; ++i) { unsigned Reg = CSI[i].getReg(); - const TargetRegisterClass *RegClass = CSI[i].getRegClass(); - if (RegClass == &SystemZ::FP64RegClass) { + if (SystemZ::FP64RegClass.contains(Reg)) { MBB.addLiveIn(Reg); - storeRegToStackSlot(MBB, MI, Reg, true, CSI[i].getFrameIdx(), RegClass, - &RI); + storeRegToStackSlot(MBB, MI, Reg, true, CSI[i].getFrameIdx(), + &SystemZ::FP64RegClass, &RI); } } @@ -361,9 +328,9 @@ SystemZInstrInfo::restoreCalleeSavedRegisters(MachineBasicBlock &MBB, // Restore FP registers for (unsigned i = 0, e = CSI.size(); i != e; ++i) { unsigned Reg = CSI[i].getReg(); - const TargetRegisterClass *RegClass = CSI[i].getRegClass(); - if (RegClass == &SystemZ::FP64RegClass) - loadRegFromStackSlot(MBB, MI, Reg, CSI[i].getFrameIdx(), RegClass, &RI); + if (SystemZ::FP64RegClass.contains(Reg)) + loadRegFromStackSlot(MBB, MI, Reg, CSI[i].getFrameIdx(), + &SystemZ::FP64RegClass, &RI); } // Restore GP registers @@ -523,9 +490,8 @@ unsigned SystemZInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const { unsigned SystemZInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, - const SmallVectorImpl<MachineOperand> &Cond) const { - // FIXME: this should probably have a DebugLoc operand - DebugLoc DL; + const SmallVectorImpl<MachineOperand> &Cond, + DebugLoc DL) const { // Shouldn't be a fall through. assert(TBB && "InsertBranch must not be told to insert a fallthrough"); assert((Cond.size() == 1 || Cond.size() == 0) && diff --git a/lib/Target/SystemZ/SystemZInstrInfo.h b/lib/Target/SystemZ/SystemZInstrInfo.h index a753f14c0b82..0559619248a6 100644 --- a/lib/Target/SystemZ/SystemZInstrInfo.h +++ b/lib/Target/SystemZ/SystemZInstrInfo.h @@ -60,11 +60,10 @@ public: /// virtual const SystemZRegisterInfo &getRegisterInfo() const { return RI; } - bool copyRegToReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, - unsigned DestReg, unsigned SrcReg, - const TargetRegisterClass *DestRC, - const TargetRegisterClass *SrcRC, - DebugLoc DL) const; + virtual void copyPhysReg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, DebugLoc DL, + unsigned DestReg, unsigned SrcReg, + bool KillSrc) const; bool isMoveInstr(const MachineInstr& MI, unsigned &SrcReg, unsigned &DstReg, @@ -102,7 +101,8 @@ public: bool AllowModify) const; virtual unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, - const SmallVectorImpl<MachineOperand> &Cond) const; + const SmallVectorImpl<MachineOperand> &Cond, + DebugLoc DL) const; virtual unsigned RemoveBranch(MachineBasicBlock &MBB) const; SystemZCC::CondCodes getOppositeCondition(SystemZCC::CondCodes CC) const; diff --git a/lib/Target/SystemZ/SystemZInstrInfo.td b/lib/Target/SystemZ/SystemZInstrInfo.td index 22bde4ee7df2..8df07c034385 100644 --- a/lib/Target/SystemZ/SystemZInstrInfo.td +++ b/lib/Target/SystemZ/SystemZInstrInfo.td @@ -478,7 +478,8 @@ def MOV64rmm : RSYI<0x04EB, "lmg\t{$from, $to, $dst}", []>; -let isReMaterializable = 1, isAsCheapAsAMove = 1, isTwoAddress = 1 in { +let isReMaterializable = 1, isAsCheapAsAMove = 1, + Constraints = "$src = $dst" in { def MOV64Pr0_even : Pseudo<(outs GR64P:$dst), (ins GR64P:$src), "lhi\t${dst:subreg_even}, 0", []>; @@ -537,7 +538,7 @@ def NEG64rr32 : RREI<0xB913, (outs GR64:$dst), (ins GR32:$src), (implicit PSW)]>; } -let isTwoAddress = 1 in { +let Constraints = "$src1 = $dst" in { let Defs = [PSW] in { @@ -924,12 +925,12 @@ def UDIVREM64m : RXYI<0xE387, (outs GR128:$dst), (ins GR128:$src1, rriaddr:$src2 "dlg\t{$dst, $src2}", []>; } // mayLoad -} // isTwoAddress = 1 +} // Constraints = "$src1 = $dst" //===----------------------------------------------------------------------===// // Shifts -let isTwoAddress = 1 in +let Constraints = "$src = $dst" in def SRL32rri : RSI<0x88, (outs GR32:$dst), (ins GR32:$src, riaddr32:$amt), "srl\t{$src, $amt}", @@ -939,7 +940,7 @@ def SRL64rri : RSYI<0xEB0C, "srlg\t{$dst, $src, $amt}", [(set GR64:$dst, (srl GR64:$src, riaddr:$amt))]>; -let isTwoAddress = 1 in +let Constraints = "$src = $dst" in def SHL32rri : RSI<0x89, (outs GR32:$dst), (ins GR32:$src, riaddr32:$amt), "sll\t{$src, $amt}", @@ -950,7 +951,7 @@ def SHL64rri : RSYI<0xEB0D, [(set GR64:$dst, (shl GR64:$src, riaddr:$amt))]>; let Defs = [PSW] in { -let isTwoAddress = 1 in +let Constraints = "$src = $dst" in def SRA32rri : RSI<0x8A, (outs GR32:$dst), (ins GR32:$src, riaddr32:$amt), "sra\t{$src, $amt}", @@ -1129,13 +1130,13 @@ def : Pat<(mulhs GR32:$src1, GR32:$src2), (EXTRACT_SUBREG (MUL64rrP (INSERT_SUBREG (v2i32 (IMPLICIT_DEF)), GR32:$src1, subreg_odd32), GR32:$src2), - subreg_even32)>; + subreg_32bit)>; def : Pat<(mulhu GR32:$src1, GR32:$src2), (EXTRACT_SUBREG (UMUL64rrP (INSERT_SUBREG (v2i32 (IMPLICIT_DEF)), GR32:$src1, subreg_odd32), GR32:$src2), - subreg_even32)>; + subreg_32bit)>; def : Pat<(mulhu GR64:$src1, GR64:$src2), (EXTRACT_SUBREG (UMUL128rrP (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), GR64:$src1, subreg_odd), diff --git a/lib/Target/SystemZ/SystemZRegisterInfo.cpp b/lib/Target/SystemZ/SystemZRegisterInfo.cpp index 638fd17c9953..ae96b0b08ff6 100644 --- a/lib/Target/SystemZ/SystemZRegisterInfo.cpp +++ b/lib/Target/SystemZ/SystemZRegisterInfo.cpp @@ -47,22 +47,6 @@ SystemZRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { return CalleeSavedRegs; } -const TargetRegisterClass* const* -SystemZRegisterInfo::getCalleeSavedRegClasses(const MachineFunction *MF) const { - static const TargetRegisterClass * const CalleeSavedRegClasses[] = { - &SystemZ::GR64RegClass, &SystemZ::GR64RegClass, - &SystemZ::GR64RegClass, &SystemZ::GR64RegClass, - &SystemZ::GR64RegClass, &SystemZ::GR64RegClass, - &SystemZ::GR64RegClass, &SystemZ::GR64RegClass, - &SystemZ::GR64RegClass, &SystemZ::GR64RegClass, - &SystemZ::FP64RegClass, &SystemZ::FP64RegClass, - &SystemZ::FP64RegClass, &SystemZ::FP64RegClass, - &SystemZ::FP64RegClass, &SystemZ::FP64RegClass, - &SystemZ::FP64RegClass, &SystemZ::FP64RegClass, 0 - }; - return CalleeSavedRegClasses; -} - BitVector SystemZRegisterInfo::getReservedRegs(const MachineFunction &MF) const { BitVector Reserved(getNumRegs()); if (hasFP(MF)) diff --git a/lib/Target/SystemZ/SystemZRegisterInfo.h b/lib/Target/SystemZ/SystemZRegisterInfo.h index 42aa5dddb4ab..670025f86e08 100644 --- a/lib/Target/SystemZ/SystemZRegisterInfo.h +++ b/lib/Target/SystemZ/SystemZRegisterInfo.h @@ -32,9 +32,6 @@ struct SystemZRegisterInfo : public SystemZGenRegisterInfo { /// Code Generation virtual methods... const unsigned *getCalleeSavedRegs(const MachineFunction *MF = 0) const; - const TargetRegisterClass* const* getCalleeSavedRegClasses( - const MachineFunction *MF = 0) const; - BitVector getReservedRegs(const MachineFunction &MF) const; bool hasReservedCallFrame(MachineFunction &MF) const { return true; } diff --git a/lib/Target/SystemZ/SystemZRegisterInfo.td b/lib/Target/SystemZ/SystemZRegisterInfo.td index b561744d1561..33be8ddffbed 100644 --- a/lib/Target/SystemZ/SystemZRegisterInfo.td +++ b/lib/Target/SystemZ/SystemZRegisterInfo.td @@ -55,7 +55,6 @@ class FPRL<bits<4> num, string n, list<Register> subregs> let Namespace = "SystemZ" in { def subreg_32bit : SubRegIndex; -def subreg_even32 : SubRegIndex; def subreg_odd32 : SubRegIndex; def subreg_even : SubRegIndex; def subreg_odd : SubRegIndex; @@ -99,7 +98,7 @@ def R15D : GPR64<15, "r15", [R15W]>, DwarfRegNum<[15]>; } // Register pairs -let SubRegIndices = [subreg_even32, subreg_odd32] in { +let SubRegIndices = [subreg_32bit, subreg_odd32] in { def R0P : GPR64< 0, "r0", [R0W, R1W], [R0D, R1D]>, DwarfRegNum<[0]>; def R2P : GPR64< 2, "r2", [R2W, R3W], [R2D, R3D]>, DwarfRegNum<[2]>; def R4P : GPR64< 4, "r4", [R4W, R5W], [R4D, R5D]>, DwarfRegNum<[4]>; @@ -111,8 +110,7 @@ def R14P : GPR64<14, "r14", [R14W, R15W], [R14D, R15D]>, DwarfRegNum<[14]>; } let SubRegIndices = [subreg_even, subreg_odd], - CompositeIndices = [(subreg_even32 subreg_even, subreg_32bit), - (subreg_odd32 subreg_odd, subreg_32bit)] in { + CompositeIndices = [(subreg_odd32 subreg_odd, subreg_32bit)] in { def R0Q : GPR128< 0, "r0", [R0D, R1D], [R0P]>, DwarfRegNum<[0]>; def R2Q : GPR128< 2, "r2", [R2D, R3D], [R2P]>, DwarfRegNum<[2]>; def R4Q : GPR128< 4, "r4", [R4D, R5D], [R4P]>, DwarfRegNum<[4]>; @@ -355,7 +353,7 @@ def ADDR64 : RegisterClass<"SystemZ", [i64], 64, def GR64P : RegisterClass<"SystemZ", [v2i32], 64, [R0P, R2P, R4P, R6P, R8P, R10P, R12P, R14P]> { - let SubRegClasses = [(GR32 subreg_even32, subreg_odd32)]; + let SubRegClasses = [(GR32 subreg_32bit, subreg_odd32)]; let MethodProtos = [{ iterator allocation_order_begin(const MachineFunction &MF) const; iterator allocation_order_end(const MachineFunction &MF) const; @@ -391,7 +389,7 @@ def GR64P : RegisterClass<"SystemZ", [v2i32], 64, def GR128 : RegisterClass<"SystemZ", [v2i64], 128, [R0Q, R2Q, R4Q, R6Q, R8Q, R10Q, R12Q, R14Q]> { - let SubRegClasses = [(GR32 subreg_even32, subreg_odd32), + let SubRegClasses = [(GR32 subreg_32bit, subreg_odd32), (GR64 subreg_even, subreg_odd)]; let MethodProtos = [{ iterator allocation_order_begin(const MachineFunction &MF) const; diff --git a/lib/Target/TargetInstrInfo.cpp b/lib/Target/TargetInstrInfo.cpp index 094a57edb419..c099a7eaefe7 100644 --- a/lib/Target/TargetInstrInfo.cpp +++ b/lib/Target/TargetInstrInfo.cpp @@ -28,6 +28,10 @@ const TargetRegisterClass * TargetOperandInfo::getRegClass(const TargetRegisterInfo *TRI) const { if (isLookupPtrRegClass()) return TRI->getPointerRegClass(RegClass); + // Instructions like INSERT_SUBREG do not have fixed register classes. + if (RegClass < 0) + return 0; + // Otherwise just look it up normally. return TRI->getRegClass(RegClass); } diff --git a/lib/Target/TargetLoweringObjectFile.cpp b/lib/Target/TargetLoweringObjectFile.cpp index b9372d04bb17..dd7b532bbfba 100644 --- a/lib/Target/TargetLoweringObjectFile.cpp +++ b/lib/Target/TargetLoweringObjectFile.cpp @@ -101,7 +101,7 @@ static bool IsNullTerminatedString(const Constant *C) { ConstantInt *Null = dyn_cast<ConstantInt>(CVA->getOperand(ATy->getNumElements()-1)); - if (Null == 0 || Null->getZExtValue() != 0) + if (Null == 0 || !Null->isZero()) return false; // Not null terminated. // Verify that the null doesn't occur anywhere else in the string. diff --git a/lib/Target/TargetRegisterInfo.cpp b/lib/Target/TargetRegisterInfo.cpp index dcc5f610e6d4..49bfad54136d 100644 --- a/lib/Target/TargetRegisterInfo.cpp +++ b/lib/Target/TargetRegisterInfo.cpp @@ -39,20 +39,20 @@ TargetRegisterInfo::TargetRegisterInfo(const TargetRegisterDesc *D, unsigned NR, TargetRegisterInfo::~TargetRegisterInfo() {} -/// getPhysicalRegisterRegClass - Returns the Register Class of a physical -/// register of the given type. If type is EVT::Other, then just return any -/// register class the register belongs to. +/// getMinimalPhysRegClass - Returns the Register Class of a physical +/// register of the given type, picking the most sub register class of +/// the right type that contains this physreg. const TargetRegisterClass * -TargetRegisterInfo::getPhysicalRegisterRegClass(unsigned reg, EVT VT) const { +TargetRegisterInfo::getMinimalPhysRegClass(unsigned reg, EVT VT) const { assert(isPhysicalRegister(reg) && "reg must be a physical register"); - // Pick the most super register class of the right type that contains + // Pick the most sub register class of the right type that contains // this physreg. const TargetRegisterClass* BestRC = 0; for (regclass_iterator I = regclass_begin(), E = regclass_end(); I != E; ++I){ const TargetRegisterClass* RC = *I; if ((VT == MVT::Other || RC->hasType(VT)) && RC->contains(reg) && - (!BestRC || BestRC->hasSuperClass(RC))) + (!BestRC || BestRC->hasSubClass(RC))) BestRC = RC; } diff --git a/lib/Target/X86/AsmParser/X86AsmLexer.cpp b/lib/Target/X86/AsmParser/X86AsmLexer.cpp index a58f58e03325..26797ab353b6 100644 --- a/lib/Target/X86/AsmParser/X86AsmLexer.cpp +++ b/lib/Target/X86/AsmParser/X86AsmLexer.cpp @@ -33,13 +33,11 @@ class X86AsmLexer : public TargetAsmLexer { } const AsmToken &lexDefinite() { - if(tentativeIsValid) { + if (tentativeIsValid) { tentativeIsValid = false; return tentativeToken; } - else { - return getLexer()->Lex(); - } + return getLexer()->Lex(); } AsmToken LexTokenATT(); @@ -72,38 +70,65 @@ public: static unsigned MatchRegisterName(StringRef Name); AsmToken X86AsmLexer::LexTokenATT() { - const AsmToken lexedToken = lexDefinite(); + AsmToken lexedToken = lexDefinite(); switch (lexedToken.getKind()) { default: - return AsmToken(lexedToken); + return lexedToken; case AsmToken::Error: SetError(Lexer->getErrLoc(), Lexer->getErr()); - return AsmToken(lexedToken); - case AsmToken::Percent: - { + return lexedToken; + + case AsmToken::Percent: { const AsmToken &nextToken = lexTentative(); - if (nextToken.getKind() == AsmToken::Identifier) { - unsigned regID = MatchRegisterName(nextToken.getString()); + if (nextToken.getKind() != AsmToken::Identifier) + return lexedToken; + - if (regID) { - lexDefinite(); + if (unsigned regID = MatchRegisterName(nextToken.getString())) { + lexDefinite(); + // FIXME: This is completely wrong when there is a space or other + // punctuation between the % and the register name. + StringRef regStr(lexedToken.getString().data(), + lexedToken.getString().size() + + nextToken.getString().size()); + + return AsmToken(AsmToken::Register, regStr, + static_cast<int64_t>(regID)); + } + + // Match register name failed. If this is "db[0-7]", match it as an alias + // for dr[0-7]. + if (nextToken.getString().size() == 3 && + nextToken.getString().startswith("db")) { + int RegNo = -1; + switch (nextToken.getString()[2]) { + case '0': RegNo = X86::DR0; break; + case '1': RegNo = X86::DR1; break; + case '2': RegNo = X86::DR2; break; + case '3': RegNo = X86::DR3; break; + case '4': RegNo = X86::DR4; break; + case '5': RegNo = X86::DR5; break; + case '6': RegNo = X86::DR6; break; + case '7': RegNo = X86::DR7; break; + } + + if (RegNo != -1) { + lexDefinite(); + + // FIXME: This is completely wrong when there is a space or other + // punctuation between the % and the register name. StringRef regStr(lexedToken.getString().data(), lexedToken.getString().size() + nextToken.getString().size()); - - return AsmToken(AsmToken::Register, - regStr, - static_cast<int64_t>(regID)); - } - else { - return AsmToken(lexedToken); + return AsmToken(AsmToken::Register, regStr, + static_cast<int64_t>(RegNo)); } } - else { - return AsmToken(lexedToken); - } + + + return lexedToken; } } } @@ -113,26 +138,22 @@ AsmToken X86AsmLexer::LexTokenIntel() { switch(lexedToken.getKind()) { default: - return AsmToken(lexedToken); + return lexedToken; case AsmToken::Error: SetError(Lexer->getErrLoc(), Lexer->getErr()); - return AsmToken(lexedToken); - case AsmToken::Identifier: - { + return lexedToken; + case AsmToken::Identifier: { std::string upperCase = lexedToken.getString().str(); std::string lowerCase = LowercaseString(upperCase); StringRef lowerRef(lowerCase); unsigned regID = MatchRegisterName(lowerRef); - if (regID) { + if (regID) return AsmToken(AsmToken::Register, lexedToken.getString(), static_cast<int64_t>(regID)); - } - else { - return AsmToken(lexedToken); - } + return lexedToken; } } } diff --git a/lib/Target/X86/AsmParser/X86AsmParser.cpp b/lib/Target/X86/AsmParser/X86AsmParser.cpp index 40a6a7b56169..a856e9cc7a4e 100644 --- a/lib/Target/X86/AsmParser/X86AsmParser.cpp +++ b/lib/Target/X86/AsmParser/X86AsmParser.cpp @@ -412,6 +412,28 @@ bool X86ATTAsmParser::ParseRegister(unsigned &RegNo, return false; } + // If this is "db[0-7]", match it as an alias + // for dr[0-7]. + if (RegNo == 0 && Tok.getString().size() == 3 && + Tok.getString().startswith("db")) { + switch (Tok.getString()[2]) { + case '0': RegNo = X86::DR0; break; + case '1': RegNo = X86::DR1; break; + case '2': RegNo = X86::DR2; break; + case '3': RegNo = X86::DR3; break; + case '4': RegNo = X86::DR4; break; + case '5': RegNo = X86::DR5; break; + case '6': RegNo = X86::DR6; break; + case '7': RegNo = X86::DR7; break; + } + + if (RegNo != 0) { + EndLoc = Tok.getLoc(); + Parser.Lex(); // Eat it. + return false; + } + } + if (RegNo == 0) return Error(Tok.getLoc(), "invalid register name"); @@ -597,6 +619,16 @@ ParseInstruction(const StringRef &Name, SMLoc NameLoc, return Error(NameLoc, "pushfq cannot be encoded in 32-bit mode"); } + // The "Jump if rCX Zero" form jcxz is not allowed in 64-bit mode and + // the form jrcxz is not allowed in 32-bit mode. + if (Is64Bit) { + if (Name == "jcxz") + return Error(NameLoc, "jcxz cannot be encoded in 64-bit mode"); + } else { + if (Name == "jrcxz") + return Error(NameLoc, "jrcxz cannot be encoded in 32-bit mode"); + } + // FIXME: Hack to recognize "sal..." and "rep..." for now. We need a way to // represent alternative syntaxes in the .td file, without requiring // instruction duplication. @@ -617,6 +649,23 @@ ParseInstruction(const StringRef &Name, SMLoc NameLoc, .Case("setnz", "setne") .Case("jz", "je") .Case("jnz", "jne") + .Case("jc", "jb") + // FIXME: in 32-bit mode jcxz requires an AdSize prefix. In 64-bit mode + // jecxz requires an AdSize prefix but jecxz does not have a prefix in + // 32-bit mode. + .Case("jecxz", "jcxz") + .Case("jrcxz", "jcxz") + .Case("jna", "jbe") + .Case("jnae", "jb") + .Case("jnb", "jae") + .Case("jnbe", "ja") + .Case("jnc", "jae") + .Case("jng", "jle") + .Case("jnge", "jl") + .Case("jnl", "jge") + .Case("jnle", "jg") + .Case("jpe", "jp") + .Case("jpo", "jnp") .Case("cmovcl", "cmovbl") .Case("cmovcl", "cmovbl") .Case("cmovnal", "cmovbel") @@ -631,36 +680,64 @@ ParseInstruction(const StringRef &Name, SMLoc NameLoc, .Case("cmovnlel", "cmovgl") .Case("cmovnzl", "cmovnel") .Case("cmovzl", "cmovel") + .Case("fwait", "wait") + .Case("movzx", "movzb") .Default(Name); // FIXME: Hack to recognize cmp<comparison code>{ss,sd,ps,pd}. const MCExpr *ExtraImmOp = 0; - if (PatchedName.startswith("cmp") && + if ((PatchedName.startswith("cmp") || PatchedName.startswith("vcmp")) && (PatchedName.endswith("ss") || PatchedName.endswith("sd") || PatchedName.endswith("ps") || PatchedName.endswith("pd"))) { + bool IsVCMP = PatchedName.startswith("vcmp"); + unsigned SSECCIdx = IsVCMP ? 4 : 3; unsigned SSEComparisonCode = StringSwitch<unsigned>( - PatchedName.slice(3, PatchedName.size() - 2)) - .Case("eq", 0) - .Case("lt", 1) - .Case("le", 2) - .Case("unord", 3) - .Case("neq", 4) - .Case("nlt", 5) - .Case("nle", 6) - .Case("ord", 7) + PatchedName.slice(SSECCIdx, PatchedName.size() - 2)) + .Case("eq", 0) + .Case("lt", 1) + .Case("le", 2) + .Case("unord", 3) + .Case("neq", 4) + .Case("nlt", 5) + .Case("nle", 6) + .Case("ord", 7) + .Case("eq_uq", 8) + .Case("nge", 9) + .Case("ngt", 0x0A) + .Case("false", 0x0B) + .Case("neq_oq", 0x0C) + .Case("ge", 0x0D) + .Case("gt", 0x0E) + .Case("true", 0x0F) + .Case("eq_os", 0x10) + .Case("lt_oq", 0x11) + .Case("le_oq", 0x12) + .Case("unord_s", 0x13) + .Case("neq_us", 0x14) + .Case("nlt_uq", 0x15) + .Case("nle_uq", 0x16) + .Case("ord_s", 0x17) + .Case("eq_us", 0x18) + .Case("nge_uq", 0x19) + .Case("ngt_uq", 0x1A) + .Case("false_os", 0x1B) + .Case("neq_os", 0x1C) + .Case("ge_oq", 0x1D) + .Case("gt_oq", 0x1E) + .Case("true_us", 0x1F) .Default(~0U); if (SSEComparisonCode != ~0U) { ExtraImmOp = MCConstantExpr::Create(SSEComparisonCode, getParser().getContext()); if (PatchedName.endswith("ss")) { - PatchedName = "cmpss"; + PatchedName = IsVCMP ? "vcmpss" : "cmpss"; } else if (PatchedName.endswith("sd")) { - PatchedName = "cmpsd"; + PatchedName = IsVCMP ? "vcmpsd" : "cmpsd"; } else if (PatchedName.endswith("ps")) { - PatchedName = "cmpps"; + PatchedName = IsVCMP ? "vcmpps" : "cmpps"; } else { assert(PatchedName.endswith("pd") && "Unexpected mnemonic!"); - PatchedName = "cmppd"; + PatchedName = IsVCMP ? "vcmppd" : "cmppd"; } } } diff --git a/lib/Target/X86/AsmPrinter/X86ATTInstPrinter.cpp b/lib/Target/X86/AsmPrinter/X86ATTInstPrinter.cpp index 0b64cb4f2828..f2cdb5ba55eb 100644 --- a/lib/Target/X86/AsmPrinter/X86ATTInstPrinter.cpp +++ b/lib/Target/X86/AsmPrinter/X86ATTInstPrinter.cpp @@ -85,11 +85,18 @@ void X86ATTInstPrinter::printOperand(const MCInst *MI, unsigned OpNo, } } -void X86ATTInstPrinter::printLeaMemReference(const MCInst *MI, unsigned Op, - raw_ostream &O) { +void X86ATTInstPrinter::printMemReference(const MCInst *MI, unsigned Op, + raw_ostream &O) { const MCOperand &BaseReg = MI->getOperand(Op); const MCOperand &IndexReg = MI->getOperand(Op+2); const MCOperand &DispSpec = MI->getOperand(Op+3); + const MCOperand &SegReg = MI->getOperand(Op+4); + + // If this has a segment register, print it. + if (SegReg.getReg()) { + printOperand(MI, Op+4, O); + O << ':'; + } if (DispSpec.isImm()) { int64_t DispVal = DispSpec.getImm(); @@ -115,13 +122,3 @@ void X86ATTInstPrinter::printLeaMemReference(const MCInst *MI, unsigned Op, O << ')'; } } - -void X86ATTInstPrinter::printMemReference(const MCInst *MI, unsigned Op, - raw_ostream &O) { - // If this has a segment register, print it. - if (MI->getOperand(Op+4).getReg()) { - printOperand(MI, Op+4, O); - O << ':'; - } - printLeaMemReference(MI, Op, O); -} diff --git a/lib/Target/X86/AsmPrinter/X86ATTInstPrinter.h b/lib/Target/X86/AsmPrinter/X86ATTInstPrinter.h index 8d5d50832d49..3be4bae5bec2 100644 --- a/lib/Target/X86/AsmPrinter/X86ATTInstPrinter.h +++ b/lib/Target/X86/AsmPrinter/X86ATTInstPrinter.h @@ -34,7 +34,6 @@ public: void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &OS); void printMemReference(const MCInst *MI, unsigned Op, raw_ostream &OS); - void printLeaMemReference(const MCInst *MI, unsigned Op, raw_ostream &OS); void printSSECC(const MCInst *MI, unsigned Op, raw_ostream &OS); void print_pcrel_imm(const MCInst *MI, unsigned OpNo, raw_ostream &OS); @@ -69,14 +68,8 @@ public: void printf128mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) { printMemReference(MI, OpNo, O); } - void printlea32mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) { - printLeaMemReference(MI, OpNo, O); - } - void printlea64mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) { - printLeaMemReference(MI, OpNo, O); - } - void printlea64_32mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) { - printLeaMemReference(MI, OpNo, O); + void printf256mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) { + printMemReference(MI, OpNo, O); } }; diff --git a/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp b/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp index 183213d324b3..73bc603f18f1 100644 --- a/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp +++ b/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp @@ -200,6 +200,11 @@ void X86AsmPrinter::printSymbolOperand(const MachineOperand &MO, case X86II::MO_GOT: O << "@GOT"; break; case X86II::MO_GOTOFF: O << "@GOTOFF"; break; case X86II::MO_PLT: O << "@PLT"; break; + case X86II::MO_TLVP: O << "@TLVP"; break; + case X86II::MO_TLVP_PIC_BASE: + O << "@TLVP" << '-'; + PrintPICBaseSymbol(O); + break; } } @@ -383,6 +388,8 @@ bool X86AsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, } if (MO.isGlobal() || MO.isCPI() || MO.isJTI() || MO.isSymbol()) { printSymbolOperand(MO, O); + if (Subtarget->isPICStyleRIPRel()) + O << "(%rip)"; return false; } if (MO.isReg()) { diff --git a/lib/Target/X86/AsmPrinter/X86IntelInstPrinter.cpp b/lib/Target/X86/AsmPrinter/X86IntelInstPrinter.cpp index 7e0a9bb891fa..a632047f6592 100644 --- a/lib/Target/X86/AsmPrinter/X86IntelInstPrinter.cpp +++ b/lib/Target/X86/AsmPrinter/X86IntelInstPrinter.cpp @@ -81,12 +81,19 @@ void X86IntelInstPrinter::printOperand(const MCInst *MI, unsigned OpNo, } } -void X86IntelInstPrinter::printLeaMemReference(const MCInst *MI, unsigned Op, - raw_ostream &O) { +void X86IntelInstPrinter::printMemReference(const MCInst *MI, unsigned Op, + raw_ostream &O) { const MCOperand &BaseReg = MI->getOperand(Op); unsigned ScaleVal = MI->getOperand(Op+1).getImm(); const MCOperand &IndexReg = MI->getOperand(Op+2); const MCOperand &DispSpec = MI->getOperand(Op+3); + const MCOperand &SegReg = MI->getOperand(Op+4); + + // If this has a segment register, print it. + if (SegReg.getReg()) { + printOperand(MI, Op+4, O); + O << ':'; + } O << '['; @@ -104,7 +111,7 @@ void X86IntelInstPrinter::printLeaMemReference(const MCInst *MI, unsigned Op, NeedPlus = true; } - + if (!DispSpec.isImm()) { if (NeedPlus) O << " + "; assert(DispSpec.isExpr() && "non-immediate displacement for LEA?"); @@ -126,13 +133,3 @@ void X86IntelInstPrinter::printLeaMemReference(const MCInst *MI, unsigned Op, O << ']'; } - -void X86IntelInstPrinter::printMemReference(const MCInst *MI, unsigned Op, - raw_ostream &O) { - // If this has a segment register, print it. - if (MI->getOperand(Op+4).getReg()) { - printOperand(MI, Op+4, O); - O << ':'; - } - printLeaMemReference(MI, Op, O); -} diff --git a/lib/Target/X86/AsmPrinter/X86IntelInstPrinter.h b/lib/Target/X86/AsmPrinter/X86IntelInstPrinter.h index a0beeb202e72..4d680744dd60 100644 --- a/lib/Target/X86/AsmPrinter/X86IntelInstPrinter.h +++ b/lib/Target/X86/AsmPrinter/X86IntelInstPrinter.h @@ -36,7 +36,6 @@ public: void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); void printMemReference(const MCInst *MI, unsigned Op, raw_ostream &O); - void printLeaMemReference(const MCInst *MI, unsigned Op, raw_ostream &O); void printSSECC(const MCInst *MI, unsigned Op, raw_ostream &O); void print_pcrel_imm(const MCInst *MI, unsigned OpNo, raw_ostream &O); @@ -81,17 +80,9 @@ public: O << "XMMWORD PTR "; printMemReference(MI, OpNo, O); } - void printlea32mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) { - O << "DWORD PTR "; - printLeaMemReference(MI, OpNo, O); - } - void printlea64mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) { - O << "QWORD PTR "; - printLeaMemReference(MI, OpNo, O); - } - void printlea64_32mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) { - O << "QWORD PTR "; - printLeaMemReference(MI, OpNo, O); + void printf256mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) { + O << "YMMWORD PTR "; + printMemReference(MI, OpNo, O); } }; diff --git a/lib/Target/X86/AsmPrinter/X86MCInstLower.cpp b/lib/Target/X86/AsmPrinter/X86MCInstLower.cpp index 4edeca979872..09f150bb7946 100644 --- a/lib/Target/X86/AsmPrinter/X86MCInstLower.cpp +++ b/lib/Target/X86/AsmPrinter/X86MCInstLower.cpp @@ -152,6 +152,17 @@ MCOperand X86MCInstLower::LowerSymbolOperand(const MachineOperand &MO, case X86II::MO_DARWIN_STUB: break; + case X86II::MO_TLVP: RefKind = MCSymbolRefExpr::VK_TLVP; break; + case X86II::MO_TLVP_PIC_BASE: + Expr = MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_TLVP, Ctx); + // Subtract the pic base. + Expr + = MCBinaryExpr::CreateSub(Expr, + MCSymbolRefExpr::Create(GetPICBaseSymbol(), + Ctx), + Ctx); + + break; case X86II::MO_TLSGD: RefKind = MCSymbolRefExpr::VK_TLSGD; break; case X86II::MO_GOTTPOFF: RefKind = MCSymbolRefExpr::VK_GOTTPOFF; break; case X86II::MO_INDNTPOFF: RefKind = MCSymbolRefExpr::VK_INDNTPOFF; break; @@ -266,10 +277,21 @@ static void SimplifyShortMoveForm(MCInst &Inst, unsigned Opcode) { return; // Check whether this is an absolute address. - if (Inst.getOperand(AddrBase + 0).getReg() != 0 || - Inst.getOperand(AddrBase + 2).getReg() != 0 || - Inst.getOperand(AddrBase + 4).getReg() != 0 || - Inst.getOperand(AddrBase + 1).getImm() != 1) + // FIXME: We know TLVP symbol refs aren't, but there should be a better way + // to do this here. + bool Absolute = true; + if (Inst.getOperand(AddrOp).isExpr()) { + const MCExpr *MCE = Inst.getOperand(AddrOp).getExpr(); + if (const MCSymbolRefExpr *SRE = dyn_cast<MCSymbolRefExpr>(MCE)) + if (SRE->getKind() == MCSymbolRefExpr::VK_TLVP) + Absolute = false; + } + + if (Absolute && + (Inst.getOperand(AddrBase + 0).getReg() != 0 || + Inst.getOperand(AddrBase + 2).getReg() != 0 || + Inst.getOperand(AddrBase + 4).getReg() != 0 || + Inst.getOperand(AddrBase + 1).getImm() != 1)) return; // If so, rewrite the instruction. @@ -327,6 +349,15 @@ void X86MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const { switch (OutMI.getOpcode()) { case X86::LEA64_32r: // Handle 'subreg rewriting' for the lea64_32mem operand. lower_lea64_32mem(&OutMI, 1); + // FALL THROUGH. + case X86::LEA64r: + case X86::LEA16r: + case X86::LEA32r: + // LEA should have a segment register, but it must be empty. + assert(OutMI.getNumOperands() == 1+X86::AddrNumOperands && + "Unexpected # of LEA operands"); + assert(OutMI.getOperand(1+X86::AddrSegmentReg).getReg() == 0 && + "LEA has segment specified!"); break; case X86::MOVZX16rr8: LowerSubReg32_Op0(OutMI, X86::MOVZX32rr8); break; case X86::MOVZX16rm8: LowerSubReg32_Op0(OutMI, X86::MOVZX32rm8); break; @@ -364,10 +395,9 @@ void X86MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const { LowerUnaryToTwoAddr(OutMI, X86::XOR32rr); // MOV32r0 -> XOR32rr break; - // TAILJMPr, TAILJMPr64, CALL64r, CALL64pcrel32 - These instructions have + // TAILJMPr64, CALL64r, CALL64pcrel32 - These instructions have // register inputs modeled as normal uses instead of implicit uses. As such, // truncate off all but the first operand (the callee). FIXME: Change isel. - case X86::TAILJMPr: case X86::TAILJMPr64: case X86::CALL64r: case X86::CALL64pcrel32: { @@ -380,11 +410,20 @@ void X86MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const { } // TAILJMPd, TAILJMPd64 - Lower to the correct jump instructions. + case X86::TAILJMPr: case X86::TAILJMPd: case X86::TAILJMPd64: { + unsigned Opcode; + switch (OutMI.getOpcode()) { + default: assert(0 && "Invalid opcode"); + case X86::TAILJMPr: Opcode = X86::JMP32r; break; + case X86::TAILJMPd: + case X86::TAILJMPd64: Opcode = X86::JMP_1; break; + } + MCOperand Saved = OutMI.getOperand(0); OutMI = MCInst(); - OutMI.setOpcode(X86::TAILJMP_1); + OutMI.setOpcode(Opcode); OutMI.addOperand(Saved); break; } @@ -483,8 +522,12 @@ void X86AsmPrinter::PrintDebugValueComment(const MachineInstr *MI, O << V.getName(); O << " <- "; // Frame address. Currently handles register +- offset only. - assert(MI->getOperand(0).isReg() && MI->getOperand(3).isImm()); - O << '['; printOperand(MI, 0, O); O << '+'; printOperand(MI, 3, O); + O << '['; + if (MI->getOperand(0).isReg() && MI->getOperand(0).getReg()) + printOperand(MI, 0, O); + else + O << "undef"; + O << '+'; printOperand(MI, 3, O); O << ']'; O << "+"; printOperand(MI, NOps-2, O); @@ -495,8 +538,9 @@ X86AsmPrinter::getDebugValueLocation(const MachineInstr *MI) const { MachineLocation Location; assert (MI->getNumOperands() == 7 && "Invalid no. of machine operands!"); // Frame address. Currently handles register +- offset only. - assert(MI->getOperand(0).isReg() && MI->getOperand(3).isImm()); - Location.set(MI->getOperand(0).getReg(), MI->getOperand(3).getImm()); + + if (MI->getOperand(0).isReg() && MI->getOperand(3).isImm()) + Location.set(MI->getOperand(0).getReg(), MI->getOperand(3).getImm()); return Location; } @@ -513,6 +557,13 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) { } return; + case X86::TAILJMPr: + case X86::TAILJMPd: + case X86::TAILJMPd64: + // Lower these as normal, but add some comments. + OutStreamer.AddComment("TAILCALL"); + break; + case X86::MOVPC32r: { MCInst TmpInst; // This is a pseudo op for a two instruction sequence with a label, which @@ -578,7 +629,6 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) { MCInst TmpInst; MCInstLowering.Lower(MI, TmpInst); - OutStreamer.EmitInstruction(TmpInst); } diff --git a/lib/Target/X86/Disassembler/CMakeLists.txt b/lib/Target/X86/Disassembler/CMakeLists.txt index 9f91060179e6..97589c00515b 100644 --- a/lib/Target/X86/Disassembler/CMakeLists.txt +++ b/lib/Target/X86/Disassembler/CMakeLists.txt @@ -4,8 +4,8 @@ add_llvm_library(LLVMX86Disassembler X86Disassembler.cpp X86DisassemblerDecoder.c ) -# workaround for hanging compilation on MSVC9 -if( MSVC_VERSION EQUAL 1500 ) +# workaround for hanging compilation on MSVC9 and 10 +if( MSVC_VERSION EQUAL 1500 OR MSVC_VERSION EQUAL 1600 ) set_property( SOURCE X86Disassembler.cpp PROPERTY COMPILE_FLAGS "/Od" diff --git a/lib/Target/X86/Disassembler/X86Disassembler.cpp b/lib/Target/X86/Disassembler/X86Disassembler.cpp index 8a5a6308704a..09f1584ce4d9 100644 --- a/lib/Target/X86/Disassembler/X86Disassembler.cpp +++ b/lib/Target/X86/Disassembler/X86Disassembler.cpp @@ -252,13 +252,8 @@ static bool translateRMRegister(MCInst &mcInst, /// @param mcInst - The MCInst to append to. /// @param insn - The instruction to extract Mod, R/M, and SIB fields /// from. -/// @param sr - Whether or not to emit the segment register. The -/// LEA instruction does not expect a segment-register -/// operand. /// @return - 0 on success; nonzero otherwise -static bool translateRMMemory(MCInst &mcInst, - InternalInstruction &insn, - bool sr) { +static bool translateRMMemory(MCInst &mcInst, InternalInstruction &insn) { // Addresses in an MCInst are represented as five operands: // 1. basereg (register) The R/M base, or (if there is a SIB) the // SIB base @@ -385,10 +380,7 @@ static bool translateRMMemory(MCInst &mcInst, mcInst.addOperand(scaleAmount); mcInst.addOperand(indexReg); mcInst.addOperand(displacement); - - if (sr) - mcInst.addOperand(segmentReg); - + mcInst.addOperand(segmentReg); return false; } @@ -439,9 +431,8 @@ static bool translateRM(MCInst &mcInst, case TYPE_M1616: case TYPE_M1632: case TYPE_M1664: - return translateRMMemory(mcInst, insn, true); case TYPE_LEA: - return translateRMMemory(mcInst, insn, false); + return translateRMMemory(mcInst, insn); } } diff --git a/lib/Target/X86/README-SSE.txt b/lib/Target/X86/README-SSE.txt index e5f84e8b169e..b6aba93f3738 100644 --- a/lib/Target/X86/README-SSE.txt +++ b/lib/Target/X86/README-SSE.txt @@ -36,62 +36,6 @@ The pattern isel got this one right. //===---------------------------------------------------------------------===// -SSE doesn't have [mem] op= reg instructions. If we have an SSE instruction -like this: - - X += y - -and the register allocator decides to spill X, it is cheaper to emit this as: - -Y += [xslot] -store Y -> [xslot] - -than as: - -tmp = [xslot] -tmp += y -store tmp -> [xslot] - -..and this uses one fewer register (so this should be done at load folding -time, not at spiller time). *Note* however that this can only be done -if Y is dead. Here's a testcase: - -@.str_3 = external global [15 x i8] -declare void @printf(i32, ...) -define void @main() { -build_tree.exit: - br label %no_exit.i7 - -no_exit.i7: ; preds = %no_exit.i7, %build_tree.exit - %tmp.0.1.0.i9 = phi double [ 0.000000e+00, %build_tree.exit ], - [ %tmp.34.i18, %no_exit.i7 ] - %tmp.0.0.0.i10 = phi double [ 0.000000e+00, %build_tree.exit ], - [ %tmp.28.i16, %no_exit.i7 ] - %tmp.28.i16 = fadd double %tmp.0.0.0.i10, 0.000000e+00 - %tmp.34.i18 = fadd double %tmp.0.1.0.i9, 0.000000e+00 - br i1 false, label %Compute_Tree.exit23, label %no_exit.i7 - -Compute_Tree.exit23: ; preds = %no_exit.i7 - tail call void (i32, ...)* @printf( i32 0 ) - store double %tmp.34.i18, double* null - ret void -} - -We currently emit: - -.BBmain_1: - xorpd %XMM1, %XMM1 - addsd %XMM0, %XMM1 -*** movsd %XMM2, QWORD PTR [%ESP + 8] -*** addsd %XMM2, %XMM1 -*** movsd QWORD PTR [%ESP + 8], %XMM2 - jmp .BBmain_1 # no_exit.i7 - -This is a bugpoint reduced testcase, which is why the testcase doesn't make -much sense (e.g. its an infinite loop). :) - -//===---------------------------------------------------------------------===// - SSE should implement 'select_cc' using 'emulated conditional moves' that use pcmp/pand/pandn/por to do a selection instead of a conditional branch: @@ -122,12 +66,6 @@ LBB_X_2: //===---------------------------------------------------------------------===// -It's not clear whether we should use pxor or xorps / xorpd to clear XMM -registers. The choice may depend on subtarget information. We should do some -more experiments on different x86 machines. - -//===---------------------------------------------------------------------===// - Lower memcpy / memset to a series of SSE 128 bit move instructions when it's feasible. @@ -151,45 +89,6 @@ Perhaps use pxor / xorp* to clear a XMM register first? //===---------------------------------------------------------------------===// -How to decide when to use the "floating point version" of logical ops? Here are -some code fragments: - - movaps LCPI5_5, %xmm2 - divps %xmm1, %xmm2 - mulps %xmm2, %xmm3 - mulps 8656(%ecx), %xmm3 - addps 8672(%ecx), %xmm3 - andps LCPI5_6, %xmm2 - andps LCPI5_1, %xmm3 - por %xmm2, %xmm3 - movdqa %xmm3, (%edi) - - movaps LCPI5_5, %xmm1 - divps %xmm0, %xmm1 - mulps %xmm1, %xmm3 - mulps 8656(%ecx), %xmm3 - addps 8672(%ecx), %xmm3 - andps LCPI5_6, %xmm1 - andps LCPI5_1, %xmm3 - orps %xmm1, %xmm3 - movaps %xmm3, 112(%esp) - movaps %xmm3, (%ebx) - -Due to some minor source change, the later case ended up using orps and movaps -instead of por and movdqa. Does it matter? - -//===---------------------------------------------------------------------===// - -X86RegisterInfo::copyRegToReg() returns X86::MOVAPSrr for VR128. Is it possible -to choose between movaps, movapd, and movdqa based on types of source and -destination? - -How about andps, andpd, and pand? Do we really care about the type of the packed -elements? If not, why not always use the "ps" variants which are likely to be -shorter. - -//===---------------------------------------------------------------------===// - External test Nurbs exposed some problems. Look for __ZN15Nurbs_SSE_Cubic17TessellateSurfaceE, bb cond_next140. This is what icc emits: @@ -278,41 +177,6 @@ It also exposes some other problems. See MOV32ri -3 and the spills. //===---------------------------------------------------------------------===// -http://gcc.gnu.org/bugzilla/show_bug.cgi?id=25500 - -LLVM is producing bad code. - -LBB_main_4: # cond_true44 - addps %xmm1, %xmm2 - subps %xmm3, %xmm2 - movaps (%ecx), %xmm4 - movaps %xmm2, %xmm1 - addps %xmm4, %xmm1 - addl $16, %ecx - incl %edx - cmpl $262144, %edx - movaps %xmm3, %xmm2 - movaps %xmm4, %xmm3 - jne LBB_main_4 # cond_true44 - -There are two problems. 1) No need to two loop induction variables. We can -compare against 262144 * 16. 2) Known register coalescer issue. We should -be able eliminate one of the movaps: - - addps %xmm2, %xmm1 <=== Commute! - subps %xmm3, %xmm1 - movaps (%ecx), %xmm4 - movaps %xmm1, %xmm1 <=== Eliminate! - addps %xmm4, %xmm1 - addl $16, %ecx - incl %edx - cmpl $262144, %edx - movaps %xmm3, %xmm2 - movaps %xmm4, %xmm3 - jne LBB_main_4 # cond_true44 - -//===---------------------------------------------------------------------===// - Consider: __m128 test(float a) { @@ -382,22 +246,6 @@ elements are fixed zeros. //===---------------------------------------------------------------------===// -__m128d test1( __m128d A, __m128d B) { - return _mm_shuffle_pd(A, B, 0x3); -} - -compiles to - -shufpd $3, %xmm1, %xmm0 - -Perhaps it's better to use unpckhpd instead? - -unpckhpd %xmm1, %xmm0 - -Don't know if unpckhpd is faster. But it is shorter. - -//===---------------------------------------------------------------------===// - This code generates ugly code, probably due to costs being off or something: define void @test(float* %P, <4 x float>* %P2 ) { @@ -549,6 +397,7 @@ entry: %tmp20 = tail call i64 @ccoshf( float %tmp6, float %z.0 ) nounwind readonly ret i64 %tmp20 } +declare i64 @ccoshf(float %z.0, float %z.1) nounwind readonly This currently compiles to: @@ -987,3 +836,34 @@ This would be better kept in the SSE unit by treating XMM0 as a 4xfloat and doing a shuffle from v[1] to v[0] then a float store. //===---------------------------------------------------------------------===// + +On SSE4 machines, we compile this code: + +define <2 x float> @test2(<2 x float> %Q, <2 x float> %R, + <2 x float> *%P) nounwind { + %Z = fadd <2 x float> %Q, %R + + store <2 x float> %Z, <2 x float> *%P + ret <2 x float> %Z +} + +into: + +_test2: ## @test2 +## BB#0: + insertps $0, %xmm2, %xmm2 + insertps $16, %xmm3, %xmm2 + insertps $0, %xmm0, %xmm3 + insertps $16, %xmm1, %xmm3 + addps %xmm2, %xmm3 + movq %xmm3, (%rdi) + movaps %xmm3, %xmm0 + pshufd $1, %xmm3, %xmm1 + ## kill: XMM1<def> XMM1<kill> + ret + +The insertps's of $0 are pointless complex copies. + +//===---------------------------------------------------------------------===// + + diff --git a/lib/Target/X86/README-X86-64.txt b/lib/Target/X86/README-X86-64.txt index e8f7c5d6dd22..78c4dc00ee72 100644 --- a/lib/Target/X86/README-X86-64.txt +++ b/lib/Target/X86/README-X86-64.txt @@ -1,27 +1,5 @@ //===- README_X86_64.txt - Notes for X86-64 code gen ----------------------===// -Implement different PIC models? Right now we only support Mac OS X with small -PIC code model. - -//===---------------------------------------------------------------------===// - -For this: - -extern void xx(void); -void bar(void) { - xx(); -} - -gcc compiles to: - -.globl _bar -_bar: - jmp _xx - -We need to do the tailcall optimization as well. - -//===---------------------------------------------------------------------===// - AMD64 Optimization Manual 8.2 has some nice information about optimizing integer multiplication by a constant. How much of it applies to Intel's X86-64 implementation? There are definite trade-offs to consider: latency vs. register @@ -96,123 +74,14 @@ gcc: movq %rax, (%rdx) ret -//===---------------------------------------------------------------------===// - -Vararg function prologue can be further optimized. Currently all XMM registers -are stored into register save area. Most of them can be eliminated since the -upper bound of the number of XMM registers used are passed in %al. gcc produces -something like the following: - - movzbl %al, %edx - leaq 0(,%rdx,4), %rax - leaq 4+L2(%rip), %rdx - leaq 239(%rsp), %rax - jmp *%rdx - movaps %xmm7, -15(%rax) - movaps %xmm6, -31(%rax) - movaps %xmm5, -47(%rax) - movaps %xmm4, -63(%rax) - movaps %xmm3, -79(%rax) - movaps %xmm2, -95(%rax) - movaps %xmm1, -111(%rax) - movaps %xmm0, -127(%rax) -L2: - -It jumps over the movaps that do not need to be stored. Hard to see this being -significant as it added 5 instruciton (including a indirect branch) to avoid -executing 0 to 8 stores in the function prologue. - -Perhaps we can optimize for the common case where no XMM registers are used for -parameter passing. i.e. is %al == 0 jump over all stores. Or in the case of a -leaf function where we can determine that no XMM input parameter is need, avoid -emitting the stores at all. - -//===---------------------------------------------------------------------===// +And the codegen is even worse for the following +(from http://gcc.gnu.org/bugzilla/show_bug.cgi?id=33103): + void fill1(char *s, int a) + { + __builtin_memset(s, a, 15); + } -AMD64 has a complex calling convention for aggregate passing by value: - -1. If the size of an object is larger than two eightbytes, or in C++, is a non- - POD structure or union type, or contains unaligned fields, it has class - MEMORY. -2. Both eightbytes get initialized to class NO_CLASS. -3. Each field of an object is classified recursively so that always two fields - are considered. The resulting class is calculated according to the classes - of the fields in the eightbyte: - (a) If both classes are equal, this is the resulting class. - (b) If one of the classes is NO_CLASS, the resulting class is the other - class. - (c) If one of the classes is MEMORY, the result is the MEMORY class. - (d) If one of the classes is INTEGER, the result is the INTEGER. - (e) If one of the classes is X87, X87UP, COMPLEX_X87 class, MEMORY is used as - class. - (f) Otherwise class SSE is used. -4. Then a post merger cleanup is done: - (a) If one of the classes is MEMORY, the whole argument is passed in memory. - (b) If SSEUP is not preceeded by SSE, it is converted to SSE. - -Currently llvm frontend does not handle this correctly. - -Problem 1: - typedef struct { int i; double d; } QuadWordS; -It is currently passed in two i64 integer registers. However, gcc compiled -callee expects the second element 'd' to be passed in XMM0. - -Problem 2: - typedef struct { int32_t i; float j; double d; } QuadWordS; -The size of the first two fields == i64 so they will be combined and passed in -a integer register RDI. The third field is still passed in XMM0. - -Problem 3: - typedef struct { int64_t i; int8_t j; int64_t d; } S; - void test(S s) -The size of this aggregate is greater than two i64 so it should be passed in -memory. Currently llvm breaks this down and passed it in three integer -registers. - -Problem 4: -Taking problem 3 one step ahead where a function expects a aggregate value -in memory followed by more parameter(s) passed in register(s). - void test(S s, int b) - -LLVM IR does not allow parameter passing by aggregates, therefore it must break -the aggregates value (in problem 3 and 4) into a number of scalar values: - void %test(long %s.i, byte %s.j, long %s.d); - -However, if the backend were to lower this code literally it would pass the 3 -values in integer registers. To force it be passed in memory, the frontend -should change the function signiture to: - void %test(long %undef1, long %undef2, long %undef3, long %undef4, - long %undef5, long %undef6, - long %s.i, byte %s.j, long %s.d); -And the callee would look something like this: - call void %test( undef, undef, undef, undef, undef, undef, - %tmp.s.i, %tmp.s.j, %tmp.s.d ); -The first 6 undef parameters would exhaust the 6 integer registers used for -parameter passing. The following three integer values would then be forced into -memory. - -For problem 4, the parameter 'd' would be moved to the front of the parameter -list so it will be passed in register: - void %test(int %d, - long %undef1, long %undef2, long %undef3, long %undef4, - long %undef5, long %undef6, - long %s.i, byte %s.j, long %s.d); - -//===---------------------------------------------------------------------===// - -Right now the asm printer assumes GlobalAddress are accessed via RIP relative -addressing. Therefore, it is not possible to generate this: - movabsq $__ZTV10polynomialIdE+16, %rax - -That is ok for now since we currently only support small model. So the above -is selected as - leaq __ZTV10polynomialIdE+16(%rip), %rax - -This is probably slightly slower but is much shorter than movabsq. However, if -we were to support medium or larger code models, we need to use the movabs -instruction. We should probably introduce something like AbsoluteAddress to -distinguish it from GlobalAddress so the asm printer and JIT code emitter can -do the right thing. +For this version, we duplicate the computation of the constant to store. //===---------------------------------------------------------------------===// @@ -298,3 +167,107 @@ be able to recognize the zero extend. This could also presumably be implemented if we have whole-function selectiondags. //===---------------------------------------------------------------------===// + +Take the following C code +(from http://gcc.gnu.org/bugzilla/show_bug.cgi?id=43640): + +struct u1 +{ + float x; + float y; +}; + +float foo(struct u1 u) +{ + return u.x + u.y; +} + +Optimizes to the following IR: +define float @foo(double %u.0) nounwind readnone { +entry: + %tmp8 = bitcast double %u.0 to i64 ; <i64> [#uses=2] + %tmp6 = trunc i64 %tmp8 to i32 ; <i32> [#uses=1] + %tmp7 = bitcast i32 %tmp6 to float ; <float> [#uses=1] + %tmp2 = lshr i64 %tmp8, 32 ; <i64> [#uses=1] + %tmp3 = trunc i64 %tmp2 to i32 ; <i32> [#uses=1] + %tmp4 = bitcast i32 %tmp3 to float ; <float> [#uses=1] + %0 = fadd float %tmp7, %tmp4 ; <float> [#uses=1] + ret float %0 +} + +And current llvm-gcc/clang output: + movd %xmm0, %rax + movd %eax, %xmm1 + shrq $32, %rax + movd %eax, %xmm0 + addss %xmm1, %xmm0 + ret + +We really shouldn't move the floats to RAX, only to immediately move them +straight back to the XMM registers. + +There really isn't any good way to handle this purely in IR optimizers; it +could possibly be handled by changing the output of the fronted, though. It +would also be feasible to add a x86-specific DAGCombine to optimize the +bitcast+trunc+(lshr+)bitcast combination. + +//===---------------------------------------------------------------------===// + +Take the following code +(from http://gcc.gnu.org/bugzilla/show_bug.cgi?id=34653): +extern unsigned long table[]; +unsigned long foo(unsigned char *p) { + unsigned long tag = *p; + return table[tag >> 4] + table[tag & 0xf]; +} + +Current code generated: + movzbl (%rdi), %eax + movq %rax, %rcx + andq $240, %rcx + shrq %rcx + andq $15, %rax + movq table(,%rax,8), %rax + addq table(%rcx), %rax + ret + +Issues: +1. First movq should be movl; saves a byte. +2. Both andq's should be andl; saves another two bytes. I think this was + implemented at one point, but subsequently regressed. +3. shrq should be shrl; saves another byte. +4. The first andq can be completely eliminated by using a slightly more + expensive addressing mode. + +//===---------------------------------------------------------------------===// + +Consider the following (contrived testcase, but contains common factors): + +#include <stdarg.h> +int test(int x, ...) { + int sum, i; + va_list l; + va_start(l, x); + for (i = 0; i < x; i++) + sum += va_arg(l, int); + va_end(l); + return sum; +} + +Testcase given in C because fixing it will likely involve changing the IR +generated for it. The primary issue with the result is that it doesn't do any +of the optimizations which are possible if we know the address of a va_list +in the current function is never taken: +1. We shouldn't spill the XMM registers because we only call va_arg with "int". +2. It would be nice if we could scalarrepl the va_list. +3. Probably overkill, but it'd be cool if we could peel off the first five +iterations of the loop. + +Other optimizations involving functions which use va_arg on floats which don't +have the address of a va_list taken: +1. Conversely to the above, we shouldn't spill general registers if we only + call va_arg on "double". +2. If we know nothing more than 64 bits wide is read from the XMM registers, + we can change the spilling code to reduce the amount of stack used by half. + +//===---------------------------------------------------------------------===// diff --git a/lib/Target/X86/README.txt b/lib/Target/X86/README.txt index d4545a6fcfd3..efc0cd82f23e 100644 --- a/lib/Target/X86/README.txt +++ b/lib/Target/X86/README.txt @@ -1103,57 +1103,6 @@ be folded into: shl [mem], 1 //===---------------------------------------------------------------------===// -This testcase misses a read/modify/write opportunity (from PR1425): - -void vertical_decompose97iH1(int *b0, int *b1, int *b2, int width){ - int i; - for(i=0; i<width; i++) - b1[i] += (1*(b0[i] + b2[i])+0)>>0; -} - -We compile it down to: - -LBB1_2: # bb - movl (%esi,%edi,4), %ebx - addl (%ecx,%edi,4), %ebx - addl (%edx,%edi,4), %ebx - movl %ebx, (%ecx,%edi,4) - incl %edi - cmpl %eax, %edi - jne LBB1_2 # bb - -the inner loop should add to the memory location (%ecx,%edi,4), saving -a mov. Something like: - - movl (%esi,%edi,4), %ebx - addl (%edx,%edi,4), %ebx - addl %ebx, (%ecx,%edi,4) - -Here is another interesting example: - -void vertical_compose97iH1(int *b0, int *b1, int *b2, int width){ - int i; - for(i=0; i<width; i++) - b1[i] -= (1*(b0[i] + b2[i])+0)>>0; -} - -We miss the r/m/w opportunity here by using 2 subs instead of an add+sub[mem]: - -LBB9_2: # bb - movl (%ecx,%edi,4), %ebx - subl (%esi,%edi,4), %ebx - subl (%edx,%edi,4), %ebx - movl %ebx, (%ecx,%edi,4) - incl %edi - cmpl %eax, %edi - jne LBB9_2 # bb - -Additionally, LSR should rewrite the exit condition of these loops to use -a stride-4 IV, would would allow all the scales in the loop to go away. -This would result in smaller code and more efficient microops. - -//===---------------------------------------------------------------------===// - In SSE mode, we turn abs and neg into a load from the constant pool plus a xor or and instruction, for example: @@ -1301,15 +1250,8 @@ FirstOnet: xorl %eax, %eax ret -There are a few possible improvements here: -1. We should be able to eliminate the dead load into %ecx -2. We could change the "movl 8(%esp), %eax" into - "movzwl 10(%esp), %eax"; this lets us change the cmpl - into a testl, which is shorter, and eliminate the shift. - -We could also in theory eliminate the branch by using a conditional -for the address of the load, but that seems unlikely to be worthwhile -in general. +We could change the "movl 8(%esp), %eax" into "movzwl 10(%esp), %eax"; this +lets us change the cmpl into a testl, which is shorter, and eliminate the shift. //===---------------------------------------------------------------------===// @@ -1331,22 +1273,23 @@ bb7: ; preds = %entry to: -_foo: +foo: # @foo +# BB#0: # %entry + movl 4(%esp), %ecx cmpb $0, 16(%esp) - movl 12(%esp), %ecx + je .LBB0_2 +# BB#1: # %bb movl 8(%esp), %eax - movl 4(%esp), %edx - je LBB1_2 # bb7 -LBB1_1: # bb - addl %edx, %eax + addl %ecx, %eax ret -LBB1_2: # bb7 - movl %edx, %eax - subl %ecx, %eax +.LBB0_2: # %bb7 + movl 12(%esp), %edx + movl %ecx, %eax + subl %edx, %eax ret -The coalescer could coalesce "edx" with "eax" to avoid the movl in LBB1_2 -if it commuted the addl in LBB1_1. +There's an obviously unnecessary movl in .LBB0_2, and we could eliminate a +couple more movls by putting 4(%esp) into %eax instead of %ecx. //===---------------------------------------------------------------------===// @@ -1396,8 +1339,7 @@ Also check why xmm7 is not used at all in the function. //===---------------------------------------------------------------------===// -Legalize loses track of the fact that bools are always zero extended when in -memory. This causes us to compile abort_gzip (from 164.gzip) from: +Take the following: target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128" target triple = "i386-apple-darwin8" @@ -1416,16 +1358,15 @@ bb4.i: ; preds = %entry } declare void @exit(i32) noreturn nounwind -into: - -_abort_gzip: +This compiles into: +_abort_gzip: ## @abort_gzip +## BB#0: ## %entry subl $12, %esp movb _in_exit.4870.b, %al - notb %al - testb $1, %al - jne LBB1_2 ## bb4.i -LBB1_1: ## bb.i - ... + cmpb $1, %al + jne LBB0_2 + +We somehow miss folding the movb into the cmpb. //===---------------------------------------------------------------------===// diff --git a/lib/Target/X86/X86.h b/lib/Target/X86/X86.h index 22e89a57f63d..677781d3730e 100644 --- a/lib/Target/X86/X86.h +++ b/lib/Target/X86/X86.h @@ -35,6 +35,10 @@ class formatted_raw_ostream; FunctionPass *createX86ISelDag(X86TargetMachine &TM, CodeGenOpt::Level OptLevel); +/// createGlobalBaseRegPass - This pass initializes a global base +/// register for PIC on x86-32. +FunctionPass* createGlobalBaseRegPass(); + /// createX86FloatingPointStackifierPass - This function returns a pass which /// converts floating point register references and pseudo instructions into /// floating point stack references and physical instructions. diff --git a/lib/Target/X86/X86AsmBackend.cpp b/lib/Target/X86/X86AsmBackend.cpp index 151087f58ed0..2cf65c11f94a 100644 --- a/lib/Target/X86/X86AsmBackend.cpp +++ b/lib/Target/X86/X86AsmBackend.cpp @@ -23,13 +23,13 @@ #include "llvm/Target/TargetAsmBackend.h" using namespace llvm; -namespace { static unsigned getFixupKindLog2Size(unsigned Kind) { switch (Kind) { default: assert(0 && "invalid fixup kind!"); case X86::reloc_pcrel_1byte: case FK_Data_1: return 0; + case X86::reloc_pcrel_2byte: case FK_Data_2: return 1; case X86::reloc_pcrel_4byte: case X86::reloc_riprel_4byte: @@ -39,6 +39,7 @@ static unsigned getFixupKindLog2Size(unsigned Kind) { } } +namespace { class X86AsmBackend : public TargetAsmBackend { public: X86AsmBackend(const Target &T) @@ -60,6 +61,7 @@ public: bool WriteNopData(uint64_t Count, MCObjectWriter *OW) const; }; +} // end anonymous namespace static unsigned getRelaxedOpcode(unsigned Op) { switch (Op) { @@ -75,7 +77,6 @@ static unsigned getRelaxedOpcode(unsigned Op) { case X86::JG_1: return X86::JG_4; case X86::JLE_1: return X86::JLE_4; case X86::JL_1: return X86::JL_4; - case X86::TAILJMP_1: case X86::JMP_1: return X86::JMP_4; case X86::JNE_1: return X86::JNE_4; case X86::JNO_1: return X86::JNO_4; @@ -180,6 +181,7 @@ bool X86AsmBackend::WriteNopData(uint64_t Count, MCObjectWriter *OW) const { /* *** */ +namespace { class ELFX86AsmBackend : public X86AsmBackend { public: ELFX86AsmBackend(const Target &T) @@ -281,7 +283,7 @@ public: } }; -} +} // end anonymous namespace TargetAsmBackend *llvm::createX86_32AsmBackend(const Target &T, const std::string &TT) { diff --git a/lib/Target/X86/X86CallingConv.td b/lib/Target/X86/X86CallingConv.td index a5774e1f241f..a6a1e4e573cf 100644 --- a/lib/Target/X86/X86CallingConv.td +++ b/lib/Target/X86/X86CallingConv.td @@ -42,7 +42,7 @@ def RetCC_X86Common : CallingConv<[ // MMX vector types are always returned in MM0. If the target doesn't have // MM0, it doesn't support these vector types. - CCIfType<[v8i8, v4i16, v2i32, v1i64, v2f32], CCAssignToReg<[MM0]>>, + CCIfType<[v8i8, v4i16, v2i32, v1i64], CCAssignToReg<[MM0]>>, // Long double types are always returned in ST0 (even with SSE). CCIfType<[f80], CCAssignToReg<[ST0, ST1]>> @@ -89,7 +89,7 @@ def RetCC_X86_64_C : CallingConv<[ // returned in RAX. This disagrees with ABI documentation but is bug // compatible with gcc. CCIfType<[v1i64], CCAssignToReg<[RAX]>>, - CCIfType<[v8i8, v4i16, v2i32, v2f32], CCAssignToReg<[XMM0, XMM1]>>, + CCIfType<[v8i8, v4i16, v2i32], CCAssignToReg<[XMM0, XMM1]>>, CCDelegateTo<RetCC_X86Common> ]>; @@ -155,7 +155,7 @@ def CC_X86_64_C : CallingConv<[ // The first 8 MMX (except for v1i64) vector arguments are passed in XMM // registers on Darwin. - CCIfType<[v8i8, v4i16, v2i32, v2f32], + CCIfType<[v8i8, v4i16, v2i32], CCIfSubtarget<"isTargetDarwin()", CCIfSubtarget<"hasSSE2()", CCPromoteToType<v2i64>>>>, @@ -177,7 +177,7 @@ def CC_X86_64_C : CallingConv<[ CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], CCAssignToStack<16, 16>>, // __m64 vectors get 8-byte stack slots that are 8-byte aligned. - CCIfType<[v8i8, v4i16, v2i32, v1i64, v2f32], CCAssignToStack<8, 8>> + CCIfType<[v8i8, v4i16, v2i32, v1i64], CCAssignToStack<8, 8>> ]>; // Calling convention used on Win64 @@ -195,7 +195,7 @@ def CC_X86_Win64_C : CallingConv<[ CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], CCPassIndirect<i64>>, // The first 4 MMX vector arguments are passed in GPRs. - CCIfType<[v8i8, v4i16, v2i32, v1i64, v2f32], + CCIfType<[v8i8, v4i16, v2i32, v1i64], CCBitConvertToType<i64>>, // The first 4 integer arguments are passed in integer registers. @@ -254,7 +254,7 @@ def CC_X86_32_Common : CallingConv<[ // The first 3 __m64 (except for v1i64) vector arguments are passed in mmx // registers if the call is not a vararg call. - CCIfNotVarArg<CCIfType<[v8i8, v4i16, v2i32, v2f32], + CCIfNotVarArg<CCIfType<[v8i8, v4i16, v2i32], CCAssignToReg<[MM0, MM1, MM2]>>>, // Integer/Float values get stored in stack slots that are 4 bytes in diff --git a/lib/Target/X86/X86CodeEmitter.cpp b/lib/Target/X86/X86CodeEmitter.cpp index 8f026049cb84..f13669bd741d 100644 --- a/lib/Target/X86/X86CodeEmitter.cpp +++ b/lib/Target/X86/X86CodeEmitter.cpp @@ -138,7 +138,7 @@ bool Emitter<CodeEmitter>::runOnMachineFunction(MachineFunction &MF) { // MOVPC32r is basically a call plus a pop instruction. if (Desc.getOpcode() == X86::MOVPC32r) emitInstruction(*I, &II->get(X86::POP32r)); - NumEmitted++; // Keep track of the # of mi's emitted + ++NumEmitted; // Keep track of the # of mi's emitted } } } while (MCE.finishFunction(MF)); @@ -730,9 +730,9 @@ void Emitter<CodeEmitter>::emitInstruction(const MachineInstr &MI, case X86II::MRMDestMem: { MCE.emitByte(BaseOpcode); emitMemModRMByte(MI, CurOp, - getX86RegNum(MI.getOperand(CurOp + X86AddrNumOperands) + getX86RegNum(MI.getOperand(CurOp + X86::AddrNumOperands) .getReg())); - CurOp += X86AddrNumOperands + 1; + CurOp += X86::AddrNumOperands + 1; if (CurOp != NumOps) emitConstant(MI.getOperand(CurOp++).getImm(), X86II::getSizeOfImm(Desc->TSFlags)); @@ -750,13 +750,7 @@ void Emitter<CodeEmitter>::emitInstruction(const MachineInstr &MI, break; case X86II::MRMSrcMem: { - // FIXME: Maybe lea should have its own form? - int AddrOperands; - if (Opcode == X86::LEA64r || Opcode == X86::LEA64_32r || - Opcode == X86::LEA16r || Opcode == X86::LEA32r) - AddrOperands = X86AddrNumOperands - 1; // No segment register - else - AddrOperands = X86AddrNumOperands; + int AddrOperands = X86::AddrNumOperands; intptr_t PCAdj = (CurOp + AddrOperands + 1 != NumOps) ? X86II::getSizeOfImm(Desc->TSFlags) : 0; @@ -810,14 +804,14 @@ void Emitter<CodeEmitter>::emitInstruction(const MachineInstr &MI, case X86II::MRM2m: case X86II::MRM3m: case X86II::MRM4m: case X86II::MRM5m: case X86II::MRM6m: case X86II::MRM7m: { - intptr_t PCAdj = (CurOp + X86AddrNumOperands != NumOps) ? - (MI.getOperand(CurOp+X86AddrNumOperands).isImm() ? + intptr_t PCAdj = (CurOp + X86::AddrNumOperands != NumOps) ? + (MI.getOperand(CurOp+X86::AddrNumOperands).isImm() ? X86II::getSizeOfImm(Desc->TSFlags) : 4) : 0; MCE.emitByte(BaseOpcode); emitMemModRMByte(MI, CurOp, (Desc->TSFlags & X86II::FormMask)-X86II::MRM0m, PCAdj); - CurOp += X86AddrNumOperands; + CurOp += X86::AddrNumOperands; if (CurOp == NumOps) break; diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp index 1bc5eb75d752..cdde24a156d0 100644 --- a/lib/Target/X86/X86FastISel.cpp +++ b/lib/Target/X86/X86FastISel.cpp @@ -23,7 +23,9 @@ #include "llvm/GlobalVariable.h" #include "llvm/Instructions.h" #include "llvm/IntrinsicInst.h" +#include "llvm/CodeGen/Analysis.h" #include "llvm/CodeGen/FastISel.h" +#include "llvm/CodeGen/FunctionLoweringInfo.h" #include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" @@ -52,20 +54,7 @@ class X86FastISel : public FastISel { bool X86ScalarSSEf32; public: - explicit X86FastISel(MachineFunction &mf, - DenseMap<const Value *, unsigned> &vm, - DenseMap<const BasicBlock *, MachineBasicBlock *> &bm, - DenseMap<const AllocaInst *, int> &am, - std::vector<std::pair<MachineInstr*, unsigned> > &pn -#ifndef NDEBUG - , SmallSet<const Instruction *, 8> &cil -#endif - ) - : FastISel(mf, vm, bm, am, pn -#ifndef NDEBUG - , cil -#endif - ) { + explicit X86FastISel(FunctionLoweringInfo &funcInfo) : FastISel(funcInfo) { Subtarget = &TM.getSubtarget<X86Subtarget>(); StackPtr = Subtarget->is64Bit() ? X86::RSP : X86::ESP; X86ScalarSSEf64 = Subtarget->hasSSE2(); @@ -96,6 +85,8 @@ private: bool X86SelectStore(const Instruction *I); + bool X86SelectRet(const Instruction *I); + bool X86SelectCmp(const Instruction *I); bool X86SelectZExt(const Instruction *I); @@ -117,6 +108,7 @@ private: bool X86SelectCall(const Instruction *I); CCAssignFn *CCAssignFnForCall(CallingConv::ID CC, bool isTailCall = false); + CCAssignFn *CCAssignFnForRet(CallingConv::ID CC, bool isTailCall = false); const X86InstrInfo *getInstrInfo() const { return getTargetMachine()->getInstrInfo(); @@ -190,6 +182,20 @@ CCAssignFn *X86FastISel::CCAssignFnForCall(CallingConv::ID CC, return CC_X86_32_C; } +/// CCAssignFnForRet - Selects the correct CCAssignFn for a given calling +/// convention. +CCAssignFn *X86FastISel::CCAssignFnForRet(CallingConv::ID CC, + bool isTaillCall) { + if (Subtarget->is64Bit()) { + if (Subtarget->isTargetWin64()) + return RetCC_X86_Win64_C; + else + return RetCC_X86_64_C; + } + + return RetCC_X86_32_C; +} + /// X86FastEmitLoad - Emit a machine instruction to load a value of type VT. /// The address is either pre-computed, i.e. Ptr, or a GlobalAddress, i.e. GV. /// Return true and the result register by reference if it is possible. @@ -242,7 +248,8 @@ bool X86FastISel::X86FastEmitLoad(EVT VT, const X86AddressMode &AM, } ResultReg = createResultReg(RC); - addFullAddress(BuildMI(MBB, DL, TII.get(Opc), ResultReg), AM); + addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, + DL, TII.get(Opc), ResultReg), AM); return true; } @@ -261,7 +268,7 @@ X86FastISel::X86FastEmitStore(EVT VT, unsigned Val, case MVT::i1: { // Mask out all but lowest bit. unsigned AndResult = createResultReg(X86::GR8RegisterClass); - BuildMI(MBB, DL, + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(X86::AND8ri), AndResult).addReg(Val).addImm(1); Val = AndResult; } @@ -278,7 +285,8 @@ X86FastISel::X86FastEmitStore(EVT VT, unsigned Val, break; } - addFullAddress(BuildMI(MBB, DL, TII.get(Opc)), AM).addReg(Val); + addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, + DL, TII.get(Opc)), AM).addReg(Val); return true; } @@ -306,7 +314,8 @@ bool X86FastISel::X86FastEmitStore(EVT VT, const Value *Val, } if (Opc) { - addFullAddress(BuildMI(MBB, DL, TII.get(Opc)), AM) + addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, + DL, TII.get(Opc)), AM) .addImm(Signed ? (uint64_t) CI->getSExtValue() : CI->getZExtValue()); return true; @@ -342,6 +351,12 @@ bool X86FastISel::X86SelectAddress(const Value *V, X86AddressMode &AM) { const User *U = NULL; unsigned Opcode = Instruction::UserOp1; if (const Instruction *I = dyn_cast<Instruction>(V)) { + // Don't walk into other basic blocks; it's possible we haven't + // visited them yet, so the instructions may not yet be assigned + // virtual registers. + if (FuncInfo.MBBMap[I->getParent()] != FuncInfo.MBB) + return false; + Opcode = I->getOpcode(); U = I; } else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(V)) { @@ -349,6 +364,12 @@ bool X86FastISel::X86SelectAddress(const Value *V, X86AddressMode &AM) { U = C; } + if (const PointerType *Ty = dyn_cast<PointerType>(V->getType())) + if (Ty->getAddressSpace() > 255) + // Fast instruction selection doesn't support the special + // address spaces. + return false; + switch (Opcode) { default: break; case Instruction::BitCast: @@ -370,8 +391,9 @@ bool X86FastISel::X86SelectAddress(const Value *V, X86AddressMode &AM) { case Instruction::Alloca: { // Do static allocas. const AllocaInst *A = cast<AllocaInst>(V); - DenseMap<const AllocaInst*, int>::iterator SI = StaticAllocaMap.find(A); - if (SI != StaticAllocaMap.end()) { + DenseMap<const AllocaInst*, int>::iterator SI = + FuncInfo.StaticAllocaMap.find(A); + if (SI != FuncInfo.StaticAllocaMap.end()) { AM.BaseType = X86AddressMode::FrameIndexBase; AM.Base.FrameIndex = SI->second; return true; @@ -411,20 +433,33 @@ bool X86FastISel::X86SelectAddress(const Value *V, X86AddressMode &AM) { Disp += SL->getElementOffset(Idx); } else { uint64_t S = TD.getTypeAllocSize(GTI.getIndexedType()); - if (const ConstantInt *CI = dyn_cast<ConstantInt>(Op)) { - // Constant-offset addressing. - Disp += CI->getSExtValue() * S; - } else if (IndexReg == 0 && - (!AM.GV || !Subtarget->isPICStyleRIPRel()) && - (S == 1 || S == 2 || S == 4 || S == 8)) { - // Scaled-index addressing. - Scale = S; - IndexReg = getRegForGEPIndex(Op).first; - if (IndexReg == 0) - return false; - } else - // Unsupported. - goto unsupported_gep; + SmallVector<const Value *, 4> Worklist; + Worklist.push_back(Op); + do { + Op = Worklist.pop_back_val(); + if (const ConstantInt *CI = dyn_cast<ConstantInt>(Op)) { + // Constant-offset addressing. + Disp += CI->getSExtValue() * S; + } else if (isa<AddOperator>(Op) && + isa<ConstantInt>(cast<AddOperator>(Op)->getOperand(1))) { + // An add with a constant operand. Fold the constant. + ConstantInt *CI = + cast<ConstantInt>(cast<AddOperator>(Op)->getOperand(1)); + Disp += CI->getSExtValue() * S; + // Add the other operand back to the work list. + Worklist.push_back(cast<AddOperator>(Op)->getOperand(0)); + } else if (IndexReg == 0 && + (!AM.GV || !Subtarget->isPICStyleRIPRel()) && + (S == 1 || S == 2 || S == 4 || S == 8)) { + // Scaled-index addressing. + Scale = S; + IndexReg = getRegForGEPIndex(Op).first; + if (IndexReg == 0) + return false; + } else + // Unsupported. + goto unsupported_gep; + } while (!Worklist.empty()); } } // Check for displacement overflow. @@ -473,7 +508,7 @@ bool X86FastISel::X86SelectAddress(const Value *V, X86AddressMode &AM) { // If this reference is relative to the pic base, set it now. if (isGlobalRelativeToPICBase(GVFlags)) { // FIXME: How do we know Base.Reg is free?? - AM.Base.Reg = getInstrInfo()->getGlobalBaseReg(&MF); + AM.Base.Reg = getInstrInfo()->getGlobalBaseReg(FuncInfo.MF); } // Unless the ABI requires an extra load, return a direct reference to @@ -504,6 +539,9 @@ bool X86FastISel::X86SelectAddress(const Value *V, X86AddressMode &AM) { StubAM.GV = GV; StubAM.GVOpFlags = GVFlags; + // Prepare for inserting code in the local-value area. + MachineBasicBlock::iterator SaveInsertPt = enterLocalValueArea(); + if (TLI.getPointerTy() == MVT::i64) { Opc = X86::MOV64rm; RC = X86::GR64RegisterClass; @@ -516,8 +554,13 @@ bool X86FastISel::X86SelectAddress(const Value *V, X86AddressMode &AM) { } LoadReg = createResultReg(RC); - addFullAddress(BuildMI(MBB, DL, TII.get(Opc), LoadReg), StubAM); - + MachineInstrBuilder LoadMI = + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), LoadReg); + addFullAddress(LoadMI, StubAM); + + // Ok, back to normal mode. + leaveLocalValueArea(SaveInsertPt); + // Prevent loading GV stub multiple times in same MBB. LocalValueMap[V] = LoadReg; } @@ -642,6 +685,93 @@ bool X86FastISel::X86SelectStore(const Instruction *I) { return X86FastEmitStore(VT, I->getOperand(0), AM); } +/// X86SelectRet - Select and emit code to implement ret instructions. +bool X86FastISel::X86SelectRet(const Instruction *I) { + const ReturnInst *Ret = cast<ReturnInst>(I); + const Function &F = *I->getParent()->getParent(); + + if (!FuncInfo.CanLowerReturn) + return false; + + CallingConv::ID CC = F.getCallingConv(); + if (CC != CallingConv::C && + CC != CallingConv::Fast && + CC != CallingConv::X86_FastCall) + return false; + + if (Subtarget->isTargetWin64()) + return false; + + // Don't handle popping bytes on return for now. + if (FuncInfo.MF->getInfo<X86MachineFunctionInfo>() + ->getBytesToPopOnReturn() != 0) + return 0; + + // fastcc with -tailcallopt is intended to provide a guaranteed + // tail call optimization. Fastisel doesn't know how to do that. + if (CC == CallingConv::Fast && GuaranteedTailCallOpt) + return false; + + // Let SDISel handle vararg functions. + if (F.isVarArg()) + return false; + + if (Ret->getNumOperands() > 0) { + SmallVector<ISD::OutputArg, 4> Outs; + GetReturnInfo(F.getReturnType(), F.getAttributes().getRetAttributes(), + Outs, TLI); + + // Analyze operands of the call, assigning locations to each operand. + SmallVector<CCValAssign, 16> ValLocs; + CCState CCInfo(CC, F.isVarArg(), TM, ValLocs, I->getContext()); + CCInfo.AnalyzeReturn(Outs, CCAssignFnForRet(CC)); + + const Value *RV = Ret->getOperand(0); + unsigned Reg = getRegForValue(RV); + if (Reg == 0) + return false; + + // Only handle a single return value for now. + if (ValLocs.size() != 1) + return false; + + CCValAssign &VA = ValLocs[0]; + + // Don't bother handling odd stuff for now. + if (VA.getLocInfo() != CCValAssign::Full) + return false; + // Only handle register returns for now. + if (!VA.isRegLoc()) + return false; + // TODO: For now, don't try to handle cases where getLocInfo() + // says Full but the types don't match. + if (VA.getValVT() != TLI.getValueType(RV->getType())) + return false; + + // The calling-convention tables for x87 returns don't tell + // the whole story. + if (VA.getLocReg() == X86::ST0 || VA.getLocReg() == X86::ST1) + return false; + + // Make the copy. + unsigned SrcReg = Reg + VA.getValNo(); + unsigned DstReg = VA.getLocReg(); + const TargetRegisterClass* SrcRC = MRI.getRegClass(SrcReg); + // Avoid a cross-class copy. This is very unlikely. + if (!SrcRC->contains(DstReg)) + return false; + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY), + DstReg).addReg(SrcReg); + + // Mark the register as live out of the function. + MRI.addLiveOut(VA.getLocReg()); + } + + // Now emit the RET. + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(X86::RET)); + return true; +} + /// X86SelectLoad - Select and emit code to implement load instructions. /// bool X86FastISel::X86SelectLoad(const Instruction *I) { @@ -661,15 +791,15 @@ bool X86FastISel::X86SelectLoad(const Instruction *I) { return false; } -static unsigned X86ChooseCmpOpcode(EVT VT) { +static unsigned X86ChooseCmpOpcode(EVT VT, const X86Subtarget *Subtarget) { switch (VT.getSimpleVT().SimpleTy) { default: return 0; case MVT::i8: return X86::CMP8rr; case MVT::i16: return X86::CMP16rr; case MVT::i32: return X86::CMP32rr; case MVT::i64: return X86::CMP64rr; - case MVT::f32: return X86::UCOMISSrr; - case MVT::f64: return X86::UCOMISDrr; + case MVT::f32: return Subtarget->hasSSE1() ? X86::UCOMISSrr : 0; + case MVT::f64: return Subtarget->hasSSE2() ? X86::UCOMISDrr : 0; } } @@ -706,18 +836,21 @@ bool X86FastISel::X86FastEmitCompare(const Value *Op0, const Value *Op1, // CMPri, otherwise use CMPrr. if (const ConstantInt *Op1C = dyn_cast<ConstantInt>(Op1)) { if (unsigned CompareImmOpc = X86ChooseCmpImmediateOpcode(VT, Op1C)) { - BuildMI(MBB, DL, TII.get(CompareImmOpc)).addReg(Op0Reg) - .addImm(Op1C->getSExtValue()); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CompareImmOpc)) + .addReg(Op0Reg) + .addImm(Op1C->getSExtValue()); return true; } } - unsigned CompareOpc = X86ChooseCmpOpcode(VT); + unsigned CompareOpc = X86ChooseCmpOpcode(VT, Subtarget); if (CompareOpc == 0) return false; unsigned Op1Reg = getRegForValue(Op1); if (Op1Reg == 0) return false; - BuildMI(MBB, DL, TII.get(CompareOpc)).addReg(Op0Reg).addReg(Op1Reg); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CompareOpc)) + .addReg(Op0Reg) + .addReg(Op1Reg); return true; } @@ -739,9 +872,10 @@ bool X86FastISel::X86SelectCmp(const Instruction *I) { unsigned EReg = createResultReg(&X86::GR8RegClass); unsigned NPReg = createResultReg(&X86::GR8RegClass); - BuildMI(MBB, DL, TII.get(X86::SETEr), EReg); - BuildMI(MBB, DL, TII.get(X86::SETNPr), NPReg); - BuildMI(MBB, DL, + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(X86::SETEr), EReg); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + TII.get(X86::SETNPr), NPReg); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(X86::AND8rr), ResultReg).addReg(NPReg).addReg(EReg); UpdateValueMap(I, ResultReg); return true; @@ -752,9 +886,13 @@ bool X86FastISel::X86SelectCmp(const Instruction *I) { unsigned NEReg = createResultReg(&X86::GR8RegClass); unsigned PReg = createResultReg(&X86::GR8RegClass); - BuildMI(MBB, DL, TII.get(X86::SETNEr), NEReg); - BuildMI(MBB, DL, TII.get(X86::SETPr), PReg); - BuildMI(MBB, DL, TII.get(X86::OR8rr), ResultReg).addReg(PReg).addReg(NEReg); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + TII.get(X86::SETNEr), NEReg); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + TII.get(X86::SETPr), PReg); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + TII.get(X86::OR8rr), ResultReg) + .addReg(PReg).addReg(NEReg); UpdateValueMap(I, ResultReg); return true; } @@ -793,7 +931,7 @@ bool X86FastISel::X86SelectCmp(const Instruction *I) { if (!X86FastEmitCompare(Op0, Op1, VT)) return false; - BuildMI(MBB, DL, TII.get(SetCCOpc), ResultReg); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(SetCCOpc), ResultReg); UpdateValueMap(I, ResultReg); return true; } @@ -819,8 +957,8 @@ bool X86FastISel::X86SelectBranch(const Instruction *I) { // Unconditional branches are selected by tablegen-generated code. // Handle a conditional branch. const BranchInst *BI = cast<BranchInst>(I); - MachineBasicBlock *TrueMBB = MBBMap[BI->getSuccessor(0)]; - MachineBasicBlock *FalseMBB = MBBMap[BI->getSuccessor(1)]; + MachineBasicBlock *TrueMBB = FuncInfo.MBBMap[BI->getSuccessor(0)]; + MachineBasicBlock *FalseMBB = FuncInfo.MBBMap[BI->getSuccessor(1)]; // Fold the common case of a conditional branch with a comparison. if (const CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) { @@ -829,7 +967,7 @@ bool X86FastISel::X86SelectBranch(const Instruction *I) { // Try to take advantage of fallthrough opportunities. CmpInst::Predicate Predicate = CI->getPredicate(); - if (MBB->isLayoutSuccessor(TrueMBB)) { + if (FuncInfo.MBB->isLayoutSuccessor(TrueMBB)) { std::swap(TrueMBB, FalseMBB); Predicate = CmpInst::getInversePredicate(Predicate); } @@ -878,16 +1016,18 @@ bool X86FastISel::X86SelectBranch(const Instruction *I) { if (!X86FastEmitCompare(Op0, Op1, VT)) return false; - BuildMI(MBB, DL, TII.get(BranchOpc)).addMBB(TrueMBB); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(BranchOpc)) + .addMBB(TrueMBB); if (Predicate == CmpInst::FCMP_UNE) { // X86 requires a second branch to handle UNE (and OEQ, // which is mapped to UNE above). - BuildMI(MBB, DL, TII.get(X86::JP_4)).addMBB(TrueMBB); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(X86::JP_4)) + .addMBB(TrueMBB); } - FastEmitBranch(FalseMBB); - MBB->addSuccessor(TrueMBB); + FastEmitBranch(FalseMBB, DL); + FuncInfo.MBB->addSuccessor(TrueMBB); return true; } } else if (ExtractValueInst *EI = @@ -910,10 +1050,11 @@ bool X86FastISel::X86SelectBranch(const Instruction *I) { if (CI->getIntrinsicID() == Intrinsic::sadd_with_overflow || CI->getIntrinsicID() == Intrinsic::uadd_with_overflow) { const MachineInstr *SetMI = 0; - unsigned Reg = lookUpRegForValue(EI); + unsigned Reg = getRegForValue(EI); for (MachineBasicBlock::const_reverse_iterator - RI = MBB->rbegin(), RE = MBB->rend(); RI != RE; ++RI) { + RI = FuncInfo.MBB->rbegin(), RE = FuncInfo.MBB->rend(); + RI != RE; ++RI) { const MachineInstr &MI = *RI; if (MI.definesRegister(Reg)) { @@ -938,11 +1079,11 @@ bool X86FastISel::X86SelectBranch(const Instruction *I) { unsigned OpCode = SetMI->getOpcode(); if (OpCode == X86::SETOr || OpCode == X86::SETBr) { - BuildMI(MBB, DL, TII.get(OpCode == X86::SETOr ? - X86::JO_4 : X86::JB_4)) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + TII.get(OpCode == X86::SETOr ? X86::JO_4 : X86::JB_4)) .addMBB(TrueMBB); - FastEmitBranch(FalseMBB); - MBB->addSuccessor(TrueMBB); + FastEmitBranch(FalseMBB, DL); + FuncInfo.MBB->addSuccessor(TrueMBB); return true; } } @@ -954,10 +1095,12 @@ bool X86FastISel::X86SelectBranch(const Instruction *I) { unsigned OpReg = getRegForValue(BI->getCondition()); if (OpReg == 0) return false; - BuildMI(MBB, DL, TII.get(X86::TEST8rr)).addReg(OpReg).addReg(OpReg); - BuildMI(MBB, DL, TII.get(X86::JNE_4)).addMBB(TrueMBB); - FastEmitBranch(FalseMBB); - MBB->addSuccessor(TrueMBB); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(X86::TEST8rr)) + .addReg(OpReg).addReg(OpReg); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(X86::JNE_4)) + .addMBB(TrueMBB); + FastEmitBranch(FalseMBB, DL); + FuncInfo.MBB->addSuccessor(TrueMBB); return true; } @@ -1014,7 +1157,7 @@ bool X86FastISel::X86SelectShift(const Instruction *I) { // Fold immediate in shl(x,3). if (const ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(1))) { unsigned ResultReg = createResultReg(RC); - BuildMI(MBB, DL, TII.get(OpImm), + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(OpImm), ResultReg).addReg(Op0Reg).addImm(CI->getZExtValue() & 0xff); UpdateValueMap(I, ResultReg); return true; @@ -1022,17 +1165,19 @@ bool X86FastISel::X86SelectShift(const Instruction *I) { unsigned Op1Reg = getRegForValue(I->getOperand(1)); if (Op1Reg == 0) return false; - TII.copyRegToReg(*MBB, MBB->end(), CReg, Op1Reg, RC, RC, DL); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY), + CReg).addReg(Op1Reg); // The shift instruction uses X86::CL. If we defined a super-register - // of X86::CL, emit an EXTRACT_SUBREG to precisely describe what - // we're doing here. + // of X86::CL, emit a subreg KILL to precisely describe what we're doing here. if (CReg != X86::CL) - BuildMI(MBB, DL, TII.get(TargetOpcode::EXTRACT_SUBREG), X86::CL) - .addReg(CReg).addImm(X86::sub_8bit); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + TII.get(TargetOpcode::KILL), X86::CL) + .addReg(CReg, RegState::Kill); unsigned ResultReg = createResultReg(RC); - BuildMI(MBB, DL, TII.get(OpReg), ResultReg).addReg(Op0Reg); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(OpReg), ResultReg) + .addReg(Op0Reg); UpdateValueMap(I, ResultReg); return true; } @@ -1064,9 +1209,11 @@ bool X86FastISel::X86SelectSelect(const Instruction *I) { unsigned Op2Reg = getRegForValue(I->getOperand(2)); if (Op2Reg == 0) return false; - BuildMI(MBB, DL, TII.get(X86::TEST8rr)).addReg(Op0Reg).addReg(Op0Reg); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(X86::TEST8rr)) + .addReg(Op0Reg).addReg(Op0Reg); unsigned ResultReg = createResultReg(RC); - BuildMI(MBB, DL, TII.get(Opc), ResultReg).addReg(Op1Reg).addReg(Op2Reg); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), ResultReg) + .addReg(Op1Reg).addReg(Op2Reg); UpdateValueMap(I, ResultReg); return true; } @@ -1080,7 +1227,9 @@ bool X86FastISel::X86SelectFPExt(const Instruction *I) { unsigned OpReg = getRegForValue(V); if (OpReg == 0) return false; unsigned ResultReg = createResultReg(X86::FR64RegisterClass); - BuildMI(MBB, DL, TII.get(X86::CVTSS2SDrr), ResultReg).addReg(OpReg); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + TII.get(X86::CVTSS2SDrr), ResultReg) + .addReg(OpReg); UpdateValueMap(I, ResultReg); return true; } @@ -1097,7 +1246,9 @@ bool X86FastISel::X86SelectFPTrunc(const Instruction *I) { unsigned OpReg = getRegForValue(V); if (OpReg == 0) return false; unsigned ResultReg = createResultReg(X86::FR32RegisterClass); - BuildMI(MBB, DL, TII.get(X86::CVTSD2SSrr), ResultReg).addReg(OpReg); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + TII.get(X86::CVTSD2SSrr), ResultReg) + .addReg(OpReg); UpdateValueMap(I, ResultReg); return true; } @@ -1132,7 +1283,8 @@ bool X86FastISel::X86SelectTrunc(const Instruction *I) { const TargetRegisterClass *CopyRC = (SrcVT == MVT::i16) ? X86::GR16_ABCDRegisterClass : X86::GR32_ABCDRegisterClass; unsigned CopyReg = createResultReg(CopyRC); - BuildMI(MBB, DL, TII.get(CopyOpc), CopyReg).addReg(InputReg); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CopyOpc), CopyReg) + .addReg(InputReg); // Then issue an extract_subreg. unsigned ResultReg = FastEmitInst_extractsubreg(MVT::i8, @@ -1153,14 +1305,18 @@ bool X86FastISel::X86SelectExtractValue(const Instruction *I) { switch (CI->getIntrinsicID()) { default: break; case Intrinsic::sadd_with_overflow: - case Intrinsic::uadd_with_overflow: + case Intrinsic::uadd_with_overflow: { // Cheat a little. We know that the registers for "add" and "seto" are // allocated sequentially. However, we only keep track of the register // for "add" in the value map. Use extractvalue's index to get the // correct register for "seto". - UpdateValueMap(I, lookUpRegForValue(Agg) + *EI->idx_begin()); + unsigned OpReg = getRegForValue(Agg); + if (OpReg == 0) + return false; + UpdateValueMap(I, OpReg + *EI->idx_begin()); return true; } + } } return false; @@ -1174,8 +1330,8 @@ bool X86FastISel::X86VisitIntrinsicCall(const IntrinsicInst &I) { // Emit code inline code to store the stack guard onto the stack. EVT PtrTy = TLI.getPointerTy(); - const Value *Op1 = I.getOperand(1); // The guard's value. - const AllocaInst *Slot = cast<AllocaInst>(I.getOperand(2)); + const Value *Op1 = I.getArgOperand(0); // The guard's value. + const AllocaInst *Slot = cast<AllocaInst>(I.getArgOperand(1)); // Grab the frame index. X86AddressMode AM; @@ -1186,7 +1342,7 @@ bool X86FastISel::X86VisitIntrinsicCall(const IntrinsicInst &I) { return true; } case Intrinsic::objectsize: { - ConstantInt *CI = dyn_cast<ConstantInt>(I.getOperand(2)); + ConstantInt *CI = dyn_cast<ConstantInt>(I.getArgOperand(1)); const Type *Ty = I.getCalledFunction()->getReturnType(); assert(CI && "Non-constant type in Intrinsic::objectsize?"); @@ -1204,8 +1360,8 @@ bool X86FastISel::X86VisitIntrinsicCall(const IntrinsicInst &I) { return false; unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT)); - BuildMI(MBB, DL, TII.get(OpC), ResultReg). - addImm(CI->getZExtValue() == 0 ? -1ULL : 0); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(OpC), ResultReg). + addImm(CI->isZero() ? -1ULL : 0); UpdateValueMap(&I, ResultReg); return true; } @@ -1218,12 +1374,12 @@ bool X86FastISel::X86VisitIntrinsicCall(const IntrinsicInst &I) { const TargetInstrDesc &II = TII.get(TargetOpcode::DBG_VALUE); // FIXME may need to add RegState::Debug to any registers produced, // although ESP/EBP should be the only ones at the moment. - addFullAddress(BuildMI(MBB, DL, II), AM).addImm(0). - addMetadata(DI->getVariable()); + addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II), AM). + addImm(0).addMetadata(DI->getVariable()); return true; } case Intrinsic::trap: { - BuildMI(MBB, DL, TII.get(X86::TRAP)); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(X86::TRAP)); return true; } case Intrinsic::sadd_with_overflow: @@ -1241,8 +1397,8 @@ bool X86FastISel::X86VisitIntrinsicCall(const IntrinsicInst &I) { if (!isTypeLegal(RetTy, VT)) return false; - const Value *Op1 = I.getOperand(1); - const Value *Op2 = I.getOperand(2); + const Value *Op1 = I.getArgOperand(0); + const Value *Op2 = I.getArgOperand(1); unsigned Reg1 = getRegForValue(Op1); unsigned Reg2 = getRegForValue(Op2); @@ -1259,7 +1415,8 @@ bool X86FastISel::X86VisitIntrinsicCall(const IntrinsicInst &I) { return false; unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT)); - BuildMI(MBB, DL, TII.get(OpC), ResultReg).addReg(Reg1).addReg(Reg2); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(OpC), ResultReg) + .addReg(Reg1).addReg(Reg2); unsigned DestReg1 = UpdateValueMap(&I, ResultReg); // If the add with overflow is an intra-block value then we just want to @@ -1277,7 +1434,7 @@ bool X86FastISel::X86VisitIntrinsicCall(const IntrinsicInst &I) { unsigned Opc = X86::SETBr; if (I.getIntrinsicID() == Intrinsic::sadd_with_overflow) Opc = X86::SETOr; - BuildMI(MBB, DL, TII.get(Opc), ResultReg); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), ResultReg); return true; } } @@ -1285,7 +1442,7 @@ bool X86FastISel::X86VisitIntrinsicCall(const IntrinsicInst &I) { bool X86FastISel::X86SelectCall(const Instruction *I) { const CallInst *CI = cast<CallInst>(I); - const Value *Callee = I->getOperand(0); + const Value *Callee = CI->getCalledValue(); // Can't handle inline asm yet. if (isa<InlineAsm>(Callee)) @@ -1314,6 +1471,10 @@ bool X86FastISel::X86SelectCall(const Instruction *I) { if (FTy->isVarArg()) return false; + // Fast-isel doesn't know about callee-pop yet. + if (Subtarget->IsCalleePop(FTy->isVarArg(), CC)) + return false; + // Handle *simple* calls for now. const Type *RetTy = CS.getType(); EVT RetVT; @@ -1387,6 +1548,12 @@ bool X86FastISel::X86SelectCall(const Instruction *I) { // Analyze operands of the call, assigning locations to each operand. SmallVector<CCValAssign, 16> ArgLocs; CCState CCInfo(CC, false, TM, ArgLocs, I->getParent()->getContext()); + + // Allocate shadow area for Win64 + if (Subtarget->isTargetWin64()) { + CCInfo.AllocateStack(32, 8); + } + CCInfo.AnalyzeCallOperands(ArgVTs, ArgFlags, CCAssignFnForCall(CC)); // Get a count of how many bytes are to be pushed on the stack. @@ -1394,7 +1561,8 @@ bool X86FastISel::X86SelectCall(const Instruction *I) { // Issue CALLSEQ_START unsigned AdjStackDown = TM.getRegisterInfo()->getCallFrameSetupOpcode(); - BuildMI(MBB, DL, TII.get(AdjStackDown)).addImm(NumBytes); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(AdjStackDown)) + .addImm(NumBytes); // Process argument: walk the register/memloc assignments, inserting // copies / loads. @@ -1449,11 +1617,8 @@ bool X86FastISel::X86SelectCall(const Instruction *I) { } if (VA.isRegLoc()) { - TargetRegisterClass* RC = TLI.getRegClassFor(ArgVT); - bool Emitted = TII.copyRegToReg(*MBB, MBB->end(), VA.getLocReg(), - Arg, RC, RC, DL); - assert(Emitted && "Failed to emit a copy instruction!"); Emitted=Emitted; - Emitted = true; + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY), + VA.getLocReg()).addReg(Arg); RegArgs.push_back(VA.getLocReg()); } else { unsigned LocMemOffset = VA.getLocMemOffset(); @@ -1475,12 +1640,9 @@ bool X86FastISel::X86SelectCall(const Instruction *I) { // ELF / PIC requires GOT in the EBX register before function calls via PLT // GOT pointer. if (Subtarget->isPICStyleGOT()) { - TargetRegisterClass *RC = X86::GR32RegisterClass; - unsigned Base = getInstrInfo()->getGlobalBaseReg(&MF); - bool Emitted = TII.copyRegToReg(*MBB, MBB->end(), X86::EBX, Base, RC, RC, - DL); - assert(Emitted && "Failed to emit a copy instruction!"); Emitted=Emitted; - Emitted = true; + unsigned Base = getInstrInfo()->getGlobalBaseReg(FuncInfo.MF); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY), + X86::EBX).addReg(Base); } // Issue the call. @@ -1488,7 +1650,8 @@ bool X86FastISel::X86SelectCall(const Instruction *I) { if (CalleeOp) { // Register-indirect call. unsigned CallOpc = Subtarget->is64Bit() ? X86::CALL64r : X86::CALL32r; - MIB = BuildMI(MBB, DL, TII.get(CallOpc)).addReg(CalleeOp); + MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CallOpc)) + .addReg(CalleeOp); } else { // Direct call. @@ -1517,7 +1680,8 @@ bool X86FastISel::X86SelectCall(const Instruction *I) { } - MIB = BuildMI(MBB, DL, TII.get(CallOpc)).addGlobalAddress(GV, 0, OpFlags); + MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CallOpc)) + .addGlobalAddress(GV, 0, OpFlags); } // Add an implicit use GOT pointer in EBX. @@ -1530,9 +1694,11 @@ bool X86FastISel::X86SelectCall(const Instruction *I) { // Issue CALLSEQ_END unsigned AdjStackUp = TM.getRegisterInfo()->getCallFrameDestroyOpcode(); - BuildMI(MBB, DL, TII.get(AdjStackUp)).addImm(NumBytes).addImm(0); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(AdjStackUp)) + .addImm(NumBytes).addImm(0); // Now handle call return value (if any). + SmallVector<unsigned, 4> UsedRegs; if (RetVT.getSimpleVT().SimpleTy != MVT::isVoid) { SmallVector<CCValAssign, 16> RVLocs; CCState CCInfo(CC, false, TM, RVLocs, I->getParent()->getContext()); @@ -1542,7 +1708,6 @@ bool X86FastISel::X86SelectCall(const Instruction *I) { assert(RVLocs.size() == 1 && "Can't handle multi-value calls!"); EVT CopyVT = RVLocs[0].getValVT(); TargetRegisterClass* DstRC = TLI.getRegClassFor(CopyVT); - TargetRegisterClass *SrcRC = DstRC; // If this is a call to a function that returns an fp value on the x87 fp // stack, but where we prefer to use the value in xmm registers, copy it @@ -1551,15 +1716,14 @@ bool X86FastISel::X86SelectCall(const Instruction *I) { RVLocs[0].getLocReg() == X86::ST1) && isScalarFPTypeInSSEReg(RVLocs[0].getValVT())) { CopyVT = MVT::f80; - SrcRC = X86::RSTRegisterClass; DstRC = X86::RFP80RegisterClass; } unsigned ResultReg = createResultReg(DstRC); - bool Emitted = TII.copyRegToReg(*MBB, MBB->end(), ResultReg, - RVLocs[0].getLocReg(), DstRC, SrcRC, DL); - assert(Emitted && "Failed to emit a copy instruction!"); Emitted=Emitted; - Emitted = true; + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY), + ResultReg).addReg(RVLocs[0].getLocReg()); + UsedRegs.push_back(RVLocs[0].getLocReg()); + if (CopyVT != RVLocs[0].getValVT()) { // Round the F80 the right size, which also moves to the appropriate xmm // register. This is accomplished by storing the F80 value in memory and @@ -1568,18 +1732,21 @@ bool X86FastISel::X86SelectCall(const Instruction *I) { unsigned Opc = ResVT == MVT::f32 ? X86::ST_Fp80m32 : X86::ST_Fp80m64; unsigned MemSize = ResVT.getSizeInBits()/8; int FI = MFI.CreateStackObject(MemSize, MemSize, false); - addFrameReference(BuildMI(MBB, DL, TII.get(Opc)), FI).addReg(ResultReg); + addFrameReference(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + TII.get(Opc)), FI) + .addReg(ResultReg); DstRC = ResVT == MVT::f32 ? X86::FR32RegisterClass : X86::FR64RegisterClass; Opc = ResVT == MVT::f32 ? X86::MOVSSrm : X86::MOVSDrm; ResultReg = createResultReg(DstRC); - addFrameReference(BuildMI(MBB, DL, TII.get(Opc), ResultReg), FI); + addFrameReference(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + TII.get(Opc), ResultReg), FI); } if (AndToI1) { // Mask out all but lowest bit for some call which produces an i1. unsigned AndResult = createResultReg(X86::GR8RegisterClass); - BuildMI(MBB, DL, + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(X86::AND8ri), AndResult).addReg(ResultReg).addImm(1); ResultReg = AndResult; } @@ -1587,6 +1754,9 @@ bool X86FastISel::X86SelectCall(const Instruction *I) { UpdateValueMap(I, ResultReg); } + // Set all unused physreg defs as dead. + static_cast<MachineInstr *>(MIB)->setPhysRegsDeadExcept(UsedRegs, TRI); + return true; } @@ -1599,6 +1769,8 @@ X86FastISel::TargetSelectInstruction(const Instruction *I) { return X86SelectLoad(I); case Instruction::Store: return X86SelectStore(I); + case Instruction::Ret: + return X86SelectRet(I); case Instruction::ICmp: case Instruction::FCmp: return X86SelectCmp(I); @@ -1699,7 +1871,8 @@ unsigned X86FastISel::TargetMaterializeConstant(const Constant *C) { else Opc = X86::LEA64r; unsigned ResultReg = createResultReg(RC); - addLeaAddress(BuildMI(MBB, DL, TII.get(Opc), ResultReg), AM); + addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + TII.get(Opc), ResultReg), AM); return ResultReg; } return 0; @@ -1717,10 +1890,10 @@ unsigned X86FastISel::TargetMaterializeConstant(const Constant *C) { unsigned char OpFlag = 0; if (Subtarget->isPICStyleStubPIC()) { // Not dynamic-no-pic OpFlag = X86II::MO_PIC_BASE_OFFSET; - PICBase = getInstrInfo()->getGlobalBaseReg(&MF); + PICBase = getInstrInfo()->getGlobalBaseReg(FuncInfo.MF); } else if (Subtarget->isPICStyleGOT()) { OpFlag = X86II::MO_GOTOFF; - PICBase = getInstrInfo()->getGlobalBaseReg(&MF); + PICBase = getInstrInfo()->getGlobalBaseReg(FuncInfo.MF); } else if (Subtarget->isPICStyleRIPRel() && TM.getCodeModel() == CodeModel::Small) { PICBase = X86::RIP; @@ -1729,7 +1902,8 @@ unsigned X86FastISel::TargetMaterializeConstant(const Constant *C) { // Create the load from the constant pool. unsigned MCPOffset = MCP.getConstantPoolIndex(C, Align); unsigned ResultReg = createResultReg(RC); - addConstantPoolReference(BuildMI(MBB, DL, TII.get(Opc), ResultReg), + addConstantPoolReference(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + TII.get(Opc), ResultReg), MCPOffset, PICBase, OpFlag); return ResultReg; @@ -1743,7 +1917,7 @@ unsigned X86FastISel::TargetMaterializeAlloca(const AllocaInst *C) { // various places, but TargetMaterializeAlloca also needs a check // in order to avoid recursion between getRegForValue, // X86SelectAddrss, and TargetMaterializeAlloca. - if (!StaticAllocaMap.count(C)) + if (!FuncInfo.StaticAllocaMap.count(C)) return 0; X86AddressMode AM; @@ -1752,24 +1926,13 @@ unsigned X86FastISel::TargetMaterializeAlloca(const AllocaInst *C) { unsigned Opc = Subtarget->is64Bit() ? X86::LEA64r : X86::LEA32r; TargetRegisterClass* RC = TLI.getRegClassFor(TLI.getPointerTy()); unsigned ResultReg = createResultReg(RC); - addLeaAddress(BuildMI(MBB, DL, TII.get(Opc), ResultReg), AM); + addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + TII.get(Opc), ResultReg), AM); return ResultReg; } namespace llvm { - llvm::FastISel *X86::createFastISel(MachineFunction &mf, - DenseMap<const Value *, unsigned> &vm, - DenseMap<const BasicBlock *, MachineBasicBlock *> &bm, - DenseMap<const AllocaInst *, int> &am, - std::vector<std::pair<MachineInstr*, unsigned> > &pn -#ifndef NDEBUG - , SmallSet<const Instruction *, 8> &cil -#endif - ) { - return new X86FastISel(mf, vm, bm, am, pn -#ifndef NDEBUG - , cil -#endif - ); + llvm::FastISel *X86::createFastISel(FunctionLoweringInfo &funcInfo) { + return new X86FastISel(funcInfo); } } diff --git a/lib/Target/X86/X86FixupKinds.h b/lib/Target/X86/X86FixupKinds.h index a8117d47bf66..96e0aaec580b 100644 --- a/lib/Target/X86/X86FixupKinds.h +++ b/lib/Target/X86/X86FixupKinds.h @@ -17,6 +17,7 @@ namespace X86 { enum Fixups { reloc_pcrel_4byte = FirstTargetFixupKind, // 32-bit pcrel, e.g. a branch. reloc_pcrel_1byte, // 8-bit pcrel, e.g. branch_1 + reloc_pcrel_2byte, // 16-bit pcrel, e.g. callw reloc_riprel_4byte, // 32-bit rip-relative reloc_riprel_4byte_movq_load // 32-bit rip-relative in movq }; diff --git a/lib/Target/X86/X86FloatingPoint.cpp b/lib/Target/X86/X86FloatingPoint.cpp index 93460ef308cc..cee4ad70201a 100644 --- a/lib/Target/X86/X86FloatingPoint.cpp +++ b/lib/Target/X86/X86FloatingPoint.cpp @@ -133,7 +133,7 @@ namespace { // Emit an fxch to update the runtime processors version of the state. BuildMI(*MBB, I, dl, TII->get(X86::XCH_F)).addReg(STReg); - NumFXCH++; + ++NumFXCH; } void duplicateToTop(unsigned RegNo, unsigned AsReg, MachineInstr *I) { @@ -164,6 +164,8 @@ namespace { void handleCompareFP(MachineBasicBlock::iterator &I); void handleCondMovFP(MachineBasicBlock::iterator &I); void handleSpecialFP(MachineBasicBlock::iterator &I); + + bool translateCopy(MachineInstr*); }; char FPS::ID = 0; } @@ -232,12 +234,15 @@ bool FPS::processBasicBlock(MachineFunction &MF, MachineBasicBlock &BB) { for (MachineBasicBlock::iterator I = BB.begin(); I != BB.end(); ++I) { MachineInstr *MI = I; - unsigned Flags = MI->getDesc().TSFlags; + uint64_t Flags = MI->getDesc().TSFlags; unsigned FPInstClass = Flags & X86II::FPTypeMask; if (MI->isInlineAsm()) FPInstClass = X86II::SpecialFP; - + + if (MI->isCopy() && translateCopy(MI)) + FPInstClass = X86II::SpecialFP; + if (FPInstClass == X86II::NotFP) continue; // Efficiently ignore non-fp insts! @@ -628,7 +633,7 @@ void FPS::handleZeroArgFP(MachineBasicBlock::iterator &I) { void FPS::handleOneArgFP(MachineBasicBlock::iterator &I) { MachineInstr *MI = I; unsigned NumOps = MI->getDesc().getNumOperands(); - assert((NumOps == X86AddrNumOperands + 1 || NumOps == 1) && + assert((NumOps == X86::AddrNumOperands + 1 || NumOps == 1) && "Can only handle fst* & ftst instructions!"); // Is this the last use of the source register? @@ -1001,15 +1006,17 @@ void FPS::handleSpecialFP(MachineBasicBlock::iterator &I) { case X86::FpSET_ST0_32: case X86::FpSET_ST0_64: case X86::FpSET_ST0_80: { + // FpSET_ST0_80 is generated by copyRegToReg for setting up inline asm + // arguments that use an st constraint. We expect a sequence of + // instructions: Fp_SET_ST0 Fp_SET_ST1? INLINEASM unsigned Op0 = getFPReg(MI->getOperand(0)); - // FpSET_ST0_80 is generated by copyRegToReg for both function return - // and inline assembly with the "st" constrain. In the latter case, - // it is possible for ST(0) to be alive after this instruction. if (!MI->killsRegister(X86::FP0 + Op0)) { - // Duplicate Op0 - duplicateToTop(0, 7 /*temp register*/, I); + // Duplicate Op0 into a temporary on the stack top. + // This actually assumes that FP7 is dead. + duplicateToTop(Op0, 7, I); } else { + // Op0 is killed, so just swap it into position. moveToTop(Op0, I); } --StackTop; // "Forget" we have something on the top of stack! @@ -1017,17 +1024,29 @@ void FPS::handleSpecialFP(MachineBasicBlock::iterator &I) { } case X86::FpSET_ST1_32: case X86::FpSET_ST1_64: - case X86::FpSET_ST1_80: - // StackTop can be 1 if a FpSET_ST0_* was before this. Exchange them. - if (StackTop == 1) { - BuildMI(*MBB, I, dl, TII->get(X86::XCH_F)).addReg(X86::ST1); - NumFXCH++; - StackTop = 0; - break; + case X86::FpSET_ST1_80: { + // Set up st(1) for inline asm. We are assuming that st(0) has already been + // set up by FpSET_ST0, and our StackTop is off by one because of it. + unsigned Op0 = getFPReg(MI->getOperand(0)); + // Restore the actual StackTop from before Fp_SET_ST0. + // Note we can't handle Fp_SET_ST1 without a preceeding Fp_SET_ST0, and we + // are not enforcing the constraint. + ++StackTop; + unsigned RegOnTop = getStackEntry(0); // This reg must remain in st(0). + if (!MI->killsRegister(X86::FP0 + Op0)) { + // Assume FP6 is not live, use it as a scratch register. + duplicateToTop(Op0, 6, I); + moveToTop(RegOnTop, I); + } else if (getSTReg(Op0) != X86::ST1) { + // We have the wrong value at st(1). Shuffle! Untested! + moveToTop(getStackEntry(1), I); + moveToTop(Op0, I); + moveToTop(RegOnTop, I); } - assert(StackTop == 2 && "Stack should have two element on it to return!"); - --StackTop; // "Forget" we have something on the top of stack! + assert(StackTop >= 2 && "Too few live registers"); + StackTop -= 2; // "Forget" both st(0) and st(1). break; + } case X86::MOV_Fp3232: case X86::MOV_Fp3264: case X86::MOV_Fp6432: @@ -1041,32 +1060,6 @@ void FPS::handleSpecialFP(MachineBasicBlock::iterator &I) { unsigned SrcReg = getFPReg(MO1); const MachineOperand &MO0 = MI->getOperand(0); - // These can be created due to inline asm. Two address pass can introduce - // copies from RFP registers to virtual registers. - if (MO0.getReg() == X86::ST0 && SrcReg == 0) { - assert(MO1.isKill()); - // Treat %ST0<def> = MOV_Fp8080 %FP0<kill> - // like FpSET_ST0_80 %FP0<kill>, %ST0<imp-def> - assert((StackTop == 1 || StackTop == 2) - && "Stack should have one or two element on it to return!"); - --StackTop; // "Forget" we have something on the top of stack! - break; - } else if (MO0.getReg() == X86::ST1 && SrcReg == 1) { - assert(MO1.isKill()); - // Treat %ST1<def> = MOV_Fp8080 %FP1<kill> - // like FpSET_ST1_80 %FP0<kill>, %ST1<imp-def> - // StackTop can be 1 if a FpSET_ST0_* was before this. Exchange them. - if (StackTop == 1) { - BuildMI(*MBB, I, dl, TII->get(X86::XCH_F)).addReg(X86::ST1); - NumFXCH++; - StackTop = 0; - break; - } - assert(StackTop == 2 && "Stack should have two element on it to return!"); - --StackTop; // "Forget" we have something on the top of stack! - break; - } - unsigned DestReg = getFPReg(MO0); if (MI->killsRegister(X86::FP0+SrcReg)) { // If the input operand is killed, we can just change the owner of the @@ -1206,3 +1199,33 @@ void FPS::handleSpecialFP(MachineBasicBlock::iterator &I) { I = MBB->erase(I); // Remove the pseudo instruction --I; } + +// Translate a COPY instruction to a pseudo-op that handleSpecialFP understands. +bool FPS::translateCopy(MachineInstr *MI) { + unsigned DstReg = MI->getOperand(0).getReg(); + unsigned SrcReg = MI->getOperand(1).getReg(); + + if (DstReg == X86::ST0) { + MI->setDesc(TII->get(X86::FpSET_ST0_80)); + MI->RemoveOperand(0); + return true; + } + if (DstReg == X86::ST1) { + MI->setDesc(TII->get(X86::FpSET_ST1_80)); + MI->RemoveOperand(0); + return true; + } + if (SrcReg == X86::ST0) { + MI->setDesc(TII->get(X86::FpGET_ST0_80)); + return true; + } + if (SrcReg == X86::ST1) { + MI->setDesc(TII->get(X86::FpGET_ST1_80)); + return true; + } + if (X86::RFP80RegClass.contains(DstReg, SrcReg)) { + MI->setDesc(TII->get(X86::MOV_Fp8080)); + return true; + } + return false; +} diff --git a/lib/Target/X86/X86FloatingPointRegKill.cpp b/lib/Target/X86/X86FloatingPointRegKill.cpp index 747683dcc412..2c98b96c510b 100644 --- a/lib/Target/X86/X86FloatingPointRegKill.cpp +++ b/lib/Target/X86/X86FloatingPointRegKill.cpp @@ -72,18 +72,15 @@ static bool isFPStackVReg(unsigned RegNo, const MachineRegisterInfo &MRI) { /// stack code, and thus needs an FP_REG_KILL. static bool ContainsFPStackCode(MachineBasicBlock *MBB, const MachineRegisterInfo &MRI) { - // Scan the block, looking for instructions that define fp stack vregs. + // Scan the block, looking for instructions that define or use fp stack vregs. for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E; ++I) { - if (I->getNumOperands() == 0 || !I->getOperand(0).isReg()) - continue; - for (unsigned op = 0, e = I->getNumOperands(); op != e; ++op) { - if (!I->getOperand(op).isReg() || !I->getOperand(op).isDef()) + if (!I->getOperand(op).isReg()) continue; - - if (isFPStackVReg(I->getOperand(op).getReg(), MRI)) - return true; + if (unsigned Reg = I->getOperand(op).getReg()) + if (isFPStackVReg(Reg, MRI)) + return true; } } @@ -108,8 +105,8 @@ static bool ContainsFPStackCode(MachineBasicBlock *MBB, bool FPRegKiller::runOnMachineFunction(MachineFunction &MF) { // If we are emitting FP stack code, scan the basic block to determine if this - // block defines any FP values. If so, put an FP_REG_KILL instruction before - // the terminator of the block. + // block defines or uses any FP values. If so, put an FP_REG_KILL instruction + // before the terminator of the block. // Note that FP stack instructions are used in all modes for long double, // so we always need to do this check. diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp index 0f64383b0b72..72f2bc11d7cc 100644 --- a/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -137,21 +137,6 @@ namespace { } namespace { - class X86ISelListener : public SelectionDAG::DAGUpdateListener { - SmallSet<SDNode*, 4> Deletes; - public: - explicit X86ISelListener() {} - virtual void NodeDeleted(SDNode *N, SDNode *E) { - Deletes.insert(N); - } - virtual void NodeUpdated(SDNode *N) { - // Ignore updates. - } - bool IsDeleted(SDNode *N) { - return Deletes.count(N); - } - }; - //===--------------------------------------------------------------------===// /// ISel - X86 specific code to select X86 machine instructions for /// SelectionDAG operations. @@ -199,16 +184,17 @@ namespace { bool MatchWrapper(SDValue N, X86ISelAddressMode &AM); bool MatchAddress(SDValue N, X86ISelAddressMode &AM); bool MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM, - X86ISelListener &DeadNodes, unsigned Depth); bool MatchAddressBase(SDValue N, X86ISelAddressMode &AM); bool SelectAddr(SDNode *Op, SDValue N, SDValue &Base, SDValue &Scale, SDValue &Index, SDValue &Disp, SDValue &Segment); bool SelectLEAAddr(SDNode *Op, SDValue N, SDValue &Base, - SDValue &Scale, SDValue &Index, SDValue &Disp); + SDValue &Scale, SDValue &Index, SDValue &Disp, + SDValue &Segment); bool SelectTLSADDRAddr(SDNode *Op, SDValue N, SDValue &Base, - SDValue &Scale, SDValue &Index, SDValue &Disp); + SDValue &Scale, SDValue &Index, SDValue &Disp, + SDValue &Segment); bool SelectScalarSSELoad(SDNode *Root, SDValue N, SDValue &Base, SDValue &Scale, SDValue &Index, SDValue &Disp, @@ -239,7 +225,8 @@ namespace { // These are 32-bit even in 64-bit mode since RIP relative offset // is 32-bit. if (AM.GV) - Disp = CurDAG->getTargetGlobalAddress(AM.GV, MVT::i32, AM.Disp, + Disp = CurDAG->getTargetGlobalAddress(AM.GV, DebugLoc(), + MVT::i32, AM.Disp, AM.SymbolFlags); else if (AM.CP) Disp = CurDAG->getTargetConstantPool(AM.CP, MVT::i32, @@ -386,14 +373,14 @@ static void MoveBelowOrigChain(SelectionDAG *CurDAG, SDValue Load, } for (unsigned i = 1, e = OrigChain.getNumOperands(); i != e; ++i) Ops.push_back(OrigChain.getOperand(i)); - CurDAG->UpdateNodeOperands(OrigChain, &Ops[0], Ops.size()); - CurDAG->UpdateNodeOperands(Load, Call.getOperand(0), + CurDAG->UpdateNodeOperands(OrigChain.getNode(), &Ops[0], Ops.size()); + CurDAG->UpdateNodeOperands(Load.getNode(), Call.getOperand(0), Load.getOperand(1), Load.getOperand(2)); Ops.clear(); Ops.push_back(SDValue(Load.getNode(), 1)); for (unsigned i = 1, e = Call.getNode()->getNumOperands(); i != e; ++i) Ops.push_back(Call.getOperand(i)); - CurDAG->UpdateNodeOperands(Call, &Ops[0], Ops.size()); + CurDAG->UpdateNodeOperands(Call.getNode(), &Ops[0], Ops.size()); } /// isCalleeLoad - Return true if call address is a load and it can be @@ -515,7 +502,7 @@ void X86DAGToDAGISel::PreprocessISelDAG() { N->getOperand(0), MemTmp, NULL, 0, MemVT, false, false, 0); - SDValue Result = CurDAG->getExtLoad(ISD::EXTLOAD, dl, DstVT, Store, MemTmp, + SDValue Result = CurDAG->getExtLoad(ISD::EXTLOAD, DstVT, dl, Store, MemTmp, NULL, 0, MemVT, false, false, 0); // We're about to replace all uses of the FP_ROUND/FP_EXTEND with the @@ -664,8 +651,7 @@ bool X86DAGToDAGISel::MatchWrapper(SDValue N, X86ISelAddressMode &AM) { /// returning true if it cannot be done. This just pattern matches for the /// addressing mode. bool X86DAGToDAGISel::MatchAddress(SDValue N, X86ISelAddressMode &AM) { - X86ISelListener DeadNodes; - if (MatchAddressRecursively(N, AM, DeadNodes, 0)) + if (MatchAddressRecursively(N, AM, 0)) return true; // Post-processing: Convert lea(,%reg,2) to lea(%reg,%reg), which has @@ -713,7 +699,6 @@ static bool isLogicallyAddWithConstant(SDValue V, SelectionDAG *CurDAG) { } bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM, - X86ISelListener &DeadNodes, unsigned Depth) { bool is64Bit = Subtarget->is64Bit(); DebugLoc dl = N.getDebugLoc(); @@ -876,13 +861,13 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM, // other uses, since it avoids a two-address sub instruction, however // it costs an additional mov if the index register has other uses. + // Add an artificial use to this node so that we can keep track of + // it if it gets CSE'd with a different node. + HandleSDNode Handle(N); + // Test if the LHS of the sub can be folded. X86ISelAddressMode Backup = AM; - if (MatchAddressRecursively(N.getNode()->getOperand(0), AM, - DeadNodes, Depth+1) || - // If it is successful but the recursive update causes N to be deleted, - // then it's not safe to continue. - DeadNodes.IsDeleted(N.getNode())) { + if (MatchAddressRecursively(N.getNode()->getOperand(0), AM, Depth+1)) { AM = Backup; break; } @@ -893,7 +878,7 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM, } int Cost = 0; - SDValue RHS = N.getNode()->getOperand(1); + SDValue RHS = Handle.getValue().getNode()->getOperand(1); // If the RHS involves a register with multiple uses, this // transformation incurs an extra mov, due to the neg instruction // clobbering its operand. @@ -944,35 +929,27 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM, } case ISD::ADD: { + // Add an artificial use to this node so that we can keep track of + // it if it gets CSE'd with a different node. + HandleSDNode Handle(N); + SDValue LHS = Handle.getValue().getNode()->getOperand(0); + SDValue RHS = Handle.getValue().getNode()->getOperand(1); + X86ISelAddressMode Backup = AM; - if (!MatchAddressRecursively(N.getNode()->getOperand(0), AM, - DeadNodes, Depth+1)) { - if (DeadNodes.IsDeleted(N.getNode())) - // If it is successful but the recursive update causes N to be deleted, - // then it's not safe to continue. - return true; - if (!MatchAddressRecursively(N.getNode()->getOperand(1), AM, - DeadNodes, Depth+1)) - // If it is successful but the recursive update causes N to be deleted, - // then it's not safe to continue. - return DeadNodes.IsDeleted(N.getNode()); - } + if (!MatchAddressRecursively(LHS, AM, Depth+1) && + !MatchAddressRecursively(RHS, AM, Depth+1)) + return false; + AM = Backup; + LHS = Handle.getValue().getNode()->getOperand(0); + RHS = Handle.getValue().getNode()->getOperand(1); // Try again after commuting the operands. + if (!MatchAddressRecursively(RHS, AM, Depth+1) && + !MatchAddressRecursively(LHS, AM, Depth+1)) + return false; AM = Backup; - if (!MatchAddressRecursively(N.getNode()->getOperand(1), AM, - DeadNodes, Depth+1)) { - if (DeadNodes.IsDeleted(N.getNode())) - // If it is successful but the recursive update causes N to be deleted, - // then it's not safe to continue. - return true; - if (!MatchAddressRecursively(N.getNode()->getOperand(0), AM, - DeadNodes, Depth+1)) - // If it is successful but the recursive update causes N to be deleted, - // then it's not safe to continue. - return DeadNodes.IsDeleted(N.getNode()); - } - AM = Backup; + LHS = Handle.getValue().getNode()->getOperand(0); + RHS = Handle.getValue().getNode()->getOperand(1); // If we couldn't fold both operands into the address at the same time, // see if we can just put each operand into a register and fold at least @@ -980,8 +957,8 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM, if (AM.BaseType == X86ISelAddressMode::RegBase && !AM.Base_Reg.getNode() && !AM.IndexReg.getNode()) { - AM.Base_Reg = N.getNode()->getOperand(0); - AM.IndexReg = N.getNode()->getOperand(1); + AM.Base_Reg = LHS; + AM.IndexReg = RHS; AM.Scale = 1; return false; } @@ -996,7 +973,7 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM, uint64_t Offset = CN->getSExtValue(); // Start with the LHS as an addr mode. - if (!MatchAddressRecursively(N.getOperand(0), AM, DeadNodes, Depth+1) && + if (!MatchAddressRecursively(N.getOperand(0), AM, Depth+1) && // Address could not have picked a GV address for the displacement. AM.GV == NULL && // On x86-64, the resultant disp must fit in 32-bits. @@ -1073,7 +1050,7 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM, CurDAG->RepositionNode(N.getNode(), Shl.getNode()); Shl.getNode()->setNodeId(N.getNode()->getNodeId()); } - CurDAG->ReplaceAllUsesWith(N, Shl, &DeadNodes); + CurDAG->ReplaceAllUsesWith(N, Shl); AM.IndexReg = And; AM.Scale = (1 << ScaleLog); return false; @@ -1124,7 +1101,7 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM, NewSHIFT.getNode()->setNodeId(N.getNode()->getNodeId()); } - CurDAG->ReplaceAllUsesWith(N, NewSHIFT, &DeadNodes); + CurDAG->ReplaceAllUsesWith(N, NewSHIFT); AM.Scale = 1 << ShiftCst; AM.IndexReg = NewAND; @@ -1230,7 +1207,8 @@ bool X86DAGToDAGISel::SelectScalarSSELoad(SDNode *Root, /// mode it matches can be cost effectively emitted as an LEA instruction. bool X86DAGToDAGISel::SelectLEAAddr(SDNode *Op, SDValue N, SDValue &Base, SDValue &Scale, - SDValue &Index, SDValue &Disp) { + SDValue &Index, SDValue &Disp, + SDValue &Segment) { X86ISelAddressMode AM; // Set AM.Segment to prevent MatchAddress from using one. LEA doesn't support @@ -1284,7 +1262,6 @@ bool X86DAGToDAGISel::SelectLEAAddr(SDNode *Op, SDValue N, if (Complexity <= 2) return false; - SDValue Segment; getAddressOperands(AM, Base, Scale, Index, Disp, Segment); return true; } @@ -1292,10 +1269,10 @@ bool X86DAGToDAGISel::SelectLEAAddr(SDNode *Op, SDValue N, /// SelectTLSADDRAddr - This is only run on TargetGlobalTLSAddress nodes. bool X86DAGToDAGISel::SelectTLSADDRAddr(SDNode *Op, SDValue N, SDValue &Base, SDValue &Scale, SDValue &Index, - SDValue &Disp) { + SDValue &Disp, SDValue &Segment) { assert(N.getOpcode() == ISD::TargetGlobalTLSAddress); const GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(N); - + X86ISelAddressMode AM; AM.GV = GA->getGlobal(); AM.Disp += GA->getOffset(); @@ -1309,7 +1286,6 @@ bool X86DAGToDAGISel::SelectTLSADDRAddr(SDNode *Op, SDValue N, SDValue &Base, AM.IndexReg = CurDAG->getRegister(0, MVT::i64); } - SDValue Segment; getAddressOperands(AM, Base, Scale, Index, Disp, Segment); return true; } @@ -1672,6 +1648,26 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Flag, N1, InFlag), 0); } + // Prevent use of AH in a REX instruction by referencing AX instead. + if (HiReg == X86::AH && Subtarget->is64Bit() && + !SDValue(Node, 1).use_empty()) { + SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, + X86::AX, MVT::i16, InFlag); + InFlag = Result.getValue(2); + // Get the low part if needed. Don't use getCopyFromReg for aliasing + // registers. + if (!SDValue(Node, 0).use_empty()) + ReplaceUses(SDValue(Node, 1), + CurDAG->getTargetExtractSubreg(X86::sub_8bit, dl, MVT::i8, Result)); + + // Shift AX down 8 bits. + Result = SDValue(CurDAG->getMachineNode(X86::SHR16ri, dl, MVT::i16, + Result, + CurDAG->getTargetConstant(8, MVT::i8)), 0); + // Then truncate it down to i8. + ReplaceUses(SDValue(Node, 1), + CurDAG->getTargetExtractSubreg(X86::sub_8bit, dl, MVT::i8, Result)); + } // Copy the low half of the result, if it is needed. if (!SDValue(Node, 0).use_empty()) { SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, @@ -1682,24 +1678,9 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { } // Copy the high half of the result, if it is needed. if (!SDValue(Node, 1).use_empty()) { - SDValue Result; - if (HiReg == X86::AH && Subtarget->is64Bit()) { - // Prevent use of AH in a REX instruction by referencing AX instead. - // Shift it down 8 bits. - Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, - X86::AX, MVT::i16, InFlag); - InFlag = Result.getValue(2); - Result = SDValue(CurDAG->getMachineNode(X86::SHR16ri, dl, MVT::i16, - Result, - CurDAG->getTargetConstant(8, MVT::i8)), 0); - // Then truncate it down to i8. - Result = CurDAG->getTargetExtractSubreg(X86::sub_8bit, dl, - MVT::i8, Result); - } else { - Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, - HiReg, NVT, InFlag); - InFlag = Result.getValue(2); - } + SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, + HiReg, NVT, InFlag); + InFlag = Result.getValue(2); ReplaceUses(SDValue(Node, 1), Result); DEBUG(dbgs() << "=> "; Result.getNode()->dump(CurDAG); dbgs() << '\n'); } @@ -1812,6 +1793,29 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Flag, N1, InFlag), 0); } + // Prevent use of AH in a REX instruction by referencing AX instead. + // Shift it down 8 bits. + if (HiReg == X86::AH && Subtarget->is64Bit() && + !SDValue(Node, 1).use_empty()) { + SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, + X86::AX, MVT::i16, InFlag); + InFlag = Result.getValue(2); + + // If we also need AL (the quotient), get it by extracting a subreg from + // Result. The fast register allocator does not like multiple CopyFromReg + // nodes using aliasing registers. + if (!SDValue(Node, 0).use_empty()) + ReplaceUses(SDValue(Node, 0), + CurDAG->getTargetExtractSubreg(X86::sub_8bit, dl, MVT::i8, Result)); + + // Shift AX right by 8 bits instead of using AH. + Result = SDValue(CurDAG->getMachineNode(X86::SHR16ri, dl, MVT::i16, + Result, + CurDAG->getTargetConstant(8, MVT::i8)), + 0); + ReplaceUses(SDValue(Node, 1), + CurDAG->getTargetExtractSubreg(X86::sub_8bit, dl, MVT::i8, Result)); + } // Copy the division (low) result, if it is needed. if (!SDValue(Node, 0).use_empty()) { SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, @@ -1822,25 +1826,9 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { } // Copy the remainder (high) result, if it is needed. if (!SDValue(Node, 1).use_empty()) { - SDValue Result; - if (HiReg == X86::AH && Subtarget->is64Bit()) { - // Prevent use of AH in a REX instruction by referencing AX instead. - // Shift it down 8 bits. - Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, - X86::AX, MVT::i16, InFlag); - InFlag = Result.getValue(2); - Result = SDValue(CurDAG->getMachineNode(X86::SHR16ri, dl, MVT::i16, - Result, - CurDAG->getTargetConstant(8, MVT::i8)), - 0); - // Then truncate it down to i8. - Result = CurDAG->getTargetExtractSubreg(X86::sub_8bit, dl, - MVT::i8, Result); - } else { - Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, - HiReg, NVT, InFlag); - InFlag = Result.getValue(2); - } + SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, + HiReg, NVT, InFlag); + InFlag = Result.getValue(2); ReplaceUses(SDValue(Node, 1), Result); DEBUG(dbgs() << "=> "; Result.getNode()->dump(CurDAG); dbgs() << '\n'); } diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index b02c33d3f880..1a634744806e 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -62,21 +62,19 @@ static SDValue getMOVL(SelectionDAG &DAG, DebugLoc dl, EVT VT, SDValue V1, SDValue V2); static TargetLoweringObjectFile *createTLOF(X86TargetMachine &TM) { - switch (TM.getSubtarget<X86Subtarget>().TargetType) { - default: llvm_unreachable("unknown subtarget type"); - case X86Subtarget::isDarwin: - if (TM.getSubtarget<X86Subtarget>().is64Bit()) - return new X8664_MachoTargetObjectFile(); + + bool is64Bit = TM.getSubtarget<X86Subtarget>().is64Bit(); + + if (TM.getSubtarget<X86Subtarget>().isTargetDarwin()) { + if (is64Bit) return new X8664_MachoTargetObjectFile(); return new TargetLoweringObjectFileMachO(); - case X86Subtarget::isELF: - if (TM.getSubtarget<X86Subtarget>().is64Bit()) - return new X8664_ELFTargetObjectFile(TM); + } else if (TM.getSubtarget<X86Subtarget>().isTargetELF() ){ + if (is64Bit) return new X8664_ELFTargetObjectFile(TM); return new X8632_ELFTargetObjectFile(TM); - case X86Subtarget::isMingw: - case X86Subtarget::isCygwin: - case X86Subtarget::isWindows: + } else if (TM.getSubtarget<X86Subtarget>().isTargetCOFF()) { return new TargetLoweringObjectFileCOFF(); - } + } + llvm_unreachable("unknown subtarget type"); } X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) @@ -347,6 +345,12 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) if (!Subtarget->hasSSE2()) setOperationAction(ISD::MEMBARRIER , MVT::Other, Expand); + // On X86 and X86-64, atomic operations are lowered to locked instructions. + // Locked instructions, in turn, have implicit fence semantics (all memory + // operations are flushed before issuing the locked instruction, and they + // are not buffered), so we can fold away the common pattern of + // fence-atomic-fence. + setShouldFoldAtomicFences(true); // Expand certain atomics setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i8, Custom); @@ -611,7 +615,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) addRegisterClass(MVT::v8i8, X86::VR64RegisterClass, false); addRegisterClass(MVT::v4i16, X86::VR64RegisterClass, false); addRegisterClass(MVT::v2i32, X86::VR64RegisterClass, false); - addRegisterClass(MVT::v2f32, X86::VR64RegisterClass, false); + addRegisterClass(MVT::v1i64, X86::VR64RegisterClass, false); setOperationAction(ISD::ADD, MVT::v8i8, Legal); @@ -657,14 +661,11 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) AddPromotedToType (ISD::LOAD, MVT::v4i16, MVT::v1i64); setOperationAction(ISD::LOAD, MVT::v2i32, Promote); AddPromotedToType (ISD::LOAD, MVT::v2i32, MVT::v1i64); - setOperationAction(ISD::LOAD, MVT::v2f32, Promote); - AddPromotedToType (ISD::LOAD, MVT::v2f32, MVT::v1i64); setOperationAction(ISD::LOAD, MVT::v1i64, Legal); setOperationAction(ISD::BUILD_VECTOR, MVT::v8i8, Custom); setOperationAction(ISD::BUILD_VECTOR, MVT::v4i16, Custom); setOperationAction(ISD::BUILD_VECTOR, MVT::v2i32, Custom); - setOperationAction(ISD::BUILD_VECTOR, MVT::v2f32, Custom); setOperationAction(ISD::BUILD_VECTOR, MVT::v1i64, Custom); setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v8i8, Custom); @@ -672,7 +673,6 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2i32, Custom); setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v1i64, Custom); - setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2f32, Custom); setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v8i8, Custom); setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i16, Custom); setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v1i64, Custom); @@ -691,7 +691,6 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::BIT_CONVERT, MVT::v8i8, Custom); setOperationAction(ISD::BIT_CONVERT, MVT::v4i16, Custom); setOperationAction(ISD::BIT_CONVERT, MVT::v2i32, Custom); - setOperationAction(ISD::BIT_CONVERT, MVT::v2f32, Custom); setOperationAction(ISD::BIT_CONVERT, MVT::v1i64, Custom); } } @@ -792,9 +791,8 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) EVT VT = SVT; // Do not attempt to promote non-128-bit vectors - if (!VT.is128BitVector()) { + if (!VT.is128BitVector()) continue; - } setOperationAction(ISD::AND, SVT, Promote); AddPromotedToType (ISD::AND, SVT, MVT::v2i64); @@ -825,6 +823,17 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) } if (Subtarget->hasSSE41()) { + setOperationAction(ISD::FFLOOR, MVT::f32, Legal); + setOperationAction(ISD::FCEIL, MVT::f32, Legal); + setOperationAction(ISD::FTRUNC, MVT::f32, Legal); + setOperationAction(ISD::FRINT, MVT::f32, Legal); + setOperationAction(ISD::FNEARBYINT, MVT::f32, Legal); + setOperationAction(ISD::FFLOOR, MVT::f64, Legal); + setOperationAction(ISD::FCEIL, MVT::f64, Legal); + setOperationAction(ISD::FTRUNC, MVT::f64, Legal); + setOperationAction(ISD::FRINT, MVT::f64, Legal); + setOperationAction(ISD::FNEARBYINT, MVT::f64, Legal); + // FIXME: Do we need to handle scalar-to-vector here? setOperationAction(ISD::MUL, MVT::v4i32, Legal); @@ -965,15 +974,24 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) // Add/Sub/Mul with overflow operations are custom lowered. setOperationAction(ISD::SADDO, MVT::i32, Custom); - setOperationAction(ISD::SADDO, MVT::i64, Custom); setOperationAction(ISD::UADDO, MVT::i32, Custom); - setOperationAction(ISD::UADDO, MVT::i64, Custom); setOperationAction(ISD::SSUBO, MVT::i32, Custom); - setOperationAction(ISD::SSUBO, MVT::i64, Custom); setOperationAction(ISD::USUBO, MVT::i32, Custom); - setOperationAction(ISD::USUBO, MVT::i64, Custom); setOperationAction(ISD::SMULO, MVT::i32, Custom); - setOperationAction(ISD::SMULO, MVT::i64, Custom); + + // Only custom-lower 64-bit SADDO and friends on 64-bit because we don't + // handle type legalization for these operations here. + // + // FIXME: We really should do custom legalization for addition and + // subtraction on x86-32 once PR3203 is fixed. We really can't do much better + // than generic legalization for 64-bit multiplication-with-overflow, though. + if (Subtarget->is64Bit()) { + setOperationAction(ISD::SADDO, MVT::i64, Custom); + setOperationAction(ISD::UADDO, MVT::i64, Custom); + setOperationAction(ISD::SSUBO, MVT::i64, Custom); + setOperationAction(ISD::USUBO, MVT::i64, Custom); + setOperationAction(ISD::SMULO, MVT::i64, Custom); + } if (!Subtarget->is64Bit()) { // These libcalls are not available in 32-bit. @@ -992,7 +1010,6 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setTargetDAGCombine(ISD::SRL); setTargetDAGCombine(ISD::OR); setTargetDAGCombine(ISD::STORE); - setTargetDAGCombine(ISD::MEMBARRIER); setTargetDAGCombine(ISD::ZERO_EXTEND); if (Subtarget->is64Bit()) setTargetDAGCombine(ISD::MUL); @@ -1172,6 +1189,27 @@ unsigned X86TargetLowering::getFunctionAlignment(const Function *F) const { return F->hasFnAttr(Attribute::OptimizeForSize) ? 0 : 4; } +bool X86TargetLowering::getStackCookieLocation(unsigned &AddressSpace, + unsigned &Offset) const { + if (!Subtarget->isTargetLinux()) + return false; + + if (Subtarget->is64Bit()) { + // %fs:0x28, unless we're using a Kernel code model, in which case it's %gs: + Offset = 0x28; + if (getTargetMachine().getCodeModel() == CodeModel::Kernel) + AddressSpace = 256; + else + AddressSpace = 257; + } else { + // %gs:0x14 on i386 + Offset = 0x14; + AddressSpace = 256; + } + return true; +} + + //===----------------------------------------------------------------------===// // Return Value Calling Convention Implementation //===----------------------------------------------------------------------===// @@ -1180,19 +1218,19 @@ unsigned X86TargetLowering::getFunctionAlignment(const Function *F) const { bool X86TargetLowering::CanLowerReturn(CallingConv::ID CallConv, bool isVarArg, - const SmallVectorImpl<EVT> &OutTys, - const SmallVectorImpl<ISD::ArgFlagsTy> &ArgsFlags, - SelectionDAG &DAG) const { + const SmallVectorImpl<ISD::OutputArg> &Outs, + LLVMContext &Context) const { SmallVector<CCValAssign, 16> RVLocs; CCState CCInfo(CallConv, isVarArg, getTargetMachine(), - RVLocs, *DAG.getContext()); - return CCInfo.CheckReturn(OutTys, ArgsFlags, RetCC_X86); + RVLocs, Context); + return CCInfo.CheckReturn(Outs, RetCC_X86); } SDValue X86TargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl<ISD::OutputArg> &Outs, + const SmallVectorImpl<SDValue> &OutVals, DebugLoc dl, SelectionDAG &DAG) const { MachineFunction &MF = DAG.getMachineFunction(); X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>(); @@ -1220,7 +1258,7 @@ X86TargetLowering::LowerReturn(SDValue Chain, for (unsigned i = 0; i != RVLocs.size(); ++i) { CCValAssign &VA = RVLocs[i]; assert(VA.isRegLoc() && "Can only return in registers!"); - SDValue ValToCopy = Outs[i].Val; + SDValue ValToCopy = OutVals[i]; // Returns in ST0/ST1 are handled specially: these are pushed as operands to // the RET instruction and handled by the FP Stackifier. @@ -1308,17 +1346,34 @@ X86TargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag, report_fatal_error("SSE register return with SSE disabled"); } + SDValue Val; + // If this is a call to a function that returns an fp value on the floating - // point stack, but where we prefer to use the value in xmm registers, copy - // it out as F80 and use a truncate to move it from fp stack reg to xmm reg. - if ((VA.getLocReg() == X86::ST0 || - VA.getLocReg() == X86::ST1) && - isScalarFPTypeInSSEReg(VA.getValVT())) { - CopyVT = MVT::f80; - } + // point stack, we must guarantee the the value is popped from the stack, so + // a CopyFromReg is not good enough - the copy instruction may be eliminated + // if the return value is not used. We use the FpGET_ST0 instructions + // instead. + if (VA.getLocReg() == X86::ST0 || VA.getLocReg() == X86::ST1) { + // If we prefer to use the value in xmm registers, copy it out as f80 and + // use a truncate to move it from fp stack reg to xmm reg. + if (isScalarFPTypeInSSEReg(VA.getValVT())) CopyVT = MVT::f80; + bool isST0 = VA.getLocReg() == X86::ST0; + unsigned Opc = 0; + if (CopyVT == MVT::f32) Opc = isST0 ? X86::FpGET_ST0_32:X86::FpGET_ST1_32; + if (CopyVT == MVT::f64) Opc = isST0 ? X86::FpGET_ST0_64:X86::FpGET_ST1_64; + if (CopyVT == MVT::f80) Opc = isST0 ? X86::FpGET_ST0_80:X86::FpGET_ST1_80; + SDValue Ops[] = { Chain, InFlag }; + Chain = SDValue(DAG.getMachineNode(Opc, dl, CopyVT, MVT::Other, MVT::Flag, + Ops, 2), 1); + Val = Chain.getValue(0); - SDValue Val; - if (Is64Bit && CopyVT.isVector() && CopyVT.getSizeInBits() == 64) { + // Round the f80 to the right size, which also moves it to the appropriate + // xmm register. + if (CopyVT != VA.getValVT()) + Val = DAG.getNode(ISD::FP_ROUND, dl, VA.getValVT(), Val, + // This truncation won't change the value. + DAG.getIntPtrConstant(1)); + } else if (Is64Bit && CopyVT.isVector() && CopyVT.getSizeInBits() == 64) { // For x86-64, MMX values are returned in XMM0 / XMM1 except for v1i64. if (VA.getLocReg() == X86::XMM0 || VA.getLocReg() == X86::XMM1) { Chain = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), @@ -1338,15 +1393,6 @@ X86TargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag, Val = Chain.getValue(0); } InFlag = Chain.getValue(2); - - if (CopyVT != VA.getValVT()) { - // Round the F80 the right size, which also moves to the appropriate xmm - // register. - Val = DAG.getNode(ISD::FP_ROUND, dl, VA.getValVT(), Val, - // This truncation won't change the value. - DAG.getIntPtrConstant(1)); - } - InVals.push_back(Val); } @@ -1383,29 +1429,6 @@ ArgsAreStructReturn(const SmallVectorImpl<ISD::InputArg> &Ins) { return Ins[0].Flags.isSRet(); } -/// IsCalleePop - Determines whether the callee is required to pop its -/// own arguments. Callee pop is necessary to support tail calls. -bool X86TargetLowering::IsCalleePop(bool IsVarArg, - CallingConv::ID CallingConv) const { - if (IsVarArg) - return false; - - switch (CallingConv) { - default: - return false; - case CallingConv::X86_StdCall: - return !Subtarget->is64Bit(); - case CallingConv::X86_FastCall: - return !Subtarget->is64Bit(); - case CallingConv::X86_ThisCall: - return !Subtarget->is64Bit(); - case CallingConv::Fast: - return GuaranteedTailCallOpt; - case CallingConv::GHC: - return GuaranteedTailCallOpt; - } -} - /// CCAssignFnForNode - Selects the correct CCAssignFn for a the /// given CallingConvention value. CCAssignFn *X86TargetLowering::CCAssignFnForNode(CallingConv::ID CC) const { @@ -1483,11 +1506,11 @@ X86TargetLowering::LowerMemArgument(SDValue Chain, // could be overwritten by lowering of arguments in case of a tail call. if (Flags.isByVal()) { int FI = MFI->CreateFixedObject(Flags.getByValSize(), - VA.getLocMemOffset(), isImmutable, false); + VA.getLocMemOffset(), isImmutable); return DAG.getFrameIndex(FI, getPointerTy()); } else { int FI = MFI->CreateFixedObject(ValVT.getSizeInBits()/8, - VA.getLocMemOffset(), isImmutable, false); + VA.getLocMemOffset(), isImmutable); SDValue FIN = DAG.getFrameIndex(FI, getPointerTy()); return DAG.getLoad(ValVT, dl, Chain, FIN, PseudoSourceValue::getFixedStack(FI), 0, @@ -1615,8 +1638,7 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain, if (isVarArg) { if (Is64Bit || (CallConv != CallingConv::X86_FastCall && CallConv != CallingConv::X86_ThisCall)) { - FuncInfo->setVarArgsFrameIndex(MFI->CreateFixedObject(1, StackSize, - true, false)); + FuncInfo->setVarArgsFrameIndex(MFI->CreateFixedObject(1, StackSize,true)); } if (Is64Bit) { unsigned TotalNumIntRegs = 0, TotalNumXMMRegs = 0; @@ -1722,7 +1744,7 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain, } // Some CCs need callee pop. - if (IsCalleePop(isVarArg, CallConv)) { + if (Subtarget->IsCalleePop(isVarArg, CallConv)) { FuncInfo->setBytesToPopOnReturn(StackSize); // Callee pops everything. } else { FuncInfo->setBytesToPopOnReturn(0); // Callee pops nothing. @@ -1788,7 +1810,7 @@ EmitTailCallStoreRetAddr(SelectionDAG & DAG, MachineFunction &MF, // Calculate the new stack slot for the return address. int SlotSize = Is64Bit ? 8 : 4; int NewReturnAddrFI = - MF.getFrameInfo()->CreateFixedObject(SlotSize, FPDiff-SlotSize, false, false); + MF.getFrameInfo()->CreateFixedObject(SlotSize, FPDiff-SlotSize, false); EVT VT = Is64Bit ? MVT::i64 : MVT::i32; SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewReturnAddrFI, VT); Chain = DAG.getStore(Chain, dl, RetAddrFrIdx, NewRetAddrFrIdx, @@ -1802,6 +1824,7 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee, CallingConv::ID CallConv, bool isVarArg, bool &isTailCall, const SmallVectorImpl<ISD::OutputArg> &Outs, + const SmallVectorImpl<SDValue> &OutVals, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const { @@ -1814,7 +1837,7 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee, // Check if it's really possible to do a tail call. isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv, isVarArg, IsStructRet, MF.getFunction()->hasStructRetAttr(), - Outs, Ins, DAG); + Outs, OutVals, Ins, DAG); // Sibcalls are automatically detected tailcalls which do not require // ABI changes. @@ -1874,7 +1897,7 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee, for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { CCValAssign &VA = ArgLocs[i]; EVT RegVT = VA.getLocVT(); - SDValue Arg = Outs[i].Val; + SDValue Arg = OutVals[i]; ISD::ArgFlagsTy Flags = Outs[i].Flags; bool isByVal = Flags.isByVal(); @@ -2013,12 +2036,12 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee, if (VA.isRegLoc()) continue; assert(VA.isMemLoc()); - SDValue Arg = Outs[i].Val; + SDValue Arg = OutVals[i]; ISD::ArgFlagsTy Flags = Outs[i].Flags; // Create frame index. int32_t Offset = VA.getLocMemOffset()+FPDiff; uint32_t OpSize = (VA.getLocVT().getSizeInBits()+7)/8; - FI = MF.getFrameInfo()->CreateFixedObject(OpSize, Offset, true, false); + FI = MF.getFrameInfo()->CreateFixedObject(OpSize, Offset, true); FIN = DAG.getFrameIndex(FI, getPointerTy()); if (Flags.isByVal()) { @@ -2059,7 +2082,6 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee, FPDiff, dl); } - bool WasGlobalOrExternal = false; if (getTargetMachine().getCodeModel() == CodeModel::Large) { assert(Is64Bit && "Large code model is only legal in 64-bit mode."); // In the 64-bit large code model, we have to make all calls @@ -2067,7 +2089,6 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee, // pc-relative offset may not be large enough to hold the whole // address. } else if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) { - WasGlobalOrExternal = true; // If the callee is a GlobalAddress node (quite common, every direct call // is) turn it into a TargetGlobalAddress node so that legalize doesn't hack // it. @@ -2095,11 +2116,10 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee, OpFlags = X86II::MO_DARWIN_STUB; } - Callee = DAG.getTargetGlobalAddress(GV, getPointerTy(), + Callee = DAG.getTargetGlobalAddress(GV, dl, getPointerTy(), G->getOffset(), OpFlags); } } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) { - WasGlobalOrExternal = true; unsigned char OpFlags = 0; // On ELF targets, in either X86-64 or X86-32 mode, direct calls to external @@ -2153,17 +2173,12 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee, Ops.push_back(InFlag); if (isTailCall) { - // If this is the first return lowered for this function, add the regs - // to the liveout set for the function. - if (MF.getRegInfo().liveout_empty()) { - SmallVector<CCValAssign, 16> RVLocs; - CCState CCInfo(CallConv, isVarArg, getTargetMachine(), RVLocs, - *DAG.getContext()); - CCInfo.AnalyzeCallResult(Ins, RetCC_X86); - for (unsigned i = 0; i != RVLocs.size(); ++i) - if (RVLocs[i].isRegLoc()) - MF.getRegInfo().addLiveOut(RVLocs[i].getLocReg()); - } + // We used to do: + //// If this is the first return lowered for this function, add the regs + //// to the liveout set for the function. + // This isn't right, although it's probably harmless on x86; liveouts + // should be computed from returns not tail calls. Consider a void + // function making a tail call to a function returning int. return DAG.getNode(X86ISD::TC_RETURN, dl, NodeTys, &Ops[0], Ops.size()); } @@ -2173,7 +2188,7 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee, // Create the CALLSEQ_END node. unsigned NumBytesForCalleeToPush; - if (IsCalleePop(isVarArg, CallConv)) + if (Subtarget->IsCalleePop(isVarArg, CallConv)) NumBytesForCalleeToPush = NumBytes; // Callee pops everything else if (!Is64Bit && !IsTailCallConvention(CallConv) && IsStructRet) // If this is a call to a struct-return function, the callee @@ -2314,6 +2329,7 @@ X86TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee, bool isCalleeStructRet, bool isCallerStructRet, const SmallVectorImpl<ISD::OutputArg> &Outs, + const SmallVectorImpl<SDValue> &OutVals, const SmallVectorImpl<ISD::InputArg> &Ins, SelectionDAG& DAG) const { if (!IsTailCallConvention(CalleeCC) && @@ -2332,8 +2348,8 @@ X86TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee, return false; } - // Look for obvious safe cases to perform tail call optimization that does not - // requite ABI changes. This is what gcc calls sibcall. + // Look for obvious safe cases to perform tail call optimization that do not + // require ABI changes. This is what gcc calls sibcall. // Can't do sibcall if stack needs to be dynamically re-aligned. PEI needs to // emit a special epilogue. @@ -2427,8 +2443,7 @@ X86TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee, ((X86TargetMachine&)getTargetMachine()).getInstrInfo(); for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { CCValAssign &VA = ArgLocs[i]; - EVT RegVT = VA.getLocVT(); - SDValue Arg = Outs[i].Val; + SDValue Arg = OutVals[i]; ISD::ArgFlagsTy Flags = Outs[i].Flags; if (VA.getLocInfo() == CCValAssign::Indirect) return false; @@ -2439,26 +2454,32 @@ X86TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee, } } } + + // If the tailcall address may be in a register, then make sure it's + // possible to register allocate for it. In 32-bit, the call address can + // only target EAX, EDX, or ECX since the tail call must be scheduled after + // callee-saved registers are restored. In 64-bit, it's RAX, RCX, RDX, RSI, + // RDI, R8, R9, R11. + if (!isa<GlobalAddressSDNode>(Callee) && + !isa<ExternalSymbolSDNode>(Callee)) { + unsigned Limit = Subtarget->is64Bit() ? 8 : 3; + unsigned NumInRegs = 0; + for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { + CCValAssign &VA = ArgLocs[i]; + if (VA.isRegLoc()) { + if (++NumInRegs == Limit) + return false; + } + } + } } return true; } FastISel * -X86TargetLowering::createFastISel(MachineFunction &mf, - DenseMap<const Value *, unsigned> &vm, - DenseMap<const BasicBlock*, MachineBasicBlock*> &bm, - DenseMap<const AllocaInst *, int> &am, - std::vector<std::pair<MachineInstr*, unsigned> > &pn -#ifndef NDEBUG - , SmallSet<const Instruction *, 8> &cil -#endif - ) const { - return X86::createFastISel(mf, vm, bm, am, pn -#ifndef NDEBUG - , cil -#endif - ); +X86TargetLowering::createFastISel(FunctionLoweringInfo &funcInfo) const { + return X86::createFastISel(funcInfo); } @@ -2476,7 +2497,7 @@ SDValue X86TargetLowering::getReturnAddressFrameIndex(SelectionDAG &DAG) const { // Set up a frame object for the return address. uint64_t SlotSize = TD->getPointerSize(); ReturnAddrIndex = MF.getFrameInfo()->CreateFixedObject(SlotSize, -SlotSize, - false, false); + false); FuncInfo->setRAIndex(ReturnAddrIndex); } @@ -3175,7 +3196,7 @@ unsigned X86::getShufflePALIGNRImmediate(SDNode *N) { /// constant +0.0. bool X86::isZeroNode(SDValue Elt) { return ((isa<ConstantSDNode>(Elt) && - cast<ConstantSDNode>(Elt)->getZExtValue() == 0) || + cast<ConstantSDNode>(Elt)->isNullValue()) || (isa<ConstantFPSDNode>(Elt) && cast<ConstantFPSDNode>(Elt)->getValueAPF().isPosZero())); } @@ -4433,7 +4454,7 @@ SDValue LowerVECTOR_SHUFFLEv16i8(ShuffleVectorSDNode *SVOp, } /// RewriteAsNarrowerShuffle - Try rewriting v8i16 and v16i8 shuffles as 4 wide -/// ones, or rewriting v4i32 / v2f32 as 2 wide ones if possible. This can be +/// ones, or rewriting v4i32 / v2i32 as 2 wide ones if possible. This can be /// done when every pair / quad of shuffle mask elements point to elements in /// the right sequence. e.g. /// vector_shuffle <>, <>, < 3, 4, | 10, 11, | 0, 1, | 14, 15> @@ -4447,7 +4468,6 @@ SDValue RewriteAsNarrowerShuffle(ShuffleVectorSDNode *SVOp, unsigned NumElems = VT.getVectorNumElements(); unsigned NewWidth = (NumElems == 4) ? 2 : 4; EVT MaskVT = MVT::getIntVectorWithNumElements(NewWidth); - EVT MaskEltVT = MaskVT.getVectorElementType(); EVT NewVT = MaskVT; switch (VT.getSimpleVT().SimpleTy) { default: assert(false && "Unexpected!"); @@ -5059,13 +5079,9 @@ X86TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const { SDValue X86TargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) const { DebugLoc dl = Op.getDebugLoc(); - if (Op.getValueType() == MVT::v2f32) - return DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v2f32, - DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i32, - DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, - Op.getOperand(0)))); - - if (Op.getValueType() == MVT::v1i64 && Op.getOperand(0).getValueType() == MVT::i64) + + if (Op.getValueType() == MVT::v1i64 && + Op.getOperand(0).getValueType() == MVT::i64) return DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v1i64, Op.getOperand(0)); SDValue AnyExt = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, Op.getOperand(0)); @@ -5230,10 +5246,10 @@ X86TargetLowering::LowerGlobalAddress(const GlobalValue *GV, DebugLoc dl, if (OpFlags == X86II::MO_NO_FLAG && X86::isOffsetSuitableForCodeModel(Offset, M)) { // A direct static reference to a global. - Result = DAG.getTargetGlobalAddress(GV, getPointerTy(), Offset); + Result = DAG.getTargetGlobalAddress(GV, dl, getPointerTy(), Offset); Offset = 0; } else { - Result = DAG.getTargetGlobalAddress(GV, getPointerTy(), 0, OpFlags); + Result = DAG.getTargetGlobalAddress(GV, dl, getPointerTy(), 0, OpFlags); } if (Subtarget->isPICStyleRIPRel() && @@ -5278,7 +5294,7 @@ GetTLSADDR(SelectionDAG &DAG, SDValue Chain, GlobalAddressSDNode *GA, MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo(); SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Flag); DebugLoc dl = GA->getDebugLoc(); - SDValue TGA = DAG.getTargetGlobalAddress(GA->getGlobal(), + SDValue TGA = DAG.getTargetGlobalAddress(GA->getGlobal(), dl, GA->getValueType(0), GA->getOffset(), OperandFlags); @@ -5351,7 +5367,8 @@ static SDValue LowerToTLSExecModel(GlobalAddressSDNode *GA, SelectionDAG &DAG, // emit "addl x@ntpoff,%eax" (local exec) or "addl x@indntpoff,%eax" (initial // exec) - SDValue TGA = DAG.getTargetGlobalAddress(GA->getGlobal(), GA->getValueType(0), + SDValue TGA = DAG.getTargetGlobalAddress(GA->getGlobal(), dl, + GA->getValueType(0), GA->getOffset(), OperandFlags); SDValue Offset = DAG.getNode(WrapperKind, dl, PtrVT, TGA); @@ -5366,33 +5383,78 @@ static SDValue LowerToTLSExecModel(GlobalAddressSDNode *GA, SelectionDAG &DAG, SDValue X86TargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const { - // TODO: implement the "local dynamic" model - // TODO: implement the "initial exec"model for pic executables - assert(Subtarget->isTargetELF() && - "TLS not implemented for non-ELF targets"); + GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op); const GlobalValue *GV = GA->getGlobal(); - // If GV is an alias then use the aliasee for determining - // thread-localness. - if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(GV)) - GV = GA->resolveAliasedGlobal(false); - - TLSModel::Model model = getTLSModel(GV, - getTargetMachine().getRelocationModel()); - - switch (model) { - case TLSModel::GeneralDynamic: - case TLSModel::LocalDynamic: // not implemented - if (Subtarget->is64Bit()) - return LowerToTLSGeneralDynamicModel64(GA, DAG, getPointerTy()); - return LowerToTLSGeneralDynamicModel32(GA, DAG, getPointerTy()); + if (Subtarget->isTargetELF()) { + // TODO: implement the "local dynamic" model + // TODO: implement the "initial exec"model for pic executables + + // If GV is an alias then use the aliasee for determining + // thread-localness. + if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(GV)) + GV = GA->resolveAliasedGlobal(false); + + TLSModel::Model model + = getTLSModel(GV, getTargetMachine().getRelocationModel()); + + switch (model) { + case TLSModel::GeneralDynamic: + case TLSModel::LocalDynamic: // not implemented + if (Subtarget->is64Bit()) + return LowerToTLSGeneralDynamicModel64(GA, DAG, getPointerTy()); + return LowerToTLSGeneralDynamicModel32(GA, DAG, getPointerTy()); + + case TLSModel::InitialExec: + case TLSModel::LocalExec: + return LowerToTLSExecModel(GA, DAG, getPointerTy(), model, + Subtarget->is64Bit()); + } + } else if (Subtarget->isTargetDarwin()) { + // Darwin only has one model of TLS. Lower to that. + unsigned char OpFlag = 0; + unsigned WrapperKind = Subtarget->isPICStyleRIPRel() ? + X86ISD::WrapperRIP : X86ISD::Wrapper; + + // In PIC mode (unless we're in RIPRel PIC mode) we add an offset to the + // global base reg. + bool PIC32 = (getTargetMachine().getRelocationModel() == Reloc::PIC_) && + !Subtarget->is64Bit(); + if (PIC32) + OpFlag = X86II::MO_TLVP_PIC_BASE; + else + OpFlag = X86II::MO_TLVP; + DebugLoc DL = Op.getDebugLoc(); + SDValue Result = DAG.getTargetGlobalAddress(GA->getGlobal(), DL, + getPointerTy(), + GA->getOffset(), OpFlag); + SDValue Offset = DAG.getNode(WrapperKind, DL, getPointerTy(), Result); + + // With PIC32, the address is actually $g + Offset. + if (PIC32) + Offset = DAG.getNode(ISD::ADD, DL, getPointerTy(), + DAG.getNode(X86ISD::GlobalBaseReg, + DebugLoc(), getPointerTy()), + Offset); + + // Lowering the machine isd will make sure everything is in the right + // location. + SDValue Args[] = { Offset }; + SDValue Chain = DAG.getNode(X86ISD::TLSCALL, DL, MVT::Other, Args, 1); + + // TLSCALL will be codegen'ed as call. Inform MFI that function has calls. + MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo(); + MFI->setAdjustsStack(true); - case TLSModel::InitialExec: - case TLSModel::LocalExec: - return LowerToTLSExecModel(GA, DAG, getPointerTy(), model, - Subtarget->is64Bit()); + // And our return value (tls address) is in the standard call return value + // location. + unsigned Reg = Subtarget->is64Bit() ? X86::RAX : X86::EAX; + return DAG.getCopyFromReg(Chain, DL, Reg, getPointerTy()); } + + assert(false && + "TLS not implemented for this target."); llvm_unreachable("Unreachable"); return SDValue(); @@ -5715,7 +5777,7 @@ SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op, // Load the value out, extending it from f32 to f80. // FIXME: Avoid the extend by constructing the right constant pool? - SDValue Fudge = DAG.getExtLoad(ISD::EXTLOAD, dl, MVT::f80, DAG.getEntryNode(), + SDValue Fudge = DAG.getExtLoad(ISD::EXTLOAD, MVT::f80, dl, DAG.getEntryNode(), FudgePtr, PseudoSourceValue::getConstantPool(), 0, MVT::f32, false, false, 4); // Extend everything to 80 bits to force it to be done on x87. @@ -5964,6 +6026,7 @@ SDValue X86TargetLowering::EmitTest(SDValue Op, unsigned X86CC, bool NeedCF = false; bool NeedOF = false; switch (X86CC) { + default: break; case X86::COND_A: case X86::COND_AE: case X86::COND_B: case X86::COND_BE: NeedCF = true; @@ -5973,120 +6036,129 @@ SDValue X86TargetLowering::EmitTest(SDValue Op, unsigned X86CC, case X86::COND_O: case X86::COND_NO: NeedOF = true; break; - default: break; } // See if we can use the EFLAGS value from the operand instead of // doing a separate TEST. TEST always sets OF and CF to 0, so unless // we prove that the arithmetic won't overflow, we can't use OF or CF. - if (Op.getResNo() == 0 && !NeedOF && !NeedCF) { - unsigned Opcode = 0; - unsigned NumOperands = 0; - switch (Op.getNode()->getOpcode()) { - case ISD::ADD: - // Due to an isel shortcoming, be conservative if this add is - // likely to be selected as part of a load-modify-store - // instruction. When the root node in a match is a store, isel - // doesn't know how to remap non-chain non-flag uses of other - // nodes in the match, such as the ADD in this case. This leads - // to the ADD being left around and reselected, with the result - // being two adds in the output. Alas, even if none our users - // are stores, that doesn't prove we're O.K. Ergo, if we have - // any parents that aren't CopyToReg or SETCC, eschew INC/DEC. - // A better fix seems to require climbing the DAG back to the - // root, and it doesn't seem to be worth the effort. - for (SDNode::use_iterator UI = Op.getNode()->use_begin(), - UE = Op.getNode()->use_end(); UI != UE; ++UI) - if (UI->getOpcode() != ISD::CopyToReg && UI->getOpcode() != ISD::SETCC) - goto default_case; - if (ConstantSDNode *C = - dyn_cast<ConstantSDNode>(Op.getNode()->getOperand(1))) { - // An add of one will be selected as an INC. - if (C->getAPIntValue() == 1) { - Opcode = X86ISD::INC; - NumOperands = 1; - break; - } - // An add of negative one (subtract of one) will be selected as a DEC. - if (C->getAPIntValue().isAllOnesValue()) { - Opcode = X86ISD::DEC; - NumOperands = 1; - break; - } + if (Op.getResNo() != 0 || NeedOF || NeedCF) + // Emit a CMP with 0, which is the TEST pattern. + return DAG.getNode(X86ISD::CMP, dl, MVT::i32, Op, + DAG.getConstant(0, Op.getValueType())); + + unsigned Opcode = 0; + unsigned NumOperands = 0; + switch (Op.getNode()->getOpcode()) { + case ISD::ADD: + // Due to an isel shortcoming, be conservative if this add is likely to be + // selected as part of a load-modify-store instruction. When the root node + // in a match is a store, isel doesn't know how to remap non-chain non-flag + // uses of other nodes in the match, such as the ADD in this case. This + // leads to the ADD being left around and reselected, with the result being + // two adds in the output. Alas, even if none our users are stores, that + // doesn't prove we're O.K. Ergo, if we have any parents that aren't + // CopyToReg or SETCC, eschew INC/DEC. A better fix seems to require + // climbing the DAG back to the root, and it doesn't seem to be worth the + // effort. + for (SDNode::use_iterator UI = Op.getNode()->use_begin(), + UE = Op.getNode()->use_end(); UI != UE; ++UI) + if (UI->getOpcode() != ISD::CopyToReg && UI->getOpcode() != ISD::SETCC) + goto default_case; + + if (ConstantSDNode *C = + dyn_cast<ConstantSDNode>(Op.getNode()->getOperand(1))) { + // An add of one will be selected as an INC. + if (C->getAPIntValue() == 1) { + Opcode = X86ISD::INC; + NumOperands = 1; + break; } - // Otherwise use a regular EFLAGS-setting add. - Opcode = X86ISD::ADD; - NumOperands = 2; - break; - case ISD::AND: { - // If the primary and result isn't used, don't bother using X86ISD::AND, - // because a TEST instruction will be better. - bool NonFlagUse = false; - for (SDNode::use_iterator UI = Op.getNode()->use_begin(), - UE = Op.getNode()->use_end(); UI != UE; ++UI) { - SDNode *User = *UI; - unsigned UOpNo = UI.getOperandNo(); - if (User->getOpcode() == ISD::TRUNCATE && User->hasOneUse()) { - // Look pass truncate. - UOpNo = User->use_begin().getOperandNo(); - User = *User->use_begin(); - } - if (User->getOpcode() != ISD::BRCOND && - User->getOpcode() != ISD::SETCC && - (User->getOpcode() != ISD::SELECT || UOpNo != 0)) { - NonFlagUse = true; - break; - } + + // An add of negative one (subtract of one) will be selected as a DEC. + if (C->getAPIntValue().isAllOnesValue()) { + Opcode = X86ISD::DEC; + NumOperands = 1; + break; } - if (!NonFlagUse) + } + + // Otherwise use a regular EFLAGS-setting add. + Opcode = X86ISD::ADD; + NumOperands = 2; + break; + case ISD::AND: { + // If the primary and result isn't used, don't bother using X86ISD::AND, + // because a TEST instruction will be better. + bool NonFlagUse = false; + for (SDNode::use_iterator UI = Op.getNode()->use_begin(), + UE = Op.getNode()->use_end(); UI != UE; ++UI) { + SDNode *User = *UI; + unsigned UOpNo = UI.getOperandNo(); + if (User->getOpcode() == ISD::TRUNCATE && User->hasOneUse()) { + // Look pass truncate. + UOpNo = User->use_begin().getOperandNo(); + User = *User->use_begin(); + } + + if (User->getOpcode() != ISD::BRCOND && + User->getOpcode() != ISD::SETCC && + (User->getOpcode() != ISD::SELECT || UOpNo != 0)) { + NonFlagUse = true; break; + } } + + if (!NonFlagUse) + break; + } // FALL THROUGH - case ISD::SUB: - case ISD::OR: - case ISD::XOR: - // Due to the ISEL shortcoming noted above, be conservative if this op is - // likely to be selected as part of a load-modify-store instruction. - for (SDNode::use_iterator UI = Op.getNode()->use_begin(), + case ISD::SUB: + case ISD::OR: + case ISD::XOR: + // Due to the ISEL shortcoming noted above, be conservative if this op is + // likely to be selected as part of a load-modify-store instruction. + for (SDNode::use_iterator UI = Op.getNode()->use_begin(), UE = Op.getNode()->use_end(); UI != UE; ++UI) - if (UI->getOpcode() == ISD::STORE) - goto default_case; - // Otherwise use a regular EFLAGS-setting instruction. - switch (Op.getNode()->getOpcode()) { - case ISD::SUB: Opcode = X86ISD::SUB; break; - case ISD::OR: Opcode = X86ISD::OR; break; - case ISD::XOR: Opcode = X86ISD::XOR; break; - case ISD::AND: Opcode = X86ISD::AND; break; - default: llvm_unreachable("unexpected operator!"); - } - NumOperands = 2; - break; - case X86ISD::ADD: - case X86ISD::SUB: - case X86ISD::INC: - case X86ISD::DEC: - case X86ISD::OR: - case X86ISD::XOR: - case X86ISD::AND: - return SDValue(Op.getNode(), 1); - default: - default_case: - break; - } - if (Opcode != 0) { - SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32); - SmallVector<SDValue, 4> Ops; - for (unsigned i = 0; i != NumOperands; ++i) - Ops.push_back(Op.getOperand(i)); - SDValue New = DAG.getNode(Opcode, dl, VTs, &Ops[0], NumOperands); - DAG.ReplaceAllUsesWith(Op, New); - return SDValue(New.getNode(), 1); + if (UI->getOpcode() == ISD::STORE) + goto default_case; + + // Otherwise use a regular EFLAGS-setting instruction. + switch (Op.getNode()->getOpcode()) { + default: llvm_unreachable("unexpected operator!"); + case ISD::SUB: Opcode = X86ISD::SUB; break; + case ISD::OR: Opcode = X86ISD::OR; break; + case ISD::XOR: Opcode = X86ISD::XOR; break; + case ISD::AND: Opcode = X86ISD::AND; break; } + + NumOperands = 2; + break; + case X86ISD::ADD: + case X86ISD::SUB: + case X86ISD::INC: + case X86ISD::DEC: + case X86ISD::OR: + case X86ISD::XOR: + case X86ISD::AND: + return SDValue(Op.getNode(), 1); + default: + default_case: + break; } - // Otherwise just emit a CMP with 0, which is the TEST pattern. - return DAG.getNode(X86ISD::CMP, dl, MVT::i32, Op, - DAG.getConstant(0, Op.getValueType())); + if (Opcode == 0) + // Emit a CMP with 0, which is the TEST pattern. + return DAG.getNode(X86ISD::CMP, dl, MVT::i32, Op, + DAG.getConstant(0, Op.getValueType())); + + SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32); + SmallVector<SDValue, 4> Ops; + for (unsigned i = 0; i != NumOperands; ++i) + Ops.push_back(Op.getOperand(i)); + + SDValue New = DAG.getNode(Opcode, dl, VTs, &Ops[0], NumOperands); + DAG.ReplaceAllUsesWith(Op, New); + return SDValue(New.getNode(), 1); } /// Emit nodes that will be selected as "cmp Op0,Op1", or something @@ -6113,15 +6185,21 @@ SDValue X86TargetLowering::LowerToBT(SDValue And, ISD::CondCode CC, Op1 = Op1.getOperand(0); SDValue LHS, RHS; - if (Op1.getOpcode() == ISD::SHL) { - if (ConstantSDNode *And10C = dyn_cast<ConstantSDNode>(Op1.getOperand(0))) - if (And10C->getZExtValue() == 1) { - LHS = Op0; - RHS = Op1.getOperand(1); - } - } else if (Op0.getOpcode() == ISD::SHL) { + if (Op1.getOpcode() == ISD::SHL) + std::swap(Op0, Op1); + if (Op0.getOpcode() == ISD::SHL) { if (ConstantSDNode *And00C = dyn_cast<ConstantSDNode>(Op0.getOperand(0))) if (And00C->getZExtValue() == 1) { + // If we looked past a truncate, check that it's only truncating away + // known zeros. + unsigned BitWidth = Op0.getValueSizeInBits(); + unsigned AndBitWidth = And.getValueSizeInBits(); + if (BitWidth > AndBitWidth) { + APInt Mask = APInt::getAllOnesValue(BitWidth), Zeros, Ones; + DAG.ComputeMaskedBits(Op0, Mask, Zeros, Ones); + if (Zeros.countLeadingOnes() < BitWidth - AndBitWidth) + return SDValue(); + } LHS = Op1; RHS = Op0.getOperand(1); } @@ -6172,7 +6250,7 @@ SDValue X86TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const { if (Op0.getOpcode() == ISD::AND && Op0.hasOneUse() && Op1.getOpcode() == ISD::Constant && - cast<ConstantSDNode>(Op1)->getZExtValue() == 0 && + cast<ConstantSDNode>(Op1)->isNullValue() && (CC == ISD::SETEQ || CC == ISD::SETNE)) { SDValue NewSetCC = LowerToBT(Op0, CC, dl, DAG); if (NewSetCC.getNode()) @@ -6552,15 +6630,16 @@ SDValue X86TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const { (X86::CondCode)Cond.getOperand(0).getConstantOperandVal(0); CCode = X86::GetOppositeBranchCondition(CCode); CC = DAG.getConstant(CCode, MVT::i8); - SDValue User = SDValue(*Op.getNode()->use_begin(), 0); + SDNode *User = *Op.getNode()->use_begin(); // Look for an unconditional branch following this conditional branch. // We need this because we need to reverse the successors in order // to implement FCMP_OEQ. - if (User.getOpcode() == ISD::BR) { - SDValue FalseBB = User.getOperand(1); - SDValue NewBR = - DAG.UpdateNodeOperands(User, User.getOperand(0), Dest); + if (User->getOpcode() == ISD::BR) { + SDValue FalseBB = User->getOperand(1); + SDNode *NewBR = + DAG.UpdateNodeOperands(User, User->getOperand(0), Dest); assert(NewBR == User); + (void)NewBR; Dest = FalseBB; Chain = DAG.getNode(X86ISD::BRCOND, dl, Op.getValueType(), @@ -6632,7 +6711,6 @@ X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, SDValue Flag; - EVT IntPtr = getPointerTy(); EVT SPTy = Subtarget->is64Bit() ? MVT::i64 : MVT::i32; Chain = DAG.getCopyToReg(Chain, dl, X86::EAX, Size, Flag); @@ -6685,7 +6763,7 @@ SDValue X86TargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const { Store = DAG.getStore(Op.getOperand(0), dl, DAG.getConstant(FuncInfo->getVarArgsFPOffset(), MVT::i32), - FIN, SV, 0, false, false, 0); + FIN, SV, 4, false, false, 0); MemOps.push_back(Store); // Store ptr to overflow_arg_area @@ -6693,7 +6771,7 @@ SDValue X86TargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const { FIN, DAG.getIntPtrConstant(4)); SDValue OVFIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), getPointerTy()); - Store = DAG.getStore(Op.getOperand(0), dl, OVFIN, FIN, SV, 0, + Store = DAG.getStore(Op.getOperand(0), dl, OVFIN, FIN, SV, 8, false, false, 0); MemOps.push_back(Store); @@ -6702,7 +6780,7 @@ SDValue X86TargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const { FIN, DAG.getIntPtrConstant(8)); SDValue RSFIN = DAG.getFrameIndex(FuncInfo->getRegSaveFrameIndex(), getPointerTy()); - Store = DAG.getStore(Op.getOperand(0), dl, RSFIN, FIN, SV, 0, + Store = DAG.getStore(Op.getOperand(0), dl, RSFIN, FIN, SV, 16, false, false, 0); MemOps.push_back(Store); return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, @@ -6712,9 +6790,6 @@ SDValue X86TargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const { SDValue X86TargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const { // X86-64 va_list is a struct { i32, i32, i8*, i8* }. assert(Subtarget->is64Bit() && "This code only handles 64-bit va_arg!"); - SDValue Chain = Op.getOperand(0); - SDValue SrcPtr = Op.getOperand(1); - SDValue SrcSV = Op.getOperand(2); report_fatal_error("VAArgInst is not yet implemented for x86-64!"); return SDValue(); @@ -7733,6 +7808,7 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { case X86ISD::FRSQRT: return "X86ISD::FRSQRT"; case X86ISD::FRCP: return "X86ISD::FRCP"; case X86ISD::TLSADDR: return "X86ISD::TLSADDR"; + case X86ISD::TLSCALL: return "X86ISD::TLSCALL"; case X86ISD::SegmentBaseAddress: return "X86ISD::SegmentBaseAddress"; case X86ISD::EH_RETURN: return "X86ISD::EH_RETURN"; case X86ISD::TC_RETURN: return "X86ISD::TC_RETURN"; @@ -7944,8 +8020,11 @@ X86TargetLowering::EmitAtomicBitwiseWithCustomInserter(MachineInstr *bInstr, F->insert(MBBIter, newMBB); F->insert(MBBIter, nextMBB); - // Move all successors to thisMBB to nextMBB - nextMBB->transferSuccessors(thisMBB); + // Transfer the remainder of thisMBB and its successor edges to nextMBB. + nextMBB->splice(nextMBB->begin(), thisMBB, + llvm::next(MachineBasicBlock::iterator(bInstr)), + thisMBB->end()); + nextMBB->transferSuccessorsAndUpdatePHIs(thisMBB); // Update thisMBB to fall through to newMBB thisMBB->addSuccessor(newMBB); @@ -7955,17 +8034,17 @@ X86TargetLowering::EmitAtomicBitwiseWithCustomInserter(MachineInstr *bInstr, newMBB->addSuccessor(newMBB); // Insert instructions into newMBB based on incoming instruction - assert(bInstr->getNumOperands() < X86AddrNumOperands + 4 && + assert(bInstr->getNumOperands() < X86::AddrNumOperands + 4 && "unexpected number of operands"); DebugLoc dl = bInstr->getDebugLoc(); MachineOperand& destOper = bInstr->getOperand(0); - MachineOperand* argOpers[2 + X86AddrNumOperands]; + MachineOperand* argOpers[2 + X86::AddrNumOperands]; int numArgs = bInstr->getNumOperands() - 1; for (int i=0; i < numArgs; ++i) argOpers[i] = &bInstr->getOperand(i+1); // x86 address has 4 operands: base, index, scale, and displacement - int lastAddrIndx = X86AddrNumOperands - 1; // [0,3] + int lastAddrIndx = X86::AddrNumOperands - 1; // [0,3] int valArgIndx = lastAddrIndx + 1; unsigned t1 = F->getRegInfo().createVirtualRegister(RC); @@ -8008,7 +8087,7 @@ X86TargetLowering::EmitAtomicBitwiseWithCustomInserter(MachineInstr *bInstr, // insert branch BuildMI(newMBB, dl, TII->get(X86::JNE_4)).addMBB(newMBB); - F->DeleteMachineInstr(bInstr); // The pseudo instruction is gone now. + bInstr->eraseFromParent(); // The pseudo instruction is gone now. return nextMBB; } @@ -8053,8 +8132,11 @@ X86TargetLowering::EmitAtomicBit6432WithCustomInserter(MachineInstr *bInstr, F->insert(MBBIter, newMBB); F->insert(MBBIter, nextMBB); - // Move all successors to thisMBB to nextMBB - nextMBB->transferSuccessors(thisMBB); + // Transfer the remainder of thisMBB and its successor edges to nextMBB. + nextMBB->splice(nextMBB->begin(), thisMBB, + llvm::next(MachineBasicBlock::iterator(bInstr)), + thisMBB->end()); + nextMBB->transferSuccessorsAndUpdatePHIs(thisMBB); // Update thisMBB to fall through to newMBB thisMBB->addSuccessor(newMBB); @@ -8066,12 +8148,12 @@ X86TargetLowering::EmitAtomicBit6432WithCustomInserter(MachineInstr *bInstr, DebugLoc dl = bInstr->getDebugLoc(); // Insert instructions into newMBB based on incoming instruction // There are 8 "real" operands plus 9 implicit def/uses, ignored here. - assert(bInstr->getNumOperands() < X86AddrNumOperands + 14 && + assert(bInstr->getNumOperands() < X86::AddrNumOperands + 14 && "unexpected number of operands"); MachineOperand& dest1Oper = bInstr->getOperand(0); MachineOperand& dest2Oper = bInstr->getOperand(1); - MachineOperand* argOpers[2 + X86AddrNumOperands]; - for (int i=0; i < 2 + X86AddrNumOperands; ++i) { + MachineOperand* argOpers[2 + X86::AddrNumOperands]; + for (int i=0; i < 2 + X86::AddrNumOperands; ++i) { argOpers[i] = &bInstr->getOperand(i+2); // We use some of the operands multiple times, so conservatively just @@ -8081,7 +8163,7 @@ X86TargetLowering::EmitAtomicBit6432WithCustomInserter(MachineInstr *bInstr, } // x86 address has 5 operands: base, index, scale, displacement, and segment. - int lastAddrIndx = X86AddrNumOperands - 1; // [0,3] + int lastAddrIndx = X86::AddrNumOperands - 1; // [0,3] unsigned t1 = F->getRegInfo().createVirtualRegister(RC); MachineInstrBuilder MIB = BuildMI(thisMBB, dl, TII->get(LoadOpc), t1); @@ -8171,7 +8253,7 @@ X86TargetLowering::EmitAtomicBit6432WithCustomInserter(MachineInstr *bInstr, // insert branch BuildMI(newMBB, dl, TII->get(X86::JNE_4)).addMBB(newMBB); - F->DeleteMachineInstr(bInstr); // The pseudo instruction is gone now. + bInstr->eraseFromParent(); // The pseudo instruction is gone now. return nextMBB; } @@ -8205,8 +8287,11 @@ X86TargetLowering::EmitAtomicMinMaxWithCustomInserter(MachineInstr *mInstr, F->insert(MBBIter, newMBB); F->insert(MBBIter, nextMBB); - // Move all successors of thisMBB to nextMBB - nextMBB->transferSuccessors(thisMBB); + // Transfer the remainder of thisMBB and its successor edges to nextMBB. + nextMBB->splice(nextMBB->begin(), thisMBB, + llvm::next(MachineBasicBlock::iterator(mInstr)), + thisMBB->end()); + nextMBB->transferSuccessorsAndUpdatePHIs(thisMBB); // Update thisMBB to fall through to newMBB thisMBB->addSuccessor(newMBB); @@ -8217,16 +8302,16 @@ X86TargetLowering::EmitAtomicMinMaxWithCustomInserter(MachineInstr *mInstr, DebugLoc dl = mInstr->getDebugLoc(); // Insert instructions into newMBB based on incoming instruction - assert(mInstr->getNumOperands() < X86AddrNumOperands + 4 && + assert(mInstr->getNumOperands() < X86::AddrNumOperands + 4 && "unexpected number of operands"); MachineOperand& destOper = mInstr->getOperand(0); - MachineOperand* argOpers[2 + X86AddrNumOperands]; + MachineOperand* argOpers[2 + X86::AddrNumOperands]; int numArgs = mInstr->getNumOperands() - 1; for (int i=0; i < numArgs; ++i) argOpers[i] = &mInstr->getOperand(i+1); // x86 address has 4 operands: base, index, scale, and displacement - int lastAddrIndx = X86AddrNumOperands - 1; // [0,3] + int lastAddrIndx = X86::AddrNumOperands - 1; // [0,3] int valArgIndx = lastAddrIndx + 1; unsigned t1 = F->getRegInfo().createVirtualRegister(X86::GR32RegisterClass); @@ -8274,7 +8359,7 @@ X86TargetLowering::EmitAtomicMinMaxWithCustomInserter(MachineInstr *mInstr, // insert branch BuildMI(newMBB, dl, TII->get(X86::JNE_4)).addMBB(newMBB); - F->DeleteMachineInstr(mInstr); // The pseudo instruction is gone now. + mInstr->eraseFromParent(); // The pseudo instruction is gone now. return nextMBB; } @@ -8284,7 +8369,6 @@ MachineBasicBlock * X86TargetLowering::EmitPCMP(MachineInstr *MI, MachineBasicBlock *BB, unsigned numArgs, bool memArg) const { - MachineFunction *F = BB->getParent(); DebugLoc dl = MI->getDebugLoc(); const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); @@ -8306,7 +8390,7 @@ X86TargetLowering::EmitPCMP(MachineInstr *MI, MachineBasicBlock *BB, BuildMI(BB, dl, TII->get(X86::MOVAPSrr), MI->getOperand(0).getReg()) .addReg(X86::XMM0); - F->DeleteMachineInstr(MI); + MI->eraseFromParent(); return BB; } @@ -8335,9 +8419,12 @@ X86TargetLowering::EmitVAStartSaveXMMRegsWithCustomInserter( F->insert(MBBIter, XMMSaveMBB); F->insert(MBBIter, EndMBB); - // Set up the CFG. - // Move any original successors of MBB to the end block. - EndMBB->transferSuccessors(MBB); + // Transfer the remainder of MBB and its successor edges to EndMBB. + EndMBB->splice(EndMBB->begin(), MBB, + llvm::next(MachineBasicBlock::iterator(MI)), + MBB->end()); + EndMBB->transferSuccessorsAndUpdatePHIs(MBB); + // The original block will now fall through to the XMM save block. MBB->addSuccessor(XMMSaveMBB); // The XMMSaveMBB will fall through to the end block. @@ -8376,7 +8463,7 @@ X86TargetLowering::EmitVAStartSaveXMMRegsWithCustomInserter( .addMemOperand(MMO); } - F->DeleteMachineInstr(MI); // The pseudo instruction is gone now. + MI->eraseFromParent(); // The pseudo instruction is gone now. return EndMBB; } @@ -8405,24 +8492,39 @@ X86TargetLowering::EmitLoweredSelect(MachineInstr *MI, MachineFunction *F = BB->getParent(); MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB); MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB); - unsigned Opc = - X86::GetCondBranchFromCond((X86::CondCode)MI->getOperand(3).getImm()); - BuildMI(BB, DL, TII->get(Opc)).addMBB(sinkMBB); F->insert(It, copy0MBB); F->insert(It, sinkMBB); - // Update machine-CFG edges by first adding all successors of the current - // block to the new block which will contain the Phi node for the select. - for (MachineBasicBlock::succ_iterator I = BB->succ_begin(), - E = BB->succ_end(); I != E; ++I) - sinkMBB->addSuccessor(*I); - // Next, remove all successors of the current block, and add the true - // and fallthrough blocks as its successors. - while (!BB->succ_empty()) - BB->removeSuccessor(BB->succ_begin()); + + // If the EFLAGS register isn't dead in the terminator, then claim that it's + // live into the sink and copy blocks. + const MachineFunction *MF = BB->getParent(); + const TargetRegisterInfo *TRI = MF->getTarget().getRegisterInfo(); + BitVector ReservedRegs = TRI->getReservedRegs(*MF); + + for (unsigned I = 0, E = MI->getNumOperands(); I != E; ++I) { + const MachineOperand &MO = MI->getOperand(I); + if (!MO.isReg() || !MO.isUse() || MO.isKill()) continue; + unsigned Reg = MO.getReg(); + if (Reg != X86::EFLAGS) continue; + copy0MBB->addLiveIn(Reg); + sinkMBB->addLiveIn(Reg); + } + + // Transfer the remainder of BB and its successor edges to sinkMBB. + sinkMBB->splice(sinkMBB->begin(), BB, + llvm::next(MachineBasicBlock::iterator(MI)), + BB->end()); + sinkMBB->transferSuccessorsAndUpdatePHIs(BB); + // Add the true and fallthrough blocks as its successors. BB->addSuccessor(copy0MBB); BB->addSuccessor(sinkMBB); + // Create the conditional branch instruction. + unsigned Opc = + X86::GetCondBranchFromCond((X86::CondCode)MI->getOperand(3).getImm()); + BuildMI(BB, DL, TII->get(Opc)).addMBB(sinkMBB); + // copy0MBB: // %FalseValue = ... // # fallthrough to sinkMBB @@ -8431,11 +8533,12 @@ X86TargetLowering::EmitLoweredSelect(MachineInstr *MI, // sinkMBB: // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ] // ... - BuildMI(sinkMBB, DL, TII->get(X86::PHI), MI->getOperand(0).getReg()) + BuildMI(*sinkMBB, sinkMBB->begin(), DL, + TII->get(X86::PHI), MI->getOperand(0).getReg()) .addReg(MI->getOperand(1).getReg()).addMBB(copy0MBB) .addReg(MI->getOperand(2).getReg()).addMBB(thisMBB); - F->DeleteMachineInstr(MI); // The pseudo instruction is gone now. + MI->eraseFromParent(); // The pseudo instruction is gone now. return sinkMBB; } @@ -8444,21 +8547,70 @@ X86TargetLowering::EmitLoweredMingwAlloca(MachineInstr *MI, MachineBasicBlock *BB) const { const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); DebugLoc DL = MI->getDebugLoc(); - MachineFunction *F = BB->getParent(); // The lowering is pretty easy: we're just emitting the call to _alloca. The // non-trivial part is impdef of ESP. // FIXME: The code should be tweaked as soon as we'll try to do codegen for // mingw-w64. - BuildMI(BB, DL, TII->get(X86::CALLpcrel32)) + BuildMI(*BB, MI, DL, TII->get(X86::CALLpcrel32)) .addExternalSymbol("_alloca") .addReg(X86::EAX, RegState::Implicit) .addReg(X86::ESP, RegState::Implicit) .addReg(X86::EAX, RegState::Define | RegState::Implicit) .addReg(X86::ESP, RegState::Define | RegState::Implicit); - F->DeleteMachineInstr(MI); // The pseudo instruction is gone now. + MI->eraseFromParent(); // The pseudo instruction is gone now. + return BB; +} + +MachineBasicBlock * +X86TargetLowering::EmitLoweredTLSCall(MachineInstr *MI, + MachineBasicBlock *BB) const { + // This is pretty easy. We're taking the value that we received from + // our load from the relocation, sticking it in either RDI (x86-64) + // or EAX and doing an indirect call. The return value will then + // be in the normal return register. + const X86InstrInfo *TII + = static_cast<const X86InstrInfo*>(getTargetMachine().getInstrInfo()); + DebugLoc DL = MI->getDebugLoc(); + MachineFunction *F = BB->getParent(); + + assert(MI->getOperand(3).isGlobal() && "This should be a global"); + + if (Subtarget->is64Bit()) { + MachineInstrBuilder MIB = BuildMI(*BB, MI, DL, + TII->get(X86::MOV64rm), X86::RDI) + .addReg(X86::RIP) + .addImm(0).addReg(0) + .addGlobalAddress(MI->getOperand(3).getGlobal(), 0, + MI->getOperand(3).getTargetFlags()) + .addReg(0); + MIB = BuildMI(*BB, MI, DL, TII->get(X86::CALL64m)); + addDirectMem(MIB, X86::RDI); + } else if (getTargetMachine().getRelocationModel() != Reloc::PIC_) { + MachineInstrBuilder MIB = BuildMI(*BB, MI, DL, + TII->get(X86::MOV32rm), X86::EAX) + .addReg(0) + .addImm(0).addReg(0) + .addGlobalAddress(MI->getOperand(3).getGlobal(), 0, + MI->getOperand(3).getTargetFlags()) + .addReg(0); + MIB = BuildMI(*BB, MI, DL, TII->get(X86::CALL32m)); + addDirectMem(MIB, X86::EAX); + } else { + MachineInstrBuilder MIB = BuildMI(*BB, MI, DL, + TII->get(X86::MOV32rm), X86::EAX) + .addReg(TII->getGlobalBaseReg(F)) + .addImm(0).addReg(0) + .addGlobalAddress(MI->getOperand(3).getGlobal(), 0, + MI->getOperand(3).getTargetFlags()) + .addReg(0); + MIB = BuildMI(*BB, MI, DL, TII->get(X86::CALL32m)); + addDirectMem(MIB, X86::EAX); + } + + MI->eraseFromParent(); // The pseudo instruction is gone now. return BB; } @@ -8469,6 +8621,9 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, default: assert(false && "Unexpected instr type to insert"); case X86::MINGW_ALLOCA: return EmitLoweredMingwAlloca(MI, BB); + case X86::TLSCall_32: + case X86::TLSCall_64: + return EmitLoweredTLSCall(MI, BB); case X86::CMOV_GR8: case X86::CMOV_V1I64: case X86::CMOV_FR32: @@ -8499,23 +8654,25 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, // mode when truncating to an integer value. MachineFunction *F = BB->getParent(); int CWFrameIdx = F->getFrameInfo()->CreateStackObject(2, 2, false); - addFrameReference(BuildMI(BB, DL, TII->get(X86::FNSTCW16m)), CWFrameIdx); + addFrameReference(BuildMI(*BB, MI, DL, + TII->get(X86::FNSTCW16m)), CWFrameIdx); // Load the old value of the high byte of the control word... unsigned OldCW = F->getRegInfo().createVirtualRegister(X86::GR16RegisterClass); - addFrameReference(BuildMI(BB, DL, TII->get(X86::MOV16rm), OldCW), + addFrameReference(BuildMI(*BB, MI, DL, TII->get(X86::MOV16rm), OldCW), CWFrameIdx); // Set the high part to be round to zero... - addFrameReference(BuildMI(BB, DL, TII->get(X86::MOV16mi)), CWFrameIdx) + addFrameReference(BuildMI(*BB, MI, DL, TII->get(X86::MOV16mi)), CWFrameIdx) .addImm(0xC7F); // Reload the modified control word now... - addFrameReference(BuildMI(BB, DL, TII->get(X86::FLDCW16m)), CWFrameIdx); + addFrameReference(BuildMI(*BB, MI, DL, + TII->get(X86::FLDCW16m)), CWFrameIdx); // Restore the memory image of control word to original value - addFrameReference(BuildMI(BB, DL, TII->get(X86::MOV16mr)), CWFrameIdx) + addFrameReference(BuildMI(*BB, MI, DL, TII->get(X86::MOV16mr)), CWFrameIdx) .addReg(OldCW); // Get the X86 opcode to use. @@ -8554,13 +8711,14 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, } else { AM.Disp = Op.getImm(); } - addFullAddress(BuildMI(BB, DL, TII->get(Opc)), AM) - .addReg(MI->getOperand(X86AddrNumOperands).getReg()); + addFullAddress(BuildMI(*BB, MI, DL, TII->get(Opc)), AM) + .addReg(MI->getOperand(X86::AddrNumOperands).getReg()); // Reload the original control word now. - addFrameReference(BuildMI(BB, DL, TII->get(X86::FLDCW16m)), CWFrameIdx); + addFrameReference(BuildMI(*BB, MI, DL, + TII->get(X86::FLDCW16m)), CWFrameIdx); - F->DeleteMachineInstr(MI); // The pseudo instruction is gone now. + MI->eraseFromParent(); // The pseudo instruction is gone now. return BB; } // String/text processing lowering. @@ -9513,8 +9671,10 @@ static SDValue PerformOrCombine(SDNode *N, SelectionDAG &DAG, if (ShAmt1.getOpcode() == ISD::SUB) { SDValue Sum = ShAmt1.getOperand(0); if (ConstantSDNode *SumC = dyn_cast<ConstantSDNode>(Sum)) { - if (SumC->getSExtValue() == Bits && - ShAmt1.getOperand(1) == ShAmt0) + SDValue ShAmt1Op1 = ShAmt1.getOperand(1); + if (ShAmt1Op1.getNode()->getOpcode() == ISD::TRUNCATE) + ShAmt1Op1 = ShAmt1Op1.getOperand(0); + if (SumC->getSExtValue() == Bits && ShAmt1Op1 == ShAmt0) return DAG.getNode(Opc, DL, VT, Op0, Op1, DAG.getNode(ISD::TRUNCATE, DL, @@ -9710,58 +9870,6 @@ static SDValue PerformVZEXT_MOVLCombine(SDNode *N, SelectionDAG &DAG) { return SDValue(); } -// On X86 and X86-64, atomic operations are lowered to locked instructions. -// Locked instructions, in turn, have implicit fence semantics (all memory -// operations are flushed before issuing the locked instruction, and the -// are not buffered), so we can fold away the common pattern of -// fence-atomic-fence. -static SDValue PerformMEMBARRIERCombine(SDNode* N, SelectionDAG &DAG) { - SDValue atomic = N->getOperand(0); - switch (atomic.getOpcode()) { - case ISD::ATOMIC_CMP_SWAP: - case ISD::ATOMIC_SWAP: - case ISD::ATOMIC_LOAD_ADD: - case ISD::ATOMIC_LOAD_SUB: - case ISD::ATOMIC_LOAD_AND: - case ISD::ATOMIC_LOAD_OR: - case ISD::ATOMIC_LOAD_XOR: - case ISD::ATOMIC_LOAD_NAND: - case ISD::ATOMIC_LOAD_MIN: - case ISD::ATOMIC_LOAD_MAX: - case ISD::ATOMIC_LOAD_UMIN: - case ISD::ATOMIC_LOAD_UMAX: - break; - default: - return SDValue(); - } - - SDValue fence = atomic.getOperand(0); - if (fence.getOpcode() != ISD::MEMBARRIER) - return SDValue(); - - switch (atomic.getOpcode()) { - case ISD::ATOMIC_CMP_SWAP: - return DAG.UpdateNodeOperands(atomic, fence.getOperand(0), - atomic.getOperand(1), atomic.getOperand(2), - atomic.getOperand(3)); - case ISD::ATOMIC_SWAP: - case ISD::ATOMIC_LOAD_ADD: - case ISD::ATOMIC_LOAD_SUB: - case ISD::ATOMIC_LOAD_AND: - case ISD::ATOMIC_LOAD_OR: - case ISD::ATOMIC_LOAD_XOR: - case ISD::ATOMIC_LOAD_NAND: - case ISD::ATOMIC_LOAD_MIN: - case ISD::ATOMIC_LOAD_MAX: - case ISD::ATOMIC_LOAD_UMIN: - case ISD::ATOMIC_LOAD_UMAX: - return DAG.UpdateNodeOperands(atomic, fence.getOperand(0), - atomic.getOperand(1), atomic.getOperand(2)); - default: - return SDValue(); - } -} - static SDValue PerformZExtCombine(SDNode *N, SelectionDAG &DAG) { // (i32 zext (and (i8 x86isd::setcc_carry), 1)) -> // (and (i32 x86isd::setcc_carry), 1) @@ -9809,7 +9917,6 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N, case X86ISD::FAND: return PerformFANDCombine(N, DAG); case X86ISD::BT: return PerformBTCombine(N, DAG, DCI); case X86ISD::VZEXT_MOVL: return PerformVZEXT_MOVLCombine(N, DAG); - case ISD::MEMBARRIER: return PerformMEMBARRIERCombine(N, DAG); case ISD::ZERO_EXTEND: return PerformZExtCombine(N, DAG); } @@ -9932,8 +10039,8 @@ static bool LowerToBSwap(CallInst *CI) { // so don't worry about this. // Verify this is a simple bswap. - if (CI->getNumOperands() != 2 || - CI->getType() != CI->getOperand(1)->getType() || + if (CI->getNumArgOperands() != 1 || + CI->getType() != CI->getArgOperand(0)->getType() || !CI->getType()->isIntegerTy()) return false; @@ -9946,7 +10053,7 @@ static bool LowerToBSwap(CallInst *CI) { Module *M = CI->getParent()->getParent()->getParent(); Constant *Int = Intrinsic::getDeclaration(M, Intrinsic::bswap, Tys, 1); - Value *Op = CI->getOperand(1); + Value *Op = CI->getArgOperand(0); Op = CallInst::Create(Int, Op, CI->getName(), CI); CI->replaceAllUsesWith(Op); @@ -10079,7 +10186,6 @@ LowerXConstraint(EVT ConstraintVT) const { /// vector. If it is invalid, don't add anything to Ops. void X86TargetLowering::LowerAsmOperandForConstraint(SDValue Op, char Constraint, - bool hasMemory, std::vector<SDValue>&Ops, SelectionDAG &DAG) const { SDValue Result(0, 0); @@ -10121,9 +10227,8 @@ void X86TargetLowering::LowerAsmOperandForConstraint(SDValue Op, case 'e': { // 32-bit signed value if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) { - const ConstantInt *CI = C->getConstantIntValue(); - if (CI->isValueValidForType(Type::getInt32Ty(*DAG.getContext()), - C->getSExtValue())) { + if (ConstantInt::isValueValidForType(Type::getInt32Ty(*DAG.getContext()), + C->getSExtValue())) { // Widen to 64 bits here to get it sign extended. Result = DAG.getTargetConstant(C->getSExtValue(), MVT::i64); break; @@ -10136,9 +10241,8 @@ void X86TargetLowering::LowerAsmOperandForConstraint(SDValue Op, case 'Z': { // 32-bit unsigned value if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) { - const ConstantInt *CI = C->getConstantIntValue(); - if (CI->isValueValidForType(Type::getInt32Ty(*DAG.getContext()), - C->getZExtValue())) { + if (ConstantInt::isValueValidForType(Type::getInt32Ty(*DAG.getContext()), + C->getZExtValue())) { Result = DAG.getTargetConstant(C->getZExtValue(), Op.getValueType()); break; } @@ -10155,6 +10259,12 @@ void X86TargetLowering::LowerAsmOperandForConstraint(SDValue Op, break; } + // In any sort of PIC mode addresses need to be computed at runtime by + // adding in a register or some sort of table lookup. These can't + // be used as immediates. + if (Subtarget->isPICStyleGOT() || Subtarget->isPICStyleStubPIC()) + return; + // If we are in non-pic codegen mode, we allow the address of a global (with // an optional displacement) to be used with 'i'. GlobalAddressSDNode *GA = 0; @@ -10190,11 +10300,8 @@ void X86TargetLowering::LowerAsmOperandForConstraint(SDValue Op, getTargetMachine()))) return; - if (hasMemory) - Op = LowerGlobalAddress(GV, Op.getDebugLoc(), Offset, DAG); - else - Op = DAG.getTargetGlobalAddress(GV, GA->getValueType(0), Offset); - Result = Op; + Result = DAG.getTargetGlobalAddress(GV, Op.getDebugLoc(), + GA->getValueType(0), Offset); break; } } @@ -10203,8 +10310,7 @@ void X86TargetLowering::LowerAsmOperandForConstraint(SDValue Op, Ops.push_back(Result); return; } - return TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, hasMemory, - Ops, DAG); + return TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG); } std::vector<unsigned> X86TargetLowering:: diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index 1ef1a7b018a9..2d28e5cc2ea7 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -196,6 +196,10 @@ namespace llvm { // TLSADDR - Thread Local Storage. TLSADDR, + + // TLSCALL - Thread Local Storage. When calling to an OS provided + // thunk at the address from an earlier relocation. + TLSCALL, // SegmentBaseAddress - The address segment:0 SegmentBaseAddress, @@ -496,7 +500,6 @@ namespace llvm { /// being processed is 'm'. virtual void LowerAsmOperandForConstraint(SDValue Op, char ConstraintLetter, - bool hasMemory, std::vector<SDValue> &Ops, SelectionDAG &DAG) const; @@ -576,20 +579,17 @@ namespace llvm { /// createFastISel - This method returns a target specific FastISel object, /// or null if the target does not support "fast" ISel. - virtual FastISel * - createFastISel(MachineFunction &mf, - DenseMap<const Value *, unsigned> &, - DenseMap<const BasicBlock *, MachineBasicBlock *> &, - DenseMap<const AllocaInst *, int> &, - std::vector<std::pair<MachineInstr*, unsigned> > & -#ifndef NDEBUG - , SmallSet<const Instruction *, 8> & -#endif - ) const; + virtual FastISel *createFastISel(FunctionLoweringInfo &funcInfo) const; /// getFunctionAlignment - Return the Log2 alignment of this function. virtual unsigned getFunctionAlignment(const Function *F) const; + /// getStackCookieLocation - Return true if the target stores stack + /// protector cookies at a fixed offset in some non-standard address + /// space, and populates the address space and offset as + /// appropriate. + virtual bool getStackCookieLocation(unsigned &AddressSpace, unsigned &Offset) const; + private: /// Subtarget - Keep a pointer to the X86Subtarget around so that we can /// make the right decision when generating code for different targets. @@ -643,6 +643,7 @@ namespace llvm { bool isCalleeStructRet, bool isCallerStructRet, const SmallVectorImpl<ISD::OutputArg> &Outs, + const SmallVectorImpl<SDValue> &OutVals, const SmallVectorImpl<ISD::InputArg> &Ins, SelectionDAG& DAG) const; bool IsCalleePop(bool isVarArg, CallingConv::ID CallConv) const; @@ -725,6 +726,7 @@ namespace llvm { LowerCall(SDValue Chain, SDValue Callee, CallingConv::ID CallConv, bool isVarArg, bool &isTailCall, const SmallVectorImpl<ISD::OutputArg> &Outs, + const SmallVectorImpl<SDValue> &OutVals, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const; @@ -733,13 +735,13 @@ namespace llvm { LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl<ISD::OutputArg> &Outs, + const SmallVectorImpl<SDValue> &OutVals, DebugLoc dl, SelectionDAG &DAG) const; virtual bool CanLowerReturn(CallingConv::ID CallConv, bool isVarArg, - const SmallVectorImpl<EVT> &OutTys, - const SmallVectorImpl<ISD::ArgFlagsTy> &ArgsFlags, - SelectionDAG &DAG) const; + const SmallVectorImpl<ISD::OutputArg> &Outs, + LLVMContext &Context) const; void ReplaceATOMIC_BINARY_64(SDNode *N, SmallVectorImpl<SDValue> &Results, SelectionDAG &DAG, unsigned NewOp) const; @@ -794,6 +796,9 @@ namespace llvm { MachineBasicBlock *EmitLoweredMingwAlloca(MachineInstr *MI, MachineBasicBlock *BB) const; + + MachineBasicBlock *EmitLoweredTLSCall(MachineInstr *MI, + MachineBasicBlock *BB) const; /// Emit nodes that will be selected as "test Op0,Op0", or something /// equivalent, for use with the given x86 condition code. @@ -806,15 +811,7 @@ namespace llvm { }; namespace X86 { - FastISel *createFastISel(MachineFunction &mf, - DenseMap<const Value *, unsigned> &, - DenseMap<const BasicBlock *, MachineBasicBlock *> &, - DenseMap<const AllocaInst *, int> &, - std::vector<std::pair<MachineInstr*, unsigned> > & -#ifndef NDEBUG - , SmallSet<const Instruction*, 8> & -#endif - ); + FastISel *createFastISel(FunctionLoweringInfo &funcInfo); } } diff --git a/lib/Target/X86/X86Instr64bit.td b/lib/Target/X86/X86Instr64bit.td index 97eb17c689d0..42d0e7f9778a 100644 --- a/lib/Target/X86/X86Instr64bit.td +++ b/lib/Target/X86/X86Instr64bit.td @@ -35,6 +35,14 @@ def i64i8imm : Operand<i64> { let ParserMatchClass = ImmSExti64i8AsmOperand; } +def lea64_32mem : Operand<i32> { + let PrintMethod = "printi32mem"; + let AsmOperandLowerMethod = "lower_lea64_32mem"; + let MIOperandInfo = (ops GR32, i8imm, GR32_NOSP, i32imm, i8imm); + let ParserMatchClass = X86MemAsmOperand; +} + + // Special i64mem for addresses of load folding tail calls. These are not // allowed to use callee-saved registers since they must be scheduled // after callee-saved register are popped. @@ -44,29 +52,16 @@ def i64mem_TC : Operand<i64> { let ParserMatchClass = X86MemAsmOperand; } -def lea64mem : Operand<i64> { - let PrintMethod = "printlea64mem"; - let MIOperandInfo = (ops GR64, i8imm, GR64_NOSP, i32imm); - let ParserMatchClass = X86NoSegMemAsmOperand; -} - -def lea64_32mem : Operand<i32> { - let PrintMethod = "printlea64_32mem"; - let AsmOperandLowerMethod = "lower_lea64_32mem"; - let MIOperandInfo = (ops GR32, i8imm, GR32_NOSP, i32imm); - let ParserMatchClass = X86NoSegMemAsmOperand; -} - //===----------------------------------------------------------------------===// // Complex Pattern Definitions. // -def lea64addr : ComplexPattern<i64, 4, "SelectLEAAddr", +def lea64addr : ComplexPattern<i64, 5, "SelectLEAAddr", [add, sub, mul, X86mul_imm, shl, or, frameindex, X86WrapperRIP], []>; -def tls64addr : ComplexPattern<i64, 4, "SelectTLSADDRAddr", +def tls64addr : ComplexPattern<i64, 5, "SelectTLSADDRAddr", [tglobaltlsaddr], []>; - + //===----------------------------------------------------------------------===// // Pattern fragments. // @@ -289,11 +284,11 @@ def LEA64_32r : I<0x8D, MRMSrcMem, [(set GR32:$dst, lea32addr:$src)]>, Requires<[In64BitMode]>; let isReMaterializable = 1 in -def LEA64r : RI<0x8D, MRMSrcMem, (outs GR64:$dst), (ins lea64mem:$src), +def LEA64r : RI<0x8D, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src), "lea{q}\t{$src|$dst}, {$dst|$src}", [(set GR64:$dst, lea64addr:$src)]>; -let isTwoAddress = 1 in +let Constraints = "$src = $dst" in def BSWAP64r : RI<0xC8, AddRegFrm, (outs GR64:$dst), (ins GR64:$src), "bswap{q}\t$dst", [(set GR64:$dst, (bswap GR64:$src))]>, TB; @@ -521,7 +516,7 @@ let Defs = [EFLAGS] in { def ADD64i32 : RIi32<0x05, RawFrm, (outs), (ins i64i32imm:$src), "add{q}\t{$src, %rax|%rax, $src}", []>; -let isTwoAddress = 1 in { +let Constraints = "$src1 = $dst" in { let isConvertibleToThreeAddress = 1 in { let isCommutable = 1 in // Register-Register Addition @@ -559,7 +554,7 @@ def ADD64rm : RI<0x03, MRMSrcMem, (outs GR64:$dst), [(set GR64:$dst, EFLAGS, (X86add_flag GR64:$src1, (load addr:$src2)))]>; -} // isTwoAddress +} // Constraints = "$src1 = $dst" // Memory-Register Addition def ADD64mr : RI<0x01, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src2), @@ -580,7 +575,7 @@ let Uses = [EFLAGS] in { def ADC64i32 : RIi32<0x15, RawFrm, (outs), (ins i64i32imm:$src), "adc{q}\t{$src, %rax|%rax, $src}", []>; -let isTwoAddress = 1 in { +let Constraints = "$src1 = $dst" in { let isCommutable = 1 in def ADC64rr : RI<0x11, MRMDestReg, (outs GR64:$dst), (ins GR64:$src1, GR64:$src2), @@ -606,7 +601,7 @@ def ADC64ri32 : RIi32<0x81, MRM2r, (outs GR64:$dst), (ins GR64:$src1, i64i32imm:$src2), "adc{q}\t{$src2, $dst|$dst, $src2}", [(set GR64:$dst, (adde GR64:$src1, i64immSExt32:$src2))]>; -} // isTwoAddress +} // Constraints = "$src1 = $dst" def ADC64mr : RI<0x11, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src2), "adc{q}\t{$src2, $dst|$dst, $src2}", @@ -621,7 +616,7 @@ def ADC64mi32 : RIi32<0x81, MRM2m, (outs), (ins i64mem:$dst, i64i32imm:$src2), addr:$dst)]>; } // Uses = [EFLAGS] -let isTwoAddress = 1 in { +let Constraints = "$src1 = $dst" in { // Register-Register Subtraction def SUB64rr : RI<0x29, MRMDestReg, (outs GR64:$dst), (ins GR64:$src1, GR64:$src2), @@ -653,7 +648,7 @@ def SUB64ri32 : RIi32<0x81, MRM5r, (outs GR64:$dst), "sub{q}\t{$src2, $dst|$dst, $src2}", [(set GR64:$dst, EFLAGS, (X86sub_flag GR64:$src1, i64immSExt32:$src2))]>; -} // isTwoAddress +} // Constraints = "$src1 = $dst" def SUB64i32 : RIi32<0x2D, RawFrm, (outs), (ins i64i32imm:$src), "sub{q}\t{$src, %rax|%rax, $src}", []>; @@ -677,7 +672,7 @@ def SUB64mi32 : RIi32<0x81, MRM5m, (outs), (ins i64mem:$dst, i64i32imm:$src2), (implicit EFLAGS)]>; let Uses = [EFLAGS] in { -let isTwoAddress = 1 in { +let Constraints = "$src1 = $dst" in { def SBB64rr : RI<0x19, MRMDestReg, (outs GR64:$dst), (ins GR64:$src1, GR64:$src2), "sbb{q}\t{$src2, $dst|$dst, $src2}", @@ -702,7 +697,7 @@ def SBB64ri32 : RIi32<0x81, MRM3r, (outs GR64:$dst), (ins GR64:$src1, i64i32imm:$src2), "sbb{q}\t{$src2, $dst|$dst, $src2}", [(set GR64:$dst, (sube GR64:$src1, i64immSExt32:$src2))]>; -} // isTwoAddress +} // Constraints = "$src1 = $dst" def SBB64i32 : RIi32<0x1D, RawFrm, (outs), (ins i64i32imm:$src), "sbb{q}\t{$src, %rax|%rax, $src}", []>; @@ -736,7 +731,7 @@ def IMUL64m : RI<0xF7, MRM5m, (outs), (ins i64mem:$src), } let Defs = [EFLAGS] in { -let isTwoAddress = 1 in { +let Constraints = "$src1 = $dst" in { let isCommutable = 1 in // Register-Register Signed Integer Multiplication def IMUL64rr : RI<0xAF, MRMSrcReg, (outs GR64:$dst), @@ -751,7 +746,7 @@ def IMUL64rm : RI<0xAF, MRMSrcMem, (outs GR64:$dst), "imul{q}\t{$src2, $dst|$dst, $src2}", [(set GR64:$dst, EFLAGS, (X86smul_flag GR64:$src1, (load addr:$src2)))]>, TB; -} // isTwoAddress +} // Constraints = "$src1 = $dst" // Suprisingly enough, these are not two address instructions! @@ -803,7 +798,7 @@ def IDIV64m: RI<0xF7, MRM7m, (outs), (ins i64mem:$src), // Unary instructions let Defs = [EFLAGS], CodeSize = 2 in { -let isTwoAddress = 1 in +let Constraints = "$src = $dst" in def NEG64r : RI<0xF7, MRM3r, (outs GR64:$dst), (ins GR64:$src), "neg{q}\t$dst", [(set GR64:$dst, (ineg GR64:$src)), (implicit EFLAGS)]>; @@ -811,14 +806,14 @@ def NEG64m : RI<0xF7, MRM3m, (outs), (ins i64mem:$dst), "neg{q}\t$dst", [(store (ineg (loadi64 addr:$dst)), addr:$dst), (implicit EFLAGS)]>; -let isTwoAddress = 1, isConvertibleToThreeAddress = 1 in +let Constraints = "$src = $dst", isConvertibleToThreeAddress = 1 in def INC64r : RI<0xFF, MRM0r, (outs GR64:$dst), (ins GR64:$src), "inc{q}\t$dst", [(set GR64:$dst, EFLAGS, (X86inc_flag GR64:$src))]>; def INC64m : RI<0xFF, MRM0m, (outs), (ins i64mem:$dst), "inc{q}\t$dst", [(store (add (loadi64 addr:$dst), 1), addr:$dst), (implicit EFLAGS)]>; -let isTwoAddress = 1, isConvertibleToThreeAddress = 1 in +let Constraints = "$src = $dst", isConvertibleToThreeAddress = 1 in def DEC64r : RI<0xFF, MRM1r, (outs GR64:$dst), (ins GR64:$src), "dec{q}\t$dst", [(set GR64:$dst, EFLAGS, (X86dec_flag GR64:$src))]>; def DEC64m : RI<0xFF, MRM1m, (outs), (ins i64mem:$dst), "dec{q}\t$dst", @@ -826,7 +821,7 @@ def DEC64m : RI<0xFF, MRM1m, (outs), (ins i64mem:$dst), "dec{q}\t$dst", (implicit EFLAGS)]>; // In 64-bit mode, single byte INC and DEC cannot be encoded. -let isTwoAddress = 1, isConvertibleToThreeAddress = 1 in { +let Constraints = "$src = $dst", isConvertibleToThreeAddress = 1 in { // Can transform into LEA. def INC64_16r : I<0xFF, MRM0r, (outs GR16:$dst), (ins GR16:$src), "inc{w}\t$dst", @@ -844,38 +839,36 @@ def DEC64_32r : I<0xFF, MRM1r, (outs GR32:$dst), (ins GR32:$src), "dec{l}\t$dst", [(set GR32:$dst, EFLAGS, (X86dec_flag GR32:$src))]>, Requires<[In64BitMode]>; -} // isConvertibleToThreeAddress +} // Constraints = "$src = $dst", isConvertibleToThreeAddress // These are duplicates of their 32-bit counterparts. Only needed so X86 knows // how to unfold them. -let isTwoAddress = 0, CodeSize = 2 in { - def INC64_16m : I<0xFF, MRM0m, (outs), (ins i16mem:$dst), "inc{w}\t$dst", - [(store (add (loadi16 addr:$dst), 1), addr:$dst), - (implicit EFLAGS)]>, - OpSize, Requires<[In64BitMode]>; - def INC64_32m : I<0xFF, MRM0m, (outs), (ins i32mem:$dst), "inc{l}\t$dst", - [(store (add (loadi32 addr:$dst), 1), addr:$dst), - (implicit EFLAGS)]>, - Requires<[In64BitMode]>; - def DEC64_16m : I<0xFF, MRM1m, (outs), (ins i16mem:$dst), "dec{w}\t$dst", - [(store (add (loadi16 addr:$dst), -1), addr:$dst), - (implicit EFLAGS)]>, - OpSize, Requires<[In64BitMode]>; - def DEC64_32m : I<0xFF, MRM1m, (outs), (ins i32mem:$dst), "dec{l}\t$dst", - [(store (add (loadi32 addr:$dst), -1), addr:$dst), - (implicit EFLAGS)]>, - Requires<[In64BitMode]>; -} +def INC64_16m : I<0xFF, MRM0m, (outs), (ins i16mem:$dst), "inc{w}\t$dst", + [(store (add (loadi16 addr:$dst), 1), addr:$dst), + (implicit EFLAGS)]>, + OpSize, Requires<[In64BitMode]>; +def INC64_32m : I<0xFF, MRM0m, (outs), (ins i32mem:$dst), "inc{l}\t$dst", + [(store (add (loadi32 addr:$dst), 1), addr:$dst), + (implicit EFLAGS)]>, + Requires<[In64BitMode]>; +def DEC64_16m : I<0xFF, MRM1m, (outs), (ins i16mem:$dst), "dec{w}\t$dst", + [(store (add (loadi16 addr:$dst), -1), addr:$dst), + (implicit EFLAGS)]>, + OpSize, Requires<[In64BitMode]>; +def DEC64_32m : I<0xFF, MRM1m, (outs), (ins i32mem:$dst), "dec{l}\t$dst", + [(store (add (loadi32 addr:$dst), -1), addr:$dst), + (implicit EFLAGS)]>, + Requires<[In64BitMode]>; } // Defs = [EFLAGS], CodeSize let Defs = [EFLAGS] in { // Shift instructions -let isTwoAddress = 1 in { +let Constraints = "$src1 = $dst" in { let Uses = [CL] in -def SHL64rCL : RI<0xD3, MRM4r, (outs GR64:$dst), (ins GR64:$src), +def SHL64rCL : RI<0xD3, MRM4r, (outs GR64:$dst), (ins GR64:$src1), "shl{q}\t{%cl, $dst|$dst, %CL}", - [(set GR64:$dst, (shl GR64:$src, CL))]>; + [(set GR64:$dst, (shl GR64:$src1, CL))]>; let isConvertibleToThreeAddress = 1 in // Can transform into LEA. def SHL64ri : RIi8<0xC1, MRM4r, (outs GR64:$dst), (ins GR64:$src1, i8imm:$src2), @@ -885,7 +878,7 @@ def SHL64ri : RIi8<0xC1, MRM4r, (outs GR64:$dst), // 'add reg,reg' is cheaper. def SHL64r1 : RI<0xD1, MRM4r, (outs GR64:$dst), (ins GR64:$src1), "shl{q}\t$dst", []>; -} // isTwoAddress +} // Constraints = "$src1 = $dst" let Uses = [CL] in def SHL64mCL : RI<0xD3, MRM4m, (outs), (ins i64mem:$dst), @@ -898,18 +891,18 @@ def SHL64m1 : RI<0xD1, MRM4m, (outs), (ins i64mem:$dst), "shl{q}\t$dst", [(store (shl (loadi64 addr:$dst), (i8 1)), addr:$dst)]>; -let isTwoAddress = 1 in { +let Constraints = "$src1 = $dst" in { let Uses = [CL] in -def SHR64rCL : RI<0xD3, MRM5r, (outs GR64:$dst), (ins GR64:$src), +def SHR64rCL : RI<0xD3, MRM5r, (outs GR64:$dst), (ins GR64:$src1), "shr{q}\t{%cl, $dst|$dst, %CL}", - [(set GR64:$dst, (srl GR64:$src, CL))]>; + [(set GR64:$dst, (srl GR64:$src1, CL))]>; def SHR64ri : RIi8<0xC1, MRM5r, (outs GR64:$dst), (ins GR64:$src1, i8imm:$src2), "shr{q}\t{$src2, $dst|$dst, $src2}", [(set GR64:$dst, (srl GR64:$src1, (i8 imm:$src2)))]>; def SHR64r1 : RI<0xD1, MRM5r, (outs GR64:$dst), (ins GR64:$src1), "shr{q}\t$dst", [(set GR64:$dst, (srl GR64:$src1, (i8 1)))]>; -} // isTwoAddress +} // Constraints = "$src1 = $dst" let Uses = [CL] in def SHR64mCL : RI<0xD3, MRM5m, (outs), (ins i64mem:$dst), @@ -922,11 +915,11 @@ def SHR64m1 : RI<0xD1, MRM5m, (outs), (ins i64mem:$dst), "shr{q}\t$dst", [(store (srl (loadi64 addr:$dst), (i8 1)), addr:$dst)]>; -let isTwoAddress = 1 in { +let Constraints = "$src1 = $dst" in { let Uses = [CL] in -def SAR64rCL : RI<0xD3, MRM7r, (outs GR64:$dst), (ins GR64:$src), +def SAR64rCL : RI<0xD3, MRM7r, (outs GR64:$dst), (ins GR64:$src1), "sar{q}\t{%cl, $dst|$dst, %CL}", - [(set GR64:$dst, (sra GR64:$src, CL))]>; + [(set GR64:$dst, (sra GR64:$src1, CL))]>; def SAR64ri : RIi8<0xC1, MRM7r, (outs GR64:$dst), (ins GR64:$src1, i8imm:$src2), "sar{q}\t{$src2, $dst|$dst, $src2}", @@ -934,7 +927,7 @@ def SAR64ri : RIi8<0xC1, MRM7r, (outs GR64:$dst), def SAR64r1 : RI<0xD1, MRM7r, (outs GR64:$dst), (ins GR64:$src1), "sar{q}\t$dst", [(set GR64:$dst, (sra GR64:$src1, (i8 1)))]>; -} // isTwoAddress +} // Constraints = "$src = $dst" let Uses = [CL] in def SAR64mCL : RI<0xD3, MRM7m, (outs), (ins i64mem:$dst), @@ -949,7 +942,7 @@ def SAR64m1 : RI<0xD1, MRM7m, (outs), (ins i64mem:$dst), // Rotate instructions -let isTwoAddress = 1 in { +let Constraints = "$src = $dst" in { def RCL64r1 : RI<0xD1, MRM2r, (outs GR64:$dst), (ins GR64:$src), "rcl{q}\t{1, $dst|$dst, 1}", []>; def RCL64ri : RIi8<0xC1, MRM2r, (outs GR64:$dst), (ins GR64:$src, i8imm:$cnt), @@ -966,9 +959,8 @@ def RCL64rCL : RI<0xD3, MRM2r, (outs GR64:$dst), (ins GR64:$src), def RCR64rCL : RI<0xD3, MRM3r, (outs GR64:$dst), (ins GR64:$src), "rcr{q}\t{%cl, $dst|$dst, CL}", []>; } -} +} // Constraints = "$src = $dst" -let isTwoAddress = 0 in { def RCL64m1 : RI<0xD1, MRM2m, (outs), (ins i64mem:$dst), "rcl{q}\t{1, $dst|$dst, 1}", []>; def RCL64mi : RIi8<0xC1, MRM2m, (outs), (ins i64mem:$dst, i8imm:$cnt), @@ -984,13 +976,12 @@ def RCL64mCL : RI<0xD3, MRM2m, (outs), (ins i64mem:$dst), def RCR64mCL : RI<0xD3, MRM3m, (outs), (ins i64mem:$dst), "rcr{q}\t{%cl, $dst|$dst, CL}", []>; } -} -let isTwoAddress = 1 in { +let Constraints = "$src1 = $dst" in { let Uses = [CL] in -def ROL64rCL : RI<0xD3, MRM0r, (outs GR64:$dst), (ins GR64:$src), +def ROL64rCL : RI<0xD3, MRM0r, (outs GR64:$dst), (ins GR64:$src1), "rol{q}\t{%cl, $dst|$dst, %CL}", - [(set GR64:$dst, (rotl GR64:$src, CL))]>; + [(set GR64:$dst, (rotl GR64:$src1, CL))]>; def ROL64ri : RIi8<0xC1, MRM0r, (outs GR64:$dst), (ins GR64:$src1, i8imm:$src2), "rol{q}\t{$src2, $dst|$dst, $src2}", @@ -998,7 +989,7 @@ def ROL64ri : RIi8<0xC1, MRM0r, (outs GR64:$dst), def ROL64r1 : RI<0xD1, MRM0r, (outs GR64:$dst), (ins GR64:$src1), "rol{q}\t$dst", [(set GR64:$dst, (rotl GR64:$src1, (i8 1)))]>; -} // isTwoAddress +} // Constraints = "$src1 = $dst" let Uses = [CL] in def ROL64mCL : RI<0xD3, MRM0m, (outs), (ins i64mem:$dst), @@ -1011,11 +1002,11 @@ def ROL64m1 : RI<0xD1, MRM0m, (outs), (ins i64mem:$dst), "rol{q}\t$dst", [(store (rotl (loadi64 addr:$dst), (i8 1)), addr:$dst)]>; -let isTwoAddress = 1 in { +let Constraints = "$src1 = $dst" in { let Uses = [CL] in -def ROR64rCL : RI<0xD3, MRM1r, (outs GR64:$dst), (ins GR64:$src), +def ROR64rCL : RI<0xD3, MRM1r, (outs GR64:$dst), (ins GR64:$src1), "ror{q}\t{%cl, $dst|$dst, %CL}", - [(set GR64:$dst, (rotr GR64:$src, CL))]>; + [(set GR64:$dst, (rotr GR64:$src1, CL))]>; def ROR64ri : RIi8<0xC1, MRM1r, (outs GR64:$dst), (ins GR64:$src1, i8imm:$src2), "ror{q}\t{$src2, $dst|$dst, $src2}", @@ -1023,7 +1014,7 @@ def ROR64ri : RIi8<0xC1, MRM1r, (outs GR64:$dst), def ROR64r1 : RI<0xD1, MRM1r, (outs GR64:$dst), (ins GR64:$src1), "ror{q}\t$dst", [(set GR64:$dst, (rotr GR64:$src1, (i8 1)))]>; -} // isTwoAddress +} // Constraints = "$src1 = $dst" let Uses = [CL] in def ROR64mCL : RI<0xD3, MRM1m, (outs), (ins i64mem:$dst), @@ -1037,7 +1028,7 @@ def ROR64m1 : RI<0xD1, MRM1m, (outs), (ins i64mem:$dst), [(store (rotr (loadi64 addr:$dst), (i8 1)), addr:$dst)]>; // Double shift instructions (generalizations of rotate) -let isTwoAddress = 1 in { +let Constraints = "$src1 = $dst" in { let Uses = [CL] in { def SHLD64rrCL : RI<0xA5, MRMDestReg, (outs GR64:$dst), (ins GR64:$src1, GR64:$src2), @@ -1067,7 +1058,7 @@ def SHRD64rri8 : RIi8<0xAC, MRMDestReg, (i8 imm:$src3)))]>, TB; } // isCommutable -} // isTwoAddress +} // Constraints = "$src1 = $dst" let Uses = [CL] in { def SHLD64mrCL : RI<0xA5, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src2), @@ -1097,7 +1088,7 @@ def SHRD64mri8 : RIi8<0xAC, MRMDestMem, // Logical Instructions... // -let isTwoAddress = 1 , AddedComplexity = 15 in +let Constraints = "$src = $dst" , AddedComplexity = 15 in def NOT64r : RI<0xF7, MRM2r, (outs GR64:$dst), (ins GR64:$src), "not{q}\t$dst", [(set GR64:$dst, (not GR64:$src))]>; def NOT64m : RI<0xF7, MRM2m, (outs), (ins i64mem:$dst), "not{q}\t$dst", @@ -1107,7 +1098,7 @@ let Defs = [EFLAGS] in { def AND64i32 : RIi32<0x25, RawFrm, (outs), (ins i64i32imm:$src), "and{q}\t{$src, %rax|%rax, $src}", []>; -let isTwoAddress = 1 in { +let Constraints = "$src1 = $dst" in { let isCommutable = 1 in def AND64rr : RI<0x21, MRMDestReg, (outs GR64:$dst), (ins GR64:$src1, GR64:$src2), @@ -1134,7 +1125,7 @@ def AND64ri32 : RIi32<0x81, MRM4r, "and{q}\t{$src2, $dst|$dst, $src2}", [(set GR64:$dst, EFLAGS, (X86and_flag GR64:$src1, i64immSExt32:$src2))]>; -} // isTwoAddress +} // Constraints = "$src1 = $dst" def AND64mr : RI<0x21, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src), @@ -1152,7 +1143,7 @@ def AND64mi32 : RIi32<0x81, MRM4m, [(store (and (loadi64 addr:$dst), i64immSExt32:$src), addr:$dst), (implicit EFLAGS)]>; -let isTwoAddress = 1 in { +let Constraints = "$src1 = $dst" in { let isCommutable = 1 in def OR64rr : RI<0x09, MRMDestReg, (outs GR64:$dst), (ins GR64:$src1, GR64:$src2), @@ -1179,7 +1170,7 @@ def OR64ri32 : RIi32<0x81, MRM1r, (outs GR64:$dst), "or{q}\t{$src2, $dst|$dst, $src2}", [(set GR64:$dst, EFLAGS, (X86or_flag GR64:$src1, i64immSExt32:$src2))]>; -} // isTwoAddress +} // Constraints = "$src1 = $dst" def OR64mr : RI<0x09, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src), "or{q}\t{$src, $dst|$dst, $src}", @@ -1197,7 +1188,7 @@ def OR64mi32 : RIi32<0x81, MRM1m, (outs), (ins i64mem:$dst, i64i32imm:$src), def OR64i32 : RIi32<0x0D, RawFrm, (outs), (ins i64i32imm:$src), "or{q}\t{$src, %rax|%rax, $src}", []>; -let isTwoAddress = 1 in { +let Constraints = "$src1 = $dst" in { let isCommutable = 1 in def XOR64rr : RI<0x31, MRMDestReg, (outs GR64:$dst), (ins GR64:$src1, GR64:$src2), @@ -1224,7 +1215,7 @@ def XOR64ri32 : RIi32<0x81, MRM6r, "xor{q}\t{$src2, $dst|$dst, $src2}", [(set GR64:$dst, EFLAGS, (X86xor_flag GR64:$src1, i64immSExt32:$src2))]>; -} // isTwoAddress +} // Constraints = "$src1 = $dst" def XOR64mr : RI<0x31, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src), "xor{q}\t{$src, $dst|$dst, $src}", @@ -1366,7 +1357,7 @@ def BTS64mi8 : RIi8<0xBA, MRM5m, (outs), (ins i64mem:$src1, i64i8imm:$src2), } // Defs = [EFLAGS] // Conditional moves -let Uses = [EFLAGS], isTwoAddress = 1 in { +let Uses = [EFLAGS], Constraints = "$src1 = $dst" in { let isCommutable = 1 in { def CMOVB64rr : RI<0x42, MRMSrcReg, // if <u, GR64 = GR64 (outs GR64:$dst), (ins GR64:$src1, GR64:$src2), @@ -1530,7 +1521,7 @@ def CMOVNO64rm : RI<0x41, MRMSrcMem, // if !overflow, GR64 = [mem64] "cmovno{q}\t{$src2, $dst|$dst, $src2}", [(set GR64:$dst, (X86cmov GR64:$src1, (loadi64 addr:$src2), X86_COND_NO, EFLAGS))]>, TB; -} // isTwoAddress +} // Constraints = "$src1 = $dst" // Use sbb to materialize carry flag into a GPR. // FIXME: This are pseudo ops that should be replaced with Pat<> patterns. @@ -1588,7 +1579,7 @@ def CVTSI2SD64rm: RSDI<0x2A, MRMSrcMem, (outs FR64:$dst), (ins i64mem:$src), "cvtsi2sd{q}\t{$src, $dst|$dst, $src}", [(set FR64:$dst, (sint_to_fp (loadi64 addr:$src)))]>; -let isTwoAddress = 1 in { +let Constraints = "$src1 = $dst" in { def Int_CVTSI2SD64rr: RSDI<0x2A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, GR64:$src2), "cvtsi2sd{q}\t{$src2, $dst|$dst, $src2}", @@ -1601,7 +1592,7 @@ def Int_CVTSI2SD64rm: RSDI<0x2A, MRMSrcMem, [(set VR128:$dst, (int_x86_sse2_cvtsi642sd VR128:$src1, (loadi64 addr:$src2)))]>; -} // isTwoAddress +} // Constraints = "$src1 = $dst" // Signed i64 -> f32 def CVTSI2SS64rr: RSSI<0x2A, MRMSrcReg, (outs FR32:$dst), (ins GR64:$src), @@ -1611,7 +1602,7 @@ def CVTSI2SS64rm: RSSI<0x2A, MRMSrcMem, (outs FR32:$dst), (ins i64mem:$src), "cvtsi2ss{q}\t{$src, $dst|$dst, $src}", [(set FR32:$dst, (sint_to_fp (loadi64 addr:$src)))]>; -let isTwoAddress = 1 in { +let Constraints = "$src1 = $dst" in { def Int_CVTSI2SS64rr : RSSI<0x2A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, GR64:$src2), "cvtsi2ss{q}\t{$src2, $dst|$dst, $src2}", @@ -1625,7 +1616,7 @@ let isTwoAddress = 1 in { [(set VR128:$dst, (int_x86_sse_cvtsi642ss VR128:$src1, (loadi64 addr:$src2)))]>; -} +} // Constraints = "$src1 = $dst" // f32 -> signed i64 def CVTSS2SI64rr: RSSI<0x2D, MRMSrcReg, (outs GR64:$dst), (ins FR32:$src), @@ -1691,6 +1682,7 @@ def MOV64ri64i32 : Ii32<0xB8, AddRegFrm, (outs GR64:$dst), (ins i64i32imm:$src), // Thread Local Storage Instructions //===----------------------------------------------------------------------===// +// ELF TLS Support // All calls clobber the non-callee saved registers. RSP is marked as // a use to prevent stack-pointer assignments that appear immediately // before calls from potentially appearing dead. @@ -1700,7 +1692,7 @@ let Defs = [RAX, RCX, RDX, RSI, RDI, R8, R9, R10, R11, XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7, XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, EFLAGS], Uses = [RSP] in -def TLS_addr64 : I<0, Pseudo, (outs), (ins lea64mem:$sym), +def TLS_addr64 : I<0, Pseudo, (outs), (ins i64mem:$sym), ".byte\t0x66; " "leaq\t$sym(%rip), %rdi; " ".word\t0x6666; " @@ -1709,6 +1701,17 @@ def TLS_addr64 : I<0, Pseudo, (outs), (ins lea64mem:$sym), [(X86tlsaddr tls64addr:$sym)]>, Requires<[In64BitMode]>; +// Darwin TLS Support +// For x86_64, the address of the thunk is passed in %rdi, on return +// the address of the variable is in %rax. All other registers are preserved. +let Defs = [RAX], + Uses = [RDI], + usesCustomInserter = 1 in +def TLSCall_64 : I<0, Pseudo, (outs), (ins i64mem:$sym), + "# TLSCall_64", + [(X86TLSCall addr:$sym)]>, + Requires<[In64BitMode]>; + let AddedComplexity = 5, isCodeGenOnly = 1 in def MOV64GSrm : RI<0x8B, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src), "movq\t%gs:$src, $dst", @@ -1964,6 +1967,17 @@ def : Pat<(X86tcret (i64 texternalsym:$dst), imm:$off), (TCRETURNdi64 texternalsym:$dst, imm:$off)>, Requires<[In64BitMode]>; +// tls has some funny stuff here... +// This corresponds to movabs $foo@tpoff, %rax +def : Pat<(i64 (X86Wrapper tglobaltlsaddr :$dst)), + (MOV64ri tglobaltlsaddr :$dst)>; +// This corresponds to add $foo@tpoff, %rax +def : Pat<(add GR64:$src1, (X86Wrapper tglobaltlsaddr :$dst)), + (ADD64ri32 GR64:$src1, tglobaltlsaddr :$dst)>; +// This corresponds to mov foo@tpoff(%rbx), %eax +def : Pat<(load (i64 (X86Wrapper tglobaltlsaddr :$dst))), + (MOV64rm tglobaltlsaddr :$dst)>; + // Comparisons. // TEST R,R is smaller than CMP R,0 @@ -2332,45 +2346,3 @@ def MOVSDto64mr : RPDI<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, FR64:$src), "movq\t{$src, $dst|$dst, $src}", [(store (i64 (bitconvert FR64:$src)), addr:$dst)]>; -//===----------------------------------------------------------------------===// -// X86-64 SSE4.1 Instructions -//===----------------------------------------------------------------------===// - -/// SS41I_extract32 - SSE 4.1 extract 32 bits to int reg or memory destination -multiclass SS41I_extract64<bits<8> opc, string OpcodeStr> { - def rr : SS4AIi8<opc, MRMDestReg, (outs GR64:$dst), - (ins VR128:$src1, i32i8imm:$src2), - !strconcat(OpcodeStr, - "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), - [(set GR64:$dst, - (extractelt (v2i64 VR128:$src1), imm:$src2))]>, OpSize, REX_W; - def mr : SS4AIi8<opc, MRMDestMem, (outs), - (ins i64mem:$dst, VR128:$src1, i32i8imm:$src2), - !strconcat(OpcodeStr, - "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), - [(store (extractelt (v2i64 VR128:$src1), imm:$src2), - addr:$dst)]>, OpSize, REX_W; -} - -defm PEXTRQ : SS41I_extract64<0x16, "pextrq">; - -let isTwoAddress = 1 in { - multiclass SS41I_insert64<bits<8> opc, string OpcodeStr> { - def rr : SS4AIi8<opc, MRMSrcReg, (outs VR128:$dst), - (ins VR128:$src1, GR64:$src2, i32i8imm:$src3), - !strconcat(OpcodeStr, - "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), - [(set VR128:$dst, - (v2i64 (insertelt VR128:$src1, GR64:$src2, imm:$src3)))]>, - OpSize, REX_W; - def rm : SS4AIi8<opc, MRMSrcMem, (outs VR128:$dst), - (ins VR128:$src1, i64mem:$src2, i32i8imm:$src3), - !strconcat(OpcodeStr, - "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), - [(set VR128:$dst, - (v2i64 (insertelt VR128:$src1, (loadi64 addr:$src2), - imm:$src3)))]>, OpSize, REX_W; - } -} - -defm PINSRQ : SS41I_insert64<0x22, "pinsrq">; diff --git a/lib/Target/X86/X86InstrBuilder.h b/lib/Target/X86/X86InstrBuilder.h index 5a82a7b1979b..2a6a71dd91a3 100644 --- a/lib/Target/X86/X86InstrBuilder.h +++ b/lib/Target/X86/X86InstrBuilder.h @@ -64,19 +64,15 @@ struct X86AddressMode { /// static inline const MachineInstrBuilder & addDirectMem(const MachineInstrBuilder &MIB, unsigned Reg) { - // Because memory references are always represented with four - // values, this adds: Reg, [1, NoReg, 0] to the instruction. - return MIB.addReg(Reg).addImm(1).addReg(0).addImm(0); + // Because memory references are always represented with five + // values, this adds: Reg, 1, NoReg, 0, NoReg to the instruction. + return MIB.addReg(Reg).addImm(1).addReg(0).addImm(0).addReg(0); } -static inline const MachineInstrBuilder & -addLeaOffset(const MachineInstrBuilder &MIB, int Offset) { - return MIB.addImm(1).addReg(0).addImm(Offset); -} static inline const MachineInstrBuilder & addOffset(const MachineInstrBuilder &MIB, int Offset) { - return addLeaOffset(MIB, Offset).addReg(0); + return MIB.addImm(1).addReg(0).addImm(Offset).addReg(0); } /// addRegOffset - This function is used to add a memory reference of the form @@ -89,25 +85,20 @@ addRegOffset(const MachineInstrBuilder &MIB, return addOffset(MIB.addReg(Reg, getKillRegState(isKill)), Offset); } -static inline const MachineInstrBuilder & -addLeaRegOffset(const MachineInstrBuilder &MIB, - unsigned Reg, bool isKill, int Offset) { - return addLeaOffset(MIB.addReg(Reg, getKillRegState(isKill)), Offset); -} - /// addRegReg - This function is used to add a memory reference of the form: /// [Reg + Reg]. static inline const MachineInstrBuilder &addRegReg(const MachineInstrBuilder &MIB, unsigned Reg1, bool isKill1, unsigned Reg2, bool isKill2) { return MIB.addReg(Reg1, getKillRegState(isKill1)).addImm(1) - .addReg(Reg2, getKillRegState(isKill2)).addImm(0); + .addReg(Reg2, getKillRegState(isKill2)).addImm(0).addReg(0); } static inline const MachineInstrBuilder & -addLeaAddress(const MachineInstrBuilder &MIB, const X86AddressMode &AM) { - assert (AM.Scale == 1 || AM.Scale == 2 || AM.Scale == 4 || AM.Scale == 8); - +addFullAddress(const MachineInstrBuilder &MIB, + const X86AddressMode &AM) { + assert(AM.Scale == 1 || AM.Scale == 2 || AM.Scale == 4 || AM.Scale == 8); + if (AM.BaseType == X86AddressMode::RegBase) MIB.addReg(AM.Base.Reg); else if (AM.BaseType == X86AddressMode::FrameIndexBase) @@ -116,15 +107,11 @@ addLeaAddress(const MachineInstrBuilder &MIB, const X86AddressMode &AM) { assert (0); MIB.addImm(AM.Scale).addReg(AM.IndexReg); if (AM.GV) - return MIB.addGlobalAddress(AM.GV, AM.Disp, AM.GVOpFlags); + MIB.addGlobalAddress(AM.GV, AM.Disp, AM.GVOpFlags); else - return MIB.addImm(AM.Disp); -} - -static inline const MachineInstrBuilder & -addFullAddress(const MachineInstrBuilder &MIB, - const X86AddressMode &AM) { - return addLeaAddress(MIB, AM).addReg(0); + MIB.addImm(AM.Disp); + + return MIB.addReg(0); } /// addFrameReference - This function is used to add a reference to the base of diff --git a/lib/Target/X86/X86InstrFPStack.td b/lib/Target/X86/X86InstrFPStack.td index 0aae4a8653b0..da93de988d50 100644 --- a/lib/Target/X86/X86InstrFPStack.td +++ b/lib/Target/X86/X86InstrFPStack.td @@ -371,7 +371,7 @@ multiclass FPCMov<PatLeaf cc> { Requires<[HasCMov]>; } -let Uses = [EFLAGS], isTwoAddress = 1 in { +let Uses = [EFLAGS], Constraints = "$src1 = $dst" in { defm CMOVB : FPCMov<X86_COND_B>; defm CMOVBE : FPCMov<X86_COND_BE>; defm CMOVE : FPCMov<X86_COND_E>; @@ -380,7 +380,7 @@ defm CMOVNB : FPCMov<X86_COND_AE>; defm CMOVNBE: FPCMov<X86_COND_A>; defm CMOVNE : FPCMov<X86_COND_NE>; defm CMOVNP : FPCMov<X86_COND_NP>; -} +} // Uses = [EFLAGS], Constraints = "$src1 = $dst" let Predicates = [HasCMov] in { // These are not factored because there's no clean way to pass DA/DB. @@ -680,19 +680,19 @@ def : Pat<(X86fildflag addr:$src, i64), (ILD_Fp64m64 addr:$src)>; // FP extensions map onto simple pseudo-value conversions if they are to/from // the FP stack. -def : Pat<(f64 (fextend RFP32:$src)), (MOV_Fp3264 RFP32:$src)>, +def : Pat<(f64 (fextend RFP32:$src)), (COPY_TO_REGCLASS RFP32:$src, RFP64)>, Requires<[FPStackf32]>; -def : Pat<(f80 (fextend RFP32:$src)), (MOV_Fp3280 RFP32:$src)>, +def : Pat<(f80 (fextend RFP32:$src)), (COPY_TO_REGCLASS RFP32:$src, RFP80)>, Requires<[FPStackf32]>; -def : Pat<(f80 (fextend RFP64:$src)), (MOV_Fp6480 RFP64:$src)>, +def : Pat<(f80 (fextend RFP64:$src)), (COPY_TO_REGCLASS RFP64:$src, RFP80)>, Requires<[FPStackf64]>; // FP truncations map onto simple pseudo-value conversions if they are to/from // the FP stack. We have validated that only value-preserving truncations make // it through isel. -def : Pat<(f32 (fround RFP64:$src)), (MOV_Fp6432 RFP64:$src)>, +def : Pat<(f32 (fround RFP64:$src)), (COPY_TO_REGCLASS RFP64:$src, RFP32)>, Requires<[FPStackf32]>; -def : Pat<(f32 (fround RFP80:$src)), (MOV_Fp8032 RFP80:$src)>, +def : Pat<(f32 (fround RFP80:$src)), (COPY_TO_REGCLASS RFP80:$src, RFP32)>, Requires<[FPStackf32]>; -def : Pat<(f64 (fround RFP80:$src)), (MOV_Fp8064 RFP80:$src)>, +def : Pat<(f64 (fround RFP80:$src)), (COPY_TO_REGCLASS RFP80:$src, RFP64)>, Requires<[FPStackf64]>; diff --git a/lib/Target/X86/X86InstrFormats.td b/lib/Target/X86/X86InstrFormats.td index c4522f3fd9e8..97578af499be 100644 --- a/lib/Target/X86/X86InstrFormats.td +++ b/lib/Target/X86/X86InstrFormats.td @@ -50,9 +50,10 @@ def NoImm : ImmType<0>; def Imm8 : ImmType<1>; def Imm8PCRel : ImmType<2>; def Imm16 : ImmType<3>; -def Imm32 : ImmType<4>; -def Imm32PCRel : ImmType<5>; -def Imm64 : ImmType<6>; +def Imm16PCRel : ImmType<4>; +def Imm32 : ImmType<5>; +def Imm32PCRel : ImmType<6>; +def Imm64 : ImmType<7>; // FPFormat - This specifies what form this FP instruction has. This is used by // the Floating-Point stackifier pass. @@ -101,6 +102,10 @@ class XS { bits<4> Prefix = 12; } class T8 { bits<4> Prefix = 13; } class TA { bits<4> Prefix = 14; } class TF { bits<4> Prefix = 15; } +class VEX { bit hasVEXPrefix = 1; } +class VEX_W { bit hasVEX_WPrefix = 1; } +class VEX_4V : VEX { bit hasVEX_4VPrefix = 1; } +class VEX_I8IMM { bit hasVEX_i8ImmReg = 1; } class X86Inst<bits<8> opcod, Format f, ImmType i, dag outs, dag ins, string AsmStr, Domain d = GenericDomain> @@ -128,6 +133,11 @@ class X86Inst<bits<8> opcod, Format f, ImmType i, dag outs, dag ins, bit hasLockPrefix = 0; // Does this inst have a 0xF0 prefix? bits<2> SegOvrBits = 0; // Segment override prefix. Domain ExeDomain = d; + bit hasVEXPrefix = 0; // Does this inst requires a VEX prefix? + bit hasVEX_WPrefix = 0; // Does this inst set the VEX_W field? + bit hasVEX_4VPrefix = 0; // Does this inst requires the VEX.VVVV field? + bit hasVEX_i8ImmReg = 0; // Does this inst requires the last source register + // to be encoded in a immediate field? // TSFlags layout should be kept in sync with X86InstrInfo.h. let TSFlags{5-0} = FormBits; @@ -141,6 +151,10 @@ class X86Inst<bits<8> opcod, Format f, ImmType i, dag outs, dag ins, let TSFlags{21-20} = SegOvrBits; let TSFlags{23-22} = ExeDomain.Value; let TSFlags{31-24} = Opcode; + let TSFlags{32} = hasVEXPrefix; + let TSFlags{33} = hasVEX_WPrefix; + let TSFlags{34} = hasVEX_4VPrefix; + let TSFlags{35} = hasVEX_i8ImmReg; } class I<bits<8> o, Format f, dag outs, dag ins, string asm, @@ -174,6 +188,13 @@ class Ii32<bits<8> o, Format f, dag outs, dag ins, string asm, let CodeSize = 3; } +class Ii16PCRel<bits<8> o, Format f, dag outs, dag ins, string asm, + list<dag> pattern> + : X86Inst<o, f, Imm16PCRel, outs, ins, asm> { + let Pattern = pattern; + let CodeSize = 3; +} + class Ii32PCRel<bits<8> o, Format f, dag outs, dag ins, string asm, list<dag> pattern> : X86Inst<o, f, Imm32PCRel, outs, ins, asm> { @@ -211,11 +232,56 @@ class Iseg32 <bits<8> o, Format f, dag outs, dag ins, string asm, let CodeSize = 3; } +// SI - SSE 1 & 2 scalar instructions +class SI<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern> + : I<o, F, outs, ins, asm, pattern> { + let Predicates = !if(hasVEXPrefix /* VEX */, [HasAVX], + !if(!eq(Prefix, 12 /* XS */), [HasSSE1], [HasSSE2])); + + // AVX instructions have a 'v' prefix in the mnemonic + let AsmString = !if(hasVEXPrefix, !strconcat("v", asm), asm); +} + +// SIi8 - SSE 1 & 2 scalar instructions +class SIi8<bits<8> o, Format F, dag outs, dag ins, string asm, + list<dag> pattern> + : Ii8<o, F, outs, ins, asm, pattern> { + let Predicates = !if(hasVEXPrefix /* VEX */, [HasAVX], + !if(!eq(Prefix, 12 /* XS */), [HasSSE1], [HasSSE2])); + + // AVX instructions have a 'v' prefix in the mnemonic + let AsmString = !if(hasVEXPrefix, !strconcat("v", asm), asm); +} + +// PI - SSE 1 & 2 packed instructions +class PI<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern, + Domain d> + : I<o, F, outs, ins, asm, pattern, d> { + let Predicates = !if(hasVEXPrefix /* VEX */, [HasAVX], + !if(hasOpSizePrefix /* OpSize */, [HasSSE2], [HasSSE1])); + + // AVX instructions have a 'v' prefix in the mnemonic + let AsmString = !if(hasVEXPrefix, !strconcat("v", asm), asm); +} + +// PIi8 - SSE 1 & 2 packed instructions with immediate +class PIi8<bits<8> o, Format F, dag outs, dag ins, string asm, + list<dag> pattern, Domain d> + : Ii8<o, F, outs, ins, asm, pattern, d> { + let Predicates = !if(hasVEX_4VPrefix /* VEX */, [HasAVX], + !if(hasOpSizePrefix /* OpSize */, [HasSSE2], [HasSSE1])); + + // AVX instructions have a 'v' prefix in the mnemonic + let AsmString = !if(hasVEX_4VPrefix, !strconcat("v", asm), asm); +} + // SSE1 Instruction Templates: // // SSI - SSE1 instructions with XS prefix. // PSI - SSE1 instructions with TB prefix. // PSIi8 - SSE1 instructions with ImmT == Imm8 and TB prefix. +// VSSI - SSE1 instructions with XS prefix in AVX form. +// VPSI - SSE1 instructions with TB prefix in AVX form. class SSI<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern> : I<o, F, outs, ins, asm, pattern>, XS, Requires<[HasSSE1]>; @@ -229,6 +295,14 @@ class PSIi8<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern> : Ii8<o, F, outs, ins, asm, pattern, SSEPackedSingle>, TB, Requires<[HasSSE1]>; +class VSSI<bits<8> o, Format F, dag outs, dag ins, string asm, + list<dag> pattern> + : I<o, F, outs, ins, !strconcat("v", asm), pattern>, XS, + Requires<[HasAVX]>; +class VPSI<bits<8> o, Format F, dag outs, dag ins, string asm, + list<dag> pattern> + : I<o, F, outs, ins, !strconcat("v", asm), pattern, SSEPackedSingle>, + Requires<[HasAVX]>; // SSE2 Instruction Templates: // @@ -237,6 +311,8 @@ class PSIi8<bits<8> o, Format F, dag outs, dag ins, string asm, // SSDIi8 - SSE2 instructions with ImmT == Imm8 and XS prefix. // PDI - SSE2 instructions with TB and OpSize prefixes. // PDIi8 - SSE2 instructions with ImmT == Imm8 and TB and OpSize prefixes. +// VSDI - SSE2 instructions with XD prefix in AVX form. +// VPDI - SSE2 instructions with TB and OpSize prefixes in AVX form. class SDI<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern> : I<o, F, outs, ins, asm, pattern>, XD, Requires<[HasSSE2]>; @@ -253,6 +329,14 @@ class PDIi8<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern> : Ii8<o, F, outs, ins, asm, pattern, SSEPackedDouble>, TB, OpSize, Requires<[HasSSE2]>; +class VSDI<bits<8> o, Format F, dag outs, dag ins, string asm, + list<dag> pattern> + : I<o, F, outs, ins, !strconcat("v", asm), pattern>, XD, + Requires<[HasAVX]>; +class VPDI<bits<8> o, Format F, dag outs, dag ins, string asm, + list<dag> pattern> + : I<o, F, outs, ins, !strconcat("v", asm), pattern, SSEPackedDouble>, + OpSize, Requires<[HasAVX]>; // SSE3 Instruction Templates: // diff --git a/lib/Target/X86/X86InstrFragmentsSIMD.td b/lib/Target/X86/X86InstrFragmentsSIMD.td index 6b9478dfa002..71c4e8bc147f 100644 --- a/lib/Target/X86/X86InstrFragmentsSIMD.td +++ b/lib/Target/X86/X86InstrFragmentsSIMD.td @@ -60,3 +60,339 @@ def mmx_pshufw : PatFrag<(ops node:$lhs, node:$rhs), (vector_shuffle node:$lhs, node:$rhs), [{ return X86::isPSHUFDMask(cast<ShuffleVectorSDNode>(N)); }], MMX_SHUFFLE_get_shuf_imm>; + +//===----------------------------------------------------------------------===// +// SSE specific DAG Nodes. +//===----------------------------------------------------------------------===// + +def SDTX86FPShiftOp : SDTypeProfile<1, 2, [ SDTCisSameAs<0, 1>, + SDTCisFP<0>, SDTCisInt<2> ]>; +def SDTX86VFCMP : SDTypeProfile<1, 3, [SDTCisInt<0>, SDTCisSameAs<1, 2>, + SDTCisFP<1>, SDTCisVT<3, i8>]>; + +def X86fmin : SDNode<"X86ISD::FMIN", SDTFPBinOp>; +def X86fmax : SDNode<"X86ISD::FMAX", SDTFPBinOp>; +def X86fand : SDNode<"X86ISD::FAND", SDTFPBinOp, + [SDNPCommutative, SDNPAssociative]>; +def X86for : SDNode<"X86ISD::FOR", SDTFPBinOp, + [SDNPCommutative, SDNPAssociative]>; +def X86fxor : SDNode<"X86ISD::FXOR", SDTFPBinOp, + [SDNPCommutative, SDNPAssociative]>; +def X86frsqrt : SDNode<"X86ISD::FRSQRT", SDTFPUnaryOp>; +def X86frcp : SDNode<"X86ISD::FRCP", SDTFPUnaryOp>; +def X86fsrl : SDNode<"X86ISD::FSRL", SDTX86FPShiftOp>; +def X86comi : SDNode<"X86ISD::COMI", SDTX86CmpTest>; +def X86ucomi : SDNode<"X86ISD::UCOMI", SDTX86CmpTest>; +def X86pshufb : SDNode<"X86ISD::PSHUFB", + SDTypeProfile<1, 2, [SDTCisVT<0, v16i8>, SDTCisSameAs<0,1>, + SDTCisSameAs<0,2>]>>; +def X86pextrb : SDNode<"X86ISD::PEXTRB", + SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisPtrTy<2>]>>; +def X86pextrw : SDNode<"X86ISD::PEXTRW", + SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisPtrTy<2>]>>; +def X86pinsrb : SDNode<"X86ISD::PINSRB", + SDTypeProfile<1, 3, [SDTCisVT<0, v16i8>, SDTCisSameAs<0,1>, + SDTCisVT<2, i32>, SDTCisPtrTy<3>]>>; +def X86pinsrw : SDNode<"X86ISD::PINSRW", + SDTypeProfile<1, 3, [SDTCisVT<0, v8i16>, SDTCisSameAs<0,1>, + SDTCisVT<2, i32>, SDTCisPtrTy<3>]>>; +def X86insrtps : SDNode<"X86ISD::INSERTPS", + SDTypeProfile<1, 3, [SDTCisVT<0, v4f32>, SDTCisSameAs<0,1>, + SDTCisVT<2, v4f32>, SDTCisPtrTy<3>]>>; +def X86vzmovl : SDNode<"X86ISD::VZEXT_MOVL", + SDTypeProfile<1, 1, [SDTCisSameAs<0,1>]>>; +def X86vzload : SDNode<"X86ISD::VZEXT_LOAD", SDTLoad, + [SDNPHasChain, SDNPMayLoad]>; +def X86vshl : SDNode<"X86ISD::VSHL", SDTIntShiftOp>; +def X86vshr : SDNode<"X86ISD::VSRL", SDTIntShiftOp>; +def X86cmpps : SDNode<"X86ISD::CMPPS", SDTX86VFCMP>; +def X86cmppd : SDNode<"X86ISD::CMPPD", SDTX86VFCMP>; +def X86pcmpeqb : SDNode<"X86ISD::PCMPEQB", SDTIntBinOp, [SDNPCommutative]>; +def X86pcmpeqw : SDNode<"X86ISD::PCMPEQW", SDTIntBinOp, [SDNPCommutative]>; +def X86pcmpeqd : SDNode<"X86ISD::PCMPEQD", SDTIntBinOp, [SDNPCommutative]>; +def X86pcmpeqq : SDNode<"X86ISD::PCMPEQQ", SDTIntBinOp, [SDNPCommutative]>; +def X86pcmpgtb : SDNode<"X86ISD::PCMPGTB", SDTIntBinOp>; +def X86pcmpgtw : SDNode<"X86ISD::PCMPGTW", SDTIntBinOp>; +def X86pcmpgtd : SDNode<"X86ISD::PCMPGTD", SDTIntBinOp>; +def X86pcmpgtq : SDNode<"X86ISD::PCMPGTQ", SDTIntBinOp>; + +def SDTX86CmpPTest : SDTypeProfile<1, 2, [SDTCisVT<0, i32>, + SDTCisVT<1, v4f32>, + SDTCisVT<2, v4f32>]>; +def X86ptest : SDNode<"X86ISD::PTEST", SDTX86CmpPTest>; + +//===----------------------------------------------------------------------===// +// SSE Complex Patterns +//===----------------------------------------------------------------------===// + +// These are 'extloads' from a scalar to the low element of a vector, zeroing +// the top elements. These are used for the SSE 'ss' and 'sd' instruction +// forms. +def sse_load_f32 : ComplexPattern<v4f32, 5, "SelectScalarSSELoad", [], + [SDNPHasChain, SDNPMayLoad]>; +def sse_load_f64 : ComplexPattern<v2f64, 5, "SelectScalarSSELoad", [], + [SDNPHasChain, SDNPMayLoad]>; + +def ssmem : Operand<v4f32> { + let PrintMethod = "printf32mem"; + let MIOperandInfo = (ops ptr_rc, i8imm, ptr_rc_nosp, i32imm, i8imm); + let ParserMatchClass = X86MemAsmOperand; +} +def sdmem : Operand<v2f64> { + let PrintMethod = "printf64mem"; + let MIOperandInfo = (ops ptr_rc, i8imm, ptr_rc_nosp, i32imm, i8imm); + let ParserMatchClass = X86MemAsmOperand; +} + +//===----------------------------------------------------------------------===// +// SSE pattern fragments +//===----------------------------------------------------------------------===// + +def loadv4f32 : PatFrag<(ops node:$ptr), (v4f32 (load node:$ptr))>; +def loadv2f64 : PatFrag<(ops node:$ptr), (v2f64 (load node:$ptr))>; +def loadv4i32 : PatFrag<(ops node:$ptr), (v4i32 (load node:$ptr))>; +def loadv2i64 : PatFrag<(ops node:$ptr), (v2i64 (load node:$ptr))>; + +// FIXME: move this to a more appropriate place after all AVX is done. +def loadv8f32 : PatFrag<(ops node:$ptr), (v8f32 (load node:$ptr))>; +def loadv4f64 : PatFrag<(ops node:$ptr), (v4f64 (load node:$ptr))>; +def loadv8i32 : PatFrag<(ops node:$ptr), (v8i32 (load node:$ptr))>; +def loadv4i64 : PatFrag<(ops node:$ptr), (v4i64 (load node:$ptr))>; + +// Like 'store', but always requires vector alignment. +def alignedstore : PatFrag<(ops node:$val, node:$ptr), + (store node:$val, node:$ptr), [{ + return cast<StoreSDNode>(N)->getAlignment() >= 16; +}]>; + +// Like 'load', but always requires vector alignment. +def alignedload : PatFrag<(ops node:$ptr), (load node:$ptr), [{ + return cast<LoadSDNode>(N)->getAlignment() >= 16; +}]>; + +def alignedloadfsf32 : PatFrag<(ops node:$ptr), + (f32 (alignedload node:$ptr))>; +def alignedloadfsf64 : PatFrag<(ops node:$ptr), + (f64 (alignedload node:$ptr))>; +def alignedloadv4f32 : PatFrag<(ops node:$ptr), + (v4f32 (alignedload node:$ptr))>; +def alignedloadv2f64 : PatFrag<(ops node:$ptr), + (v2f64 (alignedload node:$ptr))>; +def alignedloadv4i32 : PatFrag<(ops node:$ptr), + (v4i32 (alignedload node:$ptr))>; +def alignedloadv2i64 : PatFrag<(ops node:$ptr), + (v2i64 (alignedload node:$ptr))>; + +// FIXME: move this to a more appropriate place after all AVX is done. +def alignedloadv8f32 : PatFrag<(ops node:$ptr), + (v8f32 (alignedload node:$ptr))>; +def alignedloadv4f64 : PatFrag<(ops node:$ptr), + (v4f64 (alignedload node:$ptr))>; +def alignedloadv8i32 : PatFrag<(ops node:$ptr), + (v8i32 (alignedload node:$ptr))>; +def alignedloadv4i64 : PatFrag<(ops node:$ptr), + (v4i64 (alignedload node:$ptr))>; + +// Like 'load', but uses special alignment checks suitable for use in +// memory operands in most SSE instructions, which are required to +// be naturally aligned on some targets but not on others. If the subtarget +// allows unaligned accesses, match any load, though this may require +// setting a feature bit in the processor (on startup, for example). +// Opteron 10h and later implement such a feature. +def memop : PatFrag<(ops node:$ptr), (load node:$ptr), [{ + return Subtarget->hasVectorUAMem() + || cast<LoadSDNode>(N)->getAlignment() >= 16; +}]>; + +def memopfsf32 : PatFrag<(ops node:$ptr), (f32 (memop node:$ptr))>; +def memopfsf64 : PatFrag<(ops node:$ptr), (f64 (memop node:$ptr))>; +def memopv4f32 : PatFrag<(ops node:$ptr), (v4f32 (memop node:$ptr))>; +def memopv2f64 : PatFrag<(ops node:$ptr), (v2f64 (memop node:$ptr))>; +def memopv4i32 : PatFrag<(ops node:$ptr), (v4i32 (memop node:$ptr))>; +def memopv2i64 : PatFrag<(ops node:$ptr), (v2i64 (memop node:$ptr))>; +def memopv16i8 : PatFrag<(ops node:$ptr), (v16i8 (memop node:$ptr))>; + +// FIXME: move this to a more appropriate place after all AVX is done. +def memopv8f32 : PatFrag<(ops node:$ptr), (v8f32 (memop node:$ptr))>; +def memopv4f64 : PatFrag<(ops node:$ptr), (v4f64 (memop node:$ptr))>; + +// SSSE3 uses MMX registers for some instructions. They aren't aligned on a +// 16-byte boundary. +// FIXME: 8 byte alignment for mmx reads is not required +def memop64 : PatFrag<(ops node:$ptr), (load node:$ptr), [{ + return cast<LoadSDNode>(N)->getAlignment() >= 8; +}]>; + +def memopv8i8 : PatFrag<(ops node:$ptr), (v8i8 (memop64 node:$ptr))>; +def memopv4i16 : PatFrag<(ops node:$ptr), (v4i16 (memop64 node:$ptr))>; +def memopv8i16 : PatFrag<(ops node:$ptr), (v8i16 (memop64 node:$ptr))>; +def memopv2i32 : PatFrag<(ops node:$ptr), (v2i32 (memop64 node:$ptr))>; + +// MOVNT Support +// Like 'store', but requires the non-temporal bit to be set +def nontemporalstore : PatFrag<(ops node:$val, node:$ptr), + (st node:$val, node:$ptr), [{ + if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) + return ST->isNonTemporal(); + return false; +}]>; + +def alignednontemporalstore : PatFrag<(ops node:$val, node:$ptr), + (st node:$val, node:$ptr), [{ + if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) + return ST->isNonTemporal() && !ST->isTruncatingStore() && + ST->getAddressingMode() == ISD::UNINDEXED && + ST->getAlignment() >= 16; + return false; +}]>; + +def unalignednontemporalstore : PatFrag<(ops node:$val, node:$ptr), + (st node:$val, node:$ptr), [{ + if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) + return ST->isNonTemporal() && + ST->getAlignment() < 16; + return false; +}]>; + +def bc_v4f32 : PatFrag<(ops node:$in), (v4f32 (bitconvert node:$in))>; +def bc_v2f64 : PatFrag<(ops node:$in), (v2f64 (bitconvert node:$in))>; +def bc_v16i8 : PatFrag<(ops node:$in), (v16i8 (bitconvert node:$in))>; +def bc_v8i16 : PatFrag<(ops node:$in), (v8i16 (bitconvert node:$in))>; +def bc_v4i32 : PatFrag<(ops node:$in), (v4i32 (bitconvert node:$in))>; +def bc_v2i64 : PatFrag<(ops node:$in), (v2i64 (bitconvert node:$in))>; + +def vzmovl_v2i64 : PatFrag<(ops node:$src), + (bitconvert (v2i64 (X86vzmovl + (v2i64 (scalar_to_vector (loadi64 node:$src))))))>; +def vzmovl_v4i32 : PatFrag<(ops node:$src), + (bitconvert (v4i32 (X86vzmovl + (v4i32 (scalar_to_vector (loadi32 node:$src))))))>; + +def vzload_v2i64 : PatFrag<(ops node:$src), + (bitconvert (v2i64 (X86vzload node:$src)))>; + + +def fp32imm0 : PatLeaf<(f32 fpimm), [{ + return N->isExactlyValue(+0.0); +}]>; + +// BYTE_imm - Transform bit immediates into byte immediates. +def BYTE_imm : SDNodeXForm<imm, [{ + // Transformation function: imm >> 3 + return getI32Imm(N->getZExtValue() >> 3); +}]>; + +// SHUFFLE_get_shuf_imm xform function: convert vector_shuffle mask to PSHUF*, +// SHUFP* etc. imm. +def SHUFFLE_get_shuf_imm : SDNodeXForm<vector_shuffle, [{ + return getI8Imm(X86::getShuffleSHUFImmediate(N)); +}]>; + +// SHUFFLE_get_pshufhw_imm xform function: convert vector_shuffle mask to +// PSHUFHW imm. +def SHUFFLE_get_pshufhw_imm : SDNodeXForm<vector_shuffle, [{ + return getI8Imm(X86::getShufflePSHUFHWImmediate(N)); +}]>; + +// SHUFFLE_get_pshuflw_imm xform function: convert vector_shuffle mask to +// PSHUFLW imm. +def SHUFFLE_get_pshuflw_imm : SDNodeXForm<vector_shuffle, [{ + return getI8Imm(X86::getShufflePSHUFLWImmediate(N)); +}]>; + +// SHUFFLE_get_palign_imm xform function: convert vector_shuffle mask to +// a PALIGNR imm. +def SHUFFLE_get_palign_imm : SDNodeXForm<vector_shuffle, [{ + return getI8Imm(X86::getShufflePALIGNRImmediate(N)); +}]>; + +def splat_lo : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle node:$lhs, node:$rhs), [{ + ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N); + return SVOp->isSplat() && SVOp->getSplatIndex() == 0; +}]>; + +def movddup : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle node:$lhs, node:$rhs), [{ + return X86::isMOVDDUPMask(cast<ShuffleVectorSDNode>(N)); +}]>; + +def movhlps : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle node:$lhs, node:$rhs), [{ + return X86::isMOVHLPSMask(cast<ShuffleVectorSDNode>(N)); +}]>; + +def movhlps_undef : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle node:$lhs, node:$rhs), [{ + return X86::isMOVHLPS_v_undef_Mask(cast<ShuffleVectorSDNode>(N)); +}]>; + +def movlhps : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle node:$lhs, node:$rhs), [{ + return X86::isMOVLHPSMask(cast<ShuffleVectorSDNode>(N)); +}]>; + +def movlp : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle node:$lhs, node:$rhs), [{ + return X86::isMOVLPMask(cast<ShuffleVectorSDNode>(N)); +}]>; + +def movl : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle node:$lhs, node:$rhs), [{ + return X86::isMOVLMask(cast<ShuffleVectorSDNode>(N)); +}]>; + +def movshdup : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle node:$lhs, node:$rhs), [{ + return X86::isMOVSHDUPMask(cast<ShuffleVectorSDNode>(N)); +}]>; + +def movsldup : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle node:$lhs, node:$rhs), [{ + return X86::isMOVSLDUPMask(cast<ShuffleVectorSDNode>(N)); +}]>; + +def unpckl : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle node:$lhs, node:$rhs), [{ + return X86::isUNPCKLMask(cast<ShuffleVectorSDNode>(N)); +}]>; + +def unpckh : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle node:$lhs, node:$rhs), [{ + return X86::isUNPCKHMask(cast<ShuffleVectorSDNode>(N)); +}]>; + +def unpckl_undef : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle node:$lhs, node:$rhs), [{ + return X86::isUNPCKL_v_undef_Mask(cast<ShuffleVectorSDNode>(N)); +}]>; + +def unpckh_undef : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle node:$lhs, node:$rhs), [{ + return X86::isUNPCKH_v_undef_Mask(cast<ShuffleVectorSDNode>(N)); +}]>; + +def pshufd : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle node:$lhs, node:$rhs), [{ + return X86::isPSHUFDMask(cast<ShuffleVectorSDNode>(N)); +}], SHUFFLE_get_shuf_imm>; + +def shufp : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle node:$lhs, node:$rhs), [{ + return X86::isSHUFPMask(cast<ShuffleVectorSDNode>(N)); +}], SHUFFLE_get_shuf_imm>; + +def pshufhw : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle node:$lhs, node:$rhs), [{ + return X86::isPSHUFHWMask(cast<ShuffleVectorSDNode>(N)); +}], SHUFFLE_get_pshufhw_imm>; + +def pshuflw : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle node:$lhs, node:$rhs), [{ + return X86::isPSHUFLWMask(cast<ShuffleVectorSDNode>(N)); +}], SHUFFLE_get_pshuflw_imm>; + +def palign : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle node:$lhs, node:$rhs), [{ + return X86::isPALIGNRMask(cast<ShuffleVectorSDNode>(N)); +}], SHUFFLE_get_palign_imm>; diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp index 34e12cade236..ce471eadd78b 100644 --- a/lib/Target/X86/X86InstrInfo.cpp +++ b/lib/Target/X86/X86InstrInfo.cpp @@ -784,7 +784,9 @@ static bool isFrameLoadOpcode(int Opcode) { case X86::MOV8rm: case X86::MOV16rm: case X86::MOV32rm: + case X86::MOV32rm_TC: case X86::MOV64rm: + case X86::MOV64rm_TC: case X86::LD_Fp64m: case X86::MOVSSrm: case X86::MOVSDrm: @@ -805,7 +807,9 @@ static bool isFrameStoreOpcode(int Opcode) { case X86::MOV8mr: case X86::MOV16mr: case X86::MOV32mr: + case X86::MOV32mr_TC: case X86::MOV64mr: + case X86::MOV64mr_TC: case X86::ST_FpP64m: case X86::MOVSSmr: case X86::MOVSDmr: @@ -863,7 +867,7 @@ unsigned X86InstrInfo::isStoreToStackSlot(const MachineInstr *MI, int &FrameIndex) const { if (isFrameStoreOpcode(MI->getOpcode())) if (isFrameOperand(MI, 0, FrameIndex)) - return MI->getOperand(X86AddrNumOperands).getReg(); + return MI->getOperand(X86::AddrNumOperands).getReg(); return 0; } @@ -1064,14 +1068,9 @@ void X86InstrInfo::reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, unsigned DestReg, unsigned SubIdx, const MachineInstr *Orig, - const TargetRegisterInfo *TRI) const { + const TargetRegisterInfo &TRI) const { DebugLoc DL = Orig->getDebugLoc(); - if (SubIdx && TargetRegisterInfo::isPhysicalRegister(DestReg)) { - DestReg = TRI->getSubReg(DestReg, SubIdx); - SubIdx = 0; - } - // MOV32r0 etc. are implemented with xor which clobbers condition code. // Re-materialize them as movri instructions to avoid side effects. bool Clone = true; @@ -1098,14 +1097,13 @@ void X86InstrInfo::reMaterialize(MachineBasicBlock &MBB, if (Clone) { MachineInstr *MI = MBB.getParent()->CloneMachineInstr(Orig); - MI->getOperand(0).setReg(DestReg); MBB.insert(I, MI); } else { - BuildMI(MBB, I, DL, get(Opc), DestReg).addImm(0); + BuildMI(MBB, I, DL, get(Opc)).addOperand(Orig->getOperand(0)).addImm(0); } MachineInstr *NewMI = prior(I); - NewMI->getOperand(0).setSubReg(SubIdx); + NewMI->substituteRegister(Orig->getOperand(0).getReg(), DestReg, SubIdx, TRI); } /// hasLiveCondCodeDef - True if MI has a condition code def, e.g. EFLAGS, that @@ -1151,10 +1149,9 @@ X86InstrInfo::convertToThreeAddressWithLEA(unsigned MIOpc, // least on modern x86 machines). BuildMI(*MFI, MBBI, MI->getDebugLoc(), get(X86::IMPLICIT_DEF), leaInReg); MachineInstr *InsMI = - BuildMI(*MFI, MBBI, MI->getDebugLoc(), get(X86::INSERT_SUBREG),leaInReg) - .addReg(leaInReg) - .addReg(Src, getKillRegState(isKill)) - .addImm(X86::sub_16bit); + BuildMI(*MFI, MBBI, MI->getDebugLoc(), get(TargetOpcode::COPY)) + .addReg(leaInReg, RegState::Define, X86::sub_16bit) + .addReg(Src, getKillRegState(isKill)); MachineInstrBuilder MIB = BuildMI(*MFI, MBBI, MI->getDebugLoc(), get(Opc), leaOutReg); @@ -1165,20 +1162,20 @@ X86InstrInfo::convertToThreeAddressWithLEA(unsigned MIOpc, case X86::SHL16ri: { unsigned ShAmt = MI->getOperand(2).getImm(); MIB.addReg(0).addImm(1 << ShAmt) - .addReg(leaInReg, RegState::Kill).addImm(0); + .addReg(leaInReg, RegState::Kill).addImm(0).addReg(0); break; } case X86::INC16r: case X86::INC64_16r: - addLeaRegOffset(MIB, leaInReg, true, 1); + addRegOffset(MIB, leaInReg, true, 1); break; case X86::DEC16r: case X86::DEC64_16r: - addLeaRegOffset(MIB, leaInReg, true, -1); + addRegOffset(MIB, leaInReg, true, -1); break; case X86::ADD16ri: case X86::ADD16ri8: - addLeaRegOffset(MIB, leaInReg, true, MI->getOperand(2).getImm()); + addRegOffset(MIB, leaInReg, true, MI->getOperand(2).getImm()); break; case X86::ADD16rr: { unsigned Src2 = MI->getOperand(2).getReg(); @@ -1195,10 +1192,9 @@ X86InstrInfo::convertToThreeAddressWithLEA(unsigned MIOpc, // well be shifting and then extracting the lower 16-bits. BuildMI(*MFI, MIB, MI->getDebugLoc(), get(X86::IMPLICIT_DEF), leaInReg2); InsMI2 = - BuildMI(*MFI, MIB, MI->getDebugLoc(), get(X86::INSERT_SUBREG),leaInReg2) - .addReg(leaInReg2) - .addReg(Src2, getKillRegState(isKill2)) - .addImm(X86::sub_16bit); + BuildMI(*MFI, MIB, MI->getDebugLoc(), get(TargetOpcode::COPY)) + .addReg(leaInReg2, RegState::Define, X86::sub_16bit) + .addReg(Src2, getKillRegState(isKill2)); addRegReg(MIB, leaInReg, true, leaInReg2, true); } if (LV && isKill2 && InsMI2) @@ -1209,10 +1205,9 @@ X86InstrInfo::convertToThreeAddressWithLEA(unsigned MIOpc, MachineInstr *NewMI = MIB; MachineInstr *ExtMI = - BuildMI(*MFI, MBBI, MI->getDebugLoc(), get(X86::EXTRACT_SUBREG)) + BuildMI(*MFI, MBBI, MI->getDebugLoc(), get(TargetOpcode::COPY)) .addReg(Dest, RegState::Define | getDeadRegState(isDead)) - .addReg(leaOutReg, RegState::Kill) - .addImm(X86::sub_16bit); + .addReg(leaOutReg, RegState::Kill, X86::sub_16bit); if (LV) { // Update live variables @@ -1283,7 +1278,7 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, .addReg(Dest, RegState::Define | getDeadRegState(isDead)) .addReg(0).addImm(1 << ShAmt) .addReg(Src, getKillRegState(isKill)) - .addImm(0); + .addImm(0).addReg(0); break; } case X86::SHL32ri: { @@ -1297,7 +1292,7 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, NewMI = BuildMI(MF, MI->getDebugLoc(), get(Opc)) .addReg(Dest, RegState::Define | getDeadRegState(isDead)) .addReg(0).addImm(1 << ShAmt) - .addReg(Src, getKillRegState(isKill)).addImm(0); + .addReg(Src, getKillRegState(isKill)).addImm(0).addReg(0); break; } case X86::SHL16ri: { @@ -1313,7 +1308,7 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, .addReg(Dest, RegState::Define | getDeadRegState(isDead)) .addReg(0).addImm(1 << ShAmt) .addReg(Src, getKillRegState(isKill)) - .addImm(0); + .addImm(0).addReg(0); break; } default: { @@ -1331,7 +1326,7 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, assert(MI->getNumOperands() >= 2 && "Unknown inc instruction!"); unsigned Opc = MIOpc == X86::INC64r ? X86::LEA64r : (is64Bit ? X86::LEA64_32r : X86::LEA32r); - NewMI = addLeaRegOffset(BuildMI(MF, MI->getDebugLoc(), get(Opc)) + NewMI = addRegOffset(BuildMI(MF, MI->getDebugLoc(), get(Opc)) .addReg(Dest, RegState::Define | getDeadRegState(isDead)), Src, isKill, 1); @@ -1353,7 +1348,7 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, assert(MI->getNumOperands() >= 2 && "Unknown dec instruction!"); unsigned Opc = MIOpc == X86::DEC64r ? X86::LEA64r : (is64Bit ? X86::LEA64_32r : X86::LEA32r); - NewMI = addLeaRegOffset(BuildMI(MF, MI->getDebugLoc(), get(Opc)) + NewMI = addRegOffset(BuildMI(MF, MI->getDebugLoc(), get(Opc)) .addReg(Dest, RegState::Define | getDeadRegState(isDead)), Src, isKill, -1); @@ -1401,7 +1396,7 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, case X86::ADD64ri32: case X86::ADD64ri8: assert(MI->getNumOperands() >= 3 && "Unknown add instruction!"); - NewMI = addLeaRegOffset(BuildMI(MF, MI->getDebugLoc(), get(X86::LEA64r)) + NewMI = addRegOffset(BuildMI(MF, MI->getDebugLoc(), get(X86::LEA64r)) .addReg(Dest, RegState::Define | getDeadRegState(isDead)), Src, isKill, MI->getOperand(2).getImm()); @@ -1410,7 +1405,7 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, case X86::ADD32ri8: { assert(MI->getNumOperands() >= 3 && "Unknown add instruction!"); unsigned Opc = is64Bit ? X86::LEA64_32r : X86::LEA32r; - NewMI = addLeaRegOffset(BuildMI(MF, MI->getDebugLoc(), get(Opc)) + NewMI = addRegOffset(BuildMI(MF, MI->getDebugLoc(), get(Opc)) .addReg(Dest, RegState::Define | getDeadRegState(isDead)), Src, isKill, MI->getOperand(2).getImm()); @@ -1421,7 +1416,7 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, if (DisableLEA16) return is64Bit ? convertToThreeAddressWithLEA(MIOpc, MFI, MBBI, LV) : 0; assert(MI->getNumOperands() >= 3 && "Unknown add instruction!"); - NewMI = addLeaRegOffset(BuildMI(MF, MI->getDebugLoc(), get(X86::LEA16r)) + NewMI = addRegOffset(BuildMI(MF, MI->getDebugLoc(), get(X86::LEA16r)) .addReg(Dest, RegState::Define | getDeadRegState(isDead)), Src, isKill, MI->getOperand(2).getImm()); @@ -1845,9 +1840,8 @@ unsigned X86InstrInfo::RemoveBranch(MachineBasicBlock &MBB) const { unsigned X86InstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, - const SmallVectorImpl<MachineOperand> &Cond) const { - // FIXME this should probably have a DebugLoc operand - DebugLoc dl; + const SmallVectorImpl<MachineOperand> &Cond, + DebugLoc DL) const { // Shouldn't be a fall through. assert(TBB && "InsertBranch must not be told to insert a fallthrough"); assert((Cond.size() == 1 || Cond.size() == 0) && @@ -1856,7 +1850,7 @@ X86InstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, if (Cond.empty()) { // Unconditional branch? assert(!FBB && "Unconditional branch with multiple successors!"); - BuildMI(&MBB, dl, get(X86::JMP_4)).addMBB(TBB); + BuildMI(&MBB, DL, get(X86::JMP_4)).addMBB(TBB); return 1; } @@ -1866,27 +1860,27 @@ X86InstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, switch (CC) { case X86::COND_NP_OR_E: // Synthesize NP_OR_E with two branches. - BuildMI(&MBB, dl, get(X86::JNP_4)).addMBB(TBB); + BuildMI(&MBB, DL, get(X86::JNP_4)).addMBB(TBB); ++Count; - BuildMI(&MBB, dl, get(X86::JE_4)).addMBB(TBB); + BuildMI(&MBB, DL, get(X86::JE_4)).addMBB(TBB); ++Count; break; case X86::COND_NE_OR_P: // Synthesize NE_OR_P with two branches. - BuildMI(&MBB, dl, get(X86::JNE_4)).addMBB(TBB); + BuildMI(&MBB, DL, get(X86::JNE_4)).addMBB(TBB); ++Count; - BuildMI(&MBB, dl, get(X86::JP_4)).addMBB(TBB); + BuildMI(&MBB, DL, get(X86::JP_4)).addMBB(TBB); ++Count; break; default: { unsigned Opc = GetCondBranchFromCond(CC); - BuildMI(&MBB, dl, get(Opc)).addMBB(TBB); + BuildMI(&MBB, DL, get(Opc)).addMBB(TBB); ++Count; } } if (FBB) { // Two-way Conditional branch. Insert the second branch. - BuildMI(&MBB, dl, get(X86::JMP_4)).addMBB(FBB); + BuildMI(&MBB, DL, get(X86::JMP_4)).addMBB(FBB); ++Count; } return Count; @@ -1897,237 +1891,153 @@ static bool isHReg(unsigned Reg) { return X86::GR8_ABCD_HRegClass.contains(Reg); } -bool X86InstrInfo::copyRegToReg(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MI, - unsigned DestReg, unsigned SrcReg, - const TargetRegisterClass *DestRC, - const TargetRegisterClass *SrcRC, - DebugLoc DL) const { - - // Determine if DstRC and SrcRC have a common superclass in common. - const TargetRegisterClass *CommonRC = DestRC; - if (DestRC == SrcRC) - /* Source and destination have the same register class. */; - else if (CommonRC->hasSuperClass(SrcRC)) - CommonRC = SrcRC; - else if (!DestRC->hasSubClass(SrcRC)) { - // Neither of GR64_NOREX or GR64_NOSP is a superclass of the other, - // but we want to copy them as GR64. Similarly, for GR32_NOREX and - // GR32_NOSP, copy as GR32. - if (SrcRC->hasSuperClass(&X86::GR64RegClass) && - DestRC->hasSuperClass(&X86::GR64RegClass)) - CommonRC = &X86::GR64RegClass; - else if (SrcRC->hasSuperClass(&X86::GR32RegClass) && - DestRC->hasSuperClass(&X86::GR32RegClass)) - CommonRC = &X86::GR32RegClass; +void X86InstrInfo::copyPhysReg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, DebugLoc DL, + unsigned DestReg, unsigned SrcReg, + bool KillSrc) const { + // First deal with the normal symmetric copies. + unsigned Opc = 0; + if (X86::GR64RegClass.contains(DestReg, SrcReg)) + Opc = X86::MOV64rr; + else if (X86::GR32RegClass.contains(DestReg, SrcReg)) + Opc = X86::MOV32rr; + else if (X86::GR16RegClass.contains(DestReg, SrcReg)) + Opc = X86::MOV16rr; + else if (X86::GR8RegClass.contains(DestReg, SrcReg)) { + // Copying to or from a physical H register on x86-64 requires a NOREX + // move. Otherwise use a normal move. + if ((isHReg(DestReg) || isHReg(SrcReg)) && + TM.getSubtarget<X86Subtarget>().is64Bit()) + Opc = X86::MOV8rr_NOREX; else - CommonRC = 0; - } - - if (CommonRC) { - unsigned Opc; - if (CommonRC == &X86::GR64RegClass || CommonRC == &X86::GR64_NOSPRegClass) { - Opc = X86::MOV64rr; - } else if (CommonRC == &X86::GR32RegClass || - CommonRC == &X86::GR32_NOSPRegClass) { - Opc = X86::MOV32rr; - } else if (CommonRC == &X86::GR16RegClass) { - Opc = X86::MOV16rr; - } else if (CommonRC == &X86::GR8RegClass) { - // Copying to or from a physical H register on x86-64 requires a NOREX - // move. Otherwise use a normal move. - if ((isHReg(DestReg) || isHReg(SrcReg)) && - TM.getSubtarget<X86Subtarget>().is64Bit()) - Opc = X86::MOV8rr_NOREX; - else - Opc = X86::MOV8rr; - } else if (CommonRC == &X86::GR64_ABCDRegClass) { - Opc = X86::MOV64rr; - } else if (CommonRC == &X86::GR32_ABCDRegClass) { - Opc = X86::MOV32rr; - } else if (CommonRC == &X86::GR16_ABCDRegClass) { - Opc = X86::MOV16rr; - } else if (CommonRC == &X86::GR8_ABCD_LRegClass) { Opc = X86::MOV8rr; - } else if (CommonRC == &X86::GR8_ABCD_HRegClass) { - if (TM.getSubtarget<X86Subtarget>().is64Bit()) - Opc = X86::MOV8rr_NOREX; - else - Opc = X86::MOV8rr; - } else if (CommonRC == &X86::GR64_NOREXRegClass || - CommonRC == &X86::GR64_NOREX_NOSPRegClass) { - Opc = X86::MOV64rr; - } else if (CommonRC == &X86::GR32_NOREXRegClass) { - Opc = X86::MOV32rr; - } else if (CommonRC == &X86::GR16_NOREXRegClass) { - Opc = X86::MOV16rr; - } else if (CommonRC == &X86::GR8_NOREXRegClass) { - Opc = X86::MOV8rr; - } else if (CommonRC == &X86::GR64_TCRegClass) { - Opc = X86::MOV64rr_TC; - } else if (CommonRC == &X86::GR32_TCRegClass) { - Opc = X86::MOV32rr_TC; - } else if (CommonRC == &X86::RFP32RegClass) { - Opc = X86::MOV_Fp3232; - } else if (CommonRC == &X86::RFP64RegClass || CommonRC == &X86::RSTRegClass) { - Opc = X86::MOV_Fp6464; - } else if (CommonRC == &X86::RFP80RegClass) { - Opc = X86::MOV_Fp8080; - } else if (CommonRC == &X86::FR32RegClass) { - Opc = X86::FsMOVAPSrr; - } else if (CommonRC == &X86::FR64RegClass) { - Opc = X86::FsMOVAPDrr; - } else if (CommonRC == &X86::VR128RegClass) { - Opc = X86::MOVAPSrr; - } else if (CommonRC == &X86::VR64RegClass) { - Opc = X86::MMX_MOVQ64rr; - } else { - return false; - } - BuildMI(MBB, MI, DL, get(Opc), DestReg).addReg(SrcReg); - return true; + } else if (X86::VR128RegClass.contains(DestReg, SrcReg)) + Opc = X86::MOVAPSrr; + else if (X86::VR64RegClass.contains(DestReg, SrcReg)) + Opc = X86::MMX_MOVQ64rr; + + if (Opc) { + BuildMI(MBB, MI, DL, get(Opc), DestReg) + .addReg(SrcReg, getKillRegState(KillSrc)); + return; } // Moving EFLAGS to / from another register requires a push and a pop. - if (SrcRC == &X86::CCRRegClass) { - if (SrcReg != X86::EFLAGS) - return false; - if (DestRC == &X86::GR64RegClass || DestRC == &X86::GR64_NOSPRegClass) { + if (SrcReg == X86::EFLAGS) { + if (X86::GR64RegClass.contains(DestReg)) { BuildMI(MBB, MI, DL, get(X86::PUSHF64)); BuildMI(MBB, MI, DL, get(X86::POP64r), DestReg); - return true; - } else if (DestRC == &X86::GR32RegClass || - DestRC == &X86::GR32_NOSPRegClass) { + return; + } else if (X86::GR32RegClass.contains(DestReg)) { BuildMI(MBB, MI, DL, get(X86::PUSHF32)); BuildMI(MBB, MI, DL, get(X86::POP32r), DestReg); - return true; + return; } - } else if (DestRC == &X86::CCRRegClass) { - if (DestReg != X86::EFLAGS) - return false; - if (SrcRC == &X86::GR64RegClass || DestRC == &X86::GR64_NOSPRegClass) { - BuildMI(MBB, MI, DL, get(X86::PUSH64r)).addReg(SrcReg); + } + if (DestReg == X86::EFLAGS) { + if (X86::GR64RegClass.contains(SrcReg)) { + BuildMI(MBB, MI, DL, get(X86::PUSH64r)) + .addReg(SrcReg, getKillRegState(KillSrc)); BuildMI(MBB, MI, DL, get(X86::POPF64)); - return true; - } else if (SrcRC == &X86::GR32RegClass || - DestRC == &X86::GR32_NOSPRegClass) { - BuildMI(MBB, MI, DL, get(X86::PUSH32r)).addReg(SrcReg); + return; + } else if (X86::GR32RegClass.contains(SrcReg)) { + BuildMI(MBB, MI, DL, get(X86::PUSH32r)) + .addReg(SrcReg, getKillRegState(KillSrc)); BuildMI(MBB, MI, DL, get(X86::POPF32)); - return true; - } - } - - // Moving from ST(0) turns into FpGET_ST0_32 etc. - if (SrcRC == &X86::RSTRegClass) { - // Copying from ST(0)/ST(1). - if (SrcReg != X86::ST0 && SrcReg != X86::ST1) - // Can only copy from ST(0)/ST(1) right now - return false; - bool isST0 = SrcReg == X86::ST0; - unsigned Opc; - if (DestRC == &X86::RFP32RegClass) - Opc = isST0 ? X86::FpGET_ST0_32 : X86::FpGET_ST1_32; - else if (DestRC == &X86::RFP64RegClass) - Opc = isST0 ? X86::FpGET_ST0_64 : X86::FpGET_ST1_64; - else { - if (DestRC != &X86::RFP80RegClass) - return false; - Opc = isST0 ? X86::FpGET_ST0_80 : X86::FpGET_ST1_80; + return; } - BuildMI(MBB, MI, DL, get(Opc), DestReg); - return true; } - // Moving to ST(0) turns into FpSET_ST0_32 etc. - if (DestRC == &X86::RSTRegClass) { - // Copying to ST(0) / ST(1). - if (DestReg != X86::ST0 && DestReg != X86::ST1) - // Can only copy to TOS right now - return false; - bool isST0 = DestReg == X86::ST0; - unsigned Opc; - if (SrcRC == &X86::RFP32RegClass) - Opc = isST0 ? X86::FpSET_ST0_32 : X86::FpSET_ST1_32; - else if (SrcRC == &X86::RFP64RegClass) - Opc = isST0 ? X86::FpSET_ST0_64 : X86::FpSET_ST1_64; - else { - if (SrcRC != &X86::RFP80RegClass) - return false; - Opc = isST0 ? X86::FpSET_ST0_80 : X86::FpSET_ST1_80; - } - BuildMI(MBB, MI, DL, get(Opc)).addReg(SrcReg); - return true; - } - - // Not yet supported! - return false; + DEBUG(dbgs() << "Cannot copy " << RI.getName(SrcReg) + << " to " << RI.getName(DestReg) << '\n'); + llvm_unreachable("Cannot emit physreg copy instruction"); } -static unsigned getStoreRegOpcode(unsigned SrcReg, - const TargetRegisterClass *RC, - bool isStackAligned, - TargetMachine &TM) { - unsigned Opc = 0; - if (RC == &X86::GR64RegClass || RC == &X86::GR64_NOSPRegClass) { - Opc = X86::MOV64mr; - } else if (RC == &X86::GR32RegClass || RC == &X86::GR32_NOSPRegClass) { - Opc = X86::MOV32mr; - } else if (RC == &X86::GR16RegClass) { - Opc = X86::MOV16mr; - } else if (RC == &X86::GR8RegClass) { +static unsigned getLoadStoreRegOpcode(unsigned Reg, + const TargetRegisterClass *RC, + bool isStackAligned, + const TargetMachine &TM, + bool load) { + switch (RC->getID()) { + default: + llvm_unreachable("Unknown regclass"); + case X86::GR64RegClassID: + case X86::GR64_NOSPRegClassID: + return load ? X86::MOV64rm : X86::MOV64mr; + case X86::GR32RegClassID: + case X86::GR32_NOSPRegClassID: + case X86::GR32_ADRegClassID: + return load ? X86::MOV32rm : X86::MOV32mr; + case X86::GR16RegClassID: + return load ? X86::MOV16rm : X86::MOV16mr; + case X86::GR8RegClassID: // Copying to or from a physical H register on x86-64 requires a NOREX // move. Otherwise use a normal move. - if (isHReg(SrcReg) && + if (isHReg(Reg) && TM.getSubtarget<X86Subtarget>().is64Bit()) - Opc = X86::MOV8mr_NOREX; + return load ? X86::MOV8rm_NOREX : X86::MOV8mr_NOREX; else - Opc = X86::MOV8mr; - } else if (RC == &X86::GR64_ABCDRegClass) { - Opc = X86::MOV64mr; - } else if (RC == &X86::GR32_ABCDRegClass) { - Opc = X86::MOV32mr; - } else if (RC == &X86::GR16_ABCDRegClass) { - Opc = X86::MOV16mr; - } else if (RC == &X86::GR8_ABCD_LRegClass) { - Opc = X86::MOV8mr; - } else if (RC == &X86::GR8_ABCD_HRegClass) { + return load ? X86::MOV8rm : X86::MOV8mr; + case X86::GR64_ABCDRegClassID: + return load ? X86::MOV64rm : X86::MOV64mr; + case X86::GR32_ABCDRegClassID: + return load ? X86::MOV32rm : X86::MOV32mr; + case X86::GR16_ABCDRegClassID: + return load ? X86::MOV16rm : X86::MOV16mr; + case X86::GR8_ABCD_LRegClassID: + return load ? X86::MOV8rm :X86::MOV8mr; + case X86::GR8_ABCD_HRegClassID: if (TM.getSubtarget<X86Subtarget>().is64Bit()) - Opc = X86::MOV8mr_NOREX; + return load ? X86::MOV8rm_NOREX : X86::MOV8mr_NOREX; else - Opc = X86::MOV8mr; - } else if (RC == &X86::GR64_NOREXRegClass || - RC == &X86::GR64_NOREX_NOSPRegClass) { - Opc = X86::MOV64mr; - } else if (RC == &X86::GR32_NOREXRegClass) { - Opc = X86::MOV32mr; - } else if (RC == &X86::GR16_NOREXRegClass) { - Opc = X86::MOV16mr; - } else if (RC == &X86::GR8_NOREXRegClass) { - Opc = X86::MOV8mr; - } else if (RC == &X86::GR64_TCRegClass) { - Opc = X86::MOV64mr_TC; - } else if (RC == &X86::GR32_TCRegClass) { - Opc = X86::MOV32mr_TC; - } else if (RC == &X86::RFP80RegClass) { - Opc = X86::ST_FpP80m; // pops - } else if (RC == &X86::RFP64RegClass) { - Opc = X86::ST_Fp64m; - } else if (RC == &X86::RFP32RegClass) { - Opc = X86::ST_Fp32m; - } else if (RC == &X86::FR32RegClass) { - Opc = X86::MOVSSmr; - } else if (RC == &X86::FR64RegClass) { - Opc = X86::MOVSDmr; - } else if (RC == &X86::VR128RegClass) { + return load ? X86::MOV8rm : X86::MOV8mr; + case X86::GR64_NOREXRegClassID: + case X86::GR64_NOREX_NOSPRegClassID: + return load ? X86::MOV64rm : X86::MOV64mr; + case X86::GR32_NOREXRegClassID: + return load ? X86::MOV32rm : X86::MOV32mr; + case X86::GR16_NOREXRegClassID: + return load ? X86::MOV16rm : X86::MOV16mr; + case X86::GR8_NOREXRegClassID: + return load ? X86::MOV8rm : X86::MOV8mr; + case X86::GR64_TCRegClassID: + return load ? X86::MOV64rm_TC : X86::MOV64mr_TC; + case X86::GR32_TCRegClassID: + return load ? X86::MOV32rm_TC : X86::MOV32mr_TC; + case X86::RFP80RegClassID: + return load ? X86::LD_Fp80m : X86::ST_FpP80m; + case X86::RFP64RegClassID: + return load ? X86::LD_Fp64m : X86::ST_Fp64m; + case X86::RFP32RegClassID: + return load ? X86::LD_Fp32m : X86::ST_Fp32m; + case X86::FR32RegClassID: + return load ? X86::MOVSSrm : X86::MOVSSmr; + case X86::FR64RegClassID: + return load ? X86::MOVSDrm : X86::MOVSDmr; + case X86::VR128RegClassID: // If stack is realigned we can use aligned stores. - Opc = isStackAligned ? X86::MOVAPSmr : X86::MOVUPSmr; - } else if (RC == &X86::VR64RegClass) { - Opc = X86::MMX_MOVQ64mr; - } else { - llvm_unreachable("Unknown regclass"); + if (isStackAligned) + return load ? X86::MOVAPSrm : X86::MOVAPSmr; + else + return load ? X86::MOVUPSrm : X86::MOVUPSmr; + case X86::VR64RegClassID: + return load ? X86::MMX_MOVQ64rm : X86::MMX_MOVQ64mr; } +} + +static unsigned getStoreRegOpcode(unsigned SrcReg, + const TargetRegisterClass *RC, + bool isStackAligned, + TargetMachine &TM) { + return getLoadStoreRegOpcode(SrcReg, RC, isStackAligned, TM, false); +} - return Opc; + +static unsigned getLoadRegOpcode(unsigned DestReg, + const TargetRegisterClass *RC, + bool isStackAligned, + const TargetMachine &TM) { + return getLoadStoreRegOpcode(DestReg, RC, isStackAligned, TM, true); } void X86InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, @@ -2150,7 +2060,7 @@ void X86InstrInfo::storeRegToAddr(MachineFunction &MF, unsigned SrcReg, MachineInstr::mmo_iterator MMOBegin, MachineInstr::mmo_iterator MMOEnd, SmallVectorImpl<MachineInstr*> &NewMIs) const { - bool isAligned = (*MMOBegin)->getAlignment() >= 16; + bool isAligned = MMOBegin != MMOEnd && (*MMOBegin)->getAlignment() >= 16; unsigned Opc = getStoreRegOpcode(SrcReg, RC, isAligned, TM); DebugLoc DL; MachineInstrBuilder MIB = BuildMI(MF, DL, get(Opc)); @@ -2161,72 +2071,6 @@ void X86InstrInfo::storeRegToAddr(MachineFunction &MF, unsigned SrcReg, NewMIs.push_back(MIB); } -static unsigned getLoadRegOpcode(unsigned DestReg, - const TargetRegisterClass *RC, - bool isStackAligned, - const TargetMachine &TM) { - unsigned Opc = 0; - if (RC == &X86::GR64RegClass || RC == &X86::GR64_NOSPRegClass) { - Opc = X86::MOV64rm; - } else if (RC == &X86::GR32RegClass || RC == &X86::GR32_NOSPRegClass) { - Opc = X86::MOV32rm; - } else if (RC == &X86::GR16RegClass) { - Opc = X86::MOV16rm; - } else if (RC == &X86::GR8RegClass) { - // Copying to or from a physical H register on x86-64 requires a NOREX - // move. Otherwise use a normal move. - if (isHReg(DestReg) && - TM.getSubtarget<X86Subtarget>().is64Bit()) - Opc = X86::MOV8rm_NOREX; - else - Opc = X86::MOV8rm; - } else if (RC == &X86::GR64_ABCDRegClass) { - Opc = X86::MOV64rm; - } else if (RC == &X86::GR32_ABCDRegClass) { - Opc = X86::MOV32rm; - } else if (RC == &X86::GR16_ABCDRegClass) { - Opc = X86::MOV16rm; - } else if (RC == &X86::GR8_ABCD_LRegClass) { - Opc = X86::MOV8rm; - } else if (RC == &X86::GR8_ABCD_HRegClass) { - if (TM.getSubtarget<X86Subtarget>().is64Bit()) - Opc = X86::MOV8rm_NOREX; - else - Opc = X86::MOV8rm; - } else if (RC == &X86::GR64_NOREXRegClass || - RC == &X86::GR64_NOREX_NOSPRegClass) { - Opc = X86::MOV64rm; - } else if (RC == &X86::GR32_NOREXRegClass) { - Opc = X86::MOV32rm; - } else if (RC == &X86::GR16_NOREXRegClass) { - Opc = X86::MOV16rm; - } else if (RC == &X86::GR8_NOREXRegClass) { - Opc = X86::MOV8rm; - } else if (RC == &X86::GR64_TCRegClass) { - Opc = X86::MOV64rm_TC; - } else if (RC == &X86::GR32_TCRegClass) { - Opc = X86::MOV32rm_TC; - } else if (RC == &X86::RFP80RegClass) { - Opc = X86::LD_Fp80m; - } else if (RC == &X86::RFP64RegClass) { - Opc = X86::LD_Fp64m; - } else if (RC == &X86::RFP32RegClass) { - Opc = X86::LD_Fp32m; - } else if (RC == &X86::FR32RegClass) { - Opc = X86::MOVSSrm; - } else if (RC == &X86::FR64RegClass) { - Opc = X86::MOVSDrm; - } else if (RC == &X86::VR128RegClass) { - // If stack is realigned we can use aligned loads. - Opc = isStackAligned ? X86::MOVAPSrm : X86::MOVUPSrm; - } else if (RC == &X86::VR64RegClass) { - Opc = X86::MMX_MOVQ64rm; - } else { - llvm_unreachable("Unknown regclass"); - } - - return Opc; -} void X86InstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, @@ -2246,7 +2090,7 @@ void X86InstrInfo::loadRegFromAddr(MachineFunction &MF, unsigned DestReg, MachineInstr::mmo_iterator MMOBegin, MachineInstr::mmo_iterator MMOEnd, SmallVectorImpl<MachineInstr*> &NewMIs) const { - bool isAligned = (*MMOBegin)->getAlignment() >= 16; + bool isAligned = MMOBegin != MMOEnd && (*MMOBegin)->getAlignment() >= 16; unsigned Opc = getLoadRegOpcode(DestReg, RC, isAligned, TM); DebugLoc DL; MachineInstrBuilder MIB = BuildMI(MF, DL, get(Opc), DestReg); @@ -2277,18 +2121,17 @@ bool X86InstrInfo::spillCalleeSavedRegisters(MachineBasicBlock &MBB, unsigned Opc = is64Bit ? X86::PUSH64r : X86::PUSH32r; for (unsigned i = CSI.size(); i != 0; --i) { unsigned Reg = CSI[i-1].getReg(); - const TargetRegisterClass *RegClass = CSI[i-1].getRegClass(); // Add the callee-saved register as live-in. It's killed at the spill. MBB.addLiveIn(Reg); if (Reg == FPReg) // X86RegisterInfo::emitPrologue will handle spilling of frame register. continue; - if (RegClass != &X86::VR128RegClass && !isWin64) { + if (!X86::VR128RegClass.contains(Reg) && !isWin64) { CalleeFrameSize += SlotSize; BuildMI(MBB, MI, DL, get(Opc)).addReg(Reg, RegState::Kill); } else { - storeRegToStackSlot(MBB, MI, Reg, true, CSI[i-1].getFrameIdx(), RegClass, - &RI); + storeRegToStackSlot(MBB, MI, Reg, true, CSI[i-1].getFrameIdx(), + &X86::VR128RegClass, &RI); } } @@ -2315,11 +2158,11 @@ bool X86InstrInfo::restoreCalleeSavedRegisters(MachineBasicBlock &MBB, if (Reg == FPReg) // X86RegisterInfo::emitEpilogue will handle restoring of frame register. continue; - const TargetRegisterClass *RegClass = CSI[i].getRegClass(); - if (RegClass != &X86::VR128RegClass && !isWin64) { + if (!X86::VR128RegClass.contains(Reg) && !isWin64) { BuildMI(MBB, MI, DL, get(Opc), Reg); } else { - loadRegFromStackSlot(MBB, MI, Reg, CSI[i].getFrameIdx(), RegClass, &RI); + loadRegFromStackSlot(MBB, MI, Reg, CSI[i].getFrameIdx(), + &X86::VR128RegClass, &RI); } } return true; @@ -2492,7 +2335,7 @@ X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, } // No fusion - if (PrintFailedFusing) + if (PrintFailedFusing && !MI->isCopy()) dbgs() << "We failed to fuse operand " << i << " in " << *MI; return NULL; } @@ -2610,7 +2453,7 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, } else if (Ops.size() != 1) return NULL; - SmallVector<MachineOperand,X86AddrNumOperands> MOs; + SmallVector<MachineOperand,X86::AddrNumOperands> MOs; switch (LoadMI->getOpcode()) { case X86::V_SET0PS: case X86::V_SET0PD: @@ -2632,7 +2475,7 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, if (TM.getSubtarget<X86Subtarget>().is64Bit()) PICBase = X86::RIP; else - // FIXME: PICBase = TM.getInstrInfo()->getGlobalBaseReg(&MF); + // FIXME: PICBase = getGlobalBaseReg(&MF); // This doesn't work for several reasons. // 1. GlobalBaseReg may have been spilled. // 2. It may not be live at MI. @@ -2664,7 +2507,7 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, default: { // Folding a normal load. Just copy the load's address operands. unsigned NumOps = LoadMI->getDesc().getNumOperands(); - for (unsigned i = NumOps - X86AddrNumOperands; i != NumOps; ++i) + for (unsigned i = NumOps - X86::AddrNumOperands; i != NumOps; ++i) MOs.push_back(LoadMI->getOperand(i)); break; } @@ -2727,7 +2570,7 @@ bool X86InstrInfo::canFoldMemoryOperand(const MachineInstr *MI, if (I != OpcodeTablePtr->end()) return true; } - return false; + return TargetInstrInfoImpl::canFoldMemoryOperand(MI, Ops); } bool X86InstrInfo::unfoldMemoryOperand(MachineFunction &MF, MachineInstr *MI, @@ -2751,13 +2594,20 @@ bool X86InstrInfo::unfoldMemoryOperand(MachineFunction &MF, MachineInstr *MI, const TargetInstrDesc &TID = get(Opc); const TargetOperandInfo &TOI = TID.OpInfo[Index]; const TargetRegisterClass *RC = TOI.getRegClass(&RI); - SmallVector<MachineOperand, X86AddrNumOperands> AddrOps; + if (!MI->hasOneMemOperand() && + RC == &X86::VR128RegClass && + !TM.getSubtarget<X86Subtarget>().isUnalignedMemAccessFast()) + // Without memoperands, loadRegFromAddr and storeRegToStackSlot will + // conservatively assume the address is unaligned. That's bad for + // performance. + return false; + SmallVector<MachineOperand, X86::AddrNumOperands> AddrOps; SmallVector<MachineOperand,2> BeforeOps; SmallVector<MachineOperand,2> AfterOps; SmallVector<MachineOperand,4> ImpOps; for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { MachineOperand &Op = MI->getOperand(i); - if (i >= Index && i < Index + X86AddrNumOperands) + if (i >= Index && i < Index + X86::AddrNumOperands) AddrOps.push_back(Op); else if (Op.isReg() && Op.isImplicit()) ImpOps.push_back(Op); @@ -2776,7 +2626,7 @@ bool X86InstrInfo::unfoldMemoryOperand(MachineFunction &MF, MachineInstr *MI, loadRegFromAddr(MF, Reg, AddrOps, RC, MMOs.first, MMOs.second, NewMIs); if (UnfoldStore) { // Address operands cannot be marked isKill. - for (unsigned i = 1; i != 1 + X86AddrNumOperands; ++i) { + for (unsigned i = 1; i != 1 + X86::AddrNumOperands; ++i) { MachineOperand &MO = NewMIs[0]->getOperand(i); if (MO.isReg()) MO.setIsKill(false); @@ -2873,7 +2723,7 @@ X86InstrInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N, unsigned NumOps = N->getNumOperands(); for (unsigned i = 0; i != NumOps-1; ++i) { SDValue Op = N->getOperand(i); - if (i >= Index-NumDefs && i < Index-NumDefs + X86AddrNumOperands) + if (i >= Index-NumDefs && i < Index-NumDefs + X86::AddrNumOperands) AddrOps.push_back(Op); else if (i < Index-NumDefs) BeforeOps.push_back(Op); @@ -2892,7 +2742,12 @@ X86InstrInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N, MachineInstr::mmo_iterator> MMOs = MF.extractLoadMemRefs(cast<MachineSDNode>(N)->memoperands_begin(), cast<MachineSDNode>(N)->memoperands_end()); - bool isAligned = (*MMOs.first)->getAlignment() >= 16; + if (!(*MMOs.first) && + RC == &X86::VR128RegClass && + !TM.getSubtarget<X86Subtarget>().isUnalignedMemAccessFast()) + // Do not introduce a slow unaligned load. + return false; + bool isAligned = (*MMOs.first) && (*MMOs.first)->getAlignment() >= 16; Load = DAG.getMachineNode(getLoadRegOpcode(0, RC, isAligned, TM), dl, VT, MVT::Other, &AddrOps[0], AddrOps.size()); NewNodes.push_back(Load); @@ -2929,7 +2784,12 @@ X86InstrInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N, MachineInstr::mmo_iterator> MMOs = MF.extractStoreMemRefs(cast<MachineSDNode>(N)->memoperands_begin(), cast<MachineSDNode>(N)->memoperands_end()); - bool isAligned = (*MMOs.first)->getAlignment() >= 16; + if (!(*MMOs.first) && + RC == &X86::VR128RegClass && + !TM.getSubtarget<X86Subtarget>().isUnalignedMemAccessFast()) + // Do not introduce a slow unaligned store. + return false; + bool isAligned = (*MMOs.first) && (*MMOs.first)->getAlignment() >= 16; SDNode *Store = DAG.getMachineNode(getStoreRegOpcode(0, DstRC, isAligned, TM), dl, MVT::Other, @@ -3065,16 +2925,16 @@ bool X86InstrInfo::shouldScheduleLoadsNear(SDNode *Load1, SDNode *Load2, EVT VT = Load1->getValueType(0); switch (VT.getSimpleVT().SimpleTy) { - default: { + default: // XMM registers. In 64-bit mode we can be a bit more aggressive since we // have 16 of them to play with. if (TM.getSubtargetImpl()->is64Bit()) { if (NumLoads >= 3) return false; - } else if (NumLoads) + } else if (NumLoads) { return false; + } break; - } case MVT::i8: case MVT::i16: case MVT::i32: @@ -3083,6 +2943,7 @@ bool X86InstrInfo::shouldScheduleLoadsNear(SDNode *Load1, SDNode *Load2, case MVT::f64: if (NumLoads) return false; + break; } return true; @@ -3123,6 +2984,8 @@ bool X86InstrInfo::isX86_64ExtendedReg(unsigned RegNo) { case X86::R12B: case X86::R13B: case X86::R14B: case X86::R15B: case X86::XMM8: case X86::XMM9: case X86::XMM10: case X86::XMM11: case X86::XMM12: case X86::XMM13: case X86::XMM14: case X86::XMM15: + case X86::YMM8: case X86::YMM9: case X86::YMM10: case X86::YMM11: + case X86::YMM12: case X86::YMM13: case X86::YMM14: case X86::YMM15: return true; } return false; @@ -3194,7 +3057,7 @@ unsigned X86InstrInfo::determineREX(const MachineInstr &MI) { case X86II::MRM4m: case X86II::MRM5m: case X86II::MRM6m: case X86II::MRM7m: case X86II::MRMDestMem: { - unsigned e = (isTwoAddr ? X86AddrNumOperands+1 : X86AddrNumOperands); + unsigned e = (isTwoAddr ? X86::AddrNumOperands+1 : X86::AddrNumOperands); i = isTwoAddr ? 1 : 0; if (NumOps > e && isX86_64ExtendedReg(MI.getOperand(e))) REX |= 1 << 2; @@ -3546,7 +3409,7 @@ static unsigned GetInstSizeWithDesc(const MachineInstr &MI, case X86II::MRMDestMem: { ++FinalSize; FinalSize += getMemModRMByteSize(MI, CurOp, IsPIC, Is64BitMode); - CurOp += X86AddrNumOperands + 1; + CurOp += X86::AddrNumOperands + 1; if (CurOp != NumOps) { ++CurOp; FinalSize += sizeConstant(X86II::getSizeOfImm(Desc->TSFlags)); @@ -3565,16 +3428,9 @@ static unsigned GetInstSizeWithDesc(const MachineInstr &MI, break; case X86II::MRMSrcMem: { - int AddrOperands; - if (Opcode == X86::LEA64r || Opcode == X86::LEA64_32r || - Opcode == X86::LEA16r || Opcode == X86::LEA32r) - AddrOperands = X86AddrNumOperands - 1; // No segment register - else - AddrOperands = X86AddrNumOperands; - ++FinalSize; FinalSize += getMemModRMByteSize(MI, CurOp+1, IsPIC, Is64BitMode); - CurOp += AddrOperands + 1; + CurOp += X86::AddrNumOperands + 1; if (CurOp != NumOps) { ++CurOp; FinalSize += sizeConstant(X86II::getSizeOfImm(Desc->TSFlags)); @@ -3628,7 +3484,7 @@ static unsigned GetInstSizeWithDesc(const MachineInstr &MI, ++FinalSize; FinalSize += getMemModRMByteSize(MI, CurOp, IsPIC, Is64BitMode); - CurOp += X86AddrNumOperands; + CurOp += X86::AddrNumOperands; if (CurOp != NumOps) { const MachineOperand &MO = MI.getOperand(CurOp++); @@ -3694,6 +3550,8 @@ unsigned X86InstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const { /// the global base register value. Output instructions required to /// initialize the register in the function entry block, if necessary. /// +/// TODO: Eliminate this and move the code to X86MachineFunctionInfo. +/// unsigned X86InstrInfo::getGlobalBaseReg(MachineFunction *MF) const { assert(!TM.getSubtarget<X86Subtarget>().is64Bit() && "X86-64 PIC uses RIP relative addressing"); @@ -3703,30 +3561,10 @@ unsigned X86InstrInfo::getGlobalBaseReg(MachineFunction *MF) const { if (GlobalBaseReg != 0) return GlobalBaseReg; - // Insert the set of GlobalBaseReg into the first MBB of the function - MachineBasicBlock &FirstMBB = MF->front(); - MachineBasicBlock::iterator MBBI = FirstMBB.begin(); - DebugLoc DL = FirstMBB.findDebugLoc(MBBI); + // Create the register. The code to initialize it is inserted + // later, by the CGBR pass (below). MachineRegisterInfo &RegInfo = MF->getRegInfo(); - unsigned PC = RegInfo.createVirtualRegister(X86::GR32RegisterClass); - - const TargetInstrInfo *TII = TM.getInstrInfo(); - // Operand of MovePCtoStack is completely ignored by asm printer. It's - // only used in JIT code emission as displacement to pc. - BuildMI(FirstMBB, MBBI, DL, TII->get(X86::MOVPC32r), PC).addImm(0); - - // If we're using vanilla 'GOT' PIC style, we should use relative addressing - // not to pc, but to _GLOBAL_OFFSET_TABLE_ external. - if (TM.getSubtarget<X86Subtarget>().isPICStyleGOT()) { - GlobalBaseReg = RegInfo.createVirtualRegister(X86::GR32RegisterClass); - // Generate addl $__GLOBAL_OFFSET_TABLE_ + [.-piclabel], %some_register - BuildMI(FirstMBB, MBBI, DL, TII->get(X86::ADD32ri), GlobalBaseReg) - .addReg(PC).addExternalSymbol("_GLOBAL_OFFSET_TABLE_", - X86II::MO_GOT_ABSOLUTE_ADDRESS); - } else { - GlobalBaseReg = PC; - } - + GlobalBaseReg = RegInfo.createVirtualRegister(X86::GR32RegisterClass); X86FI->setGlobalBaseReg(GlobalBaseReg); return GlobalBaseReg; } @@ -3784,3 +3622,65 @@ void X86InstrInfo::getNoopForMachoTarget(MCInst &NopInst) const { NopInst.setOpcode(X86::NOOP); } +namespace { + /// CGBR - Create Global Base Reg pass. This initializes the PIC + /// global base register for x86-32. + struct CGBR : public MachineFunctionPass { + static char ID; + CGBR() : MachineFunctionPass(&ID) {} + + virtual bool runOnMachineFunction(MachineFunction &MF) { + const X86TargetMachine *TM = + static_cast<const X86TargetMachine *>(&MF.getTarget()); + + assert(!TM->getSubtarget<X86Subtarget>().is64Bit() && + "X86-64 PIC uses RIP relative addressing"); + + // Only emit a global base reg in PIC mode. + if (TM->getRelocationModel() != Reloc::PIC_) + return false; + + // Insert the set of GlobalBaseReg into the first MBB of the function + MachineBasicBlock &FirstMBB = MF.front(); + MachineBasicBlock::iterator MBBI = FirstMBB.begin(); + DebugLoc DL = FirstMBB.findDebugLoc(MBBI); + MachineRegisterInfo &RegInfo = MF.getRegInfo(); + const X86InstrInfo *TII = TM->getInstrInfo(); + + unsigned PC; + if (TM->getSubtarget<X86Subtarget>().isPICStyleGOT()) + PC = RegInfo.createVirtualRegister(X86::GR32RegisterClass); + else + PC = TII->getGlobalBaseReg(&MF); + + // Operand of MovePCtoStack is completely ignored by asm printer. It's + // only used in JIT code emission as displacement to pc. + BuildMI(FirstMBB, MBBI, DL, TII->get(X86::MOVPC32r), PC).addImm(0); + + // If we're using vanilla 'GOT' PIC style, we should use relative addressing + // not to pc, but to _GLOBAL_OFFSET_TABLE_ external. + if (TM->getSubtarget<X86Subtarget>().isPICStyleGOT()) { + unsigned GlobalBaseReg = TII->getGlobalBaseReg(&MF); + // Generate addl $__GLOBAL_OFFSET_TABLE_ + [.-piclabel], %some_register + BuildMI(FirstMBB, MBBI, DL, TII->get(X86::ADD32ri), GlobalBaseReg) + .addReg(PC).addExternalSymbol("_GLOBAL_OFFSET_TABLE_", + X86II::MO_GOT_ABSOLUTE_ADDRESS); + } + + return true; + } + + virtual const char *getPassName() const { + return "X86 PIC Global Base Reg Initialization"; + } + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesCFG(); + MachineFunctionPass::getAnalysisUsage(AU); + } + }; +} + +char CGBR::ID = 0; +FunctionPass* +llvm::createGlobalBaseRegPass() { return new CGBR(); } diff --git a/lib/Target/X86/X86InstrInfo.h b/lib/Target/X86/X86InstrInfo.h index 62d7c74484d5..f762b5827075 100644 --- a/lib/Target/X86/X86InstrInfo.h +++ b/lib/Target/X86/X86InstrInfo.h @@ -24,6 +24,24 @@ namespace llvm { class X86TargetMachine; namespace X86 { + // Enums for memory operand decoding. Each memory operand is represented with + // a 5 operand sequence in the form: + // [BaseReg, ScaleAmt, IndexReg, Disp, Segment] + // These enums help decode this. + enum { + AddrBaseReg = 0, + AddrScaleAmt = 1, + AddrIndexReg = 2, + AddrDisp = 3, + + /// AddrSegmentReg - The operand # of the segment in the memory operand. + AddrSegmentReg = 4, + + /// AddrNumOperands - Total number of operands in a memory reference. + AddrNumOperands = 5 + }; + + // X86 specific condition code. These correspond to X86_*_COND in // X86InstrInfo.td. They must be kept in synch. enum CondCode { @@ -173,7 +191,19 @@ namespace X86II { /// indicates that the reference is actually to "FOO$non_lazy_ptr -PICBASE", /// which is a PIC-base-relative reference to a hidden dyld lazy pointer /// stub. - MO_DARWIN_HIDDEN_NONLAZY_PIC_BASE + MO_DARWIN_HIDDEN_NONLAZY_PIC_BASE, + + /// MO_TLVP - On a symbol operand this indicates that the immediate is + /// some TLS offset. + /// + /// This is the TLS offset for the Darwin TLS mechanism. + MO_TLVP, + + /// MO_TLVP_PIC_BASE - On a symbol operand this indicates that the immediate + /// is some TLS offset from the picbase. + /// + /// This is the 32-bit TLS offset for Darwin TLS in PIC mode. + MO_TLVP_PIC_BASE }; } @@ -203,6 +233,7 @@ inline static bool isGlobalRelativeToPICBase(unsigned char TargetFlag) { case X86II::MO_PIC_BASE_OFFSET: // Darwin local global. case X86II::MO_DARWIN_NONLAZY_PIC_BASE: // Darwin/32 external global. case X86II::MO_DARWIN_HIDDEN_NONLAZY_PIC_BASE: // Darwin/32 hidden global. + case X86II::MO_TLVP: // ??? Pretty sure.. return true; default: return false; @@ -347,9 +378,10 @@ namespace X86II { Imm8 = 1 << ImmShift, Imm8PCRel = 2 << ImmShift, Imm16 = 3 << ImmShift, - Imm32 = 4 << ImmShift, - Imm32PCRel = 5 << ImmShift, - Imm64 = 6 << ImmShift, + Imm16PCRel = 4 << ImmShift, + Imm32 = 5 << ImmShift, + Imm32PCRel = 6 << ImmShift, + Imm64 = 7 << ImmShift, //===------------------------------------------------------------------===// // FP Instruction Classification... Zero is non-fp instruction. @@ -403,28 +435,47 @@ namespace X86II { SSEDomainShift = 22, OpcodeShift = 24, - OpcodeMask = 0xFF << OpcodeShift + OpcodeMask = 0xFF << OpcodeShift, + + //===------------------------------------------------------------------===// + // VEX - The opcode prefix used by AVX instructions + VEX = 1ULL << 32, + + // VEX_W - Has a opcode specific functionality, but is used in the same + // way as REX_W is for regular SSE instructions. + VEX_W = 1ULL << 33, + + // VEX_4V - Used to specify an additional AVX/SSE register. Several 2 + // address instructions in SSE are represented as 3 address ones in AVX + // and the additional register is encoded in VEX_VVVV prefix. + VEX_4V = 1ULL << 34, + + // VEX_I8IMM - Specifies that the last register used in a AVX instruction, + // must be encoded in the i8 immediate field. This usually happens in + // instructions with 4 operands. + VEX_I8IMM = 1ULL << 35 }; // getBaseOpcodeFor - This function returns the "base" X86 opcode for the // specified machine instruction. // - static inline unsigned char getBaseOpcodeFor(unsigned TSFlags) { + static inline unsigned char getBaseOpcodeFor(uint64_t TSFlags) { return TSFlags >> X86II::OpcodeShift; } - static inline bool hasImm(unsigned TSFlags) { + static inline bool hasImm(uint64_t TSFlags) { return (TSFlags & X86II::ImmMask) != 0; } /// getSizeOfImm - Decode the "size of immediate" field from the TSFlags field /// of the specified instruction. - static inline unsigned getSizeOfImm(unsigned TSFlags) { + static inline unsigned getSizeOfImm(uint64_t TSFlags) { switch (TSFlags & X86II::ImmMask) { default: assert(0 && "Unknown immediate size"); case X86II::Imm8: case X86II::Imm8PCRel: return 1; - case X86II::Imm16: return 2; + case X86II::Imm16: + case X86II::Imm16PCRel: return 2; case X86II::Imm32: case X86II::Imm32PCRel: return 4; case X86II::Imm64: return 8; @@ -433,23 +484,77 @@ namespace X86II { /// isImmPCRel - Return true if the immediate of the specified instruction's /// TSFlags indicates that it is pc relative. - static inline unsigned isImmPCRel(unsigned TSFlags) { + static inline unsigned isImmPCRel(uint64_t TSFlags) { switch (TSFlags & X86II::ImmMask) { - default: assert(0 && "Unknown immediate size"); - case X86II::Imm8PCRel: - case X86II::Imm32PCRel: - return true; - case X86II::Imm8: - case X86II::Imm16: - case X86II::Imm32: - case X86II::Imm64: - return false; + default: assert(0 && "Unknown immediate size"); + case X86II::Imm8PCRel: + case X86II::Imm16PCRel: + case X86II::Imm32PCRel: + return true; + case X86II::Imm8: + case X86II::Imm16: + case X86II::Imm32: + case X86II::Imm64: + return false; + } + } + + /// getMemoryOperandNo - The function returns the MCInst operand # for the + /// first field of the memory operand. If the instruction doesn't have a + /// memory operand, this returns -1. + /// + /// Note that this ignores tied operands. If there is a tied register which + /// is duplicated in the MCInst (e.g. "EAX = addl EAX, [mem]") it is only + /// counted as one operand. + /// + static inline int getMemoryOperandNo(uint64_t TSFlags) { + switch (TSFlags & X86II::FormMask) { + case X86II::MRMInitReg: assert(0 && "FIXME: Remove this form"); + default: assert(0 && "Unknown FormMask value in getMemoryOperandNo!"); + case X86II::Pseudo: + case X86II::RawFrm: + case X86II::AddRegFrm: + case X86II::MRMDestReg: + case X86II::MRMSrcReg: + return -1; + case X86II::MRMDestMem: + return 0; + case X86II::MRMSrcMem: { + bool HasVEX_4V = TSFlags & X86II::VEX_4V; + unsigned FirstMemOp = 1; + if (HasVEX_4V) + ++FirstMemOp;// Skip the register source (which is encoded in VEX_VVVV). + + // FIXME: Maybe lea should have its own form? This is a horrible hack. + //if (Opcode == X86::LEA64r || Opcode == X86::LEA64_32r || + // Opcode == X86::LEA16r || Opcode == X86::LEA32r) + return FirstMemOp; } - } + case X86II::MRM0r: case X86II::MRM1r: + case X86II::MRM2r: case X86II::MRM3r: + case X86II::MRM4r: case X86II::MRM5r: + case X86II::MRM6r: case X86II::MRM7r: + return -1; + case X86II::MRM0m: case X86II::MRM1m: + case X86II::MRM2m: case X86II::MRM3m: + case X86II::MRM4m: case X86II::MRM5m: + case X86II::MRM6m: case X86II::MRM7m: + return 0; + case X86II::MRM_C1: + case X86II::MRM_C2: + case X86II::MRM_C3: + case X86II::MRM_C4: + case X86II::MRM_C8: + case X86II::MRM_C9: + case X86II::MRM_E8: + case X86II::MRM_F0: + case X86II::MRM_F8: + case X86II::MRM_F9: + return -1; + } + } } -const int X86AddrNumOperands = 5; - inline static bool isScale(const MachineOperand &MO) { return MO.isImm() && (MO.getImm() == 1 || MO.getImm() == 2 || @@ -555,7 +660,7 @@ public: void reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, unsigned DestReg, unsigned SubIdx, const MachineInstr *Orig, - const TargetRegisterInfo *TRI) const; + const TargetRegisterInfo &TRI) const; /// convertToThreeAddress - This method must be implemented by targets that /// set the M_CONVERTIBLE_TO_3_ADDR flag. When this flag is set, the target @@ -585,13 +690,12 @@ public: virtual unsigned RemoveBranch(MachineBasicBlock &MBB) const; virtual unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, - const SmallVectorImpl<MachineOperand> &Cond) const; - virtual bool copyRegToReg(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MI, - unsigned DestReg, unsigned SrcReg, - const TargetRegisterClass *DestRC, - const TargetRegisterClass *SrcRC, - DebugLoc DL) const; + const SmallVectorImpl<MachineOperand> &Cond, + DebugLoc DL) const; + virtual void copyPhysReg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, DebugLoc DL, + unsigned DestReg, unsigned SrcReg, + bool KillSrc) const; virtual void storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, unsigned SrcReg, bool isKill, int FrameIndex, diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td index 0d59c42dd164..1efef5a80b1b 100644 --- a/lib/Target/X86/X86InstrInfo.td +++ b/lib/Target/X86/X86InstrInfo.td @@ -72,6 +72,8 @@ def SDTX86Wrapper : SDTypeProfile<1, 1, [SDTCisSameAs<0, 1>, SDTCisPtrTy<0>]>; def SDT_X86TLSADDR : SDTypeProfile<0, 1, [SDTCisInt<0>]>; +def SDT_X86TLSCALL : SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>; + def SDT_X86SegmentBaseAddress : SDTypeProfile<1, 1, [SDTCisPtrTy<0>]>; def SDT_X86EHRET : SDTypeProfile<0, 1, [SDTCisInt<0>]>; @@ -182,6 +184,9 @@ def X86mul_imm : SDNode<"X86ISD::MUL_IMM", SDTIntBinOp>; def X86MingwAlloca : SDNode<"X86ISD::MINGW_ALLOCA", SDTX86Void, [SDNPHasChain, SDNPInFlag, SDNPOutFlag]>; + +def X86TLSCall : SDNode<"X86ISD::TLSCALL", SDT_X86TLSCALL, + []>; //===----------------------------------------------------------------------===// // X86 Operand Definitions. @@ -197,13 +202,9 @@ def X86MemAsmOperand : AsmOperandClass { let Name = "Mem"; let SuperClasses = []; } -def X86NoSegMemAsmOperand : AsmOperandClass { - let Name = "NoSegMem"; - let SuperClasses = [X86MemAsmOperand]; -} def X86AbsMemAsmOperand : AsmOperandClass { let Name = "AbsMem"; - let SuperClasses = [X86NoSegMemAsmOperand]; + let SuperClasses = [X86MemAsmOperand]; } class X86MemOperand<string printMethod> : Operand<iPTR> { let PrintMethod = printMethod; @@ -226,7 +227,7 @@ def f32mem : X86MemOperand<"printf32mem">; def f64mem : X86MemOperand<"printf64mem">; def f80mem : X86MemOperand<"printf80mem">; def f128mem : X86MemOperand<"printf128mem">; -//def f256mem : X86MemOperand<"printf256mem">; +def f256mem : X86MemOperand<"printf256mem">; // A version of i8mem for use on x86-64 that uses GR64_NOREX instead of // plain GR64, so that it doesn't potentially require a REX prefix. @@ -245,15 +246,11 @@ def i32mem_TC : Operand<i32> { let ParserMatchClass = X86MemAsmOperand; } -def lea32mem : Operand<i32> { - let PrintMethod = "printlea32mem"; - let MIOperandInfo = (ops GR32, i8imm, GR32_NOSP, i32imm); - let ParserMatchClass = X86NoSegMemAsmOperand; -} let ParserMatchClass = X86AbsMemAsmOperand, PrintMethod = "print_pcrel_imm" in { def i32imm_pcrel : Operand<i32>; +def i16imm_pcrel : Operand<i16>; def offset8 : Operand<i64>; def offset16 : Operand<i64>; @@ -283,26 +280,31 @@ class ImmSExtAsmOperandClass : AsmOperandClass { // 64-bit immediates, but for a 16-bit target value we want to accept both "-1" // (which will be a -1ULL), and "0xFF" (-1 in 16-bits). -// [0, 0x7FFFFFFF] | [0xFFFFFFFF80000000, 0xFFFFFFFFFFFFFFFF] +// [0, 0x7FFFFFFF] | +// [0xFFFFFFFF80000000, 0xFFFFFFFFFFFFFFFF] def ImmSExti64i32AsmOperand : ImmSExtAsmOperandClass { let Name = "ImmSExti64i32"; } -// [0, 0x0000007F] | [0x000000000000FF80, 0x000000000000FFFF] | [0xFFFFFFFFFFFFFF80, 0xFFFFFFFFFFFFFFFF] +// [0, 0x0000007F] | [0x000000000000FF80, 0x000000000000FFFF] | +// [0xFFFFFFFFFFFFFF80, 0xFFFFFFFFFFFFFFFF] def ImmSExti16i8AsmOperand : ImmSExtAsmOperandClass { let Name = "ImmSExti16i8"; let SuperClasses = [ImmSExti64i32AsmOperand]; } -// [0, 0x0000007F] | [0x00000000FFFFFF80, 0x00000000FFFFFFFF] | [0xFFFFFFFFFFFFFF80, 0xFFFFFFFFFFFFFFFF] +// [0, 0x0000007F] | [0x00000000FFFFFF80, 0x00000000FFFFFFFF] | +// [0xFFFFFFFFFFFFFF80, 0xFFFFFFFFFFFFFFFF] def ImmSExti32i8AsmOperand : ImmSExtAsmOperandClass { let Name = "ImmSExti32i8"; } -// [0, 0x0000007F] | [0xFFFFFFFFFFFFFF80, 0xFFFFFFFFFFFFFFFF] +// [0, 0x0000007F] | +// [0xFFFFFFFFFFFFFF80, 0xFFFFFFFFFFFFFFFF] def ImmSExti64i8AsmOperand : ImmSExtAsmOperandClass { let Name = "ImmSExti64i8"; - let SuperClasses = [ImmSExti16i8AsmOperand, ImmSExti32i8AsmOperand, ImmSExti64i32AsmOperand]; + let SuperClasses = [ImmSExti16i8AsmOperand, ImmSExti32i8AsmOperand, + ImmSExti64i32AsmOperand]; } // A couple of more descriptive operand definitions. @@ -321,10 +323,10 @@ def i32i8imm : Operand<i32> { // Define X86 specific addressing mode. def addr : ComplexPattern<iPTR, 5, "SelectAddr", [], []>; -def lea32addr : ComplexPattern<i32, 4, "SelectLEAAddr", +def lea32addr : ComplexPattern<i32, 5, "SelectLEAAddr", [add, sub, mul, X86mul_imm, shl, or, frameindex], []>; -def tls32addr : ComplexPattern<i32, 4, "SelectTLSADDRAddr", +def tls32addr : ComplexPattern<i32, 5, "SelectTLSADDRAddr", [tglobaltlsaddr], []>; //===----------------------------------------------------------------------===// @@ -704,6 +706,12 @@ let isCall = 1 in "lcall{w}\t{*}$dst", []>, OpSize; def FARCALL32m : I<0xFF, MRM3m, (outs), (ins opaque48mem:$dst), "lcall{l}\t{*}$dst", []>; + + // callw for 16 bit code for the assembler. + let isAsmParserOnly = 1 in + def CALLpcrel16 : Ii16PCRel<0xE8, RawFrm, + (outs), (ins i16imm_pcrel:$dst, variable_ops), + "callw\t$dst", []>, OpSize; } // Constructing a stack frame. @@ -737,18 +745,10 @@ let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in "jmp\t$dst # TAILCALL", []>; def TAILJMPr : I<0xFF, MRM4r, (outs), (ins GR32_TC:$dst, variable_ops), - "jmp{l}\t{*}$dst # TAILCALL", - []>; + "", []>; // FIXME: Remove encoding when JIT is dead. let mayLoad = 1 in def TAILJMPm : I<0xFF, MRM4m, (outs), (ins i32mem_TC:$dst, variable_ops), "jmp{l}\t{*}$dst # TAILCALL", []>; - - // FIXME: This is a hack so that MCInst lowering can preserve the TAILCALL - // marker on instructions, while still being able to relax. - let isCodeGenOnly = 1 in { - def TAILJMP_1 : Ii8PCRel<0xEB, RawFrm, (outs), (ins brtarget8:$dst), - "jmp\t$dst # TAILCALL", []>; - } } //===----------------------------------------------------------------------===// @@ -815,7 +815,18 @@ def PUSHF32 : I<0x9C, RawFrm, (outs), (ins), "pushf{l|d}", []>, Requires<[In32BitMode]>; } -let isTwoAddress = 1 in // GR32 = bswap GR32 +let Defs = [EDI, ESI, EBP, EBX, EDX, ECX, EAX, ESP], Uses = [ESP], + mayLoad=1, neverHasSideEffects=1 in { +def POPA32 : I<0x61, RawFrm, (outs), (ins), "popa{l}", []>, + Requires<[In32BitMode]>; +} +let Defs = [ESP], Uses = [EDI, ESI, EBP, EBX, EDX, ECX, EAX, ESP], + mayStore=1, neverHasSideEffects=1 in { +def PUSHA32 : I<0x60, RawFrm, (outs), (ins), "pusha{l}", []>, + Requires<[In32BitMode]>; +} + +let Uses = [EFLAGS], Constraints = "$src = $dst" in // GR32 = bswap GR32 def BSWAP32r : I<0xC8, AddRegFrm, (outs GR32:$dst), (ins GR32:$src), "bswap{l}\t$dst", @@ -855,11 +866,11 @@ def BSR32rm : I<0xBD, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src), let neverHasSideEffects = 1 in def LEA16r : I<0x8D, MRMSrcMem, - (outs GR16:$dst), (ins lea32mem:$src), + (outs GR16:$dst), (ins i32mem:$src), "lea{w}\t{$src|$dst}, {$dst|$src}", []>, OpSize; let isReMaterializable = 1 in def LEA32r : I<0x8D, MRMSrcMem, - (outs GR32:$dst), (ins lea32mem:$src), + (outs GR32:$dst), (ins i32mem:$src), "lea{l}\t{$src|$dst}, {$dst|$src}", [(set GR32:$dst, lea32addr:$src)]>, Requires<[In32BitMode]>; @@ -1239,7 +1250,7 @@ def IDIV32m: I<0xF7, MRM7m, (outs), (ins i32mem:$src), //===----------------------------------------------------------------------===// // Two address Instructions. // -let isTwoAddress = 1 in { +let Constraints = "$src1 = $dst" in { // Conditional moves let Uses = [EFLAGS] in { @@ -1640,7 +1651,7 @@ def CMOVNO32rm : I<0x41, MRMSrcMem, // if !overflow, GR32 = [mem32] // i8 register pressure. Note that CMOV_GR8 is conservatively considered to // clobber EFLAGS, because if one of the operands is zero, the expansion // could involve an xor. -let usesCustomInserter = 1, isTwoAddress = 0, Defs = [EFLAGS] in { +let usesCustomInserter = 1, Constraints = "", Defs = [EFLAGS] in { def CMOV_GR8 : I<0, Pseudo, (outs GR8:$dst), (ins GR8:$src1, GR8:$src2, i8imm:$cond), "#CMOV_GR8 PSEUDO!", @@ -1659,86 +1670,106 @@ def CMOV_GR16 : I<0, Pseudo, [(set GR16:$dst, (X86cmov GR16:$src1, GR16:$src2, imm:$cond, EFLAGS))]>; def CMOV_RFP32 : I<0, Pseudo, - (outs RFP32:$dst), (ins RFP32:$src1, RFP32:$src2, i8imm:$cond), + (outs RFP32:$dst), + (ins RFP32:$src1, RFP32:$src2, i8imm:$cond), "#CMOV_RFP32 PSEUDO!", - [(set RFP32:$dst, (X86cmov RFP32:$src1, RFP32:$src2, imm:$cond, + [(set RFP32:$dst, + (X86cmov RFP32:$src1, RFP32:$src2, imm:$cond, EFLAGS))]>; def CMOV_RFP64 : I<0, Pseudo, - (outs RFP64:$dst), (ins RFP64:$src1, RFP64:$src2, i8imm:$cond), + (outs RFP64:$dst), + (ins RFP64:$src1, RFP64:$src2, i8imm:$cond), "#CMOV_RFP64 PSEUDO!", - [(set RFP64:$dst, (X86cmov RFP64:$src1, RFP64:$src2, imm:$cond, + [(set RFP64:$dst, + (X86cmov RFP64:$src1, RFP64:$src2, imm:$cond, EFLAGS))]>; def CMOV_RFP80 : I<0, Pseudo, - (outs RFP80:$dst), (ins RFP80:$src1, RFP80:$src2, i8imm:$cond), + (outs RFP80:$dst), + (ins RFP80:$src1, RFP80:$src2, i8imm:$cond), "#CMOV_RFP80 PSEUDO!", - [(set RFP80:$dst, (X86cmov RFP80:$src1, RFP80:$src2, imm:$cond, + [(set RFP80:$dst, + (X86cmov RFP80:$src1, RFP80:$src2, imm:$cond, EFLAGS))]>; } // Predicates = [NoCMov] -} // UsesCustomInserter = 1, isTwoAddress = 0, Defs = [EFLAGS] +} // UsesCustomInserter = 1, Constraints = "", Defs = [EFLAGS] } // Uses = [EFLAGS] // unary instructions let CodeSize = 2 in { let Defs = [EFLAGS] in { -def NEG8r : I<0xF6, MRM3r, (outs GR8 :$dst), (ins GR8 :$src), "neg{b}\t$dst", - [(set GR8:$dst, (ineg GR8:$src)), +def NEG8r : I<0xF6, MRM3r, (outs GR8 :$dst), (ins GR8 :$src1), + "neg{b}\t$dst", + [(set GR8:$dst, (ineg GR8:$src1)), (implicit EFLAGS)]>; -def NEG16r : I<0xF7, MRM3r, (outs GR16:$dst), (ins GR16:$src), "neg{w}\t$dst", - [(set GR16:$dst, (ineg GR16:$src)), +def NEG16r : I<0xF7, MRM3r, (outs GR16:$dst), (ins GR16:$src1), + "neg{w}\t$dst", + [(set GR16:$dst, (ineg GR16:$src1)), (implicit EFLAGS)]>, OpSize; -def NEG32r : I<0xF7, MRM3r, (outs GR32:$dst), (ins GR32:$src), "neg{l}\t$dst", - [(set GR32:$dst, (ineg GR32:$src)), +def NEG32r : I<0xF7, MRM3r, (outs GR32:$dst), (ins GR32:$src1), + "neg{l}\t$dst", + [(set GR32:$dst, (ineg GR32:$src1)), (implicit EFLAGS)]>; -let isTwoAddress = 0 in { - def NEG8m : I<0xF6, MRM3m, (outs), (ins i8mem :$dst), "neg{b}\t$dst", + +let Constraints = "" in { + def NEG8m : I<0xF6, MRM3m, (outs), (ins i8mem :$dst), + "neg{b}\t$dst", [(store (ineg (loadi8 addr:$dst)), addr:$dst), (implicit EFLAGS)]>; - def NEG16m : I<0xF7, MRM3m, (outs), (ins i16mem:$dst), "neg{w}\t$dst", + def NEG16m : I<0xF7, MRM3m, (outs), (ins i16mem:$dst), + "neg{w}\t$dst", [(store (ineg (loadi16 addr:$dst)), addr:$dst), (implicit EFLAGS)]>, OpSize; - def NEG32m : I<0xF7, MRM3m, (outs), (ins i32mem:$dst), "neg{l}\t$dst", + def NEG32m : I<0xF7, MRM3m, (outs), (ins i32mem:$dst), + "neg{l}\t$dst", [(store (ineg (loadi32 addr:$dst)), addr:$dst), (implicit EFLAGS)]>; -} +} // Constraints = "" } // Defs = [EFLAGS] // Match xor -1 to not. Favors these over a move imm + xor to save code size. let AddedComplexity = 15 in { -def NOT8r : I<0xF6, MRM2r, (outs GR8 :$dst), (ins GR8 :$src), "not{b}\t$dst", - [(set GR8:$dst, (not GR8:$src))]>; -def NOT16r : I<0xF7, MRM2r, (outs GR16:$dst), (ins GR16:$src), "not{w}\t$dst", - [(set GR16:$dst, (not GR16:$src))]>, OpSize; -def NOT32r : I<0xF7, MRM2r, (outs GR32:$dst), (ins GR32:$src), "not{l}\t$dst", - [(set GR32:$dst, (not GR32:$src))]>; +def NOT8r : I<0xF6, MRM2r, (outs GR8 :$dst), (ins GR8 :$src1), + "not{b}\t$dst", + [(set GR8:$dst, (not GR8:$src1))]>; +def NOT16r : I<0xF7, MRM2r, (outs GR16:$dst), (ins GR16:$src1), + "not{w}\t$dst", + [(set GR16:$dst, (not GR16:$src1))]>, OpSize; +def NOT32r : I<0xF7, MRM2r, (outs GR32:$dst), (ins GR32:$src1), + "not{l}\t$dst", + [(set GR32:$dst, (not GR32:$src1))]>; } -let isTwoAddress = 0 in { - def NOT8m : I<0xF6, MRM2m, (outs), (ins i8mem :$dst), "not{b}\t$dst", +let Constraints = "" in { + def NOT8m : I<0xF6, MRM2m, (outs), (ins i8mem :$dst), + "not{b}\t$dst", [(store (not (loadi8 addr:$dst)), addr:$dst)]>; - def NOT16m : I<0xF7, MRM2m, (outs), (ins i16mem:$dst), "not{w}\t$dst", + def NOT16m : I<0xF7, MRM2m, (outs), (ins i16mem:$dst), + "not{w}\t$dst", [(store (not (loadi16 addr:$dst)), addr:$dst)]>, OpSize; - def NOT32m : I<0xF7, MRM2m, (outs), (ins i32mem:$dst), "not{l}\t$dst", + def NOT32m : I<0xF7, MRM2m, (outs), (ins i32mem:$dst), + "not{l}\t$dst", [(store (not (loadi32 addr:$dst)), addr:$dst)]>; -} +} // Constraints = "" } // CodeSize // TODO: inc/dec is slow for P4, but fast for Pentium-M. let Defs = [EFLAGS] in { let CodeSize = 2 in -def INC8r : I<0xFE, MRM0r, (outs GR8 :$dst), (ins GR8 :$src), "inc{b}\t$dst", - [(set GR8:$dst, EFLAGS, (X86inc_flag GR8:$src))]>; +def INC8r : I<0xFE, MRM0r, (outs GR8 :$dst), (ins GR8 :$src1), + "inc{b}\t$dst", + [(set GR8:$dst, EFLAGS, (X86inc_flag GR8:$src1))]>; let isConvertibleToThreeAddress = 1, CodeSize = 1 in { // Can xform into LEA. -def INC16r : I<0x40, AddRegFrm, (outs GR16:$dst), (ins GR16:$src), +def INC16r : I<0x40, AddRegFrm, (outs GR16:$dst), (ins GR16:$src1), "inc{w}\t$dst", - [(set GR16:$dst, EFLAGS, (X86inc_flag GR16:$src))]>, + [(set GR16:$dst, EFLAGS, (X86inc_flag GR16:$src1))]>, OpSize, Requires<[In32BitMode]>; -def INC32r : I<0x40, AddRegFrm, (outs GR32:$dst), (ins GR32:$src), +def INC32r : I<0x40, AddRegFrm, (outs GR32:$dst), (ins GR32:$src1), "inc{l}\t$dst", - [(set GR32:$dst, EFLAGS, (X86inc_flag GR32:$src))]>, + [(set GR32:$dst, EFLAGS, (X86inc_flag GR32:$src1))]>, Requires<[In32BitMode]>; } -let isTwoAddress = 0, CodeSize = 2 in { +let Constraints = "", CodeSize = 2 in { def INC8m : I<0xFE, MRM0m, (outs), (ins i8mem :$dst), "inc{b}\t$dst", [(store (add (loadi8 addr:$dst), 1), addr:$dst), (implicit EFLAGS)]>; @@ -1750,23 +1781,24 @@ let isTwoAddress = 0, CodeSize = 2 in { [(store (add (loadi32 addr:$dst), 1), addr:$dst), (implicit EFLAGS)]>, Requires<[In32BitMode]>; -} +} // Constraints = "", CodeSize = 2 let CodeSize = 2 in -def DEC8r : I<0xFE, MRM1r, (outs GR8 :$dst), (ins GR8 :$src), "dec{b}\t$dst", - [(set GR8:$dst, EFLAGS, (X86dec_flag GR8:$src))]>; +def DEC8r : I<0xFE, MRM1r, (outs GR8 :$dst), (ins GR8 :$src1), + "dec{b}\t$dst", + [(set GR8:$dst, EFLAGS, (X86dec_flag GR8:$src1))]>; let isConvertibleToThreeAddress = 1, CodeSize = 1 in { // Can xform into LEA. -def DEC16r : I<0x48, AddRegFrm, (outs GR16:$dst), (ins GR16:$src), +def DEC16r : I<0x48, AddRegFrm, (outs GR16:$dst), (ins GR16:$src1), "dec{w}\t$dst", - [(set GR16:$dst, EFLAGS, (X86dec_flag GR16:$src))]>, + [(set GR16:$dst, EFLAGS, (X86dec_flag GR16:$src1))]>, OpSize, Requires<[In32BitMode]>; -def DEC32r : I<0x48, AddRegFrm, (outs GR32:$dst), (ins GR32:$src), +def DEC32r : I<0x48, AddRegFrm, (outs GR32:$dst), (ins GR32:$src1), "dec{l}\t$dst", - [(set GR32:$dst, EFLAGS, (X86dec_flag GR32:$src))]>, + [(set GR32:$dst, EFLAGS, (X86dec_flag GR32:$src1))]>, Requires<[In32BitMode]>; -} +} // CodeSize = 2 -let isTwoAddress = 0, CodeSize = 2 in { +let Constraints = "", CodeSize = 2 in { def DEC8m : I<0xFE, MRM1m, (outs), (ins i8mem :$dst), "dec{b}\t$dst", [(store (add (loadi8 addr:$dst), -1), addr:$dst), (implicit EFLAGS)]>; @@ -1778,7 +1810,7 @@ let isTwoAddress = 0, CodeSize = 2 in { [(store (add (loadi32 addr:$dst), -1), addr:$dst), (implicit EFLAGS)]>, Requires<[In32BitMode]>; -} +} // Constraints = "", CodeSize = 2 } // Defs = [EFLAGS] // Logical operators... @@ -1857,7 +1889,7 @@ def AND32ri8 : Ii8<0x83, MRM4r, [(set GR32:$dst, EFLAGS, (X86and_flag GR32:$src1, i32immSExt8:$src2))]>; -let isTwoAddress = 0 in { +let Constraints = "" in { def AND8mr : I<0x20, MRMDestMem, (outs), (ins i8mem :$dst, GR8 :$src), "and{b}\t{$src, $dst|$dst, $src}", @@ -1909,7 +1941,7 @@ let isTwoAddress = 0 in { def AND32i32 : Ii32<0x25, RawFrm, (outs), (ins i32imm:$src), "and{l}\t{$src, %eax|%eax, $src}", []>; -} +} // Constraints = "" let isCommutable = 1 in { // X = OR Y, Z --> X = OR Z, Y @@ -1983,7 +2015,7 @@ def OR32ri8 : Ii8<0x83, MRM1r, (outs GR32:$dst), "or{l}\t{$src2, $dst|$dst, $src2}", [(set GR32:$dst, EFLAGS, (X86or_flag GR32:$src1, i32immSExt8:$src2))]>; -let isTwoAddress = 0 in { +let Constraints = "" in { def OR8mr : I<0x08, MRMDestMem, (outs), (ins i8mem:$dst, GR8:$src), "or{b}\t{$src, $dst|$dst, $src}", [(store (or (load addr:$dst), GR8:$src), addr:$dst), @@ -2025,7 +2057,7 @@ let isTwoAddress = 0 in { "or{w}\t{$src, %ax|%ax, $src}", []>, OpSize; def OR32i32 : Ii32 <0x0D, RawFrm, (outs), (ins i32imm:$src), "or{l}\t{$src, %eax|%eax, $src}", []>; -} // isTwoAddress = 0 +} // Constraints = "" let isCommutable = 1 in { // X = XOR Y, Z --> X = XOR Z, Y @@ -2102,7 +2134,7 @@ def XOR32ri8 : Ii8<0x83, MRM6r, [(set GR32:$dst, EFLAGS, (X86xor_flag GR32:$src1, i32immSExt8:$src2))]>; -let isTwoAddress = 0 in { +let Constraints = "" in { def XOR8mr : I<0x30, MRMDestMem, (outs), (ins i8mem :$dst, GR8 :$src), "xor{b}\t{$src, $dst|$dst, $src}", @@ -2153,26 +2185,27 @@ let isTwoAddress = 0 in { "xor{w}\t{$src, %ax|%ax, $src}", []>, OpSize; def XOR32i32 : Ii32<0x35, RawFrm, (outs), (ins i32imm:$src), "xor{l}\t{$src, %eax|%eax, $src}", []>; -} // isTwoAddress = 0 +} // Constraints = "" } // Defs = [EFLAGS] // Shift instructions let Defs = [EFLAGS] in { let Uses = [CL] in { -def SHL8rCL : I<0xD2, MRM4r, (outs GR8 :$dst), (ins GR8 :$src), +def SHL8rCL : I<0xD2, MRM4r, (outs GR8 :$dst), (ins GR8 :$src1), "shl{b}\t{%cl, $dst|$dst, CL}", - [(set GR8:$dst, (shl GR8:$src, CL))]>; -def SHL16rCL : I<0xD3, MRM4r, (outs GR16:$dst), (ins GR16:$src), + [(set GR8:$dst, (shl GR8:$src1, CL))]>; +def SHL16rCL : I<0xD3, MRM4r, (outs GR16:$dst), (ins GR16:$src1), "shl{w}\t{%cl, $dst|$dst, CL}", - [(set GR16:$dst, (shl GR16:$src, CL))]>, OpSize; -def SHL32rCL : I<0xD3, MRM4r, (outs GR32:$dst), (ins GR32:$src), + [(set GR16:$dst, (shl GR16:$src1, CL))]>, OpSize; +def SHL32rCL : I<0xD3, MRM4r, (outs GR32:$dst), (ins GR32:$src1), "shl{l}\t{%cl, $dst|$dst, CL}", - [(set GR32:$dst, (shl GR32:$src, CL))]>; + [(set GR32:$dst, (shl GR32:$src1, CL))]>; } // Uses = [CL] def SHL8ri : Ii8<0xC0, MRM4r, (outs GR8 :$dst), (ins GR8 :$src1, i8imm:$src2), "shl{b}\t{$src2, $dst|$dst, $src2}", [(set GR8:$dst, (shl GR8:$src1, (i8 imm:$src2)))]>; + let isConvertibleToThreeAddress = 1 in { // Can transform into LEA. def SHL16ri : Ii8<0xC1, MRM4r, (outs GR16:$dst), (ins GR16:$src1, i8imm:$src2), "shl{w}\t{$src2, $dst|$dst, $src2}", @@ -2193,7 +2226,7 @@ def SHL32r1 : I<0xD1, MRM4r, (outs GR32:$dst), (ins GR32:$src1), } // isConvertibleToThreeAddress = 1 -let isTwoAddress = 0 in { +let Constraints = "" in { let Uses = [CL] in { def SHL8mCL : I<0xD2, MRM4m, (outs), (ins i8mem :$dst), "shl{b}\t{%cl, $dst|$dst, CL}", @@ -2227,18 +2260,18 @@ let isTwoAddress = 0 in { def SHL32m1 : I<0xD1, MRM4m, (outs), (ins i32mem:$dst), "shl{l}\t$dst", [(store (shl (loadi32 addr:$dst), (i8 1)), addr:$dst)]>; -} +} // Constraints = "" let Uses = [CL] in { -def SHR8rCL : I<0xD2, MRM5r, (outs GR8 :$dst), (ins GR8 :$src), +def SHR8rCL : I<0xD2, MRM5r, (outs GR8 :$dst), (ins GR8 :$src1), "shr{b}\t{%cl, $dst|$dst, CL}", - [(set GR8:$dst, (srl GR8:$src, CL))]>; -def SHR16rCL : I<0xD3, MRM5r, (outs GR16:$dst), (ins GR16:$src), + [(set GR8:$dst, (srl GR8:$src1, CL))]>; +def SHR16rCL : I<0xD3, MRM5r, (outs GR16:$dst), (ins GR16:$src1), "shr{w}\t{%cl, $dst|$dst, CL}", - [(set GR16:$dst, (srl GR16:$src, CL))]>, OpSize; -def SHR32rCL : I<0xD3, MRM5r, (outs GR32:$dst), (ins GR32:$src), + [(set GR16:$dst, (srl GR16:$src1, CL))]>, OpSize; +def SHR32rCL : I<0xD3, MRM5r, (outs GR32:$dst), (ins GR32:$src1), "shr{l}\t{%cl, $dst|$dst, CL}", - [(set GR32:$dst, (srl GR32:$src, CL))]>; + [(set GR32:$dst, (srl GR32:$src1, CL))]>; } def SHR8ri : Ii8<0xC0, MRM5r, (outs GR8:$dst), (ins GR8:$src1, i8imm:$src2), @@ -2262,7 +2295,7 @@ def SHR32r1 : I<0xD1, MRM5r, (outs GR32:$dst), (ins GR32:$src1), "shr{l}\t$dst", [(set GR32:$dst, (srl GR32:$src1, (i8 1)))]>; -let isTwoAddress = 0 in { +let Constraints = "" in { let Uses = [CL] in { def SHR8mCL : I<0xD2, MRM5m, (outs), (ins i8mem :$dst), "shr{b}\t{%cl, $dst|$dst, CL}", @@ -2296,18 +2329,18 @@ let isTwoAddress = 0 in { def SHR32m1 : I<0xD1, MRM5m, (outs), (ins i32mem:$dst), "shr{l}\t$dst", [(store (srl (loadi32 addr:$dst), (i8 1)), addr:$dst)]>; -} +} // Constraints = "" let Uses = [CL] in { -def SAR8rCL : I<0xD2, MRM7r, (outs GR8 :$dst), (ins GR8 :$src), +def SAR8rCL : I<0xD2, MRM7r, (outs GR8 :$dst), (ins GR8 :$src1), "sar{b}\t{%cl, $dst|$dst, CL}", - [(set GR8:$dst, (sra GR8:$src, CL))]>; -def SAR16rCL : I<0xD3, MRM7r, (outs GR16:$dst), (ins GR16:$src), + [(set GR8:$dst, (sra GR8:$src1, CL))]>; +def SAR16rCL : I<0xD3, MRM7r, (outs GR16:$dst), (ins GR16:$src1), "sar{w}\t{%cl, $dst|$dst, CL}", - [(set GR16:$dst, (sra GR16:$src, CL))]>, OpSize; -def SAR32rCL : I<0xD3, MRM7r, (outs GR32:$dst), (ins GR32:$src), + [(set GR16:$dst, (sra GR16:$src1, CL))]>, OpSize; +def SAR32rCL : I<0xD3, MRM7r, (outs GR32:$dst), (ins GR32:$src1), "sar{l}\t{%cl, $dst|$dst, CL}", - [(set GR32:$dst, (sra GR32:$src, CL))]>; + [(set GR32:$dst, (sra GR32:$src1, CL))]>; } def SAR8ri : Ii8<0xC0, MRM7r, (outs GR8 :$dst), (ins GR8 :$src1, i8imm:$src2), @@ -2332,7 +2365,7 @@ def SAR32r1 : I<0xD1, MRM7r, (outs GR32:$dst), (ins GR32:$src1), "sar{l}\t$dst", [(set GR32:$dst, (sra GR32:$src1, (i8 1)))]>; -let isTwoAddress = 0 in { +let Constraints = "" in { let Uses = [CL] in { def SAR8mCL : I<0xD2, MRM7m, (outs), (ins i8mem :$dst), "sar{b}\t{%cl, $dst|$dst, CL}", @@ -2366,65 +2399,65 @@ let isTwoAddress = 0 in { def SAR32m1 : I<0xD1, MRM7m, (outs), (ins i32mem:$dst), "sar{l}\t$dst", [(store (sra (loadi32 addr:$dst), (i8 1)), addr:$dst)]>; -} +} // Constraints = "" // Rotate instructions -def RCL8r1 : I<0xD0, MRM2r, (outs GR8:$dst), (ins GR8:$src), +def RCL8r1 : I<0xD0, MRM2r, (outs GR8:$dst), (ins GR8:$src1), "rcl{b}\t{1, $dst|$dst, 1}", []>; let Uses = [CL] in { -def RCL8rCL : I<0xD2, MRM2r, (outs GR8:$dst), (ins GR8:$src), +def RCL8rCL : I<0xD2, MRM2r, (outs GR8:$dst), (ins GR8:$src1), "rcl{b}\t{%cl, $dst|$dst, CL}", []>; } -def RCL8ri : Ii8<0xC0, MRM2r, (outs GR8:$dst), (ins GR8:$src, i8imm:$cnt), +def RCL8ri : Ii8<0xC0, MRM2r, (outs GR8:$dst), (ins GR8:$src1, i8imm:$cnt), "rcl{b}\t{$cnt, $dst|$dst, $cnt}", []>; -def RCL16r1 : I<0xD1, MRM2r, (outs GR16:$dst), (ins GR16:$src), +def RCL16r1 : I<0xD1, MRM2r, (outs GR16:$dst), (ins GR16:$src1), "rcl{w}\t{1, $dst|$dst, 1}", []>, OpSize; let Uses = [CL] in { -def RCL16rCL : I<0xD3, MRM2r, (outs GR16:$dst), (ins GR16:$src), +def RCL16rCL : I<0xD3, MRM2r, (outs GR16:$dst), (ins GR16:$src1), "rcl{w}\t{%cl, $dst|$dst, CL}", []>, OpSize; } -def RCL16ri : Ii8<0xC1, MRM2r, (outs GR16:$dst), (ins GR16:$src, i8imm:$cnt), +def RCL16ri : Ii8<0xC1, MRM2r, (outs GR16:$dst), (ins GR16:$src1, i8imm:$cnt), "rcl{w}\t{$cnt, $dst|$dst, $cnt}", []>, OpSize; -def RCL32r1 : I<0xD1, MRM2r, (outs GR32:$dst), (ins GR32:$src), +def RCL32r1 : I<0xD1, MRM2r, (outs GR32:$dst), (ins GR32:$src1), "rcl{l}\t{1, $dst|$dst, 1}", []>; let Uses = [CL] in { -def RCL32rCL : I<0xD3, MRM2r, (outs GR32:$dst), (ins GR32:$src), +def RCL32rCL : I<0xD3, MRM2r, (outs GR32:$dst), (ins GR32:$src1), "rcl{l}\t{%cl, $dst|$dst, CL}", []>; } -def RCL32ri : Ii8<0xC1, MRM2r, (outs GR32:$dst), (ins GR32:$src, i8imm:$cnt), +def RCL32ri : Ii8<0xC1, MRM2r, (outs GR32:$dst), (ins GR32:$src1, i8imm:$cnt), "rcl{l}\t{$cnt, $dst|$dst, $cnt}", []>; -def RCR8r1 : I<0xD0, MRM3r, (outs GR8:$dst), (ins GR8:$src), +def RCR8r1 : I<0xD0, MRM3r, (outs GR8:$dst), (ins GR8:$src1), "rcr{b}\t{1, $dst|$dst, 1}", []>; let Uses = [CL] in { -def RCR8rCL : I<0xD2, MRM3r, (outs GR8:$dst), (ins GR8:$src), +def RCR8rCL : I<0xD2, MRM3r, (outs GR8:$dst), (ins GR8:$src1), "rcr{b}\t{%cl, $dst|$dst, CL}", []>; } -def RCR8ri : Ii8<0xC0, MRM3r, (outs GR8:$dst), (ins GR8:$src, i8imm:$cnt), +def RCR8ri : Ii8<0xC0, MRM3r, (outs GR8:$dst), (ins GR8:$src1, i8imm:$cnt), "rcr{b}\t{$cnt, $dst|$dst, $cnt}", []>; -def RCR16r1 : I<0xD1, MRM3r, (outs GR16:$dst), (ins GR16:$src), +def RCR16r1 : I<0xD1, MRM3r, (outs GR16:$dst), (ins GR16:$src1), "rcr{w}\t{1, $dst|$dst, 1}", []>, OpSize; let Uses = [CL] in { -def RCR16rCL : I<0xD3, MRM3r, (outs GR16:$dst), (ins GR16:$src), +def RCR16rCL : I<0xD3, MRM3r, (outs GR16:$dst), (ins GR16:$src1), "rcr{w}\t{%cl, $dst|$dst, CL}", []>, OpSize; } -def RCR16ri : Ii8<0xC1, MRM3r, (outs GR16:$dst), (ins GR16:$src, i8imm:$cnt), +def RCR16ri : Ii8<0xC1, MRM3r, (outs GR16:$dst), (ins GR16:$src1, i8imm:$cnt), "rcr{w}\t{$cnt, $dst|$dst, $cnt}", []>, OpSize; -def RCR32r1 : I<0xD1, MRM3r, (outs GR32:$dst), (ins GR32:$src), +def RCR32r1 : I<0xD1, MRM3r, (outs GR32:$dst), (ins GR32:$src1), "rcr{l}\t{1, $dst|$dst, 1}", []>; let Uses = [CL] in { -def RCR32rCL : I<0xD3, MRM3r, (outs GR32:$dst), (ins GR32:$src), +def RCR32rCL : I<0xD3, MRM3r, (outs GR32:$dst), (ins GR32:$src1), "rcr{l}\t{%cl, $dst|$dst, CL}", []>; } -def RCR32ri : Ii8<0xC1, MRM3r, (outs GR32:$dst), (ins GR32:$src, i8imm:$cnt), +def RCR32ri : Ii8<0xC1, MRM3r, (outs GR32:$dst), (ins GR32:$src1, i8imm:$cnt), "rcr{l}\t{$cnt, $dst|$dst, $cnt}", []>; -let isTwoAddress = 0 in { +let Constraints = "" in { def RCL8m1 : I<0xD0, MRM2m, (outs), (ins i8mem:$dst), "rcl{b}\t{1, $dst|$dst, 1}", []>; def RCL8mi : Ii8<0xC0, MRM2m, (outs), (ins i8mem:$dst, i8imm:$cnt), @@ -2464,19 +2497,19 @@ def RCR16mCL : I<0xD3, MRM3m, (outs), (ins i16mem:$dst), def RCR32mCL : I<0xD3, MRM3m, (outs), (ins i32mem:$dst), "rcr{l}\t{%cl, $dst|$dst, CL}", []>; } -} +} // Constraints = "" // FIXME: provide shorter instructions when imm8 == 1 let Uses = [CL] in { -def ROL8rCL : I<0xD2, MRM0r, (outs GR8 :$dst), (ins GR8 :$src), +def ROL8rCL : I<0xD2, MRM0r, (outs GR8 :$dst), (ins GR8 :$src1), "rol{b}\t{%cl, $dst|$dst, CL}", - [(set GR8:$dst, (rotl GR8:$src, CL))]>; -def ROL16rCL : I<0xD3, MRM0r, (outs GR16:$dst), (ins GR16:$src), + [(set GR8:$dst, (rotl GR8:$src1, CL))]>; +def ROL16rCL : I<0xD3, MRM0r, (outs GR16:$dst), (ins GR16:$src1), "rol{w}\t{%cl, $dst|$dst, CL}", - [(set GR16:$dst, (rotl GR16:$src, CL))]>, OpSize; -def ROL32rCL : I<0xD3, MRM0r, (outs GR32:$dst), (ins GR32:$src), + [(set GR16:$dst, (rotl GR16:$src1, CL))]>, OpSize; +def ROL32rCL : I<0xD3, MRM0r, (outs GR32:$dst), (ins GR32:$src1), "rol{l}\t{%cl, $dst|$dst, CL}", - [(set GR32:$dst, (rotl GR32:$src, CL))]>; + [(set GR32:$dst, (rotl GR32:$src1, CL))]>; } def ROL8ri : Ii8<0xC0, MRM0r, (outs GR8 :$dst), (ins GR8 :$src1, i8imm:$src2), @@ -2501,7 +2534,7 @@ def ROL32r1 : I<0xD1, MRM0r, (outs GR32:$dst), (ins GR32:$src1), "rol{l}\t$dst", [(set GR32:$dst, (rotl GR32:$src1, (i8 1)))]>; -let isTwoAddress = 0 in { +let Constraints = "" in { let Uses = [CL] in { def ROL8mCL : I<0xD2, MRM0m, (outs), (ins i8mem :$dst), "rol{b}\t{%cl, $dst|$dst, CL}", @@ -2535,18 +2568,18 @@ let isTwoAddress = 0 in { def ROL32m1 : I<0xD1, MRM0m, (outs), (ins i32mem:$dst), "rol{l}\t$dst", [(store (rotl (loadi32 addr:$dst), (i8 1)), addr:$dst)]>; -} +} // Constraints = "" let Uses = [CL] in { -def ROR8rCL : I<0xD2, MRM1r, (outs GR8 :$dst), (ins GR8 :$src), +def ROR8rCL : I<0xD2, MRM1r, (outs GR8 :$dst), (ins GR8 :$src1), "ror{b}\t{%cl, $dst|$dst, CL}", - [(set GR8:$dst, (rotr GR8:$src, CL))]>; -def ROR16rCL : I<0xD3, MRM1r, (outs GR16:$dst), (ins GR16:$src), + [(set GR8:$dst, (rotr GR8:$src1, CL))]>; +def ROR16rCL : I<0xD3, MRM1r, (outs GR16:$dst), (ins GR16:$src1), "ror{w}\t{%cl, $dst|$dst, CL}", - [(set GR16:$dst, (rotr GR16:$src, CL))]>, OpSize; -def ROR32rCL : I<0xD3, MRM1r, (outs GR32:$dst), (ins GR32:$src), + [(set GR16:$dst, (rotr GR16:$src1, CL))]>, OpSize; +def ROR32rCL : I<0xD3, MRM1r, (outs GR32:$dst), (ins GR32:$src1), "ror{l}\t{%cl, $dst|$dst, CL}", - [(set GR32:$dst, (rotr GR32:$src, CL))]>; + [(set GR32:$dst, (rotr GR32:$src1, CL))]>; } def ROR8ri : Ii8<0xC0, MRM1r, (outs GR8 :$dst), (ins GR8 :$src1, i8imm:$src2), @@ -2571,7 +2604,7 @@ def ROR32r1 : I<0xD1, MRM1r, (outs GR32:$dst), (ins GR32:$src1), "ror{l}\t$dst", [(set GR32:$dst, (rotr GR32:$src1, (i8 1)))]>; -let isTwoAddress = 0 in { +let Constraints = "" in { let Uses = [CL] in { def ROR8mCL : I<0xD2, MRM1m, (outs), (ins i8mem :$dst), "ror{b}\t{%cl, $dst|$dst, CL}", @@ -2605,8 +2638,7 @@ let isTwoAddress = 0 in { def ROR32m1 : I<0xD1, MRM1m, (outs), (ins i32mem:$dst), "ror{l}\t$dst", [(store (rotr (loadi32 addr:$dst), (i8 1)), addr:$dst)]>; -} - +} // Constraints = "" // Double shift instructions (generalizations of rotate) @@ -2662,7 +2694,7 @@ def SHRD16rri8 : Ii8<0xAC, MRMDestReg, TB, OpSize; } -let isTwoAddress = 0 in { +let Constraints = "" in { let Uses = [CL] in { def SHLD32mrCL : I<0xA5, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src2), "shld{l}\t{%cl, $src2, $dst|$dst, $src2, CL}", @@ -2708,7 +2740,7 @@ let isTwoAddress = 0 in { [(store (X86shrd (loadi16 addr:$dst), GR16:$src2, (i8 imm:$src3)), addr:$dst)]>, TB, OpSize; -} +} // Constraints = "" } // Defs = [EFLAGS] @@ -2794,7 +2826,7 @@ def ADD32ri8 : Ii8<0x83, MRM0r, (outs GR32:$dst), (X86add_flag GR32:$src1, i32immSExt8:$src2))]>; } -let isTwoAddress = 0 in { +let Constraints = "" in { // Memory-Register Addition def ADD8mr : I<0x00, MRMDestMem, (outs), (ins i8mem:$dst, GR8:$src2), "add{b}\t{$src2, $dst|$dst, $src2}", @@ -2838,7 +2870,7 @@ let isTwoAddress = 0 in { "add{w}\t{$src, %ax|%ax, $src}", []>, OpSize; def ADD32i32 : Ii32<0x05, RawFrm, (outs), (ins i32imm:$src), "add{l}\t{$src, %eax|%eax, $src}", []>; -} +} // Constraints = "" let Uses = [EFLAGS] in { let isCommutable = 1 in { // X = ADC Y, Z --> X = ADC Z, Y @@ -2900,7 +2932,7 @@ def ADC32ri8 : Ii8<0x83, MRM2r, (outs GR32:$dst), "adc{l}\t{$src2, $dst|$dst, $src2}", [(set GR32:$dst, (adde GR32:$src1, i32immSExt8:$src2))]>; -let isTwoAddress = 0 in { +let Constraints = "" in { def ADC8mr : I<0x10, MRMDestMem, (outs), (ins i8mem:$dst, GR8:$src2), "adc{b}\t{$src2, $dst|$dst, $src2}", [(store (adde (load addr:$dst), GR8:$src2), addr:$dst)]>; @@ -2935,7 +2967,7 @@ let isTwoAddress = 0 in { "adc{w}\t{$src, %ax|%ax, $src}", []>, OpSize; def ADC32i32 : Ii32<0x15, RawFrm, (outs), (ins i32imm:$src), "adc{l}\t{$src, %eax|%eax, $src}", []>; -} +} // Constraints = "" } // Uses = [EFLAGS] // Register-Register Subtraction @@ -3007,7 +3039,7 @@ def SUB32ri8 : Ii8<0x83, MRM5r, (outs GR32:$dst), [(set GR32:$dst, EFLAGS, (X86sub_flag GR32:$src1, i32immSExt8:$src2))]>; -let isTwoAddress = 0 in { +let Constraints = "" in { // Memory-Register Subtraction def SUB8mr : I<0x28, MRMDestMem, (outs), (ins i8mem :$dst, GR8 :$src2), "sub{b}\t{$src2, $dst|$dst, $src2}", @@ -3052,7 +3084,7 @@ let isTwoAddress = 0 in { "sub{w}\t{$src, %ax|%ax, $src}", []>, OpSize; def SUB32i32 : Ii32<0x2D, RawFrm, (outs), (ins i32imm:$src), "sub{l}\t{$src, %eax|%eax, $src}", []>; -} +} // Constraints = "" let Uses = [EFLAGS] in { def SBB8rr : I<0x18, MRMDestReg, (outs GR8:$dst), @@ -3068,7 +3100,7 @@ def SBB32rr : I<0x19, MRMDestReg, (outs GR32:$dst), "sbb{l}\t{$src2, $dst|$dst, $src2}", [(set GR32:$dst, (sube GR32:$src1, GR32:$src2))]>; -let isTwoAddress = 0 in { +let Constraints = "" in { def SBB8mr : I<0x18, MRMDestMem, (outs), (ins i8mem:$dst, GR8:$src2), "sbb{b}\t{$src2, $dst|$dst, $src2}", [(store (sube (load addr:$dst), GR8:$src2), addr:$dst)]>; @@ -3103,7 +3135,7 @@ let isTwoAddress = 0 in { "sbb{w}\t{$src, %ax|%ax, $src}", []>, OpSize; def SBB32i32 : Ii32<0x1D, RawFrm, (outs), (ins i32imm:$src), "sbb{l}\t{$src, %eax|%eax, $src}", []>; -} +} // Constraints = "" let isCodeGenOnly = 1 in { def SBB8rr_REV : I<0x1A, MRMSrcReg, (outs GR8:$dst), (ins GR8:$src1, GR8:$src2), @@ -3811,6 +3843,7 @@ def MOV32r0 : I<0x31, MRMInitReg, (outs GR32:$dst), (ins), "", // Thread Local Storage Instructions // +// ELF TLS Support // All calls clobber the non-callee saved registers. ESP is marked as // a use to prevent stack-pointer assignments that appear immediately // before calls from potentially appearing dead. @@ -3819,12 +3852,24 @@ let Defs = [EAX, ECX, EDX, FP0, FP1, FP2, FP3, FP4, FP5, FP6, ST0, XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7, XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, EFLAGS], Uses = [ESP] in -def TLS_addr32 : I<0, Pseudo, (outs), (ins lea32mem:$sym), +def TLS_addr32 : I<0, Pseudo, (outs), (ins i32mem:$sym), "leal\t$sym, %eax; " "call\t___tls_get_addr@PLT", [(X86tlsaddr tls32addr:$sym)]>, Requires<[In32BitMode]>; +// Darwin TLS Support +// For i386, the address of the thunk is passed on the stack, on return the +// address of the variable is in %eax. %ecx is trashed during the function +// call. All other registers are preserved. +let Defs = [EAX, ECX], + Uses = [ESP], + usesCustomInserter = 1 in +def TLSCall_32 : I<0, Pseudo, (outs), (ins i32mem:$sym), + "# TLSCall_32", + [(X86TLSCall addr:$sym)]>, + Requires<[In32BitMode]>; + let AddedComplexity = 5, isCodeGenOnly = 1 in def GS_MOV32rm : I<0x8B, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src), "movl\t%gs:$src, $dst", @@ -4783,14 +4828,14 @@ def : Pat<(X86smul_flag GR32:$src1, 2), (ADD32rr GR32:$src1, GR32:$src1)>; // Patterns for nodes that do not produce flags, for instructions that do. // Increment reg. -def : Pat<(add GR8:$src , 1), (INC8r GR8:$src)>; -def : Pat<(add GR16:$src, 1), (INC16r GR16:$src)>, Requires<[In32BitMode]>; -def : Pat<(add GR32:$src, 1), (INC32r GR32:$src)>, Requires<[In32BitMode]>; +def : Pat<(add GR8:$src1 , 1), (INC8r GR8:$src1)>; +def : Pat<(add GR16:$src1, 1), (INC16r GR16:$src1)>, Requires<[In32BitMode]>; +def : Pat<(add GR32:$src1, 1), (INC32r GR32:$src1)>, Requires<[In32BitMode]>; // Decrement reg. -def : Pat<(add GR8:$src , -1), (DEC8r GR8:$src)>; -def : Pat<(add GR16:$src, -1), (DEC16r GR16:$src)>, Requires<[In32BitMode]>; -def : Pat<(add GR32:$src, -1), (DEC32r GR32:$src)>, Requires<[In32BitMode]>; +def : Pat<(add GR8:$src1 , -1), (DEC8r GR8:$src1)>; +def : Pat<(add GR16:$src1, -1), (DEC16r GR16:$src1)>, Requires<[In32BitMode]>; +def : Pat<(add GR32:$src1, -1), (DEC32r GR32:$src1)>, Requires<[In32BitMode]>; // or reg/reg. def : Pat<(or GR8 :$src1, GR8 :$src2), (OR8rr GR8 :$src1, GR8 :$src2)>; diff --git a/lib/Target/X86/X86InstrMMX.td b/lib/Target/X86/X86InstrMMX.td index 0952fc8d7dcb..6cf7ac83620e 100644 --- a/lib/Target/X86/X86InstrMMX.td +++ b/lib/Target/X86/X86InstrMMX.td @@ -513,30 +513,20 @@ def : Pat<(store (v4i16 VR64:$src), addr:$dst), (MMX_MOVQ64mr addr:$dst, VR64:$src)>; def : Pat<(store (v2i32 VR64:$src), addr:$dst), (MMX_MOVQ64mr addr:$dst, VR64:$src)>; -def : Pat<(store (v2f32 VR64:$src), addr:$dst), - (MMX_MOVQ64mr addr:$dst, VR64:$src)>; def : Pat<(store (v1i64 VR64:$src), addr:$dst), (MMX_MOVQ64mr addr:$dst, VR64:$src)>; // Bit convert. def : Pat<(v8i8 (bitconvert (v1i64 VR64:$src))), (v8i8 VR64:$src)>; def : Pat<(v8i8 (bitconvert (v2i32 VR64:$src))), (v8i8 VR64:$src)>; -def : Pat<(v8i8 (bitconvert (v2f32 VR64:$src))), (v8i8 VR64:$src)>; def : Pat<(v8i8 (bitconvert (v4i16 VR64:$src))), (v8i8 VR64:$src)>; def : Pat<(v4i16 (bitconvert (v1i64 VR64:$src))), (v4i16 VR64:$src)>; def : Pat<(v4i16 (bitconvert (v2i32 VR64:$src))), (v4i16 VR64:$src)>; -def : Pat<(v4i16 (bitconvert (v2f32 VR64:$src))), (v4i16 VR64:$src)>; def : Pat<(v4i16 (bitconvert (v8i8 VR64:$src))), (v4i16 VR64:$src)>; def : Pat<(v2i32 (bitconvert (v1i64 VR64:$src))), (v2i32 VR64:$src)>; -def : Pat<(v2i32 (bitconvert (v2f32 VR64:$src))), (v2i32 VR64:$src)>; def : Pat<(v2i32 (bitconvert (v4i16 VR64:$src))), (v2i32 VR64:$src)>; def : Pat<(v2i32 (bitconvert (v8i8 VR64:$src))), (v2i32 VR64:$src)>; -def : Pat<(v2f32 (bitconvert (v1i64 VR64:$src))), (v2f32 VR64:$src)>; -def : Pat<(v2f32 (bitconvert (v2i32 VR64:$src))), (v2f32 VR64:$src)>; -def : Pat<(v2f32 (bitconvert (v4i16 VR64:$src))), (v2f32 VR64:$src)>; -def : Pat<(v2f32 (bitconvert (v8i8 VR64:$src))), (v2f32 VR64:$src)>; def : Pat<(v1i64 (bitconvert (v2i32 VR64:$src))), (v1i64 VR64:$src)>; -def : Pat<(v1i64 (bitconvert (v2f32 VR64:$src))), (v1i64 VR64:$src)>; def : Pat<(v1i64 (bitconvert (v4i16 VR64:$src))), (v1i64 VR64:$src)>; def : Pat<(v1i64 (bitconvert (v8i8 VR64:$src))), (v1i64 VR64:$src)>; @@ -545,8 +535,6 @@ def : Pat<(v1i64 (bitconvert (i64 GR64:$src))), (MMX_MOVD64to64rr GR64:$src)>; def : Pat<(v2i32 (bitconvert (i64 GR64:$src))), (MMX_MOVD64to64rr GR64:$src)>; -def : Pat<(v2f32 (bitconvert (i64 GR64:$src))), - (MMX_MOVD64to64rr GR64:$src)>; def : Pat<(v4i16 (bitconvert (i64 GR64:$src))), (MMX_MOVD64to64rr GR64:$src)>; def : Pat<(v8i8 (bitconvert (i64 GR64:$src))), @@ -555,8 +543,6 @@ def : Pat<(i64 (bitconvert (v1i64 VR64:$src))), (MMX_MOVD64from64rr VR64:$src)>; def : Pat<(i64 (bitconvert (v2i32 VR64:$src))), (MMX_MOVD64from64rr VR64:$src)>; -def : Pat<(i64 (bitconvert (v2f32 VR64:$src))), - (MMX_MOVD64from64rr VR64:$src)>; def : Pat<(i64 (bitconvert (v4i16 VR64:$src))), (MMX_MOVD64from64rr VR64:$src)>; def : Pat<(i64 (bitconvert (v8i8 VR64:$src))), diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index 5580ba74e64e..ab0005b31bb8 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -15,322 +15,6 @@ //===----------------------------------------------------------------------===// -// SSE specific DAG Nodes. -//===----------------------------------------------------------------------===// - -def SDTX86FPShiftOp : SDTypeProfile<1, 2, [ SDTCisSameAs<0, 1>, - SDTCisFP<0>, SDTCisInt<2> ]>; -def SDTX86VFCMP : SDTypeProfile<1, 3, [SDTCisInt<0>, SDTCisSameAs<1, 2>, - SDTCisFP<1>, SDTCisVT<3, i8>]>; - -def X86fmin : SDNode<"X86ISD::FMIN", SDTFPBinOp>; -def X86fmax : SDNode<"X86ISD::FMAX", SDTFPBinOp>; -def X86fand : SDNode<"X86ISD::FAND", SDTFPBinOp, - [SDNPCommutative, SDNPAssociative]>; -def X86for : SDNode<"X86ISD::FOR", SDTFPBinOp, - [SDNPCommutative, SDNPAssociative]>; -def X86fxor : SDNode<"X86ISD::FXOR", SDTFPBinOp, - [SDNPCommutative, SDNPAssociative]>; -def X86frsqrt : SDNode<"X86ISD::FRSQRT", SDTFPUnaryOp>; -def X86frcp : SDNode<"X86ISD::FRCP", SDTFPUnaryOp>; -def X86fsrl : SDNode<"X86ISD::FSRL", SDTX86FPShiftOp>; -def X86comi : SDNode<"X86ISD::COMI", SDTX86CmpTest>; -def X86ucomi : SDNode<"X86ISD::UCOMI", SDTX86CmpTest>; -def X86pshufb : SDNode<"X86ISD::PSHUFB", - SDTypeProfile<1, 2, [SDTCisVT<0, v16i8>, SDTCisSameAs<0,1>, - SDTCisSameAs<0,2>]>>; -def X86pextrb : SDNode<"X86ISD::PEXTRB", - SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisPtrTy<2>]>>; -def X86pextrw : SDNode<"X86ISD::PEXTRW", - SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisPtrTy<2>]>>; -def X86pinsrb : SDNode<"X86ISD::PINSRB", - SDTypeProfile<1, 3, [SDTCisVT<0, v16i8>, SDTCisSameAs<0,1>, - SDTCisVT<2, i32>, SDTCisPtrTy<3>]>>; -def X86pinsrw : SDNode<"X86ISD::PINSRW", - SDTypeProfile<1, 3, [SDTCisVT<0, v8i16>, SDTCisSameAs<0,1>, - SDTCisVT<2, i32>, SDTCisPtrTy<3>]>>; -def X86insrtps : SDNode<"X86ISD::INSERTPS", - SDTypeProfile<1, 3, [SDTCisVT<0, v4f32>, SDTCisSameAs<0,1>, - SDTCisVT<2, v4f32>, SDTCisPtrTy<3>]>>; -def X86vzmovl : SDNode<"X86ISD::VZEXT_MOVL", - SDTypeProfile<1, 1, [SDTCisSameAs<0,1>]>>; -def X86vzload : SDNode<"X86ISD::VZEXT_LOAD", SDTLoad, - [SDNPHasChain, SDNPMayLoad]>; -def X86vshl : SDNode<"X86ISD::VSHL", SDTIntShiftOp>; -def X86vshr : SDNode<"X86ISD::VSRL", SDTIntShiftOp>; -def X86cmpps : SDNode<"X86ISD::CMPPS", SDTX86VFCMP>; -def X86cmppd : SDNode<"X86ISD::CMPPD", SDTX86VFCMP>; -def X86pcmpeqb : SDNode<"X86ISD::PCMPEQB", SDTIntBinOp, [SDNPCommutative]>; -def X86pcmpeqw : SDNode<"X86ISD::PCMPEQW", SDTIntBinOp, [SDNPCommutative]>; -def X86pcmpeqd : SDNode<"X86ISD::PCMPEQD", SDTIntBinOp, [SDNPCommutative]>; -def X86pcmpeqq : SDNode<"X86ISD::PCMPEQQ", SDTIntBinOp, [SDNPCommutative]>; -def X86pcmpgtb : SDNode<"X86ISD::PCMPGTB", SDTIntBinOp>; -def X86pcmpgtw : SDNode<"X86ISD::PCMPGTW", SDTIntBinOp>; -def X86pcmpgtd : SDNode<"X86ISD::PCMPGTD", SDTIntBinOp>; -def X86pcmpgtq : SDNode<"X86ISD::PCMPGTQ", SDTIntBinOp>; - -def SDTX86CmpPTest : SDTypeProfile<1, 2, [SDTCisVT<0, i32>, - SDTCisVT<1, v4f32>, - SDTCisVT<2, v4f32>]>; -def X86ptest : SDNode<"X86ISD::PTEST", SDTX86CmpPTest>; - -//===----------------------------------------------------------------------===// -// SSE Complex Patterns -//===----------------------------------------------------------------------===// - -// These are 'extloads' from a scalar to the low element of a vector, zeroing -// the top elements. These are used for the SSE 'ss' and 'sd' instruction -// forms. -def sse_load_f32 : ComplexPattern<v4f32, 5, "SelectScalarSSELoad", [], - [SDNPHasChain, SDNPMayLoad]>; -def sse_load_f64 : ComplexPattern<v2f64, 5, "SelectScalarSSELoad", [], - [SDNPHasChain, SDNPMayLoad]>; - -def ssmem : Operand<v4f32> { - let PrintMethod = "printf32mem"; - let MIOperandInfo = (ops ptr_rc, i8imm, ptr_rc_nosp, i32imm, i8imm); - let ParserMatchClass = X86MemAsmOperand; -} -def sdmem : Operand<v2f64> { - let PrintMethod = "printf64mem"; - let MIOperandInfo = (ops ptr_rc, i8imm, ptr_rc_nosp, i32imm, i8imm); - let ParserMatchClass = X86MemAsmOperand; -} - -//===----------------------------------------------------------------------===// -// SSE pattern fragments -//===----------------------------------------------------------------------===// - -def loadv4f32 : PatFrag<(ops node:$ptr), (v4f32 (load node:$ptr))>; -def loadv2f64 : PatFrag<(ops node:$ptr), (v2f64 (load node:$ptr))>; -def loadv4i32 : PatFrag<(ops node:$ptr), (v4i32 (load node:$ptr))>; -def loadv2i64 : PatFrag<(ops node:$ptr), (v2i64 (load node:$ptr))>; - -// Like 'store', but always requires vector alignment. -def alignedstore : PatFrag<(ops node:$val, node:$ptr), - (store node:$val, node:$ptr), [{ - return cast<StoreSDNode>(N)->getAlignment() >= 16; -}]>; - -// Like 'load', but always requires vector alignment. -def alignedload : PatFrag<(ops node:$ptr), (load node:$ptr), [{ - return cast<LoadSDNode>(N)->getAlignment() >= 16; -}]>; - -def alignedloadfsf32 : PatFrag<(ops node:$ptr), - (f32 (alignedload node:$ptr))>; -def alignedloadfsf64 : PatFrag<(ops node:$ptr), - (f64 (alignedload node:$ptr))>; -def alignedloadv4f32 : PatFrag<(ops node:$ptr), - (v4f32 (alignedload node:$ptr))>; -def alignedloadv2f64 : PatFrag<(ops node:$ptr), - (v2f64 (alignedload node:$ptr))>; -def alignedloadv4i32 : PatFrag<(ops node:$ptr), - (v4i32 (alignedload node:$ptr))>; -def alignedloadv2i64 : PatFrag<(ops node:$ptr), - (v2i64 (alignedload node:$ptr))>; - -// Like 'load', but uses special alignment checks suitable for use in -// memory operands in most SSE instructions, which are required to -// be naturally aligned on some targets but not on others. If the subtarget -// allows unaligned accesses, match any load, though this may require -// setting a feature bit in the processor (on startup, for example). -// Opteron 10h and later implement such a feature. -def memop : PatFrag<(ops node:$ptr), (load node:$ptr), [{ - return Subtarget->hasVectorUAMem() - || cast<LoadSDNode>(N)->getAlignment() >= 16; -}]>; - -def memopfsf32 : PatFrag<(ops node:$ptr), (f32 (memop node:$ptr))>; -def memopfsf64 : PatFrag<(ops node:$ptr), (f64 (memop node:$ptr))>; -def memopv4f32 : PatFrag<(ops node:$ptr), (v4f32 (memop node:$ptr))>; -def memopv2f64 : PatFrag<(ops node:$ptr), (v2f64 (memop node:$ptr))>; -def memopv4i32 : PatFrag<(ops node:$ptr), (v4i32 (memop node:$ptr))>; -def memopv2i64 : PatFrag<(ops node:$ptr), (v2i64 (memop node:$ptr))>; -def memopv16i8 : PatFrag<(ops node:$ptr), (v16i8 (memop node:$ptr))>; - -// SSSE3 uses MMX registers for some instructions. They aren't aligned on a -// 16-byte boundary. -// FIXME: 8 byte alignment for mmx reads is not required -def memop64 : PatFrag<(ops node:$ptr), (load node:$ptr), [{ - return cast<LoadSDNode>(N)->getAlignment() >= 8; -}]>; - -def memopv8i8 : PatFrag<(ops node:$ptr), (v8i8 (memop64 node:$ptr))>; -def memopv4i16 : PatFrag<(ops node:$ptr), (v4i16 (memop64 node:$ptr))>; -def memopv8i16 : PatFrag<(ops node:$ptr), (v8i16 (memop64 node:$ptr))>; -def memopv2i32 : PatFrag<(ops node:$ptr), (v2i32 (memop64 node:$ptr))>; - -// MOVNT Support -// Like 'store', but requires the non-temporal bit to be set -def nontemporalstore : PatFrag<(ops node:$val, node:$ptr), - (st node:$val, node:$ptr), [{ - if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) - return ST->isNonTemporal(); - return false; -}]>; - -def alignednontemporalstore : PatFrag<(ops node:$val, node:$ptr), - (st node:$val, node:$ptr), [{ - if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) - return ST->isNonTemporal() && !ST->isTruncatingStore() && - ST->getAddressingMode() == ISD::UNINDEXED && - ST->getAlignment() >= 16; - return false; -}]>; - -def unalignednontemporalstore : PatFrag<(ops node:$val, node:$ptr), - (st node:$val, node:$ptr), [{ - if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) - return ST->isNonTemporal() && - ST->getAlignment() < 16; - return false; -}]>; - -def bc_v4f32 : PatFrag<(ops node:$in), (v4f32 (bitconvert node:$in))>; -def bc_v2f64 : PatFrag<(ops node:$in), (v2f64 (bitconvert node:$in))>; -def bc_v16i8 : PatFrag<(ops node:$in), (v16i8 (bitconvert node:$in))>; -def bc_v8i16 : PatFrag<(ops node:$in), (v8i16 (bitconvert node:$in))>; -def bc_v4i32 : PatFrag<(ops node:$in), (v4i32 (bitconvert node:$in))>; -def bc_v2i64 : PatFrag<(ops node:$in), (v2i64 (bitconvert node:$in))>; - -def vzmovl_v2i64 : PatFrag<(ops node:$src), - (bitconvert (v2i64 (X86vzmovl - (v2i64 (scalar_to_vector (loadi64 node:$src))))))>; -def vzmovl_v4i32 : PatFrag<(ops node:$src), - (bitconvert (v4i32 (X86vzmovl - (v4i32 (scalar_to_vector (loadi32 node:$src))))))>; - -def vzload_v2i64 : PatFrag<(ops node:$src), - (bitconvert (v2i64 (X86vzload node:$src)))>; - - -def fp32imm0 : PatLeaf<(f32 fpimm), [{ - return N->isExactlyValue(+0.0); -}]>; - -// BYTE_imm - Transform bit immediates into byte immediates. -def BYTE_imm : SDNodeXForm<imm, [{ - // Transformation function: imm >> 3 - return getI32Imm(N->getZExtValue() >> 3); -}]>; - -// SHUFFLE_get_shuf_imm xform function: convert vector_shuffle mask to PSHUF*, -// SHUFP* etc. imm. -def SHUFFLE_get_shuf_imm : SDNodeXForm<vector_shuffle, [{ - return getI8Imm(X86::getShuffleSHUFImmediate(N)); -}]>; - -// SHUFFLE_get_pshufhw_imm xform function: convert vector_shuffle mask to -// PSHUFHW imm. -def SHUFFLE_get_pshufhw_imm : SDNodeXForm<vector_shuffle, [{ - return getI8Imm(X86::getShufflePSHUFHWImmediate(N)); -}]>; - -// SHUFFLE_get_pshuflw_imm xform function: convert vector_shuffle mask to -// PSHUFLW imm. -def SHUFFLE_get_pshuflw_imm : SDNodeXForm<vector_shuffle, [{ - return getI8Imm(X86::getShufflePSHUFLWImmediate(N)); -}]>; - -// SHUFFLE_get_palign_imm xform function: convert vector_shuffle mask to -// a PALIGNR imm. -def SHUFFLE_get_palign_imm : SDNodeXForm<vector_shuffle, [{ - return getI8Imm(X86::getShufflePALIGNRImmediate(N)); -}]>; - -def splat_lo : PatFrag<(ops node:$lhs, node:$rhs), - (vector_shuffle node:$lhs, node:$rhs), [{ - ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N); - return SVOp->isSplat() && SVOp->getSplatIndex() == 0; -}]>; - -def movddup : PatFrag<(ops node:$lhs, node:$rhs), - (vector_shuffle node:$lhs, node:$rhs), [{ - return X86::isMOVDDUPMask(cast<ShuffleVectorSDNode>(N)); -}]>; - -def movhlps : PatFrag<(ops node:$lhs, node:$rhs), - (vector_shuffle node:$lhs, node:$rhs), [{ - return X86::isMOVHLPSMask(cast<ShuffleVectorSDNode>(N)); -}]>; - -def movhlps_undef : PatFrag<(ops node:$lhs, node:$rhs), - (vector_shuffle node:$lhs, node:$rhs), [{ - return X86::isMOVHLPS_v_undef_Mask(cast<ShuffleVectorSDNode>(N)); -}]>; - -def movlhps : PatFrag<(ops node:$lhs, node:$rhs), - (vector_shuffle node:$lhs, node:$rhs), [{ - return X86::isMOVLHPSMask(cast<ShuffleVectorSDNode>(N)); -}]>; - -def movlp : PatFrag<(ops node:$lhs, node:$rhs), - (vector_shuffle node:$lhs, node:$rhs), [{ - return X86::isMOVLPMask(cast<ShuffleVectorSDNode>(N)); -}]>; - -def movl : PatFrag<(ops node:$lhs, node:$rhs), - (vector_shuffle node:$lhs, node:$rhs), [{ - return X86::isMOVLMask(cast<ShuffleVectorSDNode>(N)); -}]>; - -def movshdup : PatFrag<(ops node:$lhs, node:$rhs), - (vector_shuffle node:$lhs, node:$rhs), [{ - return X86::isMOVSHDUPMask(cast<ShuffleVectorSDNode>(N)); -}]>; - -def movsldup : PatFrag<(ops node:$lhs, node:$rhs), - (vector_shuffle node:$lhs, node:$rhs), [{ - return X86::isMOVSLDUPMask(cast<ShuffleVectorSDNode>(N)); -}]>; - -def unpckl : PatFrag<(ops node:$lhs, node:$rhs), - (vector_shuffle node:$lhs, node:$rhs), [{ - return X86::isUNPCKLMask(cast<ShuffleVectorSDNode>(N)); -}]>; - -def unpckh : PatFrag<(ops node:$lhs, node:$rhs), - (vector_shuffle node:$lhs, node:$rhs), [{ - return X86::isUNPCKHMask(cast<ShuffleVectorSDNode>(N)); -}]>; - -def unpckl_undef : PatFrag<(ops node:$lhs, node:$rhs), - (vector_shuffle node:$lhs, node:$rhs), [{ - return X86::isUNPCKL_v_undef_Mask(cast<ShuffleVectorSDNode>(N)); -}]>; - -def unpckh_undef : PatFrag<(ops node:$lhs, node:$rhs), - (vector_shuffle node:$lhs, node:$rhs), [{ - return X86::isUNPCKH_v_undef_Mask(cast<ShuffleVectorSDNode>(N)); -}]>; - -def pshufd : PatFrag<(ops node:$lhs, node:$rhs), - (vector_shuffle node:$lhs, node:$rhs), [{ - return X86::isPSHUFDMask(cast<ShuffleVectorSDNode>(N)); -}], SHUFFLE_get_shuf_imm>; - -def shufp : PatFrag<(ops node:$lhs, node:$rhs), - (vector_shuffle node:$lhs, node:$rhs), [{ - return X86::isSHUFPMask(cast<ShuffleVectorSDNode>(N)); -}], SHUFFLE_get_shuf_imm>; - -def pshufhw : PatFrag<(ops node:$lhs, node:$rhs), - (vector_shuffle node:$lhs, node:$rhs), [{ - return X86::isPSHUFHWMask(cast<ShuffleVectorSDNode>(N)); -}], SHUFFLE_get_pshufhw_imm>; - -def pshuflw : PatFrag<(ops node:$lhs, node:$rhs), - (vector_shuffle node:$lhs, node:$rhs), [{ - return X86::isPSHUFLWMask(cast<ShuffleVectorSDNode>(N)); -}], SHUFFLE_get_pshuflw_imm>; - -def palign : PatFrag<(ops node:$lhs, node:$rhs), - (vector_shuffle node:$lhs, node:$rhs), [{ - return X86::isPALIGNRMask(cast<ShuffleVectorSDNode>(N)); -}], SHUFFLE_get_palign_imm>; - -//===----------------------------------------------------------------------===// // SSE scalar FP Instructions //===----------------------------------------------------------------------===// @@ -368,857 +52,642 @@ let Uses = [EFLAGS], usesCustomInserter = 1 in { } //===----------------------------------------------------------------------===// -// SSE1 Instructions +// SSE 1 & 2 Instructions Classes //===----------------------------------------------------------------------===// -// Move Instructions. Register-to-register movss is not used for FR32 -// register copies because it's a partial register update; FsMOVAPSrr is -// used instead. Register-to-register movss is not modeled as an INSERT_SUBREG -// because INSERT_SUBREG requires that the insert be implementable in terms of -// a copy, and just mentioned, we don't use movss for copies. -let Constraints = "$src1 = $dst" in -def MOVSSrr : SSI<0x10, MRMSrcReg, - (outs VR128:$dst), (ins VR128:$src1, FR32:$src2), - "movss\t{$src2, $dst|$dst, $src2}", - [(set (v4f32 VR128:$dst), - (movl VR128:$src1, (scalar_to_vector FR32:$src2)))]>; +/// sse12_fp_scalar - SSE 1 & 2 scalar instructions class +multiclass sse12_fp_scalar<bits<8> opc, string OpcodeStr, SDNode OpNode, + RegisterClass RC, X86MemOperand x86memop, + bit Is2Addr = 1> { + let isCommutable = 1 in { + def rr : SI<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2), + !if(Is2Addr, + !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), + !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), + [(set RC:$dst, (OpNode RC:$src1, RC:$src2))]>; + } + def rm : SI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2), + !if(Is2Addr, + !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), + !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), + [(set RC:$dst, (OpNode RC:$src1, (load addr:$src2)))]>; +} + +/// sse12_fp_scalar_int - SSE 1 & 2 scalar instructions intrinsics class +multiclass sse12_fp_scalar_int<bits<8> opc, string OpcodeStr, RegisterClass RC, + string asm, string SSEVer, string FPSizeStr, + Operand memopr, ComplexPattern mem_cpat, + bit Is2Addr = 1> { + def rr_Int : SI<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2), + !if(Is2Addr, + !strconcat(asm, "\t{$src2, $dst|$dst, $src2}"), + !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), + [(set RC:$dst, (!nameconcat<Intrinsic>("int_x86_sse", + !strconcat(SSEVer, !strconcat("_", + !strconcat(OpcodeStr, FPSizeStr)))) + RC:$src1, RC:$src2))]>; + def rm_Int : SI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, memopr:$src2), + !if(Is2Addr, + !strconcat(asm, "\t{$src2, $dst|$dst, $src2}"), + !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), + [(set RC:$dst, (!nameconcat<Intrinsic>("int_x86_sse", + !strconcat(SSEVer, !strconcat("_", + !strconcat(OpcodeStr, FPSizeStr)))) + RC:$src1, mem_cpat:$src2))]>; +} + +/// sse12_fp_packed - SSE 1 & 2 packed instructions class +multiclass sse12_fp_packed<bits<8> opc, string OpcodeStr, SDNode OpNode, + RegisterClass RC, ValueType vt, + X86MemOperand x86memop, PatFrag mem_frag, + Domain d, bit Is2Addr = 1> { + let isCommutable = 1 in + def rr : PI<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2), + !if(Is2Addr, + !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), + !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), + [(set RC:$dst, (vt (OpNode RC:$src1, RC:$src2)))], d>; + let mayLoad = 1 in + def rm : PI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2), + !if(Is2Addr, + !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), + !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), + [(set RC:$dst, (OpNode RC:$src1, (mem_frag addr:$src2)))], d>; +} + +/// sse12_fp_packed_logical_rm - SSE 1 & 2 packed instructions class +multiclass sse12_fp_packed_logical_rm<bits<8> opc, RegisterClass RC, Domain d, + string OpcodeStr, X86MemOperand x86memop, + list<dag> pat_rr, list<dag> pat_rm, + bit Is2Addr = 1> { + let isCommutable = 1 in + def rr : PI<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2), + !if(Is2Addr, + !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), + !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), + pat_rr, d>; + def rm : PI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2), + !if(Is2Addr, + !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), + !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), + pat_rm, d>; +} + +/// sse12_fp_packed_int - SSE 1 & 2 packed instructions intrinsics class +multiclass sse12_fp_packed_int<bits<8> opc, string OpcodeStr, RegisterClass RC, + string asm, string SSEVer, string FPSizeStr, + X86MemOperand x86memop, PatFrag mem_frag, + Domain d, bit Is2Addr = 1> { + def rr_Int : PI<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2), + !if(Is2Addr, + !strconcat(asm, "\t{$src2, $dst|$dst, $src2}"), + !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), + [(set RC:$dst, (!nameconcat<Intrinsic>("int_x86_sse", + !strconcat(SSEVer, !strconcat("_", + !strconcat(OpcodeStr, FPSizeStr)))) + RC:$src1, RC:$src2))], d>; + def rm_Int : PI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1,x86memop:$src2), + !if(Is2Addr, + !strconcat(asm, "\t{$src2, $dst|$dst, $src2}"), + !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), + [(set RC:$dst, (!nameconcat<Intrinsic>("int_x86_sse", + !strconcat(SSEVer, !strconcat("_", + !strconcat(OpcodeStr, FPSizeStr)))) + RC:$src1, (mem_frag addr:$src2)))], d>; +} + +//===----------------------------------------------------------------------===// +// SSE 1 & 2 - Move Instructions +//===----------------------------------------------------------------------===// + +class sse12_move_rr<RegisterClass RC, ValueType vt, string asm> : + SI<0x10, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, RC:$src2), asm, + [(set (vt VR128:$dst), (movl VR128:$src1, (scalar_to_vector RC:$src2)))]>; + +// Loading from memory automatically zeroing upper bits. +class sse12_move_rm<RegisterClass RC, X86MemOperand x86memop, + PatFrag mem_pat, string OpcodeStr> : + SI<0x10, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src), + !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), + [(set RC:$dst, (mem_pat addr:$src))]>; + +// Move Instructions. Register-to-register movss/movsd is not used for FR32/64 +// register copies because it's a partial register update; FsMOVAPSrr/FsMOVAPDrr +// is used instead. Register-to-register movss/movsd is not modeled as an +// INSERT_SUBREG because INSERT_SUBREG requires that the insert be implementable +// in terms of a copy, and just mentioned, we don't use movss/movsd for copies. +let isAsmParserOnly = 1 in { + def VMOVSSrr : sse12_move_rr<FR32, v4f32, + "movss\t{$src2, $src1, $dst|$dst, $src1, $src2}">, XS, VEX_4V; + def VMOVSDrr : sse12_move_rr<FR64, v2f64, + "movsd\t{$src2, $src1, $dst|$dst, $src1, $src2}">, XD, VEX_4V; + + let canFoldAsLoad = 1, isReMaterializable = 1 in { + def VMOVSSrm : sse12_move_rm<FR32, f32mem, loadf32, "movss">, XS, VEX; + + let AddedComplexity = 20 in + def VMOVSDrm : sse12_move_rm<FR64, f64mem, loadf64, "movsd">, XD, VEX; + } +} + +let Constraints = "$src1 = $dst" in { + def MOVSSrr : sse12_move_rr<FR32, v4f32, + "movss\t{$src2, $dst|$dst, $src2}">, XS; + def MOVSDrr : sse12_move_rr<FR64, v2f64, + "movsd\t{$src2, $dst|$dst, $src2}">, XD; +} + +let canFoldAsLoad = 1, isReMaterializable = 1 in { + def MOVSSrm : sse12_move_rm<FR32, f32mem, loadf32, "movss">, XS; + let AddedComplexity = 20 in + def MOVSDrm : sse12_move_rm<FR64, f64mem, loadf64, "movsd">, XD; +} + +let AddedComplexity = 15 in { // Extract the low 32-bit value from one vector and insert it into another. -let AddedComplexity = 15 in def : Pat<(v4f32 (movl VR128:$src1, VR128:$src2)), (MOVSSrr (v4f32 VR128:$src1), (EXTRACT_SUBREG (v4f32 VR128:$src2), sub_ss))>; +// Extract the low 64-bit value from one vector and insert it into another. +def : Pat<(v2f64 (movl VR128:$src1, VR128:$src2)), + (MOVSDrr (v2f64 VR128:$src1), + (EXTRACT_SUBREG (v2f64 VR128:$src2), sub_sd))>; +} // Implicitly promote a 32-bit scalar to a vector. def : Pat<(v4f32 (scalar_to_vector FR32:$src)), (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FR32:$src, sub_ss)>; +// Implicitly promote a 64-bit scalar to a vector. +def : Pat<(v2f64 (scalar_to_vector FR64:$src)), + (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FR64:$src, sub_sd)>; -// Loading from memory automatically zeroing upper bits. -let canFoldAsLoad = 1, isReMaterializable = 1 in -def MOVSSrm : SSI<0x10, MRMSrcMem, (outs FR32:$dst), (ins f32mem:$src), - "movss\t{$src, $dst|$dst, $src}", - [(set FR32:$dst, (loadf32 addr:$src))]>; - +let AddedComplexity = 20 in { // MOVSSrm zeros the high parts of the register; represent this // with SUBREG_TO_REG. -let AddedComplexity = 20 in { def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector (loadf32 addr:$src))))), (SUBREG_TO_REG (i32 0), (MOVSSrm addr:$src), sub_ss)>; def : Pat<(v4f32 (scalar_to_vector (loadf32 addr:$src))), (SUBREG_TO_REG (i32 0), (MOVSSrm addr:$src), sub_ss)>; def : Pat<(v4f32 (X86vzmovl (loadv4f32 addr:$src))), (SUBREG_TO_REG (i32 0), (MOVSSrm addr:$src), sub_ss)>; +// MOVSDrm zeros the high parts of the register; represent this +// with SUBREG_TO_REG. +def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector (loadf64 addr:$src))))), + (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), sub_sd)>; +def : Pat<(v2f64 (scalar_to_vector (loadf64 addr:$src))), + (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), sub_sd)>; +def : Pat<(v2f64 (X86vzmovl (loadv2f64 addr:$src))), + (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), sub_sd)>; +def : Pat<(v2f64 (X86vzmovl (bc_v2f64 (loadv4f32 addr:$src)))), + (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), sub_sd)>; +def : Pat<(v2f64 (X86vzload addr:$src)), + (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), sub_sd)>; } // Store scalar value to memory. def MOVSSmr : SSI<0x11, MRMDestMem, (outs), (ins f32mem:$dst, FR32:$src), "movss\t{$src, $dst|$dst, $src}", [(store FR32:$src, addr:$dst)]>; +def MOVSDmr : SDI<0x11, MRMDestMem, (outs), (ins f64mem:$dst, FR64:$src), + "movsd\t{$src, $dst|$dst, $src}", + [(store FR64:$src, addr:$dst)]>; + +let isAsmParserOnly = 1 in { +def VMOVSSmr : SI<0x11, MRMDestMem, (outs), (ins f32mem:$dst, FR32:$src), + "movss\t{$src, $dst|$dst, $src}", + [(store FR32:$src, addr:$dst)]>, XS, VEX_4V; +def VMOVSDmr : SI<0x11, MRMDestMem, (outs), (ins f64mem:$dst, FR64:$src), + "movsd\t{$src, $dst|$dst, $src}", + [(store FR64:$src, addr:$dst)]>, XD, VEX_4V; +} // Extract and store. def : Pat<(store (f32 (vector_extract (v4f32 VR128:$src), (iPTR 0))), addr:$dst), (MOVSSmr addr:$dst, (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss))>; +def : Pat<(store (f64 (vector_extract (v2f64 VR128:$src), (iPTR 0))), + addr:$dst), + (MOVSDmr addr:$dst, + (EXTRACT_SUBREG (v2f64 VR128:$src), sub_sd))>; -// Conversion instructions -def CVTTSS2SIrr : SSI<0x2C, MRMSrcReg, (outs GR32:$dst), (ins FR32:$src), - "cvttss2si\t{$src, $dst|$dst, $src}", - [(set GR32:$dst, (fp_to_sint FR32:$src))]>; -def CVTTSS2SIrm : SSI<0x2C, MRMSrcMem, (outs GR32:$dst), (ins f32mem:$src), - "cvttss2si\t{$src, $dst|$dst, $src}", - [(set GR32:$dst, (fp_to_sint (loadf32 addr:$src)))]>; -def CVTSI2SSrr : SSI<0x2A, MRMSrcReg, (outs FR32:$dst), (ins GR32:$src), - "cvtsi2ss\t{$src, $dst|$dst, $src}", - [(set FR32:$dst, (sint_to_fp GR32:$src))]>; -def CVTSI2SSrm : SSI<0x2A, MRMSrcMem, (outs FR32:$dst), (ins i32mem:$src), - "cvtsi2ss\t{$src, $dst|$dst, $src}", - [(set FR32:$dst, (sint_to_fp (loadi32 addr:$src)))]>; - -// Match intrinsics which expect XMM operand(s). -def CVTSS2SIrr: SSI<0x2D, MRMSrcReg, (outs GR32:$dst), (ins FR32:$src), - "cvtss2si{l}\t{$src, $dst|$dst, $src}", []>; -def CVTSS2SIrm: SSI<0x2D, MRMSrcMem, (outs GR32:$dst), (ins f32mem:$src), - "cvtss2si{l}\t{$src, $dst|$dst, $src}", []>; - -def Int_CVTSS2SIrr : SSI<0x2D, MRMSrcReg, (outs GR32:$dst), (ins VR128:$src), - "cvtss2si\t{$src, $dst|$dst, $src}", - [(set GR32:$dst, (int_x86_sse_cvtss2si VR128:$src))]>; -def Int_CVTSS2SIrm : SSI<0x2D, MRMSrcMem, (outs GR32:$dst), (ins f32mem:$src), - "cvtss2si\t{$src, $dst|$dst, $src}", - [(set GR32:$dst, (int_x86_sse_cvtss2si - (load addr:$src)))]>; - -// Match intrinsics which expect MM and XMM operand(s). -def Int_CVTPS2PIrr : PSI<0x2D, MRMSrcReg, (outs VR64:$dst), (ins VR128:$src), - "cvtps2pi\t{$src, $dst|$dst, $src}", - [(set VR64:$dst, (int_x86_sse_cvtps2pi VR128:$src))]>; -def Int_CVTPS2PIrm : PSI<0x2D, MRMSrcMem, (outs VR64:$dst), (ins f64mem:$src), - "cvtps2pi\t{$src, $dst|$dst, $src}", - [(set VR64:$dst, (int_x86_sse_cvtps2pi - (load addr:$src)))]>; -def Int_CVTTPS2PIrr: PSI<0x2C, MRMSrcReg, (outs VR64:$dst), (ins VR128:$src), - "cvttps2pi\t{$src, $dst|$dst, $src}", - [(set VR64:$dst, (int_x86_sse_cvttps2pi VR128:$src))]>; -def Int_CVTTPS2PIrm: PSI<0x2C, MRMSrcMem, (outs VR64:$dst), (ins f64mem:$src), - "cvttps2pi\t{$src, $dst|$dst, $src}", - [(set VR64:$dst, (int_x86_sse_cvttps2pi - (load addr:$src)))]>; -let Constraints = "$src1 = $dst" in { - def Int_CVTPI2PSrr : PSI<0x2A, MRMSrcReg, - (outs VR128:$dst), (ins VR128:$src1, VR64:$src2), - "cvtpi2ps\t{$src2, $dst|$dst, $src2}", - [(set VR128:$dst, (int_x86_sse_cvtpi2ps VR128:$src1, - VR64:$src2))]>; - def Int_CVTPI2PSrm : PSI<0x2A, MRMSrcMem, - (outs VR128:$dst), (ins VR128:$src1, i64mem:$src2), - "cvtpi2ps\t{$src2, $dst|$dst, $src2}", - [(set VR128:$dst, (int_x86_sse_cvtpi2ps VR128:$src1, - (load addr:$src2)))]>; -} - -// Aliases for intrinsics -def Int_CVTTSS2SIrr : SSI<0x2C, MRMSrcReg, (outs GR32:$dst), (ins VR128:$src), - "cvttss2si\t{$src, $dst|$dst, $src}", - [(set GR32:$dst, - (int_x86_sse_cvttss2si VR128:$src))]>; -def Int_CVTTSS2SIrm : SSI<0x2C, MRMSrcMem, (outs GR32:$dst), (ins f32mem:$src), - "cvttss2si\t{$src, $dst|$dst, $src}", - [(set GR32:$dst, - (int_x86_sse_cvttss2si(load addr:$src)))]>; - -let Constraints = "$src1 = $dst" in { - def Int_CVTSI2SSrr : SSI<0x2A, MRMSrcReg, - (outs VR128:$dst), (ins VR128:$src1, GR32:$src2), - "cvtsi2ss\t{$src2, $dst|$dst, $src2}", - [(set VR128:$dst, (int_x86_sse_cvtsi2ss VR128:$src1, - GR32:$src2))]>; - def Int_CVTSI2SSrm : SSI<0x2A, MRMSrcMem, - (outs VR128:$dst), (ins VR128:$src1, i32mem:$src2), - "cvtsi2ss\t{$src2, $dst|$dst, $src2}", - [(set VR128:$dst, (int_x86_sse_cvtsi2ss VR128:$src1, - (loadi32 addr:$src2)))]>; -} - -// Comparison instructions -let Constraints = "$src1 = $dst", neverHasSideEffects = 1 in { - def CMPSSrr : SSIi8<0xC2, MRMSrcReg, - (outs FR32:$dst), (ins FR32:$src1, FR32:$src, SSECC:$cc), - "cmp${cc}ss\t{$src, $dst|$dst, $src}", []>; -let mayLoad = 1 in - def CMPSSrm : SSIi8<0xC2, MRMSrcMem, - (outs FR32:$dst), (ins FR32:$src1, f32mem:$src, SSECC:$cc), - "cmp${cc}ss\t{$src, $dst|$dst, $src}", []>; - - // Accept explicit immediate argument form instead of comparison code. -let isAsmParserOnly = 1 in { - def CMPSSrr_alt : SSIi8<0xC2, MRMSrcReg, - (outs FR32:$dst), (ins FR32:$src1, FR32:$src, i8imm:$src2), - "cmpss\t{$src2, $src, $dst|$dst, $src, $src2}", []>; -let mayLoad = 1 in - def CMPSSrm_alt : SSIi8<0xC2, MRMSrcMem, - (outs FR32:$dst), (ins FR32:$src1, f32mem:$src, i8imm:$src2), - "cmpss\t{$src2, $src, $dst|$dst, $src, $src2}", []>; -} -} - -let Defs = [EFLAGS] in { -def UCOMISSrr: PSI<0x2E, MRMSrcReg, (outs), (ins FR32:$src1, FR32:$src2), - "ucomiss\t{$src2, $src1|$src1, $src2}", - [(set EFLAGS, (X86cmp FR32:$src1, FR32:$src2))]>; -def UCOMISSrm: PSI<0x2E, MRMSrcMem, (outs), (ins FR32:$src1, f32mem:$src2), - "ucomiss\t{$src2, $src1|$src1, $src2}", - [(set EFLAGS, (X86cmp FR32:$src1, (loadf32 addr:$src2)))]>; - -def COMISSrr: PSI<0x2F, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2), - "comiss\t{$src2, $src1|$src1, $src2}", []>; -def COMISSrm: PSI<0x2F, MRMSrcMem, (outs), (ins VR128:$src1, f128mem:$src2), - "comiss\t{$src2, $src1|$src1, $src2}", []>; - -} // Defs = [EFLAGS] - -// Aliases to match intrinsics which expect XMM operand(s). -let Constraints = "$src1 = $dst" in { - def Int_CMPSSrr : SSIi8<0xC2, MRMSrcReg, - (outs VR128:$dst), - (ins VR128:$src1, VR128:$src, SSECC:$cc), - "cmp${cc}ss\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, (int_x86_sse_cmp_ss - VR128:$src1, - VR128:$src, imm:$cc))]>; - def Int_CMPSSrm : SSIi8<0xC2, MRMSrcMem, - (outs VR128:$dst), - (ins VR128:$src1, f32mem:$src, SSECC:$cc), - "cmp${cc}ss\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, (int_x86_sse_cmp_ss VR128:$src1, - (load addr:$src), imm:$cc))]>; -} - -let Defs = [EFLAGS] in { -def Int_UCOMISSrr: PSI<0x2E, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2), - "ucomiss\t{$src2, $src1|$src1, $src2}", - [(set EFLAGS, (X86ucomi (v4f32 VR128:$src1), - VR128:$src2))]>; -def Int_UCOMISSrm: PSI<0x2E, MRMSrcMem, (outs),(ins VR128:$src1, f128mem:$src2), - "ucomiss\t{$src2, $src1|$src1, $src2}", - [(set EFLAGS, (X86ucomi (v4f32 VR128:$src1), - (load addr:$src2)))]>; - -def Int_COMISSrr: PSI<0x2F, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2), - "comiss\t{$src2, $src1|$src1, $src2}", - [(set EFLAGS, (X86comi (v4f32 VR128:$src1), - VR128:$src2))]>; -def Int_COMISSrm: PSI<0x2F, MRMSrcMem, (outs), (ins VR128:$src1, f128mem:$src2), - "comiss\t{$src2, $src1|$src1, $src2}", - [(set EFLAGS, (X86comi (v4f32 VR128:$src1), - (load addr:$src2)))]>; -} // Defs = [EFLAGS] - -// Aliases of packed SSE1 instructions for scalar use. These all have names -// that start with 'Fs'. - -// Alias instructions that map fld0 to pxor for sse. -let isReMaterializable = 1, isAsCheapAsAMove = 1, isCodeGenOnly = 1, - canFoldAsLoad = 1 in - // FIXME: Set encoding to pseudo! -def FsFLD0SS : I<0xEF, MRMInitReg, (outs FR32:$dst), (ins), "", - [(set FR32:$dst, fp32imm0)]>, - Requires<[HasSSE1]>, TB, OpSize; - -// Alias instruction to do FR32 reg-to-reg copy using movaps. Upper bits are -// disregarded. +// Move Aligned/Unaligned floating point values +multiclass sse12_mov_packed<bits<8> opc, RegisterClass RC, + X86MemOperand x86memop, PatFrag ld_frag, + string asm, Domain d, + bit IsReMaterializable = 1> { let neverHasSideEffects = 1 in -def FsMOVAPSrr : PSI<0x28, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src), - "movaps\t{$src, $dst|$dst, $src}", []>; - -// Alias instruction to load FR32 from f128mem using movaps. Upper bits are -// disregarded. -let canFoldAsLoad = 1, isReMaterializable = 1 in -def FsMOVAPSrm : PSI<0x28, MRMSrcMem, (outs FR32:$dst), (ins f128mem:$src), - "movaps\t{$src, $dst|$dst, $src}", - [(set FR32:$dst, (alignedloadfsf32 addr:$src))]>; - -// Alias bitwise logical operations using SSE logical ops on packed FP values. -let Constraints = "$src1 = $dst" in { -let isCommutable = 1 in { - def FsANDPSrr : PSI<0x54, MRMSrcReg, (outs FR32:$dst), - (ins FR32:$src1, FR32:$src2), - "andps\t{$src2, $dst|$dst, $src2}", - [(set FR32:$dst, (X86fand FR32:$src1, FR32:$src2))]>; - def FsORPSrr : PSI<0x56, MRMSrcReg, (outs FR32:$dst), - (ins FR32:$src1, FR32:$src2), - "orps\t{$src2, $dst|$dst, $src2}", - [(set FR32:$dst, (X86for FR32:$src1, FR32:$src2))]>; - def FsXORPSrr : PSI<0x57, MRMSrcReg, (outs FR32:$dst), - (ins FR32:$src1, FR32:$src2), - "xorps\t{$src2, $dst|$dst, $src2}", - [(set FR32:$dst, (X86fxor FR32:$src1, FR32:$src2))]>; -} - -def FsANDPSrm : PSI<0x54, MRMSrcMem, (outs FR32:$dst), - (ins FR32:$src1, f128mem:$src2), - "andps\t{$src2, $dst|$dst, $src2}", - [(set FR32:$dst, (X86fand FR32:$src1, - (memopfsf32 addr:$src2)))]>; -def FsORPSrm : PSI<0x56, MRMSrcMem, (outs FR32:$dst), - (ins FR32:$src1, f128mem:$src2), - "orps\t{$src2, $dst|$dst, $src2}", - [(set FR32:$dst, (X86for FR32:$src1, - (memopfsf32 addr:$src2)))]>; -def FsXORPSrm : PSI<0x57, MRMSrcMem, (outs FR32:$dst), - (ins FR32:$src1, f128mem:$src2), - "xorps\t{$src2, $dst|$dst, $src2}", - [(set FR32:$dst, (X86fxor FR32:$src1, - (memopfsf32 addr:$src2)))]>; - -let neverHasSideEffects = 1 in { -def FsANDNPSrr : PSI<0x55, MRMSrcReg, - (outs FR32:$dst), (ins FR32:$src1, FR32:$src2), - "andnps\t{$src2, $dst|$dst, $src2}", []>; -let mayLoad = 1 in -def FsANDNPSrm : PSI<0x55, MRMSrcMem, - (outs FR32:$dst), (ins FR32:$src1, f128mem:$src2), - "andnps\t{$src2, $dst|$dst, $src2}", []>; -} -} - -/// basic_sse1_fp_binop_rm - SSE1 binops come in both scalar and vector forms. -/// -/// In addition, we also have a special variant of the scalar form here to -/// represent the associated intrinsic operation. This form is unlike the -/// plain scalar form, in that it takes an entire vector (instead of a scalar) -/// and leaves the top elements unmodified (therefore these cannot be commuted). -/// -/// These three forms can each be reg+reg or reg+mem, so there are a total of -/// six "instructions". -/// -let Constraints = "$src1 = $dst" in { -multiclass basic_sse1_fp_binop_rm<bits<8> opc, string OpcodeStr, - SDNode OpNode, Intrinsic F32Int, - bit Commutable = 0> { - // Scalar operation, reg+reg. - def SSrr : SSI<opc, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src1, FR32:$src2), - !strconcat(OpcodeStr, "ss\t{$src2, $dst|$dst, $src2}"), - [(set FR32:$dst, (OpNode FR32:$src1, FR32:$src2))]> { - let isCommutable = Commutable; - } - - // Scalar operation, reg+mem. - def SSrm : SSI<opc, MRMSrcMem, (outs FR32:$dst), - (ins FR32:$src1, f32mem:$src2), - !strconcat(OpcodeStr, "ss\t{$src2, $dst|$dst, $src2}"), - [(set FR32:$dst, (OpNode FR32:$src1, (load addr:$src2)))]>; - - // Vector operation, reg+reg. - def PSrr : PSI<opc, MRMSrcReg, (outs VR128:$dst), - (ins VR128:$src1, VR128:$src2), - !strconcat(OpcodeStr, "ps\t{$src2, $dst|$dst, $src2}"), - [(set VR128:$dst, (v4f32 (OpNode VR128:$src1, VR128:$src2)))]> { - let isCommutable = Commutable; - } - - // Vector operation, reg+mem. - def PSrm : PSI<opc, MRMSrcMem, (outs VR128:$dst), - (ins VR128:$src1, f128mem:$src2), - !strconcat(OpcodeStr, "ps\t{$src2, $dst|$dst, $src2}"), - [(set VR128:$dst, (OpNode VR128:$src1, (memopv4f32 addr:$src2)))]>; - - // Intrinsic operation, reg+reg. - def SSrr_Int : SSI<opc, MRMSrcReg, (outs VR128:$dst), - (ins VR128:$src1, VR128:$src2), - !strconcat(OpcodeStr, "ss\t{$src2, $dst|$dst, $src2}"), - [(set VR128:$dst, (F32Int VR128:$src1, VR128:$src2))]>; - - // Intrinsic operation, reg+mem. - def SSrm_Int : SSI<opc, MRMSrcMem, (outs VR128:$dst), - (ins VR128:$src1, ssmem:$src2), - !strconcat(OpcodeStr, "ss\t{$src2, $dst|$dst, $src2}"), - [(set VR128:$dst, (F32Int VR128:$src1, - sse_load_f32:$src2))]>; + def rr : PI<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src), + !strconcat(asm, "\t{$src, $dst|$dst, $src}"), [], d>; +let canFoldAsLoad = 1, isReMaterializable = IsReMaterializable in + def rm : PI<opc, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src), + !strconcat(asm, "\t{$src, $dst|$dst, $src}"), + [(set RC:$dst, (ld_frag addr:$src))], d>; } -} - -// Arithmetic instructions -defm ADD : basic_sse1_fp_binop_rm<0x58, "add", fadd, int_x86_sse_add_ss, 1>; -defm MUL : basic_sse1_fp_binop_rm<0x59, "mul", fmul, int_x86_sse_mul_ss, 1>; -defm SUB : basic_sse1_fp_binop_rm<0x5C, "sub", fsub, int_x86_sse_sub_ss>; -defm DIV : basic_sse1_fp_binop_rm<0x5E, "div", fdiv, int_x86_sse_div_ss>; -/// sse1_fp_binop_rm - Other SSE1 binops -/// -/// This multiclass is like basic_sse1_fp_binop_rm, with the addition of -/// instructions for a full-vector intrinsic form. Operations that map -/// onto C operators don't use this form since they just use the plain -/// vector form instead of having a separate vector intrinsic form. -/// -/// This provides a total of eight "instructions". -/// -let Constraints = "$src1 = $dst" in { -multiclass sse1_fp_binop_rm<bits<8> opc, string OpcodeStr, - SDNode OpNode, - Intrinsic F32Int, - Intrinsic V4F32Int, - bit Commutable = 0> { - - // Scalar operation, reg+reg. - def SSrr : SSI<opc, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src1, FR32:$src2), - !strconcat(OpcodeStr, "ss\t{$src2, $dst|$dst, $src2}"), - [(set FR32:$dst, (OpNode FR32:$src1, FR32:$src2))]> { - let isCommutable = Commutable; - } - - // Scalar operation, reg+mem. - def SSrm : SSI<opc, MRMSrcMem, (outs FR32:$dst), - (ins FR32:$src1, f32mem:$src2), - !strconcat(OpcodeStr, "ss\t{$src2, $dst|$dst, $src2}"), - [(set FR32:$dst, (OpNode FR32:$src1, (load addr:$src2)))]>; - - // Vector operation, reg+reg. - def PSrr : PSI<opc, MRMSrcReg, (outs VR128:$dst), - (ins VR128:$src1, VR128:$src2), - !strconcat(OpcodeStr, "ps\t{$src2, $dst|$dst, $src2}"), - [(set VR128:$dst, (v4f32 (OpNode VR128:$src1, VR128:$src2)))]> { - let isCommutable = Commutable; - } - - // Vector operation, reg+mem. - def PSrm : PSI<opc, MRMSrcMem, (outs VR128:$dst), - (ins VR128:$src1, f128mem:$src2), - !strconcat(OpcodeStr, "ps\t{$src2, $dst|$dst, $src2}"), - [(set VR128:$dst, (OpNode VR128:$src1, (memopv4f32 addr:$src2)))]>; - - // Intrinsic operation, reg+reg. - def SSrr_Int : SSI<opc, MRMSrcReg, (outs VR128:$dst), - (ins VR128:$src1, VR128:$src2), - !strconcat(OpcodeStr, "ss\t{$src2, $dst|$dst, $src2}"), - [(set VR128:$dst, (F32Int VR128:$src1, VR128:$src2))]> { - let isCommutable = Commutable; - } - - // Intrinsic operation, reg+mem. - def SSrm_Int : SSI<opc, MRMSrcMem, (outs VR128:$dst), - (ins VR128:$src1, ssmem:$src2), - !strconcat(OpcodeStr, "ss\t{$src2, $dst|$dst, $src2}"), - [(set VR128:$dst, (F32Int VR128:$src1, - sse_load_f32:$src2))]>; - - // Vector intrinsic operation, reg+reg. - def PSrr_Int : PSI<opc, MRMSrcReg, (outs VR128:$dst), - (ins VR128:$src1, VR128:$src2), - !strconcat(OpcodeStr, "ps\t{$src2, $dst|$dst, $src2}"), - [(set VR128:$dst, (V4F32Int VR128:$src1, VR128:$src2))]> { - let isCommutable = Commutable; - } - - // Vector intrinsic operation, reg+mem. - def PSrm_Int : PSI<opc, MRMSrcMem, (outs VR128:$dst), - (ins VR128:$src1, f128mem:$src2), - !strconcat(OpcodeStr, "ps\t{$src2, $dst|$dst, $src2}"), - [(set VR128:$dst, (V4F32Int VR128:$src1, (memopv4f32 addr:$src2)))]>; -} +let isAsmParserOnly = 1 in { +defm VMOVAPS : sse12_mov_packed<0x28, VR128, f128mem, alignedloadv4f32, + "movaps", SSEPackedSingle>, VEX; +defm VMOVAPD : sse12_mov_packed<0x28, VR128, f128mem, alignedloadv2f64, + "movapd", SSEPackedDouble>, OpSize, VEX; +defm VMOVUPS : sse12_mov_packed<0x10, VR128, f128mem, loadv4f32, + "movups", SSEPackedSingle>, VEX; +defm VMOVUPD : sse12_mov_packed<0x10, VR128, f128mem, loadv2f64, + "movupd", SSEPackedDouble, 0>, OpSize, VEX; + +defm VMOVAPSY : sse12_mov_packed<0x28, VR256, f256mem, alignedloadv8f32, + "movaps", SSEPackedSingle>, VEX; +defm VMOVAPDY : sse12_mov_packed<0x28, VR256, f256mem, alignedloadv4f64, + "movapd", SSEPackedDouble>, OpSize, VEX; +defm VMOVUPSY : sse12_mov_packed<0x10, VR256, f256mem, loadv8f32, + "movups", SSEPackedSingle>, VEX; +defm VMOVUPDY : sse12_mov_packed<0x10, VR256, f256mem, loadv4f64, + "movupd", SSEPackedDouble, 0>, OpSize, VEX; } +defm MOVAPS : sse12_mov_packed<0x28, VR128, f128mem, alignedloadv4f32, + "movaps", SSEPackedSingle>, TB; +defm MOVAPD : sse12_mov_packed<0x28, VR128, f128mem, alignedloadv2f64, + "movapd", SSEPackedDouble>, TB, OpSize; +defm MOVUPS : sse12_mov_packed<0x10, VR128, f128mem, loadv4f32, + "movups", SSEPackedSingle>, TB; +defm MOVUPD : sse12_mov_packed<0x10, VR128, f128mem, loadv2f64, + "movupd", SSEPackedDouble, 0>, TB, OpSize; -defm MAX : sse1_fp_binop_rm<0x5F, "max", X86fmax, - int_x86_sse_max_ss, int_x86_sse_max_ps>; -defm MIN : sse1_fp_binop_rm<0x5D, "min", X86fmin, - int_x86_sse_min_ss, int_x86_sse_min_ps>; - -//===----------------------------------------------------------------------===// -// SSE packed FP Instructions - -// Move Instructions -let neverHasSideEffects = 1 in -def MOVAPSrr : PSI<0x28, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), - "movaps\t{$src, $dst|$dst, $src}", []>; -let canFoldAsLoad = 1, isReMaterializable = 1 in -def MOVAPSrm : PSI<0x28, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), +let isAsmParserOnly = 1 in { +def VMOVAPSmr : VPSI<0x29, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), "movaps\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, (alignedloadv4f32 addr:$src))]>; - + [(alignedstore (v4f32 VR128:$src), addr:$dst)]>, VEX; +def VMOVAPDmr : VPDI<0x29, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), + "movapd\t{$src, $dst|$dst, $src}", + [(alignedstore (v2f64 VR128:$src), addr:$dst)]>, VEX; +def VMOVUPSmr : VPSI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), + "movups\t{$src, $dst|$dst, $src}", + [(store (v4f32 VR128:$src), addr:$dst)]>, VEX; +def VMOVUPDmr : VPDI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), + "movupd\t{$src, $dst|$dst, $src}", + [(store (v2f64 VR128:$src), addr:$dst)]>, VEX; +def VMOVAPSYmr : VPSI<0x29, MRMDestMem, (outs), (ins f256mem:$dst, VR256:$src), + "movaps\t{$src, $dst|$dst, $src}", + [(alignedstore (v8f32 VR256:$src), addr:$dst)]>, VEX; +def VMOVAPDYmr : VPDI<0x29, MRMDestMem, (outs), (ins f256mem:$dst, VR256:$src), + "movapd\t{$src, $dst|$dst, $src}", + [(alignedstore (v4f64 VR256:$src), addr:$dst)]>, VEX; +def VMOVUPSYmr : VPSI<0x11, MRMDestMem, (outs), (ins f256mem:$dst, VR256:$src), + "movups\t{$src, $dst|$dst, $src}", + [(store (v8f32 VR256:$src), addr:$dst)]>, VEX; +def VMOVUPDYmr : VPDI<0x11, MRMDestMem, (outs), (ins f256mem:$dst, VR256:$src), + "movupd\t{$src, $dst|$dst, $src}", + [(store (v4f64 VR256:$src), addr:$dst)]>, VEX; +} def MOVAPSmr : PSI<0x29, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), "movaps\t{$src, $dst|$dst, $src}", [(alignedstore (v4f32 VR128:$src), addr:$dst)]>; - -let neverHasSideEffects = 1 in -def MOVUPSrr : PSI<0x10, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), - "movups\t{$src, $dst|$dst, $src}", []>; -let canFoldAsLoad = 1, isReMaterializable = 1 in -def MOVUPSrm : PSI<0x10, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), - "movups\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, (loadv4f32 addr:$src))]>; +def MOVAPDmr : PDI<0x29, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), + "movapd\t{$src, $dst|$dst, $src}", + [(alignedstore (v2f64 VR128:$src), addr:$dst)]>; def MOVUPSmr : PSI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), "movups\t{$src, $dst|$dst, $src}", [(store (v4f32 VR128:$src), addr:$dst)]>; +def MOVUPDmr : PDI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), + "movupd\t{$src, $dst|$dst, $src}", + [(store (v2f64 VR128:$src), addr:$dst)]>; -// Intrinsic forms of MOVUPS load and store +// Intrinsic forms of MOVUPS/D load and store +let isAsmParserOnly = 1 in { + let canFoldAsLoad = 1, isReMaterializable = 1 in + def VMOVUPSrm_Int : VPSI<0x10, MRMSrcMem, (outs VR128:$dst), + (ins f128mem:$src), + "movups\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, (int_x86_sse_loadu_ps addr:$src))]>, VEX; + def VMOVUPDrm_Int : VPDI<0x10, MRMSrcMem, (outs VR128:$dst), + (ins f128mem:$src), + "movupd\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, (int_x86_sse2_loadu_pd addr:$src))]>, VEX; + def VMOVUPSmr_Int : VPSI<0x11, MRMDestMem, (outs), + (ins f128mem:$dst, VR128:$src), + "movups\t{$src, $dst|$dst, $src}", + [(int_x86_sse_storeu_ps addr:$dst, VR128:$src)]>, VEX; + def VMOVUPDmr_Int : VPDI<0x11, MRMDestMem, (outs), + (ins f128mem:$dst, VR128:$src), + "movupd\t{$src, $dst|$dst, $src}", + [(int_x86_sse2_storeu_pd addr:$dst, VR128:$src)]>, VEX; +} let canFoldAsLoad = 1, isReMaterializable = 1 in def MOVUPSrm_Int : PSI<0x10, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), "movups\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse_loadu_ps addr:$src))]>; +def MOVUPDrm_Int : PDI<0x10, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), + "movupd\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, (int_x86_sse2_loadu_pd addr:$src))]>; + def MOVUPSmr_Int : PSI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), "movups\t{$src, $dst|$dst, $src}", [(int_x86_sse_storeu_ps addr:$dst, VR128:$src)]>; +def MOVUPDmr_Int : PDI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), + "movupd\t{$src, $dst|$dst, $src}", + [(int_x86_sse2_storeu_pd addr:$dst, VR128:$src)]>; -let Constraints = "$src1 = $dst" in { - let AddedComplexity = 20 in { - def MOVLPSrm : PSI<0x12, MRMSrcMem, - (outs VR128:$dst), (ins VR128:$src1, f64mem:$src2), - "movlps\t{$src2, $dst|$dst, $src2}", - [(set VR128:$dst, - (movlp VR128:$src1, - (bc_v4f32 (v2f64 (scalar_to_vector (loadf64 addr:$src2))))))]>; - def MOVHPSrm : PSI<0x16, MRMSrcMem, - (outs VR128:$dst), (ins VR128:$src1, f64mem:$src2), - "movhps\t{$src2, $dst|$dst, $src2}", - [(set VR128:$dst, - (movlhps VR128:$src1, - (bc_v4f32 (v2f64 (scalar_to_vector (loadf64 addr:$src2))))))]>; - } // AddedComplexity -} // Constraints = "$src1 = $dst" - +// Move Low/High packed floating point values +multiclass sse12_mov_hilo_packed<bits<8>opc, RegisterClass RC, + PatFrag mov_frag, string base_opc, + string asm_opr> { + def PSrm : PI<opc, MRMSrcMem, + (outs VR128:$dst), (ins VR128:$src1, f64mem:$src2), + !strconcat(!strconcat(base_opc,"s"), asm_opr), + [(set RC:$dst, + (mov_frag RC:$src1, + (bc_v4f32 (v2f64 (scalar_to_vector (loadf64 addr:$src2))))))], + SSEPackedSingle>, TB; + + def PDrm : PI<opc, MRMSrcMem, + (outs RC:$dst), (ins RC:$src1, f64mem:$src2), + !strconcat(!strconcat(base_opc,"d"), asm_opr), + [(set RC:$dst, (v2f64 (mov_frag RC:$src1, + (scalar_to_vector (loadf64 addr:$src2)))))], + SSEPackedDouble>, TB, OpSize; +} -def : Pat<(movlhps VR128:$src1, (bc_v4i32 (v2i64 (X86vzload addr:$src2)))), - (MOVHPSrm (v4i32 VR128:$src1), addr:$src2)>; +let isAsmParserOnly = 1, AddedComplexity = 20 in { + defm VMOVL : sse12_mov_hilo_packed<0x12, VR128, movlp, "movlp", + "\t{$src2, $src1, $dst|$dst, $src1, $src2}">, VEX_4V; + defm VMOVH : sse12_mov_hilo_packed<0x16, VR128, movlhps, "movhp", + "\t{$src2, $src1, $dst|$dst, $src1, $src2}">, VEX_4V; +} +let Constraints = "$src1 = $dst", AddedComplexity = 20 in { + defm MOVL : sse12_mov_hilo_packed<0x12, VR128, movlp, "movlp", + "\t{$src2, $dst|$dst, $src2}">; + defm MOVH : sse12_mov_hilo_packed<0x16, VR128, movlhps, "movhp", + "\t{$src2, $dst|$dst, $src2}">; +} +let isAsmParserOnly = 1 in { +def VMOVLPSmr : VPSI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src), + "movlps\t{$src, $dst|$dst, $src}", + [(store (f64 (vector_extract (bc_v2f64 (v4f32 VR128:$src)), + (iPTR 0))), addr:$dst)]>, VEX; +def VMOVLPDmr : VPDI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src), + "movlpd\t{$src, $dst|$dst, $src}", + [(store (f64 (vector_extract (v2f64 VR128:$src), + (iPTR 0))), addr:$dst)]>, VEX; +} def MOVLPSmr : PSI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src), "movlps\t{$src, $dst|$dst, $src}", [(store (f64 (vector_extract (bc_v2f64 (v4f32 VR128:$src)), (iPTR 0))), addr:$dst)]>; +def MOVLPDmr : PDI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src), + "movlpd\t{$src, $dst|$dst, $src}", + [(store (f64 (vector_extract (v2f64 VR128:$src), + (iPTR 0))), addr:$dst)]>; // v2f64 extract element 1 is always custom lowered to unpack high to low // and extract element 0 so the non-store version isn't too horrible. +let isAsmParserOnly = 1 in { +def VMOVHPSmr : VPSI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src), + "movhps\t{$src, $dst|$dst, $src}", + [(store (f64 (vector_extract + (unpckh (bc_v2f64 (v4f32 VR128:$src)), + (undef)), (iPTR 0))), addr:$dst)]>, + VEX; +def VMOVHPDmr : VPDI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src), + "movhpd\t{$src, $dst|$dst, $src}", + [(store (f64 (vector_extract + (v2f64 (unpckh VR128:$src, (undef))), + (iPTR 0))), addr:$dst)]>, + VEX; +} def MOVHPSmr : PSI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src), "movhps\t{$src, $dst|$dst, $src}", [(store (f64 (vector_extract (unpckh (bc_v2f64 (v4f32 VR128:$src)), (undef)), (iPTR 0))), addr:$dst)]>; +def MOVHPDmr : PDI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src), + "movhpd\t{$src, $dst|$dst, $src}", + [(store (f64 (vector_extract + (v2f64 (unpckh VR128:$src, (undef))), + (iPTR 0))), addr:$dst)]>; -let Constraints = "$src1 = $dst" in { -let AddedComplexity = 20 in { -def MOVLHPSrr : PSI<0x16, MRMSrcReg, (outs VR128:$dst), - (ins VR128:$src1, VR128:$src2), - "movlhps\t{$src2, $dst|$dst, $src2}", - [(set VR128:$dst, - (v4f32 (movlhps VR128:$src1, VR128:$src2)))]>; - -def MOVHLPSrr : PSI<0x12, MRMSrcReg, (outs VR128:$dst), - (ins VR128:$src1, VR128:$src2), - "movhlps\t{$src2, $dst|$dst, $src2}", - [(set VR128:$dst, - (v4f32 (movhlps VR128:$src1, VR128:$src2)))]>; -} // AddedComplexity -} // Constraints = "$src1 = $dst" +let isAsmParserOnly = 1, AddedComplexity = 20 in { + def VMOVLHPSrr : VPSI<0x16, MRMSrcReg, (outs VR128:$dst), + (ins VR128:$src1, VR128:$src2), + "movlhps\t{$src2, $src1, $dst|$dst, $src1, $src2}", + [(set VR128:$dst, + (v4f32 (movlhps VR128:$src1, VR128:$src2)))]>, + VEX_4V; + def VMOVHLPSrr : VPSI<0x12, MRMSrcReg, (outs VR128:$dst), + (ins VR128:$src1, VR128:$src2), + "movhlps\t{$src2, $src1, $dst|$dst, $src1, $src2}", + [(set VR128:$dst, + (v4f32 (movhlps VR128:$src1, VR128:$src2)))]>, + VEX_4V; +} +let Constraints = "$src1 = $dst", AddedComplexity = 20 in { + def MOVLHPSrr : PSI<0x16, MRMSrcReg, (outs VR128:$dst), + (ins VR128:$src1, VR128:$src2), + "movlhps\t{$src2, $dst|$dst, $src2}", + [(set VR128:$dst, + (v4f32 (movlhps VR128:$src1, VR128:$src2)))]>; + def MOVHLPSrr : PSI<0x12, MRMSrcReg, (outs VR128:$dst), + (ins VR128:$src1, VR128:$src2), + "movhlps\t{$src2, $dst|$dst, $src2}", + [(set VR128:$dst, + (v4f32 (movhlps VR128:$src1, VR128:$src2)))]>; +} +def : Pat<(movlhps VR128:$src1, (bc_v4i32 (v2i64 (X86vzload addr:$src2)))), + (MOVHPSrm (v4i32 VR128:$src1), addr:$src2)>; let AddedComplexity = 20 in { -def : Pat<(v4f32 (movddup VR128:$src, (undef))), - (MOVLHPSrr (v4f32 VR128:$src), (v4f32 VR128:$src))>; -def : Pat<(v2i64 (movddup VR128:$src, (undef))), - (MOVLHPSrr (v2i64 VR128:$src), (v2i64 VR128:$src))>; + def : Pat<(v4f32 (movddup VR128:$src, (undef))), + (MOVLHPSrr (v4f32 VR128:$src), (v4f32 VR128:$src))>; + def : Pat<(v2i64 (movddup VR128:$src, (undef))), + (MOVLHPSrr (v2i64 VR128:$src), (v2i64 VR128:$src))>; } +//===----------------------------------------------------------------------===// +// SSE 1 & 2 - Conversion Instructions +//===----------------------------------------------------------------------===// +multiclass sse12_cvt_s<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC, + SDNode OpNode, X86MemOperand x86memop, PatFrag ld_frag, + string asm> { + def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src), asm, + [(set DstRC:$dst, (OpNode SrcRC:$src))]>; + def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src), asm, + [(set DstRC:$dst, (OpNode (ld_frag addr:$src)))]>; +} -// Arithmetic - -/// sse1_fp_unop_rm - SSE1 unops come in both scalar and vector forms. -/// -/// In addition, we also have a special variant of the scalar form here to -/// represent the associated intrinsic operation. This form is unlike the -/// plain scalar form, in that it takes an entire vector (instead of a -/// scalar) and leaves the top elements undefined. -/// -/// And, we have a special variant form for a full-vector intrinsic form. -/// -/// These four forms can each have a reg or a mem operand, so there are a -/// total of eight "instructions". -/// -multiclass sse1_fp_unop_rm<bits<8> opc, string OpcodeStr, - SDNode OpNode, - Intrinsic F32Int, - Intrinsic V4F32Int, - bit Commutable = 0> { - // Scalar operation, reg. - def SSr : SSI<opc, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src), - !strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"), - [(set FR32:$dst, (OpNode FR32:$src))]> { - let isCommutable = Commutable; - } - - // Scalar operation, mem. - def SSm : I<opc, MRMSrcMem, (outs FR32:$dst), (ins f32mem:$src), - !strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"), - [(set FR32:$dst, (OpNode (load addr:$src)))]>, XS, - Requires<[HasSSE1, OptForSize]>; +multiclass sse12_cvt_p<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC, + SDNode OpNode, X86MemOperand x86memop, PatFrag ld_frag, + string asm, Domain d> { + def rr : PI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src), asm, + [(set DstRC:$dst, (OpNode SrcRC:$src))], d>; + def rm : PI<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src), asm, + [(set DstRC:$dst, (OpNode (ld_frag addr:$src)))], d>; +} - // Vector operation, reg. - def PSr : PSI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), - !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"), - [(set VR128:$dst, (v4f32 (OpNode VR128:$src)))]> { - let isCommutable = Commutable; - } +multiclass sse12_vcvt_avx<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC, + SDNode OpNode, X86MemOperand x86memop, PatFrag ld_frag, + string asm> { + def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins DstRC:$src1, SrcRC:$src), + asm, []>; + def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst), + (ins DstRC:$src1, x86memop:$src), asm, []>; +} - // Vector operation, mem. - def PSm : PSI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), - !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"), - [(set VR128:$dst, (OpNode (memopv4f32 addr:$src)))]>; +let isAsmParserOnly = 1 in { +defm VCVTTSS2SI : sse12_cvt_s<0x2C, FR32, GR32, fp_to_sint, f32mem, loadf32, + "cvttss2si\t{$src, $dst|$dst, $src}">, XS, VEX; +defm VCVTTSD2SI : sse12_cvt_s<0x2C, FR64, GR32, fp_to_sint, f64mem, loadf64, + "cvttsd2si\t{$src, $dst|$dst, $src}">, XD, VEX; +defm VCVTSI2SS : sse12_vcvt_avx<0x2A, GR32, FR32, sint_to_fp, i32mem, loadi32, + "cvtsi2ss\t{$src, $src1, $dst|$dst, $src1, $src}">, XS, + VEX_4V; +defm VCVTSI2SD : sse12_vcvt_avx<0x2A, GR32, FR64, sint_to_fp, i32mem, loadi32, + "cvtsi2sd\t{$src, $src1, $dst|$dst, $src1, $src}">, XD, + VEX_4V; +} - // Intrinsic operation, reg. - def SSr_Int : SSI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), - !strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"), - [(set VR128:$dst, (F32Int VR128:$src))]> { - let isCommutable = Commutable; - } +defm CVTTSS2SI : sse12_cvt_s<0x2C, FR32, GR32, fp_to_sint, f32mem, loadf32, + "cvttss2si\t{$src, $dst|$dst, $src}">, XS; +defm CVTTSD2SI : sse12_cvt_s<0x2C, FR64, GR32, fp_to_sint, f64mem, loadf64, + "cvttsd2si\t{$src, $dst|$dst, $src}">, XD; +defm CVTSI2SS : sse12_cvt_s<0x2A, GR32, FR32, sint_to_fp, i32mem, loadi32, + "cvtsi2ss\t{$src, $dst|$dst, $src}">, XS; +defm CVTSI2SD : sse12_cvt_s<0x2A, GR32, FR64, sint_to_fp, i32mem, loadi32, + "cvtsi2sd\t{$src, $dst|$dst, $src}">, XD; + +// Conversion Instructions Intrinsics - Match intrinsics which expect MM +// and/or XMM operand(s). +multiclass sse12_cvt_pint<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC, + Intrinsic Int, X86MemOperand x86memop, PatFrag ld_frag, + string asm, Domain d> { + def rr : PI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src), asm, + [(set DstRC:$dst, (Int SrcRC:$src))], d>; + def rm : PI<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src), asm, + [(set DstRC:$dst, (Int (ld_frag addr:$src)))], d>; +} - // Intrinsic operation, mem. - def SSm_Int : SSI<opc, MRMSrcMem, (outs VR128:$dst), (ins ssmem:$src), - !strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"), - [(set VR128:$dst, (F32Int sse_load_f32:$src))]>; +multiclass sse12_cvt_sint<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC, + Intrinsic Int, X86MemOperand x86memop, PatFrag ld_frag, + string asm> { + def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src), asm, + [(set DstRC:$dst, (Int SrcRC:$src))]>; + def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src), asm, + [(set DstRC:$dst, (Int (ld_frag addr:$src)))]>; +} - // Vector intrinsic operation, reg - def PSr_Int : PSI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), - !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"), - [(set VR128:$dst, (V4F32Int VR128:$src))]> { - let isCommutable = Commutable; - } +multiclass sse12_cvt_pint_3addr<bits<8> opc, RegisterClass SrcRC, + RegisterClass DstRC, Intrinsic Int, X86MemOperand x86memop, + PatFrag ld_frag, string asm, Domain d> { + def rr : PI<opc, MRMSrcReg, (outs DstRC:$dst), (ins DstRC:$src1, SrcRC:$src2), + asm, [(set DstRC:$dst, (Int DstRC:$src1, SrcRC:$src2))], d>; + def rm : PI<opc, MRMSrcMem, (outs DstRC:$dst), + (ins DstRC:$src1, x86memop:$src2), asm, + [(set DstRC:$dst, (Int DstRC:$src1, (ld_frag addr:$src2)))], d>; +} - // Vector intrinsic operation, mem - def PSm_Int : PSI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), - !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"), - [(set VR128:$dst, (V4F32Int (memopv4f32 addr:$src)))]>; +multiclass sse12_cvt_sint_3addr<bits<8> opc, RegisterClass SrcRC, + RegisterClass DstRC, Intrinsic Int, X86MemOperand x86memop, + PatFrag ld_frag, string asm> { + def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins DstRC:$src1, SrcRC:$src2), + asm, [(set DstRC:$dst, (Int DstRC:$src1, SrcRC:$src2))]>; + def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst), + (ins DstRC:$src1, x86memop:$src2), asm, + [(set DstRC:$dst, (Int DstRC:$src1, (ld_frag addr:$src2)))]>; } -// Square root. -defm SQRT : sse1_fp_unop_rm<0x51, "sqrt", fsqrt, - int_x86_sse_sqrt_ss, int_x86_sse_sqrt_ps>; +let isAsmParserOnly = 1 in { + defm Int_VCVTSS2SI : sse12_cvt_sint<0x2D, VR128, GR32, int_x86_sse_cvtss2si, + f32mem, load, "cvtss2si\t{$src, $dst|$dst, $src}">, XS, + VEX; + defm Int_VCVTSD2SI : sse12_cvt_sint<0x2D, VR128, GR32, int_x86_sse2_cvtsd2si, + f128mem, load, "cvtsd2si\t{$src, $dst|$dst, $src}">, XD, + VEX; +} +defm Int_CVTSS2SI : sse12_cvt_sint<0x2D, VR128, GR32, int_x86_sse_cvtss2si, + f32mem, load, "cvtss2si\t{$src, $dst|$dst, $src}">, XS; +defm Int_CVTSD2SI : sse12_cvt_sint<0x2D, VR128, GR32, int_x86_sse2_cvtsd2si, + f128mem, load, "cvtsd2si\t{$src, $dst|$dst, $src}">, XD; -// Reciprocal approximations. Note that these typically require refinement -// in order to obtain suitable precision. -defm RSQRT : sse1_fp_unop_rm<0x52, "rsqrt", X86frsqrt, - int_x86_sse_rsqrt_ss, int_x86_sse_rsqrt_ps>; -defm RCP : sse1_fp_unop_rm<0x53, "rcp", X86frcp, - int_x86_sse_rcp_ss, int_x86_sse_rcp_ps>; -// Logical let Constraints = "$src1 = $dst" in { - let isCommutable = 1 in { - def ANDPSrr : PSI<0x54, MRMSrcReg, - (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), - "andps\t{$src2, $dst|$dst, $src2}", - [(set VR128:$dst, (v2i64 - (and VR128:$src1, VR128:$src2)))]>; - def ORPSrr : PSI<0x56, MRMSrcReg, - (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), - "orps\t{$src2, $dst|$dst, $src2}", - [(set VR128:$dst, (v2i64 - (or VR128:$src1, VR128:$src2)))]>; - def XORPSrr : PSI<0x57, MRMSrcReg, - (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), - "xorps\t{$src2, $dst|$dst, $src2}", - [(set VR128:$dst, (v2i64 - (xor VR128:$src1, VR128:$src2)))]>; - } - - def ANDPSrm : PSI<0x54, MRMSrcMem, - (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2), - "andps\t{$src2, $dst|$dst, $src2}", - [(set VR128:$dst, (and (bc_v2i64 (v4f32 VR128:$src1)), - (memopv2i64 addr:$src2)))]>; - def ORPSrm : PSI<0x56, MRMSrcMem, - (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2), - "orps\t{$src2, $dst|$dst, $src2}", - [(set VR128:$dst, (or (bc_v2i64 (v4f32 VR128:$src1)), - (memopv2i64 addr:$src2)))]>; - def XORPSrm : PSI<0x57, MRMSrcMem, - (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2), - "xorps\t{$src2, $dst|$dst, $src2}", - [(set VR128:$dst, (xor (bc_v2i64 (v4f32 VR128:$src1)), - (memopv2i64 addr:$src2)))]>; - def ANDNPSrr : PSI<0x55, MRMSrcReg, - (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), - "andnps\t{$src2, $dst|$dst, $src2}", - [(set VR128:$dst, - (v2i64 (and (xor VR128:$src1, - (bc_v2i64 (v4i32 immAllOnesV))), - VR128:$src2)))]>; - def ANDNPSrm : PSI<0x55, MRMSrcMem, - (outs VR128:$dst), (ins VR128:$src1,f128mem:$src2), - "andnps\t{$src2, $dst|$dst, $src2}", - [(set VR128:$dst, - (v2i64 (and (xor (bc_v2i64 (v4f32 VR128:$src1)), - (bc_v2i64 (v4i32 immAllOnesV))), - (memopv2i64 addr:$src2))))]>; + defm Int_CVTSI2SS : sse12_cvt_sint_3addr<0x2A, GR32, VR128, + int_x86_sse_cvtsi2ss, i32mem, loadi32, + "cvtsi2ss\t{$src2, $dst|$dst, $src2}">, XS; + defm Int_CVTSI2SD : sse12_cvt_sint_3addr<0x2A, GR32, VR128, + int_x86_sse2_cvtsi2sd, i32mem, loadi32, + "cvtsi2ss\t{$src2, $dst|$dst, $src2}">, XD; } +// Instructions below don't have an AVX form. +defm Int_CVTPS2PI : sse12_cvt_pint<0x2D, VR128, VR64, int_x86_sse_cvtps2pi, + f64mem, load, "cvtps2pi\t{$src, $dst|$dst, $src}", + SSEPackedSingle>, TB; +defm Int_CVTPD2PI : sse12_cvt_pint<0x2D, VR128, VR64, int_x86_sse_cvtpd2pi, + f128mem, memop, "cvtpd2pi\t{$src, $dst|$dst, $src}", + SSEPackedDouble>, TB, OpSize; +defm Int_CVTTPS2PI : sse12_cvt_pint<0x2C, VR128, VR64, int_x86_sse_cvttps2pi, + f64mem, load, "cvttps2pi\t{$src, $dst|$dst, $src}", + SSEPackedSingle>, TB; +defm Int_CVTTPD2PI : sse12_cvt_pint<0x2C, VR128, VR64, int_x86_sse_cvttpd2pi, + f128mem, memop, "cvttpd2pi\t{$src, $dst|$dst, $src}", + SSEPackedDouble>, TB, OpSize; +defm Int_CVTPI2PD : sse12_cvt_pint<0x2A, VR64, VR128, int_x86_sse_cvtpi2pd, + i64mem, load, "cvtpi2pd\t{$src, $dst|$dst, $src}", + SSEPackedDouble>, TB, OpSize; let Constraints = "$src1 = $dst" in { - def CMPPSrri : PSIi8<0xC2, MRMSrcReg, - (outs VR128:$dst), (ins VR128:$src1, VR128:$src, SSECC:$cc), - "cmp${cc}ps\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, (int_x86_sse_cmp_ps VR128:$src1, - VR128:$src, imm:$cc))]>; - def CMPPSrmi : PSIi8<0xC2, MRMSrcMem, - (outs VR128:$dst), (ins VR128:$src1, f128mem:$src, SSECC:$cc), - "cmp${cc}ps\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, (int_x86_sse_cmp_ps VR128:$src1, - (memop addr:$src), imm:$cc))]>; - - // Accept explicit immediate argument form instead of comparison code. -let isAsmParserOnly = 1 in { - def CMPPSrri_alt : PSIi8<0xC2, MRMSrcReg, - (outs VR128:$dst), (ins VR128:$src1, VR128:$src, i8imm:$src2), - "cmpps\t{$src2, $src, $dst|$dst, $src, $src}", []>; - def CMPPSrmi_alt : PSIi8<0xC2, MRMSrcMem, - (outs VR128:$dst), (ins VR128:$src1, f128mem:$src, i8imm:$src2), - "cmpps\t{$src2, $src, $dst|$dst, $src, $src}", []>; + defm Int_CVTPI2PS : sse12_cvt_pint_3addr<0x2A, VR64, VR128, + int_x86_sse_cvtpi2ps, + i64mem, load, "cvtpi2ps\t{$src2, $dst|$dst, $src2}", + SSEPackedSingle>, TB; } -} -def : Pat<(v4i32 (X86cmpps (v4f32 VR128:$src1), VR128:$src2, imm:$cc)), - (CMPPSrri (v4f32 VR128:$src1), (v4f32 VR128:$src2), imm:$cc)>; -def : Pat<(v4i32 (X86cmpps (v4f32 VR128:$src1), (memop addr:$src2), imm:$cc)), - (CMPPSrmi (v4f32 VR128:$src1), addr:$src2, imm:$cc)>; - -// Shuffle and unpack instructions -let Constraints = "$src1 = $dst" in { - let isConvertibleToThreeAddress = 1 in // Convert to pshufd - def SHUFPSrri : PSIi8<0xC6, MRMSrcReg, - (outs VR128:$dst), (ins VR128:$src1, - VR128:$src2, i8imm:$src3), - "shufps\t{$src3, $src2, $dst|$dst, $src2, $src3}", - [(set VR128:$dst, - (v4f32 (shufp:$src3 VR128:$src1, VR128:$src2)))]>; - def SHUFPSrmi : PSIi8<0xC6, MRMSrcMem, - (outs VR128:$dst), (ins VR128:$src1, - f128mem:$src2, i8imm:$src3), - "shufps\t{$src3, $src2, $dst|$dst, $src2, $src3}", - [(set VR128:$dst, - (v4f32 (shufp:$src3 - VR128:$src1, (memopv4f32 addr:$src2))))]>; - - let AddedComplexity = 10 in { - def UNPCKHPSrr : PSI<0x15, MRMSrcReg, - (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), - "unpckhps\t{$src2, $dst|$dst, $src2}", - [(set VR128:$dst, - (v4f32 (unpckh VR128:$src1, VR128:$src2)))]>; - def UNPCKHPSrm : PSI<0x15, MRMSrcMem, - (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2), - "unpckhps\t{$src2, $dst|$dst, $src2}", - [(set VR128:$dst, - (v4f32 (unpckh VR128:$src1, - (memopv4f32 addr:$src2))))]>; - - def UNPCKLPSrr : PSI<0x14, MRMSrcReg, - (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), - "unpcklps\t{$src2, $dst|$dst, $src2}", - [(set VR128:$dst, - (v4f32 (unpckl VR128:$src1, VR128:$src2)))]>; - def UNPCKLPSrm : PSI<0x14, MRMSrcMem, - (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2), - "unpcklps\t{$src2, $dst|$dst, $src2}", - [(set VR128:$dst, - (unpckl VR128:$src1, (memopv4f32 addr:$src2)))]>; - } // AddedComplexity -} // Constraints = "$src1 = $dst" - -// Mask creation -def MOVMSKPSrr : PSI<0x50, MRMSrcReg, (outs GR32:$dst), (ins VR128:$src), - "movmskps\t{$src, $dst|$dst, $src}", - [(set GR32:$dst, (int_x86_sse_movmsk_ps VR128:$src))]>; -def MOVMSKPDrr : PDI<0x50, MRMSrcReg, (outs GR32:$dst), (ins VR128:$src), - "movmskpd\t{$src, $dst|$dst, $src}", - [(set GR32:$dst, (int_x86_sse2_movmsk_pd VR128:$src))]>; - -// Prefetch intrinsic. -def PREFETCHT0 : PSI<0x18, MRM1m, (outs), (ins i8mem:$src), - "prefetcht0\t$src", [(prefetch addr:$src, imm, (i32 3))]>; -def PREFETCHT1 : PSI<0x18, MRM2m, (outs), (ins i8mem:$src), - "prefetcht1\t$src", [(prefetch addr:$src, imm, (i32 2))]>; -def PREFETCHT2 : PSI<0x18, MRM3m, (outs), (ins i8mem:$src), - "prefetcht2\t$src", [(prefetch addr:$src, imm, (i32 1))]>; -def PREFETCHNTA : PSI<0x18, MRM0m, (outs), (ins i8mem:$src), - "prefetchnta\t$src", [(prefetch addr:$src, imm, (i32 0))]>; -// Non-temporal stores -def MOVNTPSmr_Int : PSI<0x2B, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src), - "movntps\t{$src, $dst|$dst, $src}", - [(int_x86_sse_movnt_ps addr:$dst, VR128:$src)]>; +/// SSE 1 Only -let AddedComplexity = 400 in { // Prefer non-temporal versions -def MOVNTPSmr : PSI<0x2B, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), - "movntps\t{$src, $dst|$dst, $src}", - [(alignednontemporalstore (v4f32 VR128:$src), addr:$dst)]>; - -def MOVNTDQ_64mr : PDI<0xE7, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), - "movntdq\t{$src, $dst|$dst, $src}", - [(alignednontemporalstore (v2f64 VR128:$src), addr:$dst)]>; - -def MOVNTImr : I<0xC3, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src), - "movnti\t{$src, $dst|$dst, $src}", - [(nontemporalstore (i32 GR32:$src), addr:$dst)]>, - TB, Requires<[HasSSE2]>; - -def MOVNTI_64mr : RI<0xC3, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src), - "movnti\t{$src, $dst|$dst, $src}", - [(nontemporalstore (i64 GR64:$src), addr:$dst)]>, - TB, Requires<[HasSSE2]>; +// Aliases for intrinsics +let isAsmParserOnly = 1, Pattern = []<dag> in { +defm Int_VCVTTSS2SI : sse12_cvt_sint_3addr<0x2C, VR128, GR32, + int_x86_sse_cvttss2si, f32mem, load, + "cvttss2si\t{$src2, $src1, $dst|$dst, $src1, $src2}">, XS; +defm Int_VCVTTSD2SI : sse12_cvt_sint_3addr<0x2C, VR128, GR32, + int_x86_sse2_cvttsd2si, f128mem, load, + "cvttss2si\t{$src2, $src1, $dst|$dst, $src1, $src2}">, XD; } - -// Load, store, and memory fence -def SFENCE : I<0xAE, MRM_F8, (outs), (ins), "sfence", [(int_x86_sse_sfence)]>, - TB, Requires<[HasSSE1]>; - -// MXCSR register -def LDMXCSR : PSI<0xAE, MRM2m, (outs), (ins i32mem:$src), - "ldmxcsr\t$src", [(int_x86_sse_ldmxcsr addr:$src)]>; -def STMXCSR : PSI<0xAE, MRM3m, (outs), (ins i32mem:$dst), - "stmxcsr\t$dst", [(int_x86_sse_stmxcsr addr:$dst)]>; - -// Alias instructions that map zero vector to pxor / xorp* for sse. -// We set canFoldAsLoad because this can be converted to a constant-pool -// load of an all-zeros value if folding it would be beneficial. -// FIXME: Change encoding to pseudo! -let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, - isCodeGenOnly = 1 in { -def V_SET0PS : PSI<0x57, MRMInitReg, (outs VR128:$dst), (ins), "", - [(set VR128:$dst, (v4f32 immAllZerosV))]>; -def V_SET0PD : PDI<0x57, MRMInitReg, (outs VR128:$dst), (ins), "", - [(set VR128:$dst, (v2f64 immAllZerosV))]>; -let ExeDomain = SSEPackedInt in -def V_SET0PI : PDI<0xEF, MRMInitReg, (outs VR128:$dst), (ins), "", - [(set VR128:$dst, (v4i32 immAllZerosV))]>; +defm Int_CVTTSS2SI : sse12_cvt_sint<0x2C, VR128, GR32, int_x86_sse_cvttss2si, + f32mem, load, "cvttss2si\t{$src, $dst|$dst, $src}">, + XS; +defm Int_CVTTSD2SI : sse12_cvt_sint<0x2C, VR128, GR32, int_x86_sse2_cvttsd2si, + f128mem, load, "cvttss2si\t{$src, $dst|$dst, $src}">, + XD; + +let isAsmParserOnly = 1, Pattern = []<dag> in { +defm VCVTSS2SI : sse12_cvt_s<0x2D, FR32, GR32, undef, f32mem, load, + "cvtss2si{l}\t{$src, $dst|$dst, $src}">, XS, VEX; +defm VCVTDQ2PS : sse12_cvt_p<0x5B, VR128, VR128, undef, f128mem, load, + "cvtdq2ps\t{$src, $dst|$dst, $src}", + SSEPackedSingle>, TB, VEX; } - -def : Pat<(v2i64 immAllZerosV), (V_SET0PI)>; -def : Pat<(v8i16 immAllZerosV), (V_SET0PI)>; -def : Pat<(v16i8 immAllZerosV), (V_SET0PI)>; - -def : Pat<(f32 (vector_extract (v4f32 VR128:$src), (iPTR 0))), - (f32 (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss))>; - -//===---------------------------------------------------------------------===// -// SSE2 Instructions -//===---------------------------------------------------------------------===// - -// Move Instructions. Register-to-register movsd is not used for FR64 -// register copies because it's a partial register update; FsMOVAPDrr is -// used instead. Register-to-register movsd is not modeled as an INSERT_SUBREG -// because INSERT_SUBREG requires that the insert be implementable in terms of -// a copy, and just mentioned, we don't use movsd for copies. -let Constraints = "$src1 = $dst" in -def MOVSDrr : SDI<0x10, MRMSrcReg, - (outs VR128:$dst), (ins VR128:$src1, FR64:$src2), - "movsd\t{$src2, $dst|$dst, $src2}", - [(set (v2f64 VR128:$dst), - (movl VR128:$src1, (scalar_to_vector FR64:$src2)))]>; - -// Extract the low 64-bit value from one vector and insert it into another. -let AddedComplexity = 15 in -def : Pat<(v2f64 (movl VR128:$src1, VR128:$src2)), - (MOVSDrr (v2f64 VR128:$src1), - (EXTRACT_SUBREG (v2f64 VR128:$src2), sub_sd))>; - -// Implicitly promote a 64-bit scalar to a vector. -def : Pat<(v2f64 (scalar_to_vector FR64:$src)), - (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FR64:$src, sub_sd)>; - -// Loading from memory automatically zeroing upper bits. -let canFoldAsLoad = 1, isReMaterializable = 1, AddedComplexity = 20 in -def MOVSDrm : SDI<0x10, MRMSrcMem, (outs FR64:$dst), (ins f64mem:$src), - "movsd\t{$src, $dst|$dst, $src}", - [(set FR64:$dst, (loadf64 addr:$src))]>; - -// MOVSDrm zeros the high parts of the register; represent this -// with SUBREG_TO_REG. -let AddedComplexity = 20 in { -def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector (loadf64 addr:$src))))), - (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), sub_sd)>; -def : Pat<(v2f64 (scalar_to_vector (loadf64 addr:$src))), - (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), sub_sd)>; -def : Pat<(v2f64 (X86vzmovl (loadv2f64 addr:$src))), - (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), sub_sd)>; -def : Pat<(v2f64 (X86vzmovl (bc_v2f64 (loadv4f32 addr:$src)))), - (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), sub_sd)>; -def : Pat<(v2f64 (X86vzload addr:$src)), - (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), sub_sd)>; +let Pattern = []<dag> in { +defm CVTSS2SI : sse12_cvt_s<0x2D, FR32, GR32, undef, f32mem, load /*dummy*/, + "cvtss2si{l}\t{$src, $dst|$dst, $src}">, XS; +defm CVTDQ2PS : sse12_cvt_p<0x5B, VR128, VR128, undef, f128mem, load /*dummy*/, + "cvtdq2ps\t{$src, $dst|$dst, $src}", + SSEPackedSingle>, TB; /* PD SSE3 form is avaiable */ } -// Store scalar value to memory. -def MOVSDmr : SDI<0x11, MRMDestMem, (outs), (ins f64mem:$dst, FR64:$src), - "movsd\t{$src, $dst|$dst, $src}", - [(store FR64:$src, addr:$dst)]>; - -// Extract and store. -def : Pat<(store (f64 (vector_extract (v2f64 VR128:$src), (iPTR 0))), - addr:$dst), - (MOVSDmr addr:$dst, - (EXTRACT_SUBREG (v2f64 VR128:$src), sub_sd))>; +/// SSE 2 Only -// Conversion instructions -def CVTTSD2SIrr : SDI<0x2C, MRMSrcReg, (outs GR32:$dst), (ins FR64:$src), - "cvttsd2si\t{$src, $dst|$dst, $src}", - [(set GR32:$dst, (fp_to_sint FR64:$src))]>; -def CVTTSD2SIrm : SDI<0x2C, MRMSrcMem, (outs GR32:$dst), (ins f64mem:$src), - "cvttsd2si\t{$src, $dst|$dst, $src}", - [(set GR32:$dst, (fp_to_sint (loadf64 addr:$src)))]>; +// Convert scalar double to scalar single +let isAsmParserOnly = 1 in { +def VCVTSD2SSrr : VSDI<0x5A, MRMSrcReg, (outs FR32:$dst), + (ins FR64:$src1, FR64:$src2), + "cvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, + VEX_4V; +def VCVTSD2SSrm : I<0x5A, MRMSrcMem, (outs FR32:$dst), + (ins FR64:$src1, f64mem:$src2), + "vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}", + []>, XD, Requires<[HasAVX, OptForSize]>, VEX_4V; +} def CVTSD2SSrr : SDI<0x5A, MRMSrcReg, (outs FR32:$dst), (ins FR64:$src), "cvtsd2ss\t{$src, $dst|$dst, $src}", [(set FR32:$dst, (fround FR64:$src))]>; @@ -1226,35 +695,28 @@ def CVTSD2SSrm : I<0x5A, MRMSrcMem, (outs FR32:$dst), (ins f64mem:$src), "cvtsd2ss\t{$src, $dst|$dst, $src}", [(set FR32:$dst, (fround (loadf64 addr:$src)))]>, XD, Requires<[HasSSE2, OptForSize]>; -def CVTSI2SDrr : SDI<0x2A, MRMSrcReg, (outs FR64:$dst), (ins GR32:$src), - "cvtsi2sd\t{$src, $dst|$dst, $src}", - [(set FR64:$dst, (sint_to_fp GR32:$src))]>; -def CVTSI2SDrm : SDI<0x2A, MRMSrcMem, (outs FR64:$dst), (ins i32mem:$src), - "cvtsi2sd\t{$src, $dst|$dst, $src}", - [(set FR64:$dst, (sint_to_fp (loadi32 addr:$src)))]>; -def CVTPD2DQrm : S3DI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), - "cvtpd2dq\t{$src, $dst|$dst, $src}", []>; -def CVTPD2DQrr : S3DI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), - "cvtpd2dq\t{$src, $dst|$dst, $src}", []>; -def CVTDQ2PDrm : S3SI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), - "cvtdq2pd\t{$src, $dst|$dst, $src}", []>; -def CVTDQ2PDrr : S3SI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), - "cvtdq2pd\t{$src, $dst|$dst, $src}", []>; -def CVTPS2DQrr : PDI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), - "cvtps2dq\t{$src, $dst|$dst, $src}", []>; -def CVTPS2DQrm : PDI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), - "cvtps2dq\t{$src, $dst|$dst, $src}", []>; -def CVTDQ2PSrr : PSI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), - "cvtdq2ps\t{$src, $dst|$dst, $src}", []>; -def CVTDQ2PSrm : PSI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), - "cvtdq2ps\t{$src, $dst|$dst, $src}", []>; -def COMISDrr: PDI<0x2F, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2), - "comisd\t{$src2, $src1|$src1, $src2}", []>; -def COMISDrm: PDI<0x2F, MRMSrcMem, (outs), (ins VR128:$src1, f128mem:$src2), - "comisd\t{$src2, $src1|$src1, $src2}", []>; - -// SSE2 instructions with XS prefix +let isAsmParserOnly = 1 in +defm Int_VCVTSD2SS: sse12_cvt_sint_3addr<0x5A, VR128, VR128, + int_x86_sse2_cvtsd2ss, f64mem, load, + "cvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}">, + XS, VEX_4V; +let Constraints = "$src1 = $dst" in +defm Int_CVTSD2SS: sse12_cvt_sint_3addr<0x5A, VR128, VR128, + int_x86_sse2_cvtsd2ss, f64mem, load, + "cvtsd2ss\t{$src2, $dst|$dst, $src2}">, XS; + +// Convert scalar single to scalar double +let isAsmParserOnly = 1 in { // SSE2 instructions with XS prefix +def VCVTSS2SDrr : I<0x5A, MRMSrcReg, (outs FR64:$dst), + (ins FR32:$src1, FR32:$src2), + "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}", + []>, XS, Requires<[HasAVX]>, VEX_4V; +def VCVTSS2SDrm : I<0x5A, MRMSrcMem, (outs FR64:$dst), + (ins FR32:$src1, f32mem:$src2), + "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}", + []>, XS, VEX_4V, Requires<[HasAVX, OptForSize]>; +} def CVTSS2SDrr : I<0x5A, MRMSrcReg, (outs FR64:$dst), (ins FR32:$src), "cvtss2sd\t{$src, $dst|$dst, $src}", [(set FR64:$dst, (fextend FR32:$src))]>, XS, @@ -1264,394 +726,51 @@ def CVTSS2SDrm : I<0x5A, MRMSrcMem, (outs FR64:$dst), (ins f32mem:$src), [(set FR64:$dst, (extloadf32 addr:$src))]>, XS, Requires<[HasSSE2, OptForSize]>; -def : Pat<(extloadf32 addr:$src), - (CVTSS2SDrr (MOVSSrm addr:$src))>, - Requires<[HasSSE2, OptForSpeed]>; - -// Match intrinsics which expect XMM operand(s). -def Int_CVTSD2SIrr : SDI<0x2D, MRMSrcReg, (outs GR32:$dst), (ins VR128:$src), - "cvtsd2si\t{$src, $dst|$dst, $src}", - [(set GR32:$dst, (int_x86_sse2_cvtsd2si VR128:$src))]>; -def Int_CVTSD2SIrm : SDI<0x2D, MRMSrcMem, (outs GR32:$dst), (ins f128mem:$src), - "cvtsd2si\t{$src, $dst|$dst, $src}", - [(set GR32:$dst, (int_x86_sse2_cvtsd2si - (load addr:$src)))]>; - -// Match intrinsics which expect MM and XMM operand(s). -def Int_CVTPD2PIrr : PDI<0x2D, MRMSrcReg, (outs VR64:$dst), (ins VR128:$src), - "cvtpd2pi\t{$src, $dst|$dst, $src}", - [(set VR64:$dst, (int_x86_sse_cvtpd2pi VR128:$src))]>; -def Int_CVTPD2PIrm : PDI<0x2D, MRMSrcMem, (outs VR64:$dst), (ins f128mem:$src), - "cvtpd2pi\t{$src, $dst|$dst, $src}", - [(set VR64:$dst, (int_x86_sse_cvtpd2pi - (memop addr:$src)))]>; -def Int_CVTTPD2PIrr: PDI<0x2C, MRMSrcReg, (outs VR64:$dst), (ins VR128:$src), - "cvttpd2pi\t{$src, $dst|$dst, $src}", - [(set VR64:$dst, (int_x86_sse_cvttpd2pi VR128:$src))]>; -def Int_CVTTPD2PIrm: PDI<0x2C, MRMSrcMem, (outs VR64:$dst), (ins f128mem:$src), - "cvttpd2pi\t{$src, $dst|$dst, $src}", - [(set VR64:$dst, (int_x86_sse_cvttpd2pi - (memop addr:$src)))]>; -def Int_CVTPI2PDrr : PDI<0x2A, MRMSrcReg, (outs VR128:$dst), (ins VR64:$src), - "cvtpi2pd\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, (int_x86_sse_cvtpi2pd VR64:$src))]>; -def Int_CVTPI2PDrm : PDI<0x2A, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src), - "cvtpi2pd\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, (int_x86_sse_cvtpi2pd - (load addr:$src)))]>; - -// Aliases for intrinsics -def Int_CVTTSD2SIrr : SDI<0x2C, MRMSrcReg, (outs GR32:$dst), (ins VR128:$src), - "cvttsd2si\t{$src, $dst|$dst, $src}", - [(set GR32:$dst, - (int_x86_sse2_cvttsd2si VR128:$src))]>; -def Int_CVTTSD2SIrm : SDI<0x2C, MRMSrcMem, (outs GR32:$dst), (ins f128mem:$src), - "cvttsd2si\t{$src, $dst|$dst, $src}", - [(set GR32:$dst, (int_x86_sse2_cvttsd2si - (load addr:$src)))]>; - -// Comparison instructions -let Constraints = "$src1 = $dst", neverHasSideEffects = 1 in { - def CMPSDrr : SDIi8<0xC2, MRMSrcReg, - (outs FR64:$dst), (ins FR64:$src1, FR64:$src, SSECC:$cc), - "cmp${cc}sd\t{$src, $dst|$dst, $src}", []>; -let mayLoad = 1 in - def CMPSDrm : SDIi8<0xC2, MRMSrcMem, - (outs FR64:$dst), (ins FR64:$src1, f64mem:$src, SSECC:$cc), - "cmp${cc}sd\t{$src, $dst|$dst, $src}", []>; - - // Accept explicit immediate argument form instead of comparison code. let isAsmParserOnly = 1 in { - def CMPSDrr_alt : SDIi8<0xC2, MRMSrcReg, - (outs FR64:$dst), (ins FR64:$src1, FR64:$src, i8imm:$src2), - "cmpsd\t{$src2, $src, $dst|$dst, $src, $src2}", []>; -let mayLoad = 1 in - def CMPSDrm_alt : SDIi8<0xC2, MRMSrcMem, - (outs FR64:$dst), (ins FR64:$src1, f64mem:$src, i8imm:$src2), - "cmpsd\t{$src2, $src, $dst|$dst, $src, $src2}", []>; -} -} - -let Defs = [EFLAGS] in { -def UCOMISDrr: PDI<0x2E, MRMSrcReg, (outs), (ins FR64:$src1, FR64:$src2), - "ucomisd\t{$src2, $src1|$src1, $src2}", - [(set EFLAGS, (X86cmp FR64:$src1, FR64:$src2))]>; -def UCOMISDrm: PDI<0x2E, MRMSrcMem, (outs), (ins FR64:$src1, f64mem:$src2), - "ucomisd\t{$src2, $src1|$src1, $src2}", - [(set EFLAGS, (X86cmp FR64:$src1, (loadf64 addr:$src2)))]>; -} // Defs = [EFLAGS] - -// Aliases to match intrinsics which expect XMM operand(s). -let Constraints = "$src1 = $dst" in { - def Int_CMPSDrr : SDIi8<0xC2, MRMSrcReg, - (outs VR128:$dst), - (ins VR128:$src1, VR128:$src, SSECC:$cc), - "cmp${cc}sd\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, (int_x86_sse2_cmp_sd VR128:$src1, - VR128:$src, imm:$cc))]>; - def Int_CMPSDrm : SDIi8<0xC2, MRMSrcMem, - (outs VR128:$dst), - (ins VR128:$src1, f64mem:$src, SSECC:$cc), - "cmp${cc}sd\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, (int_x86_sse2_cmp_sd VR128:$src1, - (load addr:$src), imm:$cc))]>; -} - -let Defs = [EFLAGS] in { -def Int_UCOMISDrr: PDI<0x2E, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2), - "ucomisd\t{$src2, $src1|$src1, $src2}", - [(set EFLAGS, (X86ucomi (v2f64 VR128:$src1), - VR128:$src2))]>; -def Int_UCOMISDrm: PDI<0x2E, MRMSrcMem, (outs),(ins VR128:$src1, f128mem:$src2), - "ucomisd\t{$src2, $src1|$src1, $src2}", - [(set EFLAGS, (X86ucomi (v2f64 VR128:$src1), - (load addr:$src2)))]>; - -def Int_COMISDrr: PDI<0x2F, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2), - "comisd\t{$src2, $src1|$src1, $src2}", - [(set EFLAGS, (X86comi (v2f64 VR128:$src1), - VR128:$src2))]>; -def Int_COMISDrm: PDI<0x2F, MRMSrcMem, (outs), (ins VR128:$src1, f128mem:$src2), - "comisd\t{$src2, $src1|$src1, $src2}", - [(set EFLAGS, (X86comi (v2f64 VR128:$src1), - (load addr:$src2)))]>; -} // Defs = [EFLAGS] - -// Aliases of packed SSE2 instructions for scalar use. These all have names -// that start with 'Fs'. - -// Alias instructions that map fld0 to pxor for sse. -let isReMaterializable = 1, isAsCheapAsAMove = 1, isCodeGenOnly = 1, - canFoldAsLoad = 1 in -def FsFLD0SD : I<0xEF, MRMInitReg, (outs FR64:$dst), (ins), "", - [(set FR64:$dst, fpimm0)]>, - Requires<[HasSSE2]>, TB, OpSize; - -// Alias instruction to do FR64 reg-to-reg copy using movapd. Upper bits are -// disregarded. -let neverHasSideEffects = 1 in -def FsMOVAPDrr : PDI<0x28, MRMSrcReg, (outs FR64:$dst), (ins FR64:$src), - "movapd\t{$src, $dst|$dst, $src}", []>; - -// Alias instruction to load FR64 from f128mem using movapd. Upper bits are -// disregarded. -let canFoldAsLoad = 1, isReMaterializable = 1 in -def FsMOVAPDrm : PDI<0x28, MRMSrcMem, (outs FR64:$dst), (ins f128mem:$src), - "movapd\t{$src, $dst|$dst, $src}", - [(set FR64:$dst, (alignedloadfsf64 addr:$src))]>; - -// Alias bitwise logical operations using SSE logical ops on packed FP values. -let Constraints = "$src1 = $dst" in { -let isCommutable = 1 in { - def FsANDPDrr : PDI<0x54, MRMSrcReg, (outs FR64:$dst), - (ins FR64:$src1, FR64:$src2), - "andpd\t{$src2, $dst|$dst, $src2}", - [(set FR64:$dst, (X86fand FR64:$src1, FR64:$src2))]>; - def FsORPDrr : PDI<0x56, MRMSrcReg, (outs FR64:$dst), - (ins FR64:$src1, FR64:$src2), - "orpd\t{$src2, $dst|$dst, $src2}", - [(set FR64:$dst, (X86for FR64:$src1, FR64:$src2))]>; - def FsXORPDrr : PDI<0x57, MRMSrcReg, (outs FR64:$dst), - (ins FR64:$src1, FR64:$src2), - "xorpd\t{$src2, $dst|$dst, $src2}", - [(set FR64:$dst, (X86fxor FR64:$src1, FR64:$src2))]>; -} - -def FsANDPDrm : PDI<0x54, MRMSrcMem, (outs FR64:$dst), - (ins FR64:$src1, f128mem:$src2), - "andpd\t{$src2, $dst|$dst, $src2}", - [(set FR64:$dst, (X86fand FR64:$src1, - (memopfsf64 addr:$src2)))]>; -def FsORPDrm : PDI<0x56, MRMSrcMem, (outs FR64:$dst), - (ins FR64:$src1, f128mem:$src2), - "orpd\t{$src2, $dst|$dst, $src2}", - [(set FR64:$dst, (X86for FR64:$src1, - (memopfsf64 addr:$src2)))]>; -def FsXORPDrm : PDI<0x57, MRMSrcMem, (outs FR64:$dst), - (ins FR64:$src1, f128mem:$src2), - "xorpd\t{$src2, $dst|$dst, $src2}", - [(set FR64:$dst, (X86fxor FR64:$src1, - (memopfsf64 addr:$src2)))]>; - -let neverHasSideEffects = 1 in { -def FsANDNPDrr : PDI<0x55, MRMSrcReg, - (outs FR64:$dst), (ins FR64:$src1, FR64:$src2), - "andnpd\t{$src2, $dst|$dst, $src2}", []>; -let mayLoad = 1 in -def FsANDNPDrm : PDI<0x55, MRMSrcMem, - (outs FR64:$dst), (ins FR64:$src1, f128mem:$src2), - "andnpd\t{$src2, $dst|$dst, $src2}", []>; -} -} - -/// basic_sse2_fp_binop_rm - SSE2 binops come in both scalar and vector forms. -/// -/// In addition, we also have a special variant of the scalar form here to -/// represent the associated intrinsic operation. This form is unlike the -/// plain scalar form, in that it takes an entire vector (instead of a scalar) -/// and leaves the top elements unmodified (therefore these cannot be commuted). -/// -/// These three forms can each be reg+reg or reg+mem, so there are a total of -/// six "instructions". -/// -let Constraints = "$src1 = $dst" in { -multiclass basic_sse2_fp_binop_rm<bits<8> opc, string OpcodeStr, - SDNode OpNode, Intrinsic F64Int, - bit Commutable = 0> { - // Scalar operation, reg+reg. - def SDrr : SDI<opc, MRMSrcReg, (outs FR64:$dst), (ins FR64:$src1, FR64:$src2), - !strconcat(OpcodeStr, "sd\t{$src2, $dst|$dst, $src2}"), - [(set FR64:$dst, (OpNode FR64:$src1, FR64:$src2))]> { - let isCommutable = Commutable; - } - - // Scalar operation, reg+mem. - def SDrm : SDI<opc, MRMSrcMem, (outs FR64:$dst), - (ins FR64:$src1, f64mem:$src2), - !strconcat(OpcodeStr, "sd\t{$src2, $dst|$dst, $src2}"), - [(set FR64:$dst, (OpNode FR64:$src1, (load addr:$src2)))]>; - - // Vector operation, reg+reg. - def PDrr : PDI<opc, MRMSrcReg, (outs VR128:$dst), - (ins VR128:$src1, VR128:$src2), - !strconcat(OpcodeStr, "pd\t{$src2, $dst|$dst, $src2}"), - [(set VR128:$dst, (v2f64 (OpNode VR128:$src1, VR128:$src2)))]> { - let isCommutable = Commutable; - } - - // Vector operation, reg+mem. - def PDrm : PDI<opc, MRMSrcMem, (outs VR128:$dst), - (ins VR128:$src1, f128mem:$src2), - !strconcat(OpcodeStr, "pd\t{$src2, $dst|$dst, $src2}"), - [(set VR128:$dst, (OpNode VR128:$src1, (memopv2f64 addr:$src2)))]>; - - // Intrinsic operation, reg+reg. - def SDrr_Int : SDI<opc, MRMSrcReg, (outs VR128:$dst), - (ins VR128:$src1, VR128:$src2), - !strconcat(OpcodeStr, "sd\t{$src2, $dst|$dst, $src2}"), - [(set VR128:$dst, (F64Int VR128:$src1, VR128:$src2))]>; - - // Intrinsic operation, reg+mem. - def SDrm_Int : SDI<opc, MRMSrcMem, (outs VR128:$dst), - (ins VR128:$src1, sdmem:$src2), - !strconcat(OpcodeStr, "sd\t{$src2, $dst|$dst, $src2}"), - [(set VR128:$dst, (F64Int VR128:$src1, - sse_load_f64:$src2))]>; +def Int_VCVTSS2SDrr: I<0x5A, MRMSrcReg, + (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), + "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}", + [(set VR128:$dst, (int_x86_sse2_cvtss2sd VR128:$src1, + VR128:$src2))]>, XS, VEX_4V, + Requires<[HasAVX]>; +def Int_VCVTSS2SDrm: I<0x5A, MRMSrcMem, + (outs VR128:$dst), (ins VR128:$src1, f32mem:$src2), + "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}", + [(set VR128:$dst, (int_x86_sse2_cvtss2sd VR128:$src1, + (load addr:$src2)))]>, XS, VEX_4V, + Requires<[HasAVX]>; } +let Constraints = "$src1 = $dst" in { // SSE2 instructions with XS prefix +def Int_CVTSS2SDrr: I<0x5A, MRMSrcReg, + (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), + "cvtss2sd\t{$src2, $dst|$dst, $src2}", + [(set VR128:$dst, (int_x86_sse2_cvtss2sd VR128:$src1, + VR128:$src2))]>, XS, + Requires<[HasSSE2]>; +def Int_CVTSS2SDrm: I<0x5A, MRMSrcMem, + (outs VR128:$dst), (ins VR128:$src1, f32mem:$src2), + "cvtss2sd\t{$src2, $dst|$dst, $src2}", + [(set VR128:$dst, (int_x86_sse2_cvtss2sd VR128:$src1, + (load addr:$src2)))]>, XS, + Requires<[HasSSE2]>; } -// Arithmetic instructions -defm ADD : basic_sse2_fp_binop_rm<0x58, "add", fadd, int_x86_sse2_add_sd, 1>; -defm MUL : basic_sse2_fp_binop_rm<0x59, "mul", fmul, int_x86_sse2_mul_sd, 1>; -defm SUB : basic_sse2_fp_binop_rm<0x5C, "sub", fsub, int_x86_sse2_sub_sd>; -defm DIV : basic_sse2_fp_binop_rm<0x5E, "div", fdiv, int_x86_sse2_div_sd>; - -/// sse2_fp_binop_rm - Other SSE2 binops -/// -/// This multiclass is like basic_sse2_fp_binop_rm, with the addition of -/// instructions for a full-vector intrinsic form. Operations that map -/// onto C operators don't use this form since they just use the plain -/// vector form instead of having a separate vector intrinsic form. -/// -/// This provides a total of eight "instructions". -/// -let Constraints = "$src1 = $dst" in { -multiclass sse2_fp_binop_rm<bits<8> opc, string OpcodeStr, - SDNode OpNode, - Intrinsic F64Int, - Intrinsic V2F64Int, - bit Commutable = 0> { - - // Scalar operation, reg+reg. - def SDrr : SDI<opc, MRMSrcReg, (outs FR64:$dst), (ins FR64:$src1, FR64:$src2), - !strconcat(OpcodeStr, "sd\t{$src2, $dst|$dst, $src2}"), - [(set FR64:$dst, (OpNode FR64:$src1, FR64:$src2))]> { - let isCommutable = Commutable; - } - - // Scalar operation, reg+mem. - def SDrm : SDI<opc, MRMSrcMem, (outs FR64:$dst), - (ins FR64:$src1, f64mem:$src2), - !strconcat(OpcodeStr, "sd\t{$src2, $dst|$dst, $src2}"), - [(set FR64:$dst, (OpNode FR64:$src1, (load addr:$src2)))]>; - - // Vector operation, reg+reg. - def PDrr : PDI<opc, MRMSrcReg, (outs VR128:$dst), - (ins VR128:$src1, VR128:$src2), - !strconcat(OpcodeStr, "pd\t{$src2, $dst|$dst, $src2}"), - [(set VR128:$dst, (v2f64 (OpNode VR128:$src1, VR128:$src2)))]> { - let isCommutable = Commutable; - } - - // Vector operation, reg+mem. - def PDrm : PDI<opc, MRMSrcMem, (outs VR128:$dst), - (ins VR128:$src1, f128mem:$src2), - !strconcat(OpcodeStr, "pd\t{$src2, $dst|$dst, $src2}"), - [(set VR128:$dst, (OpNode VR128:$src1, (memopv2f64 addr:$src2)))]>; - - // Intrinsic operation, reg+reg. - def SDrr_Int : SDI<opc, MRMSrcReg, (outs VR128:$dst), - (ins VR128:$src1, VR128:$src2), - !strconcat(OpcodeStr, "sd\t{$src2, $dst|$dst, $src2}"), - [(set VR128:$dst, (F64Int VR128:$src1, VR128:$src2))]> { - let isCommutable = Commutable; - } - - // Intrinsic operation, reg+mem. - def SDrm_Int : SDI<opc, MRMSrcMem, (outs VR128:$dst), - (ins VR128:$src1, sdmem:$src2), - !strconcat(OpcodeStr, "sd\t{$src2, $dst|$dst, $src2}"), - [(set VR128:$dst, (F64Int VR128:$src1, - sse_load_f64:$src2))]>; - - // Vector intrinsic operation, reg+reg. - def PDrr_Int : PDI<opc, MRMSrcReg, (outs VR128:$dst), - (ins VR128:$src1, VR128:$src2), - !strconcat(OpcodeStr, "pd\t{$src2, $dst|$dst, $src2}"), - [(set VR128:$dst, (V2F64Int VR128:$src1, VR128:$src2))]> { - let isCommutable = Commutable; - } +def : Pat<(extloadf32 addr:$src), + (CVTSS2SDrr (MOVSSrm addr:$src))>, + Requires<[HasSSE2, OptForSpeed]>; - // Vector intrinsic operation, reg+mem. - def PDrm_Int : PDI<opc, MRMSrcMem, (outs VR128:$dst), - (ins VR128:$src1, f128mem:$src2), - !strconcat(OpcodeStr, "pd\t{$src2, $dst|$dst, $src2}"), - [(set VR128:$dst, (V2F64Int VR128:$src1, - (memopv2f64 addr:$src2)))]>; -} +// Convert doubleword to packed single/double fp +let isAsmParserOnly = 1 in { // SSE2 instructions without OpSize prefix +def Int_VCVTDQ2PSrr : I<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), + "vcvtdq2ps\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, (int_x86_sse2_cvtdq2ps VR128:$src))]>, + TB, VEX, Requires<[HasAVX]>; +def Int_VCVTDQ2PSrm : I<0x5B, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), + "vcvtdq2ps\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, (int_x86_sse2_cvtdq2ps + (bitconvert (memopv2i64 addr:$src))))]>, + TB, VEX, Requires<[HasAVX]>; } - -defm MAX : sse2_fp_binop_rm<0x5F, "max", X86fmax, - int_x86_sse2_max_sd, int_x86_sse2_max_pd>; -defm MIN : sse2_fp_binop_rm<0x5D, "min", X86fmin, - int_x86_sse2_min_sd, int_x86_sse2_min_pd>; - -//===---------------------------------------------------------------------===// -// SSE packed FP Instructions - -// Move Instructions -let neverHasSideEffects = 1 in -def MOVAPDrr : PDI<0x28, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), - "movapd\t{$src, $dst|$dst, $src}", []>; -let canFoldAsLoad = 1, isReMaterializable = 1 in -def MOVAPDrm : PDI<0x28, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), - "movapd\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, (alignedloadv2f64 addr:$src))]>; - -def MOVAPDmr : PDI<0x29, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), - "movapd\t{$src, $dst|$dst, $src}", - [(alignedstore (v2f64 VR128:$src), addr:$dst)]>; - -let neverHasSideEffects = 1 in -def MOVUPDrr : PDI<0x10, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), - "movupd\t{$src, $dst|$dst, $src}", []>; -let canFoldAsLoad = 1 in -def MOVUPDrm : PDI<0x10, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), - "movupd\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, (loadv2f64 addr:$src))]>; -def MOVUPDmr : PDI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), - "movupd\t{$src, $dst|$dst, $src}", - [(store (v2f64 VR128:$src), addr:$dst)]>; - -// Intrinsic forms of MOVUPD load and store -def MOVUPDrm_Int : PDI<0x10, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), - "movupd\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, (int_x86_sse2_loadu_pd addr:$src))]>; -def MOVUPDmr_Int : PDI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), - "movupd\t{$src, $dst|$dst, $src}", - [(int_x86_sse2_storeu_pd addr:$dst, VR128:$src)]>; - -let Constraints = "$src1 = $dst" in { - let AddedComplexity = 20 in { - def MOVLPDrm : PDI<0x12, MRMSrcMem, - (outs VR128:$dst), (ins VR128:$src1, f64mem:$src2), - "movlpd\t{$src2, $dst|$dst, $src2}", - [(set VR128:$dst, - (v2f64 (movlp VR128:$src1, - (scalar_to_vector (loadf64 addr:$src2)))))]>; - def MOVHPDrm : PDI<0x16, MRMSrcMem, - (outs VR128:$dst), (ins VR128:$src1, f64mem:$src2), - "movhpd\t{$src2, $dst|$dst, $src2}", - [(set VR128:$dst, - (v2f64 (movlhps VR128:$src1, - (scalar_to_vector (loadf64 addr:$src2)))))]>; - } // AddedComplexity -} // Constraints = "$src1 = $dst" - -def MOVLPDmr : PDI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src), - "movlpd\t{$src, $dst|$dst, $src}", - [(store (f64 (vector_extract (v2f64 VR128:$src), - (iPTR 0))), addr:$dst)]>; - -// v2f64 extract element 1 is always custom lowered to unpack high to low -// and extract element 0 so the non-store version isn't too horrible. -def MOVHPDmr : PDI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src), - "movhpd\t{$src, $dst|$dst, $src}", - [(store (f64 (vector_extract - (v2f64 (unpckh VR128:$src, (undef))), - (iPTR 0))), addr:$dst)]>; - -// SSE2 instructions without OpSize prefix def Int_CVTDQ2PSrr : I<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "cvtdq2ps\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse2_cvtdq2ps VR128:$src))]>, @@ -1662,7 +781,18 @@ def Int_CVTDQ2PSrm : I<0x5B, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), (bitconvert (memopv2i64 addr:$src))))]>, TB, Requires<[HasSSE2]>; -// SSE2 instructions with XS prefix +// FIXME: why the non-intrinsic version is described as SSE3? +let isAsmParserOnly = 1 in { // SSE2 instructions with XS prefix +def Int_VCVTDQ2PDrr : I<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), + "vcvtdq2pd\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, (int_x86_sse2_cvtdq2pd VR128:$src))]>, + XS, VEX, Requires<[HasAVX]>; +def Int_VCVTDQ2PDrm : I<0xE6, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src), + "vcvtdq2pd\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, (int_x86_sse2_cvtdq2pd + (bitconvert (memopv2i64 addr:$src))))]>, + XS, VEX, Requires<[HasAVX]>; +} def Int_CVTDQ2PDrr : I<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "cvtdq2pd\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse2_cvtdq2pd VR128:$src))]>, @@ -1673,6 +803,29 @@ def Int_CVTDQ2PDrm : I<0xE6, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src), (bitconvert (memopv2i64 addr:$src))))]>, XS, Requires<[HasSSE2]>; +// Convert packed single/double fp to doubleword +let isAsmParserOnly = 1 in { +def VCVTPS2DQrr : VPDI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), + "cvtps2dq\t{$src, $dst|$dst, $src}", []>, VEX; +def VCVTPS2DQrm : VPDI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), + "cvtps2dq\t{$src, $dst|$dst, $src}", []>, VEX; +} +def CVTPS2DQrr : PDI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), + "cvtps2dq\t{$src, $dst|$dst, $src}", []>; +def CVTPS2DQrm : PDI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), + "cvtps2dq\t{$src, $dst|$dst, $src}", []>; + +let isAsmParserOnly = 1 in { +def Int_VCVTPS2DQrr : VPDI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), + "cvtps2dq\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, (int_x86_sse2_cvtps2dq VR128:$src))]>, + VEX; +def Int_VCVTPS2DQrm : VPDI<0x5B, MRMSrcMem, (outs VR128:$dst), + (ins f128mem:$src), + "cvtps2dq\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, (int_x86_sse2_cvtps2dq + (memop addr:$src)))]>, VEX; +} def Int_CVTPS2DQrr : PDI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "cvtps2dq\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse2_cvtps2dq VR128:$src))]>; @@ -1680,12 +833,54 @@ def Int_CVTPS2DQrm : PDI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), "cvtps2dq\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse2_cvtps2dq (memop addr:$src)))]>; -// SSE2 packed instructions with XS prefix + +let isAsmParserOnly = 1 in { // SSE2 packed instructions with XD prefix +def Int_VCVTPD2DQrr : I<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), + "vcvtpd2dq\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, (int_x86_sse2_cvtpd2dq VR128:$src))]>, + XD, VEX, Requires<[HasAVX]>; +def Int_VCVTPD2DQrm : I<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), + "vcvtpd2dq\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, (int_x86_sse2_cvtpd2dq + (memop addr:$src)))]>, + XD, VEX, Requires<[HasAVX]>; +} +def Int_CVTPD2DQrr : I<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), + "cvtpd2dq\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, (int_x86_sse2_cvtpd2dq VR128:$src))]>, + XD, Requires<[HasSSE2]>; +def Int_CVTPD2DQrm : I<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), + "cvtpd2dq\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, (int_x86_sse2_cvtpd2dq + (memop addr:$src)))]>, + XD, Requires<[HasSSE2]>; + + +// Convert with truncation packed single/double fp to doubleword +let isAsmParserOnly = 1 in { // SSE2 packed instructions with XS prefix +def VCVTTPS2DQrr : VSSI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), + "cvttps2dq\t{$src, $dst|$dst, $src}", []>, VEX; +def VCVTTPS2DQrm : VSSI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), + "cvttps2dq\t{$src, $dst|$dst, $src}", []>, VEX; +} def CVTTPS2DQrr : SSI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "cvttps2dq\t{$src, $dst|$dst, $src}", []>; def CVTTPS2DQrm : SSI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), "cvttps2dq\t{$src, $dst|$dst, $src}", []>; + +let isAsmParserOnly = 1 in { +def Int_VCVTTPS2DQrr : I<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), + "vcvttps2dq\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, + (int_x86_sse2_cvttps2dq VR128:$src))]>, + XS, VEX, Requires<[HasAVX]>; +def Int_VCVTTPS2DQrm : I<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), + "vcvttps2dq\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, (int_x86_sse2_cvttps2dq + (memop addr:$src)))]>, + XS, VEX, Requires<[HasAVX]>; +} def Int_CVTTPS2DQrr : I<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "cvttps2dq\t{$src, $dst|$dst, $src}", [(set VR128:$dst, @@ -1697,17 +892,18 @@ def Int_CVTTPS2DQrm : I<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), (memop addr:$src)))]>, XS, Requires<[HasSSE2]>; -// SSE2 packed instructions with XD prefix -def Int_CVTPD2DQrr : I<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), - "cvtpd2dq\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, (int_x86_sse2_cvtpd2dq VR128:$src))]>, - XD, Requires<[HasSSE2]>; -def Int_CVTPD2DQrm : I<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), - "cvtpd2dq\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, (int_x86_sse2_cvtpd2dq - (memop addr:$src)))]>, - XD, Requires<[HasSSE2]>; - +let isAsmParserOnly = 1 in { +def Int_VCVTTPD2DQrr : VPDI<0xE6, MRMSrcReg, (outs VR128:$dst), + (ins VR128:$src), + "cvttpd2dq\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, (int_x86_sse2_cvttpd2dq VR128:$src))]>, + VEX; +def Int_VCVTTPD2DQrm : VPDI<0xE6, MRMSrcMem, (outs VR128:$dst), + (ins f128mem:$src), + "cvttpd2dq\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, (int_x86_sse2_cvttpd2dq + (memop addr:$src)))]>, VEX; +} def Int_CVTTPD2DQrr : PDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "cvttpd2dq\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse2_cvttpd2dq VR128:$src))]>; @@ -1716,12 +912,31 @@ def Int_CVTTPD2DQrm : PDI<0xE6, MRMSrcMem, (outs VR128:$dst),(ins f128mem:$src), [(set VR128:$dst, (int_x86_sse2_cvttpd2dq (memop addr:$src)))]>; -// SSE2 instructions without OpSize prefix +// Convert packed single to packed double +let isAsmParserOnly = 1 in { // SSE2 instructions without OpSize prefix +def VCVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), + "vcvtps2pd\t{$src, $dst|$dst, $src}", []>, VEX, + Requires<[HasAVX]>; +def VCVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src), + "vcvtps2pd\t{$src, $dst|$dst, $src}", []>, VEX, + Requires<[HasAVX]>; +} def CVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "cvtps2pd\t{$src, $dst|$dst, $src}", []>, TB; def CVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src), "cvtps2pd\t{$src, $dst|$dst, $src}", []>, TB; +let isAsmParserOnly = 1 in { +def Int_VCVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), + "cvtps2pd\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, (int_x86_sse2_cvtps2pd VR128:$src))]>, + VEX, Requires<[HasAVX]>; +def Int_VCVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src), + "cvtps2pd\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, (int_x86_sse2_cvtps2pd + (load addr:$src)))]>, + VEX, Requires<[HasAVX]>; +} def Int_CVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "cvtps2pd\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse2_cvtps2pd VR128:$src))]>, @@ -1732,12 +947,29 @@ def Int_CVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src), (load addr:$src)))]>, TB, Requires<[HasSSE2]>; +// Convert packed double to packed single +let isAsmParserOnly = 1 in { +def VCVTPD2PSrr : VPDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), + "cvtpd2ps\t{$src, $dst|$dst, $src}", []>, VEX; +// FIXME: the memory form of this instruction should described using +// use extra asm syntax +} def CVTPD2PSrr : PDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "cvtpd2ps\t{$src, $dst|$dst, $src}", []>; def CVTPD2PSrm : PDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), "cvtpd2ps\t{$src, $dst|$dst, $src}", []>; +let isAsmParserOnly = 1 in { +def Int_VCVTPD2PSrr : VPDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), + "cvtpd2ps\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, (int_x86_sse2_cvtpd2ps VR128:$src))]>; +def Int_VCVTPD2PSrm : VPDI<0x5A, MRMSrcMem, (outs VR128:$dst), + (ins f128mem:$src), + "cvtpd2ps\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, (int_x86_sse2_cvtpd2ps + (memop addr:$src)))]>; +} def Int_CVTPD2PSrr : PDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "cvtpd2ps\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse2_cvtpd2ps VR128:$src))]>; @@ -1746,269 +978,1039 @@ def Int_CVTPD2PSrm : PDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), [(set VR128:$dst, (int_x86_sse2_cvtpd2ps (memop addr:$src)))]>; -// Match intrinsics which expect XMM operand(s). -// Aliases for intrinsics +//===----------------------------------------------------------------------===// +// SSE 1 & 2 - Compare Instructions +//===----------------------------------------------------------------------===// + +// sse12_cmp_scalar - sse 1 & 2 compare scalar instructions +multiclass sse12_cmp_scalar<RegisterClass RC, X86MemOperand x86memop, + string asm, string asm_alt> { + def rr : SIi8<0xC2, MRMSrcReg, + (outs RC:$dst), (ins RC:$src1, RC:$src, SSECC:$cc), + asm, []>; + let mayLoad = 1 in + def rm : SIi8<0xC2, MRMSrcMem, + (outs RC:$dst), (ins RC:$src1, x86memop:$src, SSECC:$cc), + asm, []>; + // Accept explicit immediate argument form instead of comparison code. + let isAsmParserOnly = 1 in { + def rr_alt : SIi8<0xC2, MRMSrcReg, + (outs RC:$dst), (ins RC:$src1, RC:$src, i8imm:$src2), + asm_alt, []>; + let mayLoad = 1 in + def rm_alt : SIi8<0xC2, MRMSrcMem, + (outs RC:$dst), (ins RC:$src1, x86memop:$src, i8imm:$src2), + asm_alt, []>; + } +} + +let neverHasSideEffects = 1, isAsmParserOnly = 1 in { + defm VCMPSS : sse12_cmp_scalar<FR32, f32mem, + "cmp${cc}ss\t{$src, $src1, $dst|$dst, $src1, $src}", + "cmpss\t{$src2, $src, $src1, $dst|$dst, $src1, $src, $src2}">, + XS, VEX_4V; + defm VCMPSD : sse12_cmp_scalar<FR64, f64mem, + "cmp${cc}sd\t{$src, $src1, $dst|$dst, $src1, $src}", + "cmpsd\t{$src2, $src, $src1, $dst|$dst, $src1, $src, $src2}">, + XD, VEX_4V; +} + +let Constraints = "$src1 = $dst", neverHasSideEffects = 1 in { + defm CMPSS : sse12_cmp_scalar<FR32, f32mem, + "cmp${cc}ss\t{$src, $dst|$dst, $src}", + "cmpss\t{$src2, $src, $dst|$dst, $src, $src2}">, XS; + defm CMPSD : sse12_cmp_scalar<FR64, f64mem, + "cmp${cc}sd\t{$src, $dst|$dst, $src}", + "cmpsd\t{$src2, $src, $dst|$dst, $src, $src2}">, XD; +} + +multiclass sse12_cmp_scalar_int<RegisterClass RC, X86MemOperand x86memop, + Intrinsic Int, string asm> { + def rr : SIi8<0xC2, MRMSrcReg, (outs VR128:$dst), + (ins VR128:$src1, VR128:$src, SSECC:$cc), asm, + [(set VR128:$dst, (Int VR128:$src1, + VR128:$src, imm:$cc))]>; + def rm : SIi8<0xC2, MRMSrcMem, (outs VR128:$dst), + (ins VR128:$src1, f32mem:$src, SSECC:$cc), asm, + [(set VR128:$dst, (Int VR128:$src1, + (load addr:$src), imm:$cc))]>; +} + +// Aliases to match intrinsics which expect XMM operand(s). +let isAsmParserOnly = 1 in { + defm Int_VCMPSS : sse12_cmp_scalar_int<VR128, f32mem, int_x86_sse_cmp_ss, + "cmp${cc}ss\t{$src, $src1, $dst|$dst, $src1, $src}">, + XS, VEX_4V; + defm Int_VCMPSD : sse12_cmp_scalar_int<VR128, f64mem, int_x86_sse2_cmp_sd, + "cmp${cc}sd\t{$src, $src1, $dst|$dst, $src1, $src}">, + XD, VEX_4V; +} let Constraints = "$src1 = $dst" in { -def Int_CVTSI2SDrr: SDI<0x2A, MRMSrcReg, - (outs VR128:$dst), (ins VR128:$src1, GR32:$src2), - "cvtsi2sd\t{$src2, $dst|$dst, $src2}", - [(set VR128:$dst, (int_x86_sse2_cvtsi2sd VR128:$src1, - GR32:$src2))]>; -def Int_CVTSI2SDrm: SDI<0x2A, MRMSrcMem, - (outs VR128:$dst), (ins VR128:$src1, i32mem:$src2), - "cvtsi2sd\t{$src2, $dst|$dst, $src2}", - [(set VR128:$dst, (int_x86_sse2_cvtsi2sd VR128:$src1, - (loadi32 addr:$src2)))]>; -def Int_CVTSD2SSrr: SDI<0x5A, MRMSrcReg, - (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), - "cvtsd2ss\t{$src2, $dst|$dst, $src2}", - [(set VR128:$dst, (int_x86_sse2_cvtsd2ss VR128:$src1, - VR128:$src2))]>; -def Int_CVTSD2SSrm: SDI<0x5A, MRMSrcMem, - (outs VR128:$dst), (ins VR128:$src1, f64mem:$src2), - "cvtsd2ss\t{$src2, $dst|$dst, $src2}", - [(set VR128:$dst, (int_x86_sse2_cvtsd2ss VR128:$src1, - (load addr:$src2)))]>; -def Int_CVTSS2SDrr: I<0x5A, MRMSrcReg, - (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), - "cvtss2sd\t{$src2, $dst|$dst, $src2}", - [(set VR128:$dst, (int_x86_sse2_cvtss2sd VR128:$src1, - VR128:$src2))]>, XS, - Requires<[HasSSE2]>; -def Int_CVTSS2SDrm: I<0x5A, MRMSrcMem, - (outs VR128:$dst), (ins VR128:$src1, f32mem:$src2), - "cvtss2sd\t{$src2, $dst|$dst, $src2}", - [(set VR128:$dst, (int_x86_sse2_cvtss2sd VR128:$src1, - (load addr:$src2)))]>, XS, - Requires<[HasSSE2]>; + defm Int_CMPSS : sse12_cmp_scalar_int<VR128, f32mem, int_x86_sse_cmp_ss, + "cmp${cc}ss\t{$src, $dst|$dst, $src}">, XS; + defm Int_CMPSD : sse12_cmp_scalar_int<VR128, f64mem, int_x86_sse2_cmp_sd, + "cmp${cc}sd\t{$src, $dst|$dst, $src}">, XD; +} + + +// sse12_ord_cmp - Unordered/Ordered scalar fp compare and set EFLAGS +multiclass sse12_ord_cmp<bits<8> opc, RegisterClass RC, SDNode OpNode, + ValueType vt, X86MemOperand x86memop, + PatFrag ld_frag, string OpcodeStr, Domain d> { + def rr: PI<opc, MRMSrcReg, (outs), (ins RC:$src1, RC:$src2), + !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"), + [(set EFLAGS, (OpNode (vt RC:$src1), RC:$src2))], d>; + def rm: PI<opc, MRMSrcMem, (outs), (ins RC:$src1, x86memop:$src2), + !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"), + [(set EFLAGS, (OpNode (vt RC:$src1), + (ld_frag addr:$src2)))], d>; +} + +let Defs = [EFLAGS] in { + let isAsmParserOnly = 1 in { + defm VUCOMISS : sse12_ord_cmp<0x2E, FR32, X86cmp, f32, f32mem, loadf32, + "ucomiss", SSEPackedSingle>, VEX; + defm VUCOMISD : sse12_ord_cmp<0x2E, FR64, X86cmp, f64, f64mem, loadf64, + "ucomisd", SSEPackedDouble>, OpSize, VEX; + let Pattern = []<dag> in { + defm VCOMISS : sse12_ord_cmp<0x2F, VR128, undef, v4f32, f128mem, load, + "comiss", SSEPackedSingle>, VEX; + defm VCOMISD : sse12_ord_cmp<0x2F, VR128, undef, v2f64, f128mem, load, + "comisd", SSEPackedDouble>, OpSize, VEX; + } + + defm Int_VUCOMISS : sse12_ord_cmp<0x2E, VR128, X86ucomi, v4f32, f128mem, + load, "ucomiss", SSEPackedSingle>, VEX; + defm Int_VUCOMISD : sse12_ord_cmp<0x2E, VR128, X86ucomi, v2f64, f128mem, + load, "ucomisd", SSEPackedDouble>, OpSize, VEX; + + defm Int_VCOMISS : sse12_ord_cmp<0x2F, VR128, X86comi, v4f32, f128mem, + load, "comiss", SSEPackedSingle>, VEX; + defm Int_VCOMISD : sse12_ord_cmp<0x2F, VR128, X86comi, v2f64, f128mem, + load, "comisd", SSEPackedDouble>, OpSize, VEX; + } + defm UCOMISS : sse12_ord_cmp<0x2E, FR32, X86cmp, f32, f32mem, loadf32, + "ucomiss", SSEPackedSingle>, TB; + defm UCOMISD : sse12_ord_cmp<0x2E, FR64, X86cmp, f64, f64mem, loadf64, + "ucomisd", SSEPackedDouble>, TB, OpSize; + + let Pattern = []<dag> in { + defm COMISS : sse12_ord_cmp<0x2F, VR128, undef, v4f32, f128mem, load, + "comiss", SSEPackedSingle>, TB; + defm COMISD : sse12_ord_cmp<0x2F, VR128, undef, v2f64, f128mem, load, + "comisd", SSEPackedDouble>, TB, OpSize; + } + + defm Int_UCOMISS : sse12_ord_cmp<0x2E, VR128, X86ucomi, v4f32, f128mem, + load, "ucomiss", SSEPackedSingle>, TB; + defm Int_UCOMISD : sse12_ord_cmp<0x2E, VR128, X86ucomi, v2f64, f128mem, + load, "ucomisd", SSEPackedDouble>, TB, OpSize; + + defm Int_COMISS : sse12_ord_cmp<0x2F, VR128, X86comi, v4f32, f128mem, load, + "comiss", SSEPackedSingle>, TB; + defm Int_COMISD : sse12_ord_cmp<0x2F, VR128, X86comi, v2f64, f128mem, load, + "comisd", SSEPackedDouble>, TB, OpSize; +} // Defs = [EFLAGS] + +// sse12_cmp_packed - sse 1 & 2 compared packed instructions +multiclass sse12_cmp_packed<RegisterClass RC, X86MemOperand x86memop, + Intrinsic Int, string asm, string asm_alt, + Domain d> { + def rri : PIi8<0xC2, MRMSrcReg, + (outs RC:$dst), (ins RC:$src1, RC:$src, SSECC:$cc), asm, + [(set RC:$dst, (Int RC:$src1, RC:$src, imm:$cc))], d>; + def rmi : PIi8<0xC2, MRMSrcMem, + (outs RC:$dst), (ins RC:$src1, f128mem:$src, SSECC:$cc), asm, + [(set RC:$dst, (Int RC:$src1, (memop addr:$src), imm:$cc))], d>; + // Accept explicit immediate argument form instead of comparison code. + let isAsmParserOnly = 1 in { + def rri_alt : PIi8<0xC2, MRMSrcReg, + (outs RC:$dst), (ins RC:$src1, RC:$src, i8imm:$src2), + asm_alt, [], d>; + def rmi_alt : PIi8<0xC2, MRMSrcMem, + (outs RC:$dst), (ins RC:$src1, f128mem:$src, i8imm:$src2), + asm_alt, [], d>; + } +} + +let isAsmParserOnly = 1 in { + defm VCMPPS : sse12_cmp_packed<VR128, f128mem, int_x86_sse_cmp_ps, + "cmp${cc}ps\t{$src, $src1, $dst|$dst, $src1, $src}", + "cmpps\t{$src2, $src, $src1, $dst|$dst, $src1, $src, $src2}", + SSEPackedSingle>, VEX_4V; + defm VCMPPD : sse12_cmp_packed<VR128, f128mem, int_x86_sse2_cmp_pd, + "cmp${cc}pd\t{$src, $src1, $dst|$dst, $src1, $src}", + "cmppd\t{$src2, $src, $src1, $dst|$dst, $src1, $src, $src2}", + SSEPackedDouble>, OpSize, VEX_4V; +} +let Constraints = "$src1 = $dst" in { + defm CMPPS : sse12_cmp_packed<VR128, f128mem, int_x86_sse_cmp_ps, + "cmp${cc}ps\t{$src, $dst|$dst, $src}", + "cmpps\t{$src2, $src, $dst|$dst, $src, $src2}", + SSEPackedSingle>, TB; + defm CMPPD : sse12_cmp_packed<VR128, f128mem, int_x86_sse2_cmp_pd, + "cmp${cc}pd\t{$src, $dst|$dst, $src}", + "cmppd\t{$src2, $src, $dst|$dst, $src, $src2}", + SSEPackedDouble>, TB, OpSize; +} + +def : Pat<(v4i32 (X86cmpps (v4f32 VR128:$src1), VR128:$src2, imm:$cc)), + (CMPPSrri (v4f32 VR128:$src1), (v4f32 VR128:$src2), imm:$cc)>; +def : Pat<(v4i32 (X86cmpps (v4f32 VR128:$src1), (memop addr:$src2), imm:$cc)), + (CMPPSrmi (v4f32 VR128:$src1), addr:$src2, imm:$cc)>; +def : Pat<(v2i64 (X86cmppd (v2f64 VR128:$src1), VR128:$src2, imm:$cc)), + (CMPPDrri VR128:$src1, VR128:$src2, imm:$cc)>; +def : Pat<(v2i64 (X86cmppd (v2f64 VR128:$src1), (memop addr:$src2), imm:$cc)), + (CMPPDrmi VR128:$src1, addr:$src2, imm:$cc)>; + +//===----------------------------------------------------------------------===// +// SSE 1 & 2 - Shuffle Instructions +//===----------------------------------------------------------------------===// + +/// sse12_shuffle - sse 1 & 2 shuffle instructions +multiclass sse12_shuffle<RegisterClass RC, X86MemOperand x86memop, + ValueType vt, string asm, PatFrag mem_frag, + Domain d, bit IsConvertibleToThreeAddress = 0> { + def rmi : PIi8<0xC6, MRMSrcMem, (outs VR128:$dst), + (ins VR128:$src1, f128mem:$src2, i8imm:$src3), asm, + [(set VR128:$dst, (vt (shufp:$src3 + VR128:$src1, (mem_frag addr:$src2))))], d>; + let isConvertibleToThreeAddress = IsConvertibleToThreeAddress in + def rri : PIi8<0xC6, MRMSrcReg, (outs VR128:$dst), + (ins VR128:$src1, VR128:$src2, i8imm:$src3), asm, + [(set VR128:$dst, + (vt (shufp:$src3 VR128:$src1, VR128:$src2)))], d>; +} + +let isAsmParserOnly = 1 in { + defm VSHUFPS : sse12_shuffle<VR128, f128mem, v4f32, + "shufps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", + memopv4f32, SSEPackedSingle>, VEX_4V; + defm VSHUFPD : sse12_shuffle<VR128, f128mem, v2f64, + "shufpd\t{$src3, $src2, $src1, $dst|$dst, $src2, $src2, $src3}", + memopv2f64, SSEPackedDouble>, OpSize, VEX_4V; +} + +let Constraints = "$src1 = $dst" in { + defm SHUFPS : sse12_shuffle<VR128, f128mem, v4f32, + "shufps\t{$src3, $src2, $dst|$dst, $src2, $src3}", + memopv4f32, SSEPackedSingle, 1 /* cvt to pshufd */>, + TB; + defm SHUFPD : sse12_shuffle<VR128, f128mem, v2f64, + "shufpd\t{$src3, $src2, $dst|$dst, $src2, $src3}", + memopv2f64, SSEPackedDouble>, TB, OpSize; +} + +//===----------------------------------------------------------------------===// +// SSE 1 & 2 - Unpack Instructions +//===----------------------------------------------------------------------===// + +/// sse12_unpack_interleave - sse 1 & 2 unpack and interleave +multiclass sse12_unpack_interleave<bits<8> opc, PatFrag OpNode, ValueType vt, + PatFrag mem_frag, RegisterClass RC, + X86MemOperand x86memop, string asm, + Domain d> { + def rr : PI<opc, MRMSrcReg, + (outs RC:$dst), (ins RC:$src1, RC:$src2), + asm, [(set RC:$dst, + (vt (OpNode RC:$src1, RC:$src2)))], d>; + def rm : PI<opc, MRMSrcMem, + (outs RC:$dst), (ins RC:$src1, x86memop:$src2), + asm, [(set RC:$dst, + (vt (OpNode RC:$src1, + (mem_frag addr:$src2))))], d>; +} + +let AddedComplexity = 10 in { + let isAsmParserOnly = 1 in { + defm VUNPCKHPS: sse12_unpack_interleave<0x15, unpckh, v4f32, memopv4f32, + VR128, f128mem, "unpckhps\t{$src2, $src1, $dst|$dst, $src1, $src2}", + SSEPackedSingle>, VEX_4V; + defm VUNPCKHPD: sse12_unpack_interleave<0x15, unpckh, v2f64, memopv2f64, + VR128, f128mem, "unpckhpd\t{$src2, $src1, $dst|$dst, $src1, $src2}", + SSEPackedDouble>, OpSize, VEX_4V; + defm VUNPCKLPS: sse12_unpack_interleave<0x14, unpckl, v4f32, memopv4f32, + VR128, f128mem, "unpcklps\t{$src2, $src1, $dst|$dst, $src1, $src2}", + SSEPackedSingle>, VEX_4V; + defm VUNPCKLPD: sse12_unpack_interleave<0x14, unpckl, v2f64, memopv2f64, + VR128, f128mem, "unpcklpd\t{$src2, $src1, $dst|$dst, $src1, $src2}", + SSEPackedDouble>, OpSize, VEX_4V; + + defm VUNPCKHPSY: sse12_unpack_interleave<0x15, unpckh, v8f32, memopv8f32, + VR256, f256mem, "unpckhps\t{$src2, $src1, $dst|$dst, $src1, $src2}", + SSEPackedSingle>, VEX_4V; + defm VUNPCKHPDY: sse12_unpack_interleave<0x15, unpckh, v4f64, memopv4f64, + VR256, f256mem, "unpckhpd\t{$src2, $src1, $dst|$dst, $src1, $src2}", + SSEPackedDouble>, OpSize, VEX_4V; + defm VUNPCKLPSY: sse12_unpack_interleave<0x14, unpckl, v8f32, memopv8f32, + VR256, f256mem, "unpcklps\t{$src2, $src1, $dst|$dst, $src1, $src2}", + SSEPackedSingle>, VEX_4V; + defm VUNPCKLPDY: sse12_unpack_interleave<0x14, unpckl, v4f64, memopv4f64, + VR256, f256mem, "unpcklpd\t{$src2, $src1, $dst|$dst, $src1, $src2}", + SSEPackedDouble>, OpSize, VEX_4V; + } + + let Constraints = "$src1 = $dst" in { + defm UNPCKHPS: sse12_unpack_interleave<0x15, unpckh, v4f32, memopv4f32, + VR128, f128mem, "unpckhps\t{$src2, $dst|$dst, $src2}", + SSEPackedSingle>, TB; + defm UNPCKHPD: sse12_unpack_interleave<0x15, unpckh, v2f64, memopv2f64, + VR128, f128mem, "unpckhpd\t{$src2, $dst|$dst, $src2}", + SSEPackedDouble>, TB, OpSize; + defm UNPCKLPS: sse12_unpack_interleave<0x14, unpckl, v4f32, memopv4f32, + VR128, f128mem, "unpcklps\t{$src2, $dst|$dst, $src2}", + SSEPackedSingle>, TB; + defm UNPCKLPD: sse12_unpack_interleave<0x14, unpckl, v2f64, memopv2f64, + VR128, f128mem, "unpcklpd\t{$src2, $dst|$dst, $src2}", + SSEPackedDouble>, TB, OpSize; + } // Constraints = "$src1 = $dst" +} // AddedComplexity + +//===----------------------------------------------------------------------===// +// SSE 1 & 2 - Extract Floating-Point Sign mask +//===----------------------------------------------------------------------===// + +/// sse12_extr_sign_mask - sse 1 & 2 unpack and interleave +multiclass sse12_extr_sign_mask<RegisterClass RC, Intrinsic Int, string asm, + Domain d> { + def rr : PI<0x50, MRMSrcReg, (outs GR32:$dst), (ins RC:$src), + !strconcat(asm, "\t{$src, $dst|$dst, $src}"), + [(set GR32:$dst, (Int RC:$src))], d>; +} + +// Mask creation +defm MOVMSKPS : sse12_extr_sign_mask<VR128, int_x86_sse_movmsk_ps, "movmskps", + SSEPackedSingle>, TB; +defm MOVMSKPD : sse12_extr_sign_mask<VR128, int_x86_sse2_movmsk_pd, "movmskpd", + SSEPackedDouble>, TB, OpSize; + +let isAsmParserOnly = 1 in { + defm VMOVMSKPS : sse12_extr_sign_mask<VR128, int_x86_sse_movmsk_ps, + "movmskps", SSEPackedSingle>, VEX; + defm VMOVMSKPD : sse12_extr_sign_mask<VR128, int_x86_sse2_movmsk_pd, + "movmskpd", SSEPackedDouble>, OpSize, + VEX; + // FIXME: merge with multiclass above when the intrinsics come. + def VMOVMSKPSYrr : PI<0x50, MRMSrcReg, (outs GR32:$dst), (ins VR256:$src), + "movmskps\t{$src, $dst|$dst, $src}", [], SSEPackedSingle>, VEX; + def VMOVMSKPDYrr : PI<0x50, MRMSrcReg, (outs GR32:$dst), (ins VR256:$src), + "movmskpd\t{$src, $dst|$dst, $src}", [], SSEPackedDouble>, OpSize, + VEX; +} + +//===----------------------------------------------------------------------===// +// SSE 1 & 2 - Misc aliasing of packed SSE 1 & 2 instructions +//===----------------------------------------------------------------------===// + +// Aliases of packed SSE1 & SSE2 instructions for scalar use. These all have +// names that start with 'Fs'. + +// Alias instructions that map fld0 to pxor for sse. +let isReMaterializable = 1, isAsCheapAsAMove = 1, isCodeGenOnly = 1, + canFoldAsLoad = 1 in { + // FIXME: Set encoding to pseudo! +def FsFLD0SS : I<0xEF, MRMInitReg, (outs FR32:$dst), (ins), "", + [(set FR32:$dst, fp32imm0)]>, + Requires<[HasSSE1]>, TB, OpSize; +def FsFLD0SD : I<0xEF, MRMInitReg, (outs FR64:$dst), (ins), "", + [(set FR64:$dst, fpimm0)]>, + Requires<[HasSSE2]>, TB, OpSize; } -// Arithmetic +// Alias instruction to do FR32 or FR64 reg-to-reg copy using movaps. Upper +// bits are disregarded. +let neverHasSideEffects = 1 in { +def FsMOVAPSrr : PSI<0x28, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src), + "movaps\t{$src, $dst|$dst, $src}", []>; +def FsMOVAPDrr : PDI<0x28, MRMSrcReg, (outs FR64:$dst), (ins FR64:$src), + "movapd\t{$src, $dst|$dst, $src}", []>; +} -/// sse2_fp_unop_rm - SSE2 unops come in both scalar and vector forms. +// Alias instruction to load FR32 or FR64 from f128mem using movaps. Upper +// bits are disregarded. +let canFoldAsLoad = 1, isReMaterializable = 1 in { +def FsMOVAPSrm : PSI<0x28, MRMSrcMem, (outs FR32:$dst), (ins f128mem:$src), + "movaps\t{$src, $dst|$dst, $src}", + [(set FR32:$dst, (alignedloadfsf32 addr:$src))]>; +def FsMOVAPDrm : PDI<0x28, MRMSrcMem, (outs FR64:$dst), (ins f128mem:$src), + "movapd\t{$src, $dst|$dst, $src}", + [(set FR64:$dst, (alignedloadfsf64 addr:$src))]>; +} + +//===----------------------------------------------------------------------===// +// SSE 1 & 2 - Logical Instructions +//===----------------------------------------------------------------------===// + +/// sse12_fp_alias_pack_logical - SSE 1 & 2 aliased packed FP logical ops +/// +multiclass sse12_fp_alias_pack_logical<bits<8> opc, string OpcodeStr, + SDNode OpNode> { + let isAsmParserOnly = 1 in { + defm V#NAME#PS : sse12_fp_packed<opc, !strconcat(OpcodeStr, "ps"), OpNode, + FR32, f32, f128mem, memopfsf32, SSEPackedSingle, 0>, VEX_4V; + + defm V#NAME#PD : sse12_fp_packed<opc, !strconcat(OpcodeStr, "pd"), OpNode, + FR64, f64, f128mem, memopfsf64, SSEPackedDouble, 0>, OpSize, VEX_4V; + } + + let Constraints = "$src1 = $dst" in { + defm PS : sse12_fp_packed<opc, !strconcat(OpcodeStr, "ps"), OpNode, FR32, + f32, f128mem, memopfsf32, SSEPackedSingle>, TB; + + defm PD : sse12_fp_packed<opc, !strconcat(OpcodeStr, "pd"), OpNode, FR64, + f64, f128mem, memopfsf64, SSEPackedDouble>, TB, OpSize; + } +} + +// Alias bitwise logical operations using SSE logical ops on packed FP values. +let mayLoad = 0 in { + defm FsAND : sse12_fp_alias_pack_logical<0x54, "and", X86fand>; + defm FsOR : sse12_fp_alias_pack_logical<0x56, "or", X86for>; + defm FsXOR : sse12_fp_alias_pack_logical<0x57, "xor", X86fxor>; +} + +let neverHasSideEffects = 1, Pattern = []<dag>, isCommutable = 0 in + defm FsANDN : sse12_fp_alias_pack_logical<0x55, "andn", undef>; + +/// sse12_fp_packed_logical - SSE 1 & 2 packed FP logical ops /// +multiclass sse12_fp_packed_logical<bits<8> opc, string OpcodeStr, + SDNode OpNode, int HasPat = 0, + list<list<dag>> Pattern = []> { + let isAsmParserOnly = 1, Pattern = []<dag> in { + defm V#NAME#PS : sse12_fp_packed_logical_rm<opc, VR128, SSEPackedSingle, + !strconcat(OpcodeStr, "ps"), f128mem, + !if(HasPat, Pattern[0], // rr + [(set VR128:$dst, (v2i64 (OpNode VR128:$src1, + VR128:$src2)))]), + !if(HasPat, Pattern[2], // rm + [(set VR128:$dst, (OpNode (bc_v2i64 (v4f32 VR128:$src1)), + (memopv2i64 addr:$src2)))]), 0>, + VEX_4V; + + defm V#NAME#PD : sse12_fp_packed_logical_rm<opc, VR128, SSEPackedDouble, + !strconcat(OpcodeStr, "pd"), f128mem, + !if(HasPat, Pattern[1], // rr + [(set VR128:$dst, (OpNode (bc_v2i64 (v2f64 VR128:$src1)), + (bc_v2i64 (v2f64 + VR128:$src2))))]), + !if(HasPat, Pattern[3], // rm + [(set VR128:$dst, (OpNode (bc_v2i64 (v2f64 VR128:$src1)), + (memopv2i64 addr:$src2)))]), 0>, + OpSize, VEX_4V; + } + let Constraints = "$src1 = $dst" in { + defm PS : sse12_fp_packed_logical_rm<opc, VR128, SSEPackedSingle, + !strconcat(OpcodeStr, "ps"), f128mem, + !if(HasPat, Pattern[0], // rr + [(set VR128:$dst, (v2i64 (OpNode VR128:$src1, + VR128:$src2)))]), + !if(HasPat, Pattern[2], // rm + [(set VR128:$dst, (OpNode (bc_v2i64 (v4f32 VR128:$src1)), + (memopv2i64 addr:$src2)))])>, TB; + + defm PD : sse12_fp_packed_logical_rm<opc, VR128, SSEPackedDouble, + !strconcat(OpcodeStr, "pd"), f128mem, + !if(HasPat, Pattern[1], // rr + [(set VR128:$dst, (OpNode (bc_v2i64 (v2f64 VR128:$src1)), + (bc_v2i64 (v2f64 + VR128:$src2))))]), + !if(HasPat, Pattern[3], // rm + [(set VR128:$dst, (OpNode (bc_v2i64 (v2f64 VR128:$src1)), + (memopv2i64 addr:$src2)))])>, + TB, OpSize; + } +} + +/// sse12_fp_packed_logical_y - AVX 256-bit SSE 1 & 2 logical ops forms +/// +let isAsmParserOnly = 1 in { +multiclass sse12_fp_packed_logical_y<bits<8> opc, string OpcodeStr> { + defm PSY : sse12_fp_packed_logical_rm<opc, VR256, SSEPackedSingle, + !strconcat(OpcodeStr, "ps"), f256mem, [], [], 0>, VEX_4V; + + defm PDY : sse12_fp_packed_logical_rm<opc, VR256, SSEPackedDouble, + !strconcat(OpcodeStr, "pd"), f256mem, [], [], 0>, OpSize, VEX_4V; +} +} + +// AVX 256-bit packed logical ops forms +defm VAND : sse12_fp_packed_logical_y<0x54, "and">; +defm VOR : sse12_fp_packed_logical_y<0x56, "or">; +defm VXOR : sse12_fp_packed_logical_y<0x57, "xor">; +let isCommutable = 0 in + defm VANDN : sse12_fp_packed_logical_y<0x55, "andn">; + +defm AND : sse12_fp_packed_logical<0x54, "and", and>; +defm OR : sse12_fp_packed_logical<0x56, "or", or>; +defm XOR : sse12_fp_packed_logical<0x57, "xor", xor>; +let isCommutable = 0 in + defm ANDN : sse12_fp_packed_logical<0x55, "andn", undef /* dummy */, 1, [ + // single r+r + [(set VR128:$dst, (v2i64 (and (xor VR128:$src1, + (bc_v2i64 (v4i32 immAllOnesV))), + VR128:$src2)))], + // double r+r + [(set VR128:$dst, (and (vnot (bc_v2i64 (v2f64 VR128:$src1))), + (bc_v2i64 (v2f64 VR128:$src2))))], + // single r+m + [(set VR128:$dst, (v2i64 (and (xor (bc_v2i64 (v4f32 VR128:$src1)), + (bc_v2i64 (v4i32 immAllOnesV))), + (memopv2i64 addr:$src2))))], + // double r+m + [(set VR128:$dst, (and (vnot (bc_v2i64 (v2f64 VR128:$src1))), + (memopv2i64 addr:$src2)))]]>; + +//===----------------------------------------------------------------------===// +// SSE 1 & 2 - Arithmetic Instructions +//===----------------------------------------------------------------------===// + +/// basic_sse12_fp_binop_xxx - SSE 1 & 2 binops come in both scalar and +/// vector forms. +/// +/// In addition, we also have a special variant of the scalar form here to +/// represent the associated intrinsic operation. This form is unlike the +/// plain scalar form, in that it takes an entire vector (instead of a scalar) +/// and leaves the top elements unmodified (therefore these cannot be commuted). +/// +/// These three forms can each be reg+reg or reg+mem. +/// +multiclass basic_sse12_fp_binop_s<bits<8> opc, string OpcodeStr, SDNode OpNode, + bit Is2Addr = 1> { + defm SS : sse12_fp_scalar<opc, !strconcat(OpcodeStr, "ss"), + OpNode, FR32, f32mem, Is2Addr>, XS; + defm SD : sse12_fp_scalar<opc, !strconcat(OpcodeStr, "sd"), + OpNode, FR64, f64mem, Is2Addr>, XD; +} + +multiclass basic_sse12_fp_binop_p<bits<8> opc, string OpcodeStr, SDNode OpNode, + bit Is2Addr = 1> { + let mayLoad = 0 in { + defm PS : sse12_fp_packed<opc, !strconcat(OpcodeStr, "ps"), OpNode, VR128, + v4f32, f128mem, memopv4f32, SSEPackedSingle, Is2Addr>, TB; + defm PD : sse12_fp_packed<opc, !strconcat(OpcodeStr, "pd"), OpNode, VR128, + v2f64, f128mem, memopv2f64, SSEPackedDouble, Is2Addr>, TB, OpSize; + } +} + +multiclass basic_sse12_fp_binop_p_y<bits<8> opc, string OpcodeStr, + SDNode OpNode> { + let mayLoad = 0 in { + defm PSY : sse12_fp_packed<opc, !strconcat(OpcodeStr, "ps"), OpNode, VR256, + v8f32, f256mem, memopv8f32, SSEPackedSingle, 0>, TB; + defm PDY : sse12_fp_packed<opc, !strconcat(OpcodeStr, "pd"), OpNode, VR256, + v4f64, f256mem, memopv4f64, SSEPackedDouble, 0>, TB, OpSize; + } +} + +multiclass basic_sse12_fp_binop_s_int<bits<8> opc, string OpcodeStr, + bit Is2Addr = 1> { + defm SS : sse12_fp_scalar_int<opc, OpcodeStr, VR128, + !strconcat(OpcodeStr, "ss"), "", "_ss", ssmem, sse_load_f32, Is2Addr>, XS; + defm SD : sse12_fp_scalar_int<opc, OpcodeStr, VR128, + !strconcat(OpcodeStr, "sd"), "2", "_sd", sdmem, sse_load_f64, Is2Addr>, XD; +} + +multiclass basic_sse12_fp_binop_p_int<bits<8> opc, string OpcodeStr, + bit Is2Addr = 1> { + defm PS : sse12_fp_packed_int<opc, OpcodeStr, VR128, + !strconcat(OpcodeStr, "ps"), "", "_ps", f128mem, memopv4f32, + SSEPackedSingle, Is2Addr>, TB; + + defm PD : sse12_fp_packed_int<opc, OpcodeStr, VR128, + !strconcat(OpcodeStr, "pd"), "2", "_pd", f128mem, memopv2f64, + SSEPackedDouble, Is2Addr>, TB, OpSize; +} + +// Binary Arithmetic instructions +let isAsmParserOnly = 1 in { + defm VADD : basic_sse12_fp_binop_s<0x58, "add", fadd, 0>, + basic_sse12_fp_binop_p<0x58, "add", fadd, 0>, + basic_sse12_fp_binop_p_y<0x58, "add", fadd>, VEX_4V; + defm VMUL : basic_sse12_fp_binop_s<0x59, "mul", fmul, 0>, + basic_sse12_fp_binop_p<0x59, "mul", fmul, 0>, + basic_sse12_fp_binop_p_y<0x59, "mul", fmul>, VEX_4V; + + let isCommutable = 0 in { + defm VSUB : basic_sse12_fp_binop_s<0x5C, "sub", fsub, 0>, + basic_sse12_fp_binop_p<0x5C, "sub", fsub, 0>, + basic_sse12_fp_binop_p_y<0x5C, "sub", fsub>, VEX_4V; + defm VDIV : basic_sse12_fp_binop_s<0x5E, "div", fdiv, 0>, + basic_sse12_fp_binop_p<0x5E, "div", fdiv, 0>, + basic_sse12_fp_binop_p_y<0x5E, "div", fdiv>, VEX_4V; + defm VMAX : basic_sse12_fp_binop_s<0x5F, "max", X86fmax, 0>, + basic_sse12_fp_binop_p<0x5F, "max", X86fmax, 0>, + basic_sse12_fp_binop_p_y<0x5F, "max", X86fmax>, VEX_4V; + defm VMIN : basic_sse12_fp_binop_s<0x5D, "min", X86fmin, 0>, + basic_sse12_fp_binop_p<0x5D, "min", X86fmin, 0>, + basic_sse12_fp_binop_p_y<0x5D, "min", X86fmin>, VEX_4V; + } +} + +let Constraints = "$src1 = $dst" in { + defm ADD : basic_sse12_fp_binop_s<0x58, "add", fadd>, + basic_sse12_fp_binop_p<0x58, "add", fadd>, + basic_sse12_fp_binop_s_int<0x58, "add">; + defm MUL : basic_sse12_fp_binop_s<0x59, "mul", fmul>, + basic_sse12_fp_binop_p<0x59, "mul", fmul>, + basic_sse12_fp_binop_s_int<0x59, "mul">; + + let isCommutable = 0 in { + defm SUB : basic_sse12_fp_binop_s<0x5C, "sub", fsub>, + basic_sse12_fp_binop_p<0x5C, "sub", fsub>, + basic_sse12_fp_binop_s_int<0x5C, "sub">; + defm DIV : basic_sse12_fp_binop_s<0x5E, "div", fdiv>, + basic_sse12_fp_binop_p<0x5E, "div", fdiv>, + basic_sse12_fp_binop_s_int<0x5E, "div">; + defm MAX : basic_sse12_fp_binop_s<0x5F, "max", X86fmax>, + basic_sse12_fp_binop_p<0x5F, "max", X86fmax>, + basic_sse12_fp_binop_s_int<0x5F, "max">, + basic_sse12_fp_binop_p_int<0x5F, "max">; + defm MIN : basic_sse12_fp_binop_s<0x5D, "min", X86fmin>, + basic_sse12_fp_binop_p<0x5D, "min", X86fmin>, + basic_sse12_fp_binop_s_int<0x5D, "min">, + basic_sse12_fp_binop_p_int<0x5D, "min">; + } +} + +/// Unop Arithmetic /// In addition, we also have a special variant of the scalar form here to /// represent the associated intrinsic operation. This form is unlike the /// plain scalar form, in that it takes an entire vector (instead of a /// scalar) and leaves the top elements undefined. /// /// And, we have a special variant form for a full-vector intrinsic form. -/// -/// These four forms can each have a reg or a mem operand, so there are a -/// total of eight "instructions". -/// -multiclass sse2_fp_unop_rm<bits<8> opc, string OpcodeStr, - SDNode OpNode, - Intrinsic F64Int, - Intrinsic V2F64Int, - bit Commutable = 0> { - // Scalar operation, reg. + +/// sse1_fp_unop_s - SSE1 unops in scalar form. +multiclass sse1_fp_unop_s<bits<8> opc, string OpcodeStr, + SDNode OpNode, Intrinsic F32Int> { + def SSr : SSI<opc, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src), + !strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"), + [(set FR32:$dst, (OpNode FR32:$src))]>; + // For scalar unary operations, fold a load into the operation + // only in OptForSize mode. It eliminates an instruction, but it also + // eliminates a whole-register clobber (the load), so it introduces a + // partial register update condition. + def SSm : I<opc, MRMSrcMem, (outs FR32:$dst), (ins f32mem:$src), + !strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"), + [(set FR32:$dst, (OpNode (load addr:$src)))]>, XS, + Requires<[HasSSE1, OptForSize]>; + def SSr_Int : SSI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), + !strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"), + [(set VR128:$dst, (F32Int VR128:$src))]>; + def SSm_Int : SSI<opc, MRMSrcMem, (outs VR128:$dst), (ins ssmem:$src), + !strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"), + [(set VR128:$dst, (F32Int sse_load_f32:$src))]>; +} + +/// sse1_fp_unop_s_avx - AVX SSE1 unops in scalar form. +multiclass sse1_fp_unop_s_avx<bits<8> opc, string OpcodeStr, + SDNode OpNode, Intrinsic F32Int> { + def SSr : SSI<opc, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src1, FR32:$src2), + !strconcat(!strconcat("v", OpcodeStr), + "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>; + def SSm : I<opc, MRMSrcMem, (outs FR32:$dst), (ins FR32:$src1, f32mem:$src2), + !strconcat(!strconcat("v", OpcodeStr), + "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"), + []>, XS, Requires<[HasAVX, OptForSize]>; + def SSr_Int : SSI<opc, MRMSrcReg, (outs VR128:$dst), + (ins VR128:$src1, VR128:$src2), + !strconcat(!strconcat("v", OpcodeStr), + "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>; + def SSm_Int : SSI<opc, MRMSrcMem, (outs VR128:$dst), + (ins VR128:$src1, ssmem:$src2), + !strconcat(!strconcat("v", OpcodeStr), + "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>; +} + +/// sse1_fp_unop_p - SSE1 unops in packed form. +multiclass sse1_fp_unop_p<bits<8> opc, string OpcodeStr, SDNode OpNode> { + def PSr : PSI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), + !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"), + [(set VR128:$dst, (v4f32 (OpNode VR128:$src)))]>; + def PSm : PSI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), + !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"), + [(set VR128:$dst, (OpNode (memopv4f32 addr:$src)))]>; +} + +/// sse1_fp_unop_p_y - AVX 256-bit SSE1 unops in packed form. +multiclass sse1_fp_unop_p_y<bits<8> opc, string OpcodeStr, SDNode OpNode> { + def PSYr : PSI<opc, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src), + !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"), + [(set VR256:$dst, (v8f32 (OpNode VR256:$src)))]>; + def PSYm : PSI<opc, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src), + !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"), + [(set VR256:$dst, (OpNode (memopv8f32 addr:$src)))]>; +} + +/// sse1_fp_unop_p_int - SSE1 intrinsics unops in packed forms. +multiclass sse1_fp_unop_p_int<bits<8> opc, string OpcodeStr, + Intrinsic V4F32Int> { + def PSr_Int : PSI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), + !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"), + [(set VR128:$dst, (V4F32Int VR128:$src))]>; + def PSm_Int : PSI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), + !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"), + [(set VR128:$dst, (V4F32Int (memopv4f32 addr:$src)))]>; +} + + +/// sse2_fp_unop_s - SSE2 unops in scalar form. +multiclass sse2_fp_unop_s<bits<8> opc, string OpcodeStr, + SDNode OpNode, Intrinsic F64Int> { def SDr : SDI<opc, MRMSrcReg, (outs FR64:$dst), (ins FR64:$src), !strconcat(OpcodeStr, "sd\t{$src, $dst|$dst, $src}"), - [(set FR64:$dst, (OpNode FR64:$src))]> { - let isCommutable = Commutable; - } - - // Scalar operation, mem. - def SDm : SDI<opc, MRMSrcMem, (outs FR64:$dst), (ins f64mem:$src), + [(set FR64:$dst, (OpNode FR64:$src))]>; + // See the comments in sse1_fp_unop_s for why this is OptForSize. + def SDm : I<opc, MRMSrcMem, (outs FR64:$dst), (ins f64mem:$src), !strconcat(OpcodeStr, "sd\t{$src, $dst|$dst, $src}"), - [(set FR64:$dst, (OpNode (load addr:$src)))]>; + [(set FR64:$dst, (OpNode (load addr:$src)))]>, XD, + Requires<[HasSSE2, OptForSize]>; + def SDr_Int : SDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), + !strconcat(OpcodeStr, "sd\t{$src, $dst|$dst, $src}"), + [(set VR128:$dst, (F64Int VR128:$src))]>; + def SDm_Int : SDI<opc, MRMSrcMem, (outs VR128:$dst), (ins sdmem:$src), + !strconcat(OpcodeStr, "sd\t{$src, $dst|$dst, $src}"), + [(set VR128:$dst, (F64Int sse_load_f64:$src))]>; +} + +/// sse2_fp_unop_s_avx - AVX SSE2 unops in scalar form. +multiclass sse2_fp_unop_s_avx<bits<8> opc, string OpcodeStr, + SDNode OpNode, Intrinsic F64Int> { + def SDr : VSDI<opc, MRMSrcReg, (outs FR64:$dst), (ins FR64:$src1, FR64:$src2), + !strconcat(OpcodeStr, + "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>; + def SDm : VSDI<opc, MRMSrcMem, (outs FR64:$dst), + (ins FR64:$src1, f64mem:$src2), + !strconcat(OpcodeStr, + "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>; + def SDr_Int : VSDI<opc, MRMSrcReg, (outs VR128:$dst), + (ins VR128:$src1, VR128:$src2), + !strconcat(OpcodeStr, "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), + []>; + def SDm_Int : VSDI<opc, MRMSrcMem, (outs VR128:$dst), + (ins VR128:$src1, sdmem:$src2), + !strconcat(OpcodeStr, "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), + []>; +} - // Vector operation, reg. +/// sse2_fp_unop_p - SSE2 unops in vector forms. +multiclass sse2_fp_unop_p<bits<8> opc, string OpcodeStr, + SDNode OpNode> { def PDr : PDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), !strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"), - [(set VR128:$dst, (v2f64 (OpNode VR128:$src)))]> { - let isCommutable = Commutable; - } - - // Vector operation, mem. + [(set VR128:$dst, (v2f64 (OpNode VR128:$src)))]>; def PDm : PDI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), !strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"), [(set VR128:$dst, (OpNode (memopv2f64 addr:$src)))]>; +} - // Intrinsic operation, reg. - def SDr_Int : SDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), - !strconcat(OpcodeStr, "sd\t{$src, $dst|$dst, $src}"), - [(set VR128:$dst, (F64Int VR128:$src))]> { - let isCommutable = Commutable; - } - - // Intrinsic operation, mem. - def SDm_Int : SDI<opc, MRMSrcMem, (outs VR128:$dst), (ins sdmem:$src), - !strconcat(OpcodeStr, "sd\t{$src, $dst|$dst, $src}"), - [(set VR128:$dst, (F64Int sse_load_f64:$src))]>; +/// sse2_fp_unop_p_y - AVX SSE2 256-bit unops in vector forms. +multiclass sse2_fp_unop_p_y<bits<8> opc, string OpcodeStr, SDNode OpNode> { + def PDYr : PDI<opc, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src), + !strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"), + [(set VR256:$dst, (v4f64 (OpNode VR256:$src)))]>; + def PDYm : PDI<opc, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src), + !strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"), + [(set VR256:$dst, (OpNode (memopv4f64 addr:$src)))]>; +} - // Vector intrinsic operation, reg +/// sse2_fp_unop_p_int - SSE2 intrinsic unops in vector forms. +multiclass sse2_fp_unop_p_int<bits<8> opc, string OpcodeStr, + Intrinsic V2F64Int> { def PDr_Int : PDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), !strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"), - [(set VR128:$dst, (V2F64Int VR128:$src))]> { - let isCommutable = Commutable; - } - - // Vector intrinsic operation, mem + [(set VR128:$dst, (V2F64Int VR128:$src))]>; def PDm_Int : PDI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), !strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"), [(set VR128:$dst, (V2F64Int (memopv2f64 addr:$src)))]>; } +let isAsmParserOnly = 1, Predicates = [HasAVX] in { + // Square root. + defm VSQRT : sse1_fp_unop_s_avx<0x51, "sqrt", fsqrt, int_x86_sse_sqrt_ss>, + sse2_fp_unop_s_avx<0x51, "sqrt", fsqrt, int_x86_sse2_sqrt_sd>, + VEX_4V; + + defm VSQRT : sse1_fp_unop_p<0x51, "vsqrt", fsqrt>, + sse2_fp_unop_p<0x51, "vsqrt", fsqrt>, + sse1_fp_unop_p_y<0x51, "vsqrt", fsqrt>, + sse2_fp_unop_p_y<0x51, "vsqrt", fsqrt>, + VEX; + + // Reciprocal approximations. Note that these typically require refinement + // in order to obtain suitable precision. + defm VRSQRT : sse1_fp_unop_s_avx<0x52, "rsqrt", X86frsqrt, + int_x86_sse_rsqrt_ss>, VEX_4V; + defm VRSQRT : sse1_fp_unop_p<0x52, "vrsqrt", X86frsqrt>, + sse1_fp_unop_p_y<0x52, "vrsqrt", X86frsqrt>, VEX; + + defm VRCP : sse1_fp_unop_s_avx<0x53, "rcp", X86frcp, int_x86_sse_rcp_ss>, + VEX_4V; + defm VRCP : sse1_fp_unop_p<0x53, "vrcp", X86frcp>, + sse1_fp_unop_p_y<0x53, "vrcp", X86frcp>, VEX; +} + // Square root. -defm SQRT : sse2_fp_unop_rm<0x51, "sqrt", fsqrt, - int_x86_sse2_sqrt_sd, int_x86_sse2_sqrt_pd>; +defm SQRT : sse1_fp_unop_s<0x51, "sqrt", fsqrt, int_x86_sse_sqrt_ss>, + sse1_fp_unop_p<0x51, "sqrt", fsqrt>, + sse1_fp_unop_p_int<0x51, "sqrt", int_x86_sse_sqrt_ps>, + sse2_fp_unop_s<0x51, "sqrt", fsqrt, int_x86_sse2_sqrt_sd>, + sse2_fp_unop_p<0x51, "sqrt", fsqrt>, + sse2_fp_unop_p_int<0x51, "sqrt", int_x86_sse2_sqrt_pd>; + +// Reciprocal approximations. Note that these typically require refinement +// in order to obtain suitable precision. +defm RSQRT : sse1_fp_unop_s<0x52, "rsqrt", X86frsqrt, int_x86_sse_rsqrt_ss>, + sse1_fp_unop_p<0x52, "rsqrt", X86frsqrt>, + sse1_fp_unop_p_int<0x52, "rsqrt", int_x86_sse_rsqrt_ps>; +defm RCP : sse1_fp_unop_s<0x53, "rcp", X86frcp, int_x86_sse_rcp_ss>, + sse1_fp_unop_p<0x53, "rcp", X86frcp>, + sse1_fp_unop_p_int<0x53, "rcp", int_x86_sse_rcp_ps>; // There is no f64 version of the reciprocal approximation instructions. -// Logical -let Constraints = "$src1 = $dst" in { - let isCommutable = 1 in { - def ANDPDrr : PDI<0x54, MRMSrcReg, - (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), - "andpd\t{$src2, $dst|$dst, $src2}", - [(set VR128:$dst, - (and (bc_v2i64 (v2f64 VR128:$src1)), - (bc_v2i64 (v2f64 VR128:$src2))))]>; - def ORPDrr : PDI<0x56, MRMSrcReg, - (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), - "orpd\t{$src2, $dst|$dst, $src2}", - [(set VR128:$dst, - (or (bc_v2i64 (v2f64 VR128:$src1)), - (bc_v2i64 (v2f64 VR128:$src2))))]>; - def XORPDrr : PDI<0x57, MRMSrcReg, - (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), - "xorpd\t{$src2, $dst|$dst, $src2}", - [(set VR128:$dst, - (xor (bc_v2i64 (v2f64 VR128:$src1)), - (bc_v2i64 (v2f64 VR128:$src2))))]>; - } +//===----------------------------------------------------------------------===// +// SSE 1 & 2 - Non-temporal stores +//===----------------------------------------------------------------------===// - def ANDPDrm : PDI<0x54, MRMSrcMem, - (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2), - "andpd\t{$src2, $dst|$dst, $src2}", - [(set VR128:$dst, - (and (bc_v2i64 (v2f64 VR128:$src1)), - (memopv2i64 addr:$src2)))]>; - def ORPDrm : PDI<0x56, MRMSrcMem, - (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2), - "orpd\t{$src2, $dst|$dst, $src2}", - [(set VR128:$dst, - (or (bc_v2i64 (v2f64 VR128:$src1)), - (memopv2i64 addr:$src2)))]>; - def XORPDrm : PDI<0x57, MRMSrcMem, - (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2), - "xorpd\t{$src2, $dst|$dst, $src2}", - [(set VR128:$dst, - (xor (bc_v2i64 (v2f64 VR128:$src1)), - (memopv2i64 addr:$src2)))]>; - def ANDNPDrr : PDI<0x55, MRMSrcReg, - (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), - "andnpd\t{$src2, $dst|$dst, $src2}", - [(set VR128:$dst, - (and (vnot (bc_v2i64 (v2f64 VR128:$src1))), - (bc_v2i64 (v2f64 VR128:$src2))))]>; - def ANDNPDrm : PDI<0x55, MRMSrcMem, - (outs VR128:$dst), (ins VR128:$src1,f128mem:$src2), - "andnpd\t{$src2, $dst|$dst, $src2}", - [(set VR128:$dst, - (and (vnot (bc_v2i64 (v2f64 VR128:$src1))), - (memopv2i64 addr:$src2)))]>; +let isAsmParserOnly = 1 in { + def VMOVNTPSmr_Int : VPSI<0x2B, MRMDestMem, (outs), + (ins i128mem:$dst, VR128:$src), + "movntps\t{$src, $dst|$dst, $src}", + [(int_x86_sse_movnt_ps addr:$dst, VR128:$src)]>, VEX; + def VMOVNTPDmr_Int : VPDI<0x2B, MRMDestMem, (outs), + (ins i128mem:$dst, VR128:$src), + "movntpd\t{$src, $dst|$dst, $src}", + [(int_x86_sse2_movnt_pd addr:$dst, VR128:$src)]>, VEX; + + let ExeDomain = SSEPackedInt in + def VMOVNTDQmr_Int : VPDI<0xE7, MRMDestMem, (outs), + (ins f128mem:$dst, VR128:$src), + "movntdq\t{$src, $dst|$dst, $src}", + [(int_x86_sse2_movnt_dq addr:$dst, VR128:$src)]>, VEX; + + let AddedComplexity = 400 in { // Prefer non-temporal versions + def VMOVNTPSmr : VPSI<0x2B, MRMDestMem, (outs), + (ins f128mem:$dst, VR128:$src), + "movntps\t{$src, $dst|$dst, $src}", + [(alignednontemporalstore (v4f32 VR128:$src), + addr:$dst)]>, VEX; + def VMOVNTPDmr : VPDI<0x2B, MRMDestMem, (outs), + (ins f128mem:$dst, VR128:$src), + "movntpd\t{$src, $dst|$dst, $src}", + [(alignednontemporalstore (v2f64 VR128:$src), + addr:$dst)]>, VEX; + def VMOVNTDQ_64mr : VPDI<0xE7, MRMDestMem, (outs), + (ins f128mem:$dst, VR128:$src), + "movntdq\t{$src, $dst|$dst, $src}", + [(alignednontemporalstore (v2f64 VR128:$src), + addr:$dst)]>, VEX; + let ExeDomain = SSEPackedInt in + def VMOVNTDQmr : VPDI<0xE7, MRMDestMem, (outs), + (ins f128mem:$dst, VR128:$src), + "movntdq\t{$src, $dst|$dst, $src}", + [(alignednontemporalstore (v4f32 VR128:$src), + addr:$dst)]>, VEX; + + def VMOVNTPSYmr : VPSI<0x2B, MRMDestMem, (outs), + (ins f256mem:$dst, VR256:$src), + "movntps\t{$src, $dst|$dst, $src}", + [(alignednontemporalstore (v8f32 VR256:$src), + addr:$dst)]>, VEX; + def VMOVNTPDYmr : VPDI<0x2B, MRMDestMem, (outs), + (ins f256mem:$dst, VR256:$src), + "movntpd\t{$src, $dst|$dst, $src}", + [(alignednontemporalstore (v4f64 VR256:$src), + addr:$dst)]>, VEX; + def VMOVNTDQY_64mr : VPDI<0xE7, MRMDestMem, (outs), + (ins f256mem:$dst, VR256:$src), + "movntdq\t{$src, $dst|$dst, $src}", + [(alignednontemporalstore (v4f64 VR256:$src), + addr:$dst)]>, VEX; + let ExeDomain = SSEPackedInt in + def VMOVNTDQYmr : VPDI<0xE7, MRMDestMem, (outs), + (ins f256mem:$dst, VR256:$src), + "movntdq\t{$src, $dst|$dst, $src}", + [(alignednontemporalstore (v8f32 VR256:$src), + addr:$dst)]>, VEX; + } } -let Constraints = "$src1 = $dst" in { - def CMPPDrri : PDIi8<0xC2, MRMSrcReg, - (outs VR128:$dst), (ins VR128:$src1, VR128:$src, SSECC:$cc), - "cmp${cc}pd\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, (int_x86_sse2_cmp_pd VR128:$src1, - VR128:$src, imm:$cc))]>; - def CMPPDrmi : PDIi8<0xC2, MRMSrcMem, - (outs VR128:$dst), (ins VR128:$src1, f128mem:$src, SSECC:$cc), - "cmp${cc}pd\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, (int_x86_sse2_cmp_pd VR128:$src1, - (memop addr:$src), imm:$cc))]>; +def MOVNTPSmr_Int : PSI<0x2B, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src), + "movntps\t{$src, $dst|$dst, $src}", + [(int_x86_sse_movnt_ps addr:$dst, VR128:$src)]>; +def MOVNTPDmr_Int : PDI<0x2B, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src), + "movntpd\t{$src, $dst|$dst, $src}", + [(int_x86_sse2_movnt_pd addr:$dst, VR128:$src)]>; + +let ExeDomain = SSEPackedInt in +def MOVNTDQmr_Int : PDI<0xE7, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), + "movntdq\t{$src, $dst|$dst, $src}", + [(int_x86_sse2_movnt_dq addr:$dst, VR128:$src)]>; + +let AddedComplexity = 400 in { // Prefer non-temporal versions +def MOVNTPSmr : PSI<0x2B, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), + "movntps\t{$src, $dst|$dst, $src}", + [(alignednontemporalstore (v4f32 VR128:$src), addr:$dst)]>; +def MOVNTPDmr : PDI<0x2B, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), + "movntpd\t{$src, $dst|$dst, $src}", + [(alignednontemporalstore(v2f64 VR128:$src), addr:$dst)]>; + +def MOVNTDQ_64mr : PDI<0xE7, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), + "movntdq\t{$src, $dst|$dst, $src}", + [(alignednontemporalstore (v2f64 VR128:$src), addr:$dst)]>; + +let ExeDomain = SSEPackedInt in +def MOVNTDQmr : PDI<0xE7, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), + "movntdq\t{$src, $dst|$dst, $src}", + [(alignednontemporalstore (v4f32 VR128:$src), addr:$dst)]>; + +// There is no AVX form for instructions below this point +def MOVNTImr : I<0xC3, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src), + "movnti\t{$src, $dst|$dst, $src}", + [(nontemporalstore (i32 GR32:$src), addr:$dst)]>, + TB, Requires<[HasSSE2]>; + +def MOVNTI_64mr : RI<0xC3, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src), + "movnti\t{$src, $dst|$dst, $src}", + [(nontemporalstore (i64 GR64:$src), addr:$dst)]>, + TB, Requires<[HasSSE2]>; - // Accept explicit immediate argument form instead of comparison code. -let isAsmParserOnly = 1 in { - def CMPPDrri_alt : PDIi8<0xC2, MRMSrcReg, - (outs VR128:$dst), (ins VR128:$src1, VR128:$src, i8imm:$src2), - "cmppd\t{$src2, $src, $dst|$dst, $src, $src2}", []>; - def CMPPDrmi_alt : PDIi8<0xC2, MRMSrcMem, - (outs VR128:$dst), (ins VR128:$src1, f128mem:$src, i8imm:$src2), - "cmppd\t{$src2, $src, $dst|$dst, $src, $src2}", []>; } +def MOVNTImr_Int : I<0xC3, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src), + "movnti\t{$src, $dst|$dst, $src}", + [(int_x86_sse2_movnt_i addr:$dst, GR32:$src)]>, + TB, Requires<[HasSSE2]>; + +//===----------------------------------------------------------------------===// +// SSE 1 & 2 - Misc Instructions (No AVX form) +//===----------------------------------------------------------------------===// + +// Prefetch intrinsic. +def PREFETCHT0 : PSI<0x18, MRM1m, (outs), (ins i8mem:$src), + "prefetcht0\t$src", [(prefetch addr:$src, imm, (i32 3))]>; +def PREFETCHT1 : PSI<0x18, MRM2m, (outs), (ins i8mem:$src), + "prefetcht1\t$src", [(prefetch addr:$src, imm, (i32 2))]>; +def PREFETCHT2 : PSI<0x18, MRM3m, (outs), (ins i8mem:$src), + "prefetcht2\t$src", [(prefetch addr:$src, imm, (i32 1))]>; +def PREFETCHNTA : PSI<0x18, MRM0m, (outs), (ins i8mem:$src), + "prefetchnta\t$src", [(prefetch addr:$src, imm, (i32 0))]>; + +// Load, store, and memory fence +def SFENCE : I<0xAE, MRM_F8, (outs), (ins), "sfence", [(int_x86_sse_sfence)]>, + TB, Requires<[HasSSE1]>; + +// Alias instructions that map zero vector to pxor / xorp* for sse. +// We set canFoldAsLoad because this can be converted to a constant-pool +// load of an all-zeros value if folding it would be beneficial. +// FIXME: Change encoding to pseudo! +let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, + isCodeGenOnly = 1 in { +def V_SET0PS : PSI<0x57, MRMInitReg, (outs VR128:$dst), (ins), "", + [(set VR128:$dst, (v4f32 immAllZerosV))]>; +def V_SET0PD : PDI<0x57, MRMInitReg, (outs VR128:$dst), (ins), "", + [(set VR128:$dst, (v2f64 immAllZerosV))]>; +let ExeDomain = SSEPackedInt in +def V_SET0PI : PDI<0xEF, MRMInitReg, (outs VR128:$dst), (ins), "", + [(set VR128:$dst, (v4i32 immAllZerosV))]>; } -def : Pat<(v2i64 (X86cmppd (v2f64 VR128:$src1), VR128:$src2, imm:$cc)), - (CMPPDrri VR128:$src1, VR128:$src2, imm:$cc)>; -def : Pat<(v2i64 (X86cmppd (v2f64 VR128:$src1), (memop addr:$src2), imm:$cc)), - (CMPPDrmi VR128:$src1, addr:$src2, imm:$cc)>; -// Shuffle and unpack instructions -let Constraints = "$src1 = $dst" in { - def SHUFPDrri : PDIi8<0xC6, MRMSrcReg, - (outs VR128:$dst), (ins VR128:$src1, VR128:$src2, i8imm:$src3), - "shufpd\t{$src3, $src2, $dst|$dst, $src2, $src3}", - [(set VR128:$dst, - (v2f64 (shufp:$src3 VR128:$src1, VR128:$src2)))]>; - def SHUFPDrmi : PDIi8<0xC6, MRMSrcMem, - (outs VR128:$dst), (ins VR128:$src1, - f128mem:$src2, i8imm:$src3), - "shufpd\t{$src3, $src2, $dst|$dst, $src2, $src3}", - [(set VR128:$dst, - (v2f64 (shufp:$src3 - VR128:$src1, (memopv2f64 addr:$src2))))]>; +def : Pat<(v2i64 immAllZerosV), (V_SET0PI)>; +def : Pat<(v8i16 immAllZerosV), (V_SET0PI)>; +def : Pat<(v16i8 immAllZerosV), (V_SET0PI)>; - let AddedComplexity = 10 in { - def UNPCKHPDrr : PDI<0x15, MRMSrcReg, - (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), - "unpckhpd\t{$src2, $dst|$dst, $src2}", - [(set VR128:$dst, - (v2f64 (unpckh VR128:$src1, VR128:$src2)))]>; - def UNPCKHPDrm : PDI<0x15, MRMSrcMem, - (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2), - "unpckhpd\t{$src2, $dst|$dst, $src2}", - [(set VR128:$dst, - (v2f64 (unpckh VR128:$src1, - (memopv2f64 addr:$src2))))]>; - - def UNPCKLPDrr : PDI<0x14, MRMSrcReg, - (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), - "unpcklpd\t{$src2, $dst|$dst, $src2}", - [(set VR128:$dst, - (v2f64 (unpckl VR128:$src1, VR128:$src2)))]>; - def UNPCKLPDrm : PDI<0x14, MRMSrcMem, - (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2), - "unpcklpd\t{$src2, $dst|$dst, $src2}", - [(set VR128:$dst, - (unpckl VR128:$src1, (memopv2f64 addr:$src2)))]>; - } // AddedComplexity -} // Constraints = "$src1 = $dst" +def : Pat<(f32 (vector_extract (v4f32 VR128:$src), (iPTR 0))), + (f32 (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss))>; + +//===----------------------------------------------------------------------===// +// SSE 1 & 2 - Load/Store XCSR register +//===----------------------------------------------------------------------===// +let isAsmParserOnly = 1 in { + def VLDMXCSR : VPSI<0xAE, MRM2m, (outs), (ins i32mem:$src), + "ldmxcsr\t$src", [(int_x86_sse_ldmxcsr addr:$src)]>, VEX; + def VSTMXCSR : VPSI<0xAE, MRM3m, (outs), (ins i32mem:$dst), + "stmxcsr\t$dst", [(int_x86_sse_stmxcsr addr:$dst)]>, VEX; +} + +def LDMXCSR : PSI<0xAE, MRM2m, (outs), (ins i32mem:$src), + "ldmxcsr\t$src", [(int_x86_sse_ldmxcsr addr:$src)]>; +def STMXCSR : PSI<0xAE, MRM3m, (outs), (ins i32mem:$dst), + "stmxcsr\t$dst", [(int_x86_sse_stmxcsr addr:$dst)]>; //===---------------------------------------------------------------------===// -// SSE integer instructions -let ExeDomain = SSEPackedInt in { +// SSE2 - Move Aligned/Unaligned Packed Integer Instructions +//===---------------------------------------------------------------------===// +let ExeDomain = SSEPackedInt in { // SSE integer instructions + +let isAsmParserOnly = 1 in { + let neverHasSideEffects = 1 in + def VMOVDQArr : VPDI<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), + "movdqa\t{$src, $dst|$dst, $src}", []>, VEX; + def VMOVDQUrr : VPDI<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), + "movdqu\t{$src, $dst|$dst, $src}", []>, XS, VEX; + + let canFoldAsLoad = 1, mayLoad = 1 in { + def VMOVDQArm : VPDI<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), + "movdqa\t{$src, $dst|$dst, $src}", + [/*(set VR128:$dst, (alignedloadv2i64 addr:$src))*/]>, + VEX; + def VMOVDQUrm : I<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), + "vmovdqu\t{$src, $dst|$dst, $src}", + [/*(set VR128:$dst, (loadv2i64 addr:$src))*/]>, + XS, VEX, Requires<[HasAVX]>; + } + + let mayStore = 1 in { + def VMOVDQAmr : VPDI<0x7F, MRMDestMem, (outs), + (ins i128mem:$dst, VR128:$src), + "movdqa\t{$src, $dst|$dst, $src}", + [/*(alignedstore (v2i64 VR128:$src), addr:$dst)*/]>, VEX; + def VMOVDQUmr : I<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src), + "vmovdqu\t{$src, $dst|$dst, $src}", + [/*(store (v2i64 VR128:$src), addr:$dst)*/]>, + XS, VEX, Requires<[HasAVX]>; + } +} -// Move Instructions let neverHasSideEffects = 1 in def MOVDQArr : PDI<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "movdqa\t{$src, $dst|$dst, $src}", []>; -let canFoldAsLoad = 1, mayLoad = 1 in + +let canFoldAsLoad = 1, mayLoad = 1 in { def MOVDQArm : PDI<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), "movdqa\t{$src, $dst|$dst, $src}", [/*(set VR128:$dst, (alignedloadv2i64 addr:$src))*/]>; -let mayStore = 1 in -def MOVDQAmr : PDI<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src), - "movdqa\t{$src, $dst|$dst, $src}", - [/*(alignedstore (v2i64 VR128:$src), addr:$dst)*/]>; -let canFoldAsLoad = 1, mayLoad = 1 in def MOVDQUrm : I<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), "movdqu\t{$src, $dst|$dst, $src}", [/*(set VR128:$dst, (loadv2i64 addr:$src))*/]>, XS, Requires<[HasSSE2]>; -let mayStore = 1 in +} + +let mayStore = 1 in { +def MOVDQAmr : PDI<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src), + "movdqa\t{$src, $dst|$dst, $src}", + [/*(alignedstore (v2i64 VR128:$src), addr:$dst)*/]>; def MOVDQUmr : I<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src), "movdqu\t{$src, $dst|$dst, $src}", [/*(store (v2i64 VR128:$src), addr:$dst)*/]>, XS, Requires<[HasSSE2]>; +} // Intrinsic forms of MOVDQU load and store +let isAsmParserOnly = 1 in { +let canFoldAsLoad = 1 in +def VMOVDQUrm_Int : I<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), + "vmovdqu\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, (int_x86_sse2_loadu_dq addr:$src))]>, + XS, VEX, Requires<[HasAVX]>; +def VMOVDQUmr_Int : I<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src), + "vmovdqu\t{$src, $dst|$dst, $src}", + [(int_x86_sse2_storeu_dq addr:$dst, VR128:$src)]>, + XS, VEX, Requires<[HasAVX]>; +} + let canFoldAsLoad = 1 in def MOVDQUrm_Int : I<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), "movdqu\t{$src, $dst|$dst, $src}", @@ -2019,55 +2021,72 @@ def MOVDQUmr_Int : I<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src), [(int_x86_sse2_storeu_dq addr:$dst, VR128:$src)]>, XS, Requires<[HasSSE2]>; -let Constraints = "$src1 = $dst" in { +} // ExeDomain = SSEPackedInt + +//===---------------------------------------------------------------------===// +// SSE2 - Packed Integer Arithmetic Instructions +//===---------------------------------------------------------------------===// + +let ExeDomain = SSEPackedInt in { // SSE integer instructions multiclass PDI_binop_rm_int<bits<8> opc, string OpcodeStr, Intrinsic IntId, - bit Commutable = 0> { + bit IsCommutable = 0, bit Is2Addr = 1> { + let isCommutable = IsCommutable in def rr : PDI<opc, MRMSrcReg, (outs VR128:$dst), - (ins VR128:$src1, VR128:$src2), - !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), - [(set VR128:$dst, (IntId VR128:$src1, VR128:$src2))]> { - let isCommutable = Commutable; - } + (ins VR128:$src1, VR128:$src2), + !if(Is2Addr, + !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), + !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), + [(set VR128:$dst, (IntId VR128:$src1, VR128:$src2))]>; def rm : PDI<opc, MRMSrcMem, (outs VR128:$dst), - (ins VR128:$src1, i128mem:$src2), - !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), - [(set VR128:$dst, (IntId VR128:$src1, - (bitconvert (memopv2i64 - addr:$src2))))]>; + (ins VR128:$src1, i128mem:$src2), + !if(Is2Addr, + !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), + !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), + [(set VR128:$dst, (IntId VR128:$src1, + (bitconvert (memopv2i64 addr:$src2))))]>; } multiclass PDI_binop_rmi_int<bits<8> opc, bits<8> opc2, Format ImmForm, - string OpcodeStr, - Intrinsic IntId, Intrinsic IntId2> { + string OpcodeStr, Intrinsic IntId, + Intrinsic IntId2, bit Is2Addr = 1> { def rr : PDI<opc, MRMSrcReg, (outs VR128:$dst), - (ins VR128:$src1, VR128:$src2), - !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), - [(set VR128:$dst, (IntId VR128:$src1, VR128:$src2))]>; + (ins VR128:$src1, VR128:$src2), + !if(Is2Addr, + !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), + !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), + [(set VR128:$dst, (IntId VR128:$src1, VR128:$src2))]>; def rm : PDI<opc, MRMSrcMem, (outs VR128:$dst), - (ins VR128:$src1, i128mem:$src2), - !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), - [(set VR128:$dst, (IntId VR128:$src1, + (ins VR128:$src1, i128mem:$src2), + !if(Is2Addr, + !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), + !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), + [(set VR128:$dst, (IntId VR128:$src1, (bitconvert (memopv2i64 addr:$src2))))]>; def ri : PDIi8<opc2, ImmForm, (outs VR128:$dst), - (ins VR128:$src1, i32i8imm:$src2), - !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), - [(set VR128:$dst, (IntId2 VR128:$src1, (i32 imm:$src2)))]>; + (ins VR128:$src1, i32i8imm:$src2), + !if(Is2Addr, + !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), + !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), + [(set VR128:$dst, (IntId2 VR128:$src1, (i32 imm:$src2)))]>; } /// PDI_binop_rm - Simple SSE2 binary operator. multiclass PDI_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode, - ValueType OpVT, bit Commutable = 0> { + ValueType OpVT, bit IsCommutable = 0, bit Is2Addr = 1> { + let isCommutable = IsCommutable in def rr : PDI<opc, MRMSrcReg, (outs VR128:$dst), - (ins VR128:$src1, VR128:$src2), - !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), - [(set VR128:$dst, (OpVT (OpNode VR128:$src1, VR128:$src2)))]> { - let isCommutable = Commutable; - } + (ins VR128:$src1, VR128:$src2), + !if(Is2Addr, + !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), + !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), + [(set VR128:$dst, (OpVT (OpNode VR128:$src1, VR128:$src2)))]>; def rm : PDI<opc, MRMSrcMem, (outs VR128:$dst), - (ins VR128:$src1, i128mem:$src2), - !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), - [(set VR128:$dst, (OpVT (OpNode VR128:$src1, + (ins VR128:$src1, i128mem:$src2), + !if(Is2Addr, + !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), + !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), + [(set VR128:$dst, (OpVT (OpNode VR128:$src1, (bitconvert (memopv2i64 addr:$src2)))))]>; } @@ -2077,64 +2096,177 @@ multiclass PDI_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode, /// to collapse (bitconvert VT to VT) into its operand. /// multiclass PDI_binop_rm_v2i64<bits<8> opc, string OpcodeStr, SDNode OpNode, - bit Commutable = 0> { + bit IsCommutable = 0, bit Is2Addr = 1> { + let isCommutable = IsCommutable in def rr : PDI<opc, MRMSrcReg, (outs VR128:$dst), - (ins VR128:$src1, VR128:$src2), - !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), - [(set VR128:$dst, (v2i64 (OpNode VR128:$src1, VR128:$src2)))]> { - let isCommutable = Commutable; - } + (ins VR128:$src1, VR128:$src2), + !if(Is2Addr, + !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), + !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), + [(set VR128:$dst, (v2i64 (OpNode VR128:$src1, VR128:$src2)))]>; def rm : PDI<opc, MRMSrcMem, (outs VR128:$dst), - (ins VR128:$src1, i128mem:$src2), - !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), - [(set VR128:$dst, (OpNode VR128:$src1, - (memopv2i64 addr:$src2)))]>; + (ins VR128:$src1, i128mem:$src2), + !if(Is2Addr, + !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), + !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), + [(set VR128:$dst, (OpNode VR128:$src1, (memopv2i64 addr:$src2)))]>; } -} // Constraints = "$src1 = $dst" } // ExeDomain = SSEPackedInt // 128-bit Integer Arithmetic -defm PADDB : PDI_binop_rm<0xFC, "paddb", add, v16i8, 1>; -defm PADDW : PDI_binop_rm<0xFD, "paddw", add, v8i16, 1>; -defm PADDD : PDI_binop_rm<0xFE, "paddd", add, v4i32, 1>; -defm PADDQ : PDI_binop_rm_v2i64<0xD4, "paddq", add, 1>; - -defm PADDSB : PDI_binop_rm_int<0xEC, "paddsb" , int_x86_sse2_padds_b, 1>; -defm PADDSW : PDI_binop_rm_int<0xED, "paddsw" , int_x86_sse2_padds_w, 1>; -defm PADDUSB : PDI_binop_rm_int<0xDC, "paddusb", int_x86_sse2_paddus_b, 1>; -defm PADDUSW : PDI_binop_rm_int<0xDD, "paddusw", int_x86_sse2_paddus_w, 1>; +let isAsmParserOnly = 1, Predicates = [HasAVX] in { +defm VPADDB : PDI_binop_rm<0xFC, "vpaddb", add, v16i8, 1, 0 /*3addr*/>, VEX_4V; +defm VPADDW : PDI_binop_rm<0xFD, "vpaddw", add, v8i16, 1, 0>, VEX_4V; +defm VPADDD : PDI_binop_rm<0xFE, "vpaddd", add, v4i32, 1, 0>, VEX_4V; +defm VPADDQ : PDI_binop_rm_v2i64<0xD4, "vpaddq", add, 1, 0>, VEX_4V; +defm VPMULLW : PDI_binop_rm<0xD5, "vpmullw", mul, v8i16, 1, 0>, VEX_4V; +defm VPSUBB : PDI_binop_rm<0xF8, "vpsubb", sub, v16i8, 0, 0>, VEX_4V; +defm VPSUBW : PDI_binop_rm<0xF9, "vpsubw", sub, v8i16, 0, 0>, VEX_4V; +defm VPSUBD : PDI_binop_rm<0xFA, "vpsubd", sub, v4i32, 0, 0>, VEX_4V; +defm VPSUBQ : PDI_binop_rm_v2i64<0xFB, "vpsubq", sub, 0, 0>, VEX_4V; + +// Intrinsic forms +defm VPSUBSB : PDI_binop_rm_int<0xE8, "vpsubsb" , int_x86_sse2_psubs_b, 0, 0>, + VEX_4V; +defm VPSUBSW : PDI_binop_rm_int<0xE9, "vpsubsw" , int_x86_sse2_psubs_w, 0, 0>, + VEX_4V; +defm VPSUBUSB : PDI_binop_rm_int<0xD8, "vpsubusb", int_x86_sse2_psubus_b, 0, 0>, + VEX_4V; +defm VPSUBUSW : PDI_binop_rm_int<0xD9, "vpsubusw", int_x86_sse2_psubus_w, 0, 0>, + VEX_4V; +defm VPADDSB : PDI_binop_rm_int<0xEC, "vpaddsb" , int_x86_sse2_padds_b, 1, 0>, + VEX_4V; +defm VPADDSW : PDI_binop_rm_int<0xED, "vpaddsw" , int_x86_sse2_padds_w, 1, 0>, + VEX_4V; +defm VPADDUSB : PDI_binop_rm_int<0xDC, "vpaddusb", int_x86_sse2_paddus_b, 1, 0>, + VEX_4V; +defm VPADDUSW : PDI_binop_rm_int<0xDD, "vpaddusw", int_x86_sse2_paddus_w, 1, 0>, + VEX_4V; +defm VPMULHUW : PDI_binop_rm_int<0xE4, "vpmulhuw", int_x86_sse2_pmulhu_w, 1, 0>, + VEX_4V; +defm VPMULHW : PDI_binop_rm_int<0xE5, "vpmulhw" , int_x86_sse2_pmulh_w, 1, 0>, + VEX_4V; +defm VPMULUDQ : PDI_binop_rm_int<0xF4, "vpmuludq", int_x86_sse2_pmulu_dq, 1, 0>, + VEX_4V; +defm VPMADDWD : PDI_binop_rm_int<0xF5, "vpmaddwd", int_x86_sse2_pmadd_wd, 1, 0>, + VEX_4V; +defm VPAVGB : PDI_binop_rm_int<0xE0, "vpavgb", int_x86_sse2_pavg_b, 1, 0>, + VEX_4V; +defm VPAVGW : PDI_binop_rm_int<0xE3, "vpavgw", int_x86_sse2_pavg_w, 1, 0>, + VEX_4V; +defm VPMINUB : PDI_binop_rm_int<0xDA, "vpminub", int_x86_sse2_pminu_b, 1, 0>, + VEX_4V; +defm VPMINSW : PDI_binop_rm_int<0xEA, "vpminsw", int_x86_sse2_pmins_w, 1, 0>, + VEX_4V; +defm VPMAXUB : PDI_binop_rm_int<0xDE, "vpmaxub", int_x86_sse2_pmaxu_b, 1, 0>, + VEX_4V; +defm VPMAXSW : PDI_binop_rm_int<0xEE, "vpmaxsw", int_x86_sse2_pmaxs_w, 1, 0>, + VEX_4V; +defm VPSADBW : PDI_binop_rm_int<0xF6, "vpsadbw", int_x86_sse2_psad_bw, 1, 0>, + VEX_4V; +} +let Constraints = "$src1 = $dst" in { +defm PADDB : PDI_binop_rm<0xFC, "paddb", add, v16i8, 1>; +defm PADDW : PDI_binop_rm<0xFD, "paddw", add, v8i16, 1>; +defm PADDD : PDI_binop_rm<0xFE, "paddd", add, v4i32, 1>; +defm PADDQ : PDI_binop_rm_v2i64<0xD4, "paddq", add, 1>; +defm PMULLW : PDI_binop_rm<0xD5, "pmullw", mul, v8i16, 1>; defm PSUBB : PDI_binop_rm<0xF8, "psubb", sub, v16i8>; defm PSUBW : PDI_binop_rm<0xF9, "psubw", sub, v8i16>; defm PSUBD : PDI_binop_rm<0xFA, "psubd", sub, v4i32>; defm PSUBQ : PDI_binop_rm_v2i64<0xFB, "psubq", sub>; +// Intrinsic forms defm PSUBSB : PDI_binop_rm_int<0xE8, "psubsb" , int_x86_sse2_psubs_b>; defm PSUBSW : PDI_binop_rm_int<0xE9, "psubsw" , int_x86_sse2_psubs_w>; defm PSUBUSB : PDI_binop_rm_int<0xD8, "psubusb", int_x86_sse2_psubus_b>; defm PSUBUSW : PDI_binop_rm_int<0xD9, "psubusw", int_x86_sse2_psubus_w>; - -defm PMULLW : PDI_binop_rm<0xD5, "pmullw", mul, v8i16, 1>; - +defm PADDSB : PDI_binop_rm_int<0xEC, "paddsb" , int_x86_sse2_padds_b, 1>; +defm PADDSW : PDI_binop_rm_int<0xED, "paddsw" , int_x86_sse2_padds_w, 1>; +defm PADDUSB : PDI_binop_rm_int<0xDC, "paddusb", int_x86_sse2_paddus_b, 1>; +defm PADDUSW : PDI_binop_rm_int<0xDD, "paddusw", int_x86_sse2_paddus_w, 1>; defm PMULHUW : PDI_binop_rm_int<0xE4, "pmulhuw", int_x86_sse2_pmulhu_w, 1>; -defm PMULHW : PDI_binop_rm_int<0xE5, "pmulhw" , int_x86_sse2_pmulh_w , 1>; +defm PMULHW : PDI_binop_rm_int<0xE5, "pmulhw" , int_x86_sse2_pmulh_w, 1>; defm PMULUDQ : PDI_binop_rm_int<0xF4, "pmuludq", int_x86_sse2_pmulu_dq, 1>; - defm PMADDWD : PDI_binop_rm_int<0xF5, "pmaddwd", int_x86_sse2_pmadd_wd, 1>; +defm PAVGB : PDI_binop_rm_int<0xE0, "pavgb", int_x86_sse2_pavg_b, 1>; +defm PAVGW : PDI_binop_rm_int<0xE3, "pavgw", int_x86_sse2_pavg_w, 1>; +defm PMINUB : PDI_binop_rm_int<0xDA, "pminub", int_x86_sse2_pminu_b, 1>; +defm PMINSW : PDI_binop_rm_int<0xEA, "pminsw", int_x86_sse2_pmins_w, 1>; +defm PMAXUB : PDI_binop_rm_int<0xDE, "pmaxub", int_x86_sse2_pmaxu_b, 1>; +defm PMAXSW : PDI_binop_rm_int<0xEE, "pmaxsw", int_x86_sse2_pmaxs_w, 1>; +defm PSADBW : PDI_binop_rm_int<0xF6, "psadbw", int_x86_sse2_psad_bw, 1>; + +} // Constraints = "$src1 = $dst" -defm PAVGB : PDI_binop_rm_int<0xE0, "pavgb", int_x86_sse2_pavg_b, 1>; -defm PAVGW : PDI_binop_rm_int<0xE3, "pavgw", int_x86_sse2_pavg_w, 1>; +//===---------------------------------------------------------------------===// +// SSE2 - Packed Integer Logical Instructions +//===---------------------------------------------------------------------===// +let isAsmParserOnly = 1, Predicates = [HasAVX] in { +defm VPSLLW : PDI_binop_rmi_int<0xF1, 0x71, MRM6r, "vpsllw", + int_x86_sse2_psll_w, int_x86_sse2_pslli_w, 0>, + VEX_4V; +defm VPSLLD : PDI_binop_rmi_int<0xF2, 0x72, MRM6r, "vpslld", + int_x86_sse2_psll_d, int_x86_sse2_pslli_d, 0>, + VEX_4V; +defm VPSLLQ : PDI_binop_rmi_int<0xF3, 0x73, MRM6r, "vpsllq", + int_x86_sse2_psll_q, int_x86_sse2_pslli_q, 0>, + VEX_4V; + +defm VPSRLW : PDI_binop_rmi_int<0xD1, 0x71, MRM2r, "vpsrlw", + int_x86_sse2_psrl_w, int_x86_sse2_psrli_w, 0>, + VEX_4V; +defm VPSRLD : PDI_binop_rmi_int<0xD2, 0x72, MRM2r, "vpsrld", + int_x86_sse2_psrl_d, int_x86_sse2_psrli_d, 0>, + VEX_4V; +defm VPSRLQ : PDI_binop_rmi_int<0xD3, 0x73, MRM2r, "vpsrlq", + int_x86_sse2_psrl_q, int_x86_sse2_psrli_q, 0>, + VEX_4V; + +defm VPSRAW : PDI_binop_rmi_int<0xE1, 0x71, MRM4r, "vpsraw", + int_x86_sse2_psra_w, int_x86_sse2_psrai_w, 0>, + VEX_4V; +defm VPSRAD : PDI_binop_rmi_int<0xE2, 0x72, MRM4r, "vpsrad", + int_x86_sse2_psra_d, int_x86_sse2_psrai_d, 0>, + VEX_4V; + +defm VPAND : PDI_binop_rm_v2i64<0xDB, "vpand", and, 1, 0>, VEX_4V; +defm VPOR : PDI_binop_rm_v2i64<0xEB, "vpor" , or, 1, 0>, VEX_4V; +defm VPXOR : PDI_binop_rm_v2i64<0xEF, "vpxor", xor, 1, 0>, VEX_4V; -defm PMINUB : PDI_binop_rm_int<0xDA, "pminub", int_x86_sse2_pminu_b, 1>; -defm PMINSW : PDI_binop_rm_int<0xEA, "pminsw", int_x86_sse2_pmins_w, 1>; -defm PMAXUB : PDI_binop_rm_int<0xDE, "pmaxub", int_x86_sse2_pmaxu_b, 1>; -defm PMAXSW : PDI_binop_rm_int<0xEE, "pmaxsw", int_x86_sse2_pmaxs_w, 1>; -defm PSADBW : PDI_binop_rm_int<0xF6, "psadbw", int_x86_sse2_psad_bw, 1>; +let ExeDomain = SSEPackedInt in { + let neverHasSideEffects = 1 in { + // 128-bit logical shifts. + def VPSLLDQri : PDIi8<0x73, MRM7r, + (outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2), + "vpslldq\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, + VEX_4V; + def VPSRLDQri : PDIi8<0x73, MRM3r, + (outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2), + "vpsrldq\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, + VEX_4V; + // PSRADQri doesn't exist in SSE[1-3]. + } + def VPANDNrr : PDI<0xDF, MRMSrcReg, + (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), + "vpandn\t{$src2, $src1, $dst|$dst, $src1, $src2}", + [(set VR128:$dst, (v2i64 (and (vnot VR128:$src1), + VR128:$src2)))]>, VEX_4V; + def VPANDNrm : PDI<0xDF, MRMSrcMem, + (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2), + "vpandn\t{$src2, $src1, $dst|$dst, $src1, $src2}", + [(set VR128:$dst, (v2i64 (and (vnot VR128:$src1), + (memopv2i64 addr:$src2))))]>, + VEX_4V; +} +} +let Constraints = "$src1 = $dst" in { defm PSLLW : PDI_binop_rmi_int<0xF1, 0x71, MRM6r, "psllw", int_x86_sse2_psll_w, int_x86_sse2_pslli_w>; defm PSLLD : PDI_binop_rmi_int<0xF2, 0x72, MRM6r, "pslld", @@ -2154,17 +2286,34 @@ defm PSRAW : PDI_binop_rmi_int<0xE1, 0x71, MRM4r, "psraw", defm PSRAD : PDI_binop_rmi_int<0xE2, 0x72, MRM4r, "psrad", int_x86_sse2_psra_d, int_x86_sse2_psrai_d>; -// 128-bit logical shifts. -let Constraints = "$src1 = $dst", neverHasSideEffects = 1, - ExeDomain = SSEPackedInt in { - def PSLLDQri : PDIi8<0x73, MRM7r, - (outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2), - "pslldq\t{$src2, $dst|$dst, $src2}", []>; - def PSRLDQri : PDIi8<0x73, MRM3r, - (outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2), - "psrldq\t{$src2, $dst|$dst, $src2}", []>; - // PSRADQri doesn't exist in SSE[1-3]. +defm PAND : PDI_binop_rm_v2i64<0xDB, "pand", and, 1>; +defm POR : PDI_binop_rm_v2i64<0xEB, "por" , or, 1>; +defm PXOR : PDI_binop_rm_v2i64<0xEF, "pxor", xor, 1>; + +let ExeDomain = SSEPackedInt in { + let neverHasSideEffects = 1 in { + // 128-bit logical shifts. + def PSLLDQri : PDIi8<0x73, MRM7r, + (outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2), + "pslldq\t{$src2, $dst|$dst, $src2}", []>; + def PSRLDQri : PDIi8<0x73, MRM3r, + (outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2), + "psrldq\t{$src2, $dst|$dst, $src2}", []>; + // PSRADQri doesn't exist in SSE[1-3]. + } + def PANDNrr : PDI<0xDF, MRMSrcReg, + (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), + "pandn\t{$src2, $dst|$dst, $src2}", + [(set VR128:$dst, (v2i64 (and (vnot VR128:$src1), + VR128:$src2)))]>; + + def PANDNrm : PDI<0xDF, MRMSrcMem, + (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2), + "pandn\t{$src2, $dst|$dst, $src2}", + [(set VR128:$dst, (v2i64 (and (vnot VR128:$src1), + (memopv2i64 addr:$src2))))]>; } +} // Constraints = "$src1 = $dst" let Predicates = [HasSSE2] in { def : Pat<(int_x86_sse2_psll_dq VR128:$src1, imm:$src2), @@ -2185,32 +2334,33 @@ let Predicates = [HasSSE2] in { (v2i64 (PSRLDQri VR128:$src, (BYTE_imm imm:$amt)))>; } -// Logical -defm PAND : PDI_binop_rm_v2i64<0xDB, "pand", and, 1>; -defm POR : PDI_binop_rm_v2i64<0xEB, "por" , or , 1>; -defm PXOR : PDI_binop_rm_v2i64<0xEF, "pxor", xor, 1>; - -let Constraints = "$src1 = $dst", ExeDomain = SSEPackedInt in { - def PANDNrr : PDI<0xDF, MRMSrcReg, - (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), - "pandn\t{$src2, $dst|$dst, $src2}", - [(set VR128:$dst, (v2i64 (and (vnot VR128:$src1), - VR128:$src2)))]>; +//===---------------------------------------------------------------------===// +// SSE2 - Packed Integer Comparison Instructions +//===---------------------------------------------------------------------===// - def PANDNrm : PDI<0xDF, MRMSrcMem, - (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2), - "pandn\t{$src2, $dst|$dst, $src2}", - [(set VR128:$dst, (v2i64 (and (vnot VR128:$src1), - (memopv2i64 addr:$src2))))]>; +let isAsmParserOnly = 1, Predicates = [HasAVX] in { + defm VPCMPEQB : PDI_binop_rm_int<0x74, "vpcmpeqb", int_x86_sse2_pcmpeq_b, 1, + 0>, VEX_4V; + defm VPCMPEQW : PDI_binop_rm_int<0x75, "vpcmpeqw", int_x86_sse2_pcmpeq_w, 1, + 0>, VEX_4V; + defm VPCMPEQD : PDI_binop_rm_int<0x76, "vpcmpeqd", int_x86_sse2_pcmpeq_d, 1, + 0>, VEX_4V; + defm VPCMPGTB : PDI_binop_rm_int<0x64, "vpcmpgtb", int_x86_sse2_pcmpgt_b, 0, + 0>, VEX_4V; + defm VPCMPGTW : PDI_binop_rm_int<0x65, "vpcmpgtw", int_x86_sse2_pcmpgt_w, 0, + 0>, VEX_4V; + defm VPCMPGTD : PDI_binop_rm_int<0x66, "vpcmpgtd", int_x86_sse2_pcmpgt_d, 0, + 0>, VEX_4V; } -// SSE2 Integer comparison -defm PCMPEQB : PDI_binop_rm_int<0x74, "pcmpeqb", int_x86_sse2_pcmpeq_b>; -defm PCMPEQW : PDI_binop_rm_int<0x75, "pcmpeqw", int_x86_sse2_pcmpeq_w>; -defm PCMPEQD : PDI_binop_rm_int<0x76, "pcmpeqd", int_x86_sse2_pcmpeq_d>; -defm PCMPGTB : PDI_binop_rm_int<0x64, "pcmpgtb", int_x86_sse2_pcmpgt_b>; -defm PCMPGTW : PDI_binop_rm_int<0x65, "pcmpgtw", int_x86_sse2_pcmpgt_w>; -defm PCMPGTD : PDI_binop_rm_int<0x66, "pcmpgtd", int_x86_sse2_pcmpgt_d>; +let Constraints = "$src1 = $dst" in { + defm PCMPEQB : PDI_binop_rm_int<0x74, "pcmpeqb", int_x86_sse2_pcmpeq_b, 1>; + defm PCMPEQW : PDI_binop_rm_int<0x75, "pcmpeqw", int_x86_sse2_pcmpeq_w, 1>; + defm PCMPEQD : PDI_binop_rm_int<0x76, "pcmpeqd", int_x86_sse2_pcmpeq_d, 1>; + defm PCMPGTB : PDI_binop_rm_int<0x64, "pcmpgtb", int_x86_sse2_pcmpgt_b>; + defm PCMPGTW : PDI_binop_rm_int<0x65, "pcmpgtw", int_x86_sse2_pcmpgt_w>; + defm PCMPGTD : PDI_binop_rm_int<0x66, "pcmpgtd", int_x86_sse2_pcmpgt_d>; +} // Constraints = "$src1 = $dst" def : Pat<(v16i8 (X86pcmpeqb VR128:$src1, VR128:$src2)), (PCMPEQBrr VR128:$src1, VR128:$src2)>; @@ -2238,94 +2388,147 @@ def : Pat<(v4i32 (X86pcmpgtd VR128:$src1, VR128:$src2)), def : Pat<(v4i32 (X86pcmpgtd VR128:$src1, (memop addr:$src2))), (PCMPGTDrm VR128:$src1, addr:$src2)>; +//===---------------------------------------------------------------------===// +// SSE2 - Packed Integer Pack Instructions +//===---------------------------------------------------------------------===// -// Pack instructions +let isAsmParserOnly = 1, Predicates = [HasAVX] in { +defm VPACKSSWB : PDI_binop_rm_int<0x63, "vpacksswb", int_x86_sse2_packsswb_128, + 0, 0>, VEX_4V; +defm VPACKSSDW : PDI_binop_rm_int<0x6B, "vpackssdw", int_x86_sse2_packssdw_128, + 0, 0>, VEX_4V; +defm VPACKUSWB : PDI_binop_rm_int<0x67, "vpackuswb", int_x86_sse2_packuswb_128, + 0, 0>, VEX_4V; +} + +let Constraints = "$src1 = $dst" in { defm PACKSSWB : PDI_binop_rm_int<0x63, "packsswb", int_x86_sse2_packsswb_128>; defm PACKSSDW : PDI_binop_rm_int<0x6B, "packssdw", int_x86_sse2_packssdw_128>; defm PACKUSWB : PDI_binop_rm_int<0x67, "packuswb", int_x86_sse2_packuswb_128>; +} // Constraints = "$src1 = $dst" + +//===---------------------------------------------------------------------===// +// SSE2 - Packed Integer Shuffle Instructions +//===---------------------------------------------------------------------===// let ExeDomain = SSEPackedInt in { +multiclass sse2_pshuffle<string OpcodeStr, ValueType vt, PatFrag pshuf_frag, + PatFrag bc_frag> { +def ri : Ii8<0x70, MRMSrcReg, + (outs VR128:$dst), (ins VR128:$src1, i8imm:$src2), + !strconcat(OpcodeStr, + "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), + [(set VR128:$dst, (vt (pshuf_frag:$src2 VR128:$src1, + (undef))))]>; +def mi : Ii8<0x70, MRMSrcMem, + (outs VR128:$dst), (ins i128mem:$src1, i8imm:$src2), + !strconcat(OpcodeStr, + "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), + [(set VR128:$dst, (vt (pshuf_frag:$src2 + (bc_frag (memopv2i64 addr:$src1)), + (undef))))]>; +} +} // ExeDomain = SSEPackedInt -// Shuffle and unpack instructions -let AddedComplexity = 5 in { -def PSHUFDri : PDIi8<0x70, MRMSrcReg, - (outs VR128:$dst), (ins VR128:$src1, i8imm:$src2), - "pshufd\t{$src2, $src1, $dst|$dst, $src1, $src2}", - [(set VR128:$dst, (v4i32 (pshufd:$src2 - VR128:$src1, (undef))))]>; -def PSHUFDmi : PDIi8<0x70, MRMSrcMem, - (outs VR128:$dst), (ins i128mem:$src1, i8imm:$src2), - "pshufd\t{$src2, $src1, $dst|$dst, $src1, $src2}", - [(set VR128:$dst, (v4i32 (pshufd:$src2 - (bc_v4i32 (memopv2i64 addr:$src1)), - (undef))))]>; -} - -// SSE2 with ImmT == Imm8 and XS prefix. -def PSHUFHWri : Ii8<0x70, MRMSrcReg, - (outs VR128:$dst), (ins VR128:$src1, i8imm:$src2), - "pshufhw\t{$src2, $src1, $dst|$dst, $src1, $src2}", - [(set VR128:$dst, (v8i16 (pshufhw:$src2 VR128:$src1, - (undef))))]>, - XS, Requires<[HasSSE2]>; -def PSHUFHWmi : Ii8<0x70, MRMSrcMem, - (outs VR128:$dst), (ins i128mem:$src1, i8imm:$src2), - "pshufhw\t{$src2, $src1, $dst|$dst, $src1, $src2}", - [(set VR128:$dst, (v8i16 (pshufhw:$src2 - (bc_v8i16 (memopv2i64 addr:$src1)), - (undef))))]>, - XS, Requires<[HasSSE2]>; - -// SSE2 with ImmT == Imm8 and XD prefix. -def PSHUFLWri : Ii8<0x70, MRMSrcReg, - (outs VR128:$dst), (ins VR128:$src1, i8imm:$src2), - "pshuflw\t{$src2, $src1, $dst|$dst, $src1, $src2}", - [(set VR128:$dst, (v8i16 (pshuflw:$src2 VR128:$src1, - (undef))))]>, - XD, Requires<[HasSSE2]>; -def PSHUFLWmi : Ii8<0x70, MRMSrcMem, - (outs VR128:$dst), (ins i128mem:$src1, i8imm:$src2), - "pshuflw\t{$src2, $src1, $dst|$dst, $src1, $src2}", - [(set VR128:$dst, (v8i16 (pshuflw:$src2 - (bc_v8i16 (memopv2i64 addr:$src1)), - (undef))))]>, - XD, Requires<[HasSSE2]>; +let isAsmParserOnly = 1, Predicates = [HasAVX] in { + let AddedComplexity = 5 in + defm VPSHUFD : sse2_pshuffle<"vpshufd", v4i32, pshufd, bc_v4i32>, OpSize, + VEX; + // SSE2 with ImmT == Imm8 and XS prefix. + defm VPSHUFHW : sse2_pshuffle<"vpshufhw", v8i16, pshufhw, bc_v8i16>, XS, + VEX; -let Constraints = "$src1 = $dst" in { - def PUNPCKLBWrr : PDI<0x60, MRMSrcReg, - (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), - "punpcklbw\t{$src2, $dst|$dst, $src2}", - [(set VR128:$dst, - (v16i8 (unpckl VR128:$src1, VR128:$src2)))]>; - def PUNPCKLBWrm : PDI<0x60, MRMSrcMem, - (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2), - "punpcklbw\t{$src2, $dst|$dst, $src2}", - [(set VR128:$dst, - (unpckl VR128:$src1, - (bc_v16i8 (memopv2i64 addr:$src2))))]>; - def PUNPCKLWDrr : PDI<0x61, MRMSrcReg, - (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), - "punpcklwd\t{$src2, $dst|$dst, $src2}", + // SSE2 with ImmT == Imm8 and XD prefix. + defm VPSHUFLW : sse2_pshuffle<"vpshuflw", v8i16, pshuflw, bc_v8i16>, XD, + VEX; +} + +let Predicates = [HasSSE2] in { + let AddedComplexity = 5 in + defm PSHUFD : sse2_pshuffle<"pshufd", v4i32, pshufd, bc_v4i32>, TB, OpSize; + + // SSE2 with ImmT == Imm8 and XS prefix. + defm PSHUFHW : sse2_pshuffle<"pshufhw", v8i16, pshufhw, bc_v8i16>, XS; + + // SSE2 with ImmT == Imm8 and XD prefix. + defm PSHUFLW : sse2_pshuffle<"pshuflw", v8i16, pshuflw, bc_v8i16>, XD; +} + +//===---------------------------------------------------------------------===// +// SSE2 - Packed Integer Unpack Instructions +//===---------------------------------------------------------------------===// + +let ExeDomain = SSEPackedInt in { +multiclass sse2_unpack<bits<8> opc, string OpcodeStr, ValueType vt, + PatFrag unp_frag, PatFrag bc_frag, bit Is2Addr = 1> { + def rr : PDI<opc, MRMSrcReg, + (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), + !if(Is2Addr, + !strconcat(OpcodeStr,"\t{$src2, $dst|$dst, $src2}"), + !strconcat(OpcodeStr,"\t{$src2, $src1, $dst|$dst, $src1, $src2}")), + [(set VR128:$dst, (vt (unp_frag VR128:$src1, VR128:$src2)))]>; + def rm : PDI<opc, MRMSrcMem, + (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2), + !if(Is2Addr, + !strconcat(OpcodeStr,"\t{$src2, $dst|$dst, $src2}"), + !strconcat(OpcodeStr,"\t{$src2, $src1, $dst|$dst, $src1, $src2}")), + [(set VR128:$dst, (unp_frag VR128:$src1, + (bc_frag (memopv2i64 + addr:$src2))))]>; +} + +let isAsmParserOnly = 1, Predicates = [HasAVX] in { + defm VPUNPCKLBW : sse2_unpack<0x60, "vpunpcklbw", v16i8, unpckl, bc_v16i8, + 0>, VEX_4V; + defm VPUNPCKLWD : sse2_unpack<0x61, "vpunpcklwd", v8i16, unpckl, bc_v8i16, + 0>, VEX_4V; + defm VPUNPCKLDQ : sse2_unpack<0x62, "vpunpckldq", v4i32, unpckl, bc_v4i32, + 0>, VEX_4V; + + /// FIXME: we could eliminate this and use sse2_unpack instead if tblgen + /// knew to collapse (bitconvert VT to VT) into its operand. + def VPUNPCKLQDQrr : PDI<0x6C, MRMSrcReg, + (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), + "vpunpcklqdq\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set VR128:$dst, - (v8i16 (unpckl VR128:$src1, VR128:$src2)))]>; - def PUNPCKLWDrm : PDI<0x61, MRMSrcMem, - (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2), - "punpcklwd\t{$src2, $dst|$dst, $src2}", + (v2i64 (unpckl VR128:$src1, VR128:$src2)))]>, VEX_4V; + def VPUNPCKLQDQrm : PDI<0x6C, MRMSrcMem, + (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2), + "vpunpcklqdq\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set VR128:$dst, - (unpckl VR128:$src1, - (bc_v8i16 (memopv2i64 addr:$src2))))]>; - def PUNPCKLDQrr : PDI<0x62, MRMSrcReg, - (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), - "punpckldq\t{$src2, $dst|$dst, $src2}", + (v2i64 (unpckl VR128:$src1, + (memopv2i64 addr:$src2))))]>, VEX_4V; + + defm VPUNPCKHBW : sse2_unpack<0x68, "vpunpckhbw", v16i8, unpckh, bc_v16i8, + 0>, VEX_4V; + defm VPUNPCKHWD : sse2_unpack<0x69, "vpunpckhwd", v8i16, unpckh, bc_v8i16, + 0>, VEX_4V; + defm VPUNPCKHDQ : sse2_unpack<0x6A, "vpunpckhdq", v4i32, unpckh, bc_v4i32, + 0>, VEX_4V; + + /// FIXME: we could eliminate this and use sse2_unpack instead if tblgen + /// knew to collapse (bitconvert VT to VT) into its operand. + def VPUNPCKHQDQrr : PDI<0x6D, MRMSrcReg, + (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), + "vpunpckhqdq\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set VR128:$dst, - (v4i32 (unpckl VR128:$src1, VR128:$src2)))]>; - def PUNPCKLDQrm : PDI<0x62, MRMSrcMem, + (v2i64 (unpckh VR128:$src1, VR128:$src2)))]>, VEX_4V; + def VPUNPCKHQDQrm : PDI<0x6D, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2), - "punpckldq\t{$src2, $dst|$dst, $src2}", + "vpunpckhqdq\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set VR128:$dst, - (unpckl VR128:$src1, - (bc_v4i32 (memopv2i64 addr:$src2))))]>; + (v2i64 (unpckh VR128:$src1, + (memopv2i64 addr:$src2))))]>, VEX_4V; +} + +let Constraints = "$src1 = $dst" in { + defm PUNPCKLBW : sse2_unpack<0x60, "punpcklbw", v16i8, unpckl, bc_v16i8>; + defm PUNPCKLWD : sse2_unpack<0x61, "punpcklwd", v8i16, unpckl, bc_v8i16>; + defm PUNPCKLDQ : sse2_unpack<0x62, "punpckldq", v4i32, unpckl, bc_v4i32>; + + /// FIXME: we could eliminate this and use sse2_unpack instead if tblgen + /// knew to collapse (bitconvert VT to VT) into its operand. def PUNPCKLQDQrr : PDI<0x6C, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), "punpcklqdq\t{$src2, $dst|$dst, $src2}", @@ -2338,39 +2541,12 @@ let Constraints = "$src1 = $dst" in { (v2i64 (unpckl VR128:$src1, (memopv2i64 addr:$src2))))]>; - def PUNPCKHBWrr : PDI<0x68, MRMSrcReg, - (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), - "punpckhbw\t{$src2, $dst|$dst, $src2}", - [(set VR128:$dst, - (v16i8 (unpckh VR128:$src1, VR128:$src2)))]>; - def PUNPCKHBWrm : PDI<0x68, MRMSrcMem, - (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2), - "punpckhbw\t{$src2, $dst|$dst, $src2}", - [(set VR128:$dst, - (unpckh VR128:$src1, - (bc_v16i8 (memopv2i64 addr:$src2))))]>; - def PUNPCKHWDrr : PDI<0x69, MRMSrcReg, - (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), - "punpckhwd\t{$src2, $dst|$dst, $src2}", - [(set VR128:$dst, - (v8i16 (unpckh VR128:$src1, VR128:$src2)))]>; - def PUNPCKHWDrm : PDI<0x69, MRMSrcMem, - (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2), - "punpckhwd\t{$src2, $dst|$dst, $src2}", - [(set VR128:$dst, - (unpckh VR128:$src1, - (bc_v8i16 (memopv2i64 addr:$src2))))]>; - def PUNPCKHDQrr : PDI<0x6A, MRMSrcReg, - (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), - "punpckhdq\t{$src2, $dst|$dst, $src2}", - [(set VR128:$dst, - (v4i32 (unpckh VR128:$src1, VR128:$src2)))]>; - def PUNPCKHDQrm : PDI<0x6A, MRMSrcMem, - (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2), - "punpckhdq\t{$src2, $dst|$dst, $src2}", - [(set VR128:$dst, - (unpckh VR128:$src1, - (bc_v4i32 (memopv2i64 addr:$src2))))]>; + defm PUNPCKHBW : sse2_unpack<0x68, "punpckhbw", v16i8, unpckh, bc_v16i8>; + defm PUNPCKHWD : sse2_unpack<0x69, "punpckhwd", v8i16, unpckh, bc_v8i16>; + defm PUNPCKHDQ : sse2_unpack<0x6A, "punpckhdq", v4i32, unpckh, bc_v4i32>; + + /// FIXME: we could eliminate this and use sse2_unpack instead if tblgen + /// knew to collapse (bitconvert VT to VT) into its operand. def PUNPCKHQDQrr : PDI<0x6D, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), "punpckhqdq\t{$src2, $dst|$dst, $src2}", @@ -2384,102 +2560,117 @@ let Constraints = "$src1 = $dst" in { (memopv2i64 addr:$src2))))]>; } -// Extract / Insert +} // ExeDomain = SSEPackedInt + +//===---------------------------------------------------------------------===// +// SSE2 - Packed Integer Extract and Insert +//===---------------------------------------------------------------------===// + +let ExeDomain = SSEPackedInt in { +multiclass sse2_pinsrw<bit Is2Addr = 1> { + def rri : Ii8<0xC4, MRMSrcReg, + (outs VR128:$dst), (ins VR128:$src1, + GR32:$src2, i32i8imm:$src3), + !if(Is2Addr, + "pinsrw\t{$src3, $src2, $dst|$dst, $src2, $src3}", + "vpinsrw\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), + [(set VR128:$dst, + (X86pinsrw VR128:$src1, GR32:$src2, imm:$src3))]>; + def rmi : Ii8<0xC4, MRMSrcMem, + (outs VR128:$dst), (ins VR128:$src1, + i16mem:$src2, i32i8imm:$src3), + !if(Is2Addr, + "pinsrw\t{$src3, $src2, $dst|$dst, $src2, $src3}", + "vpinsrw\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), + [(set VR128:$dst, + (X86pinsrw VR128:$src1, (extloadi16 addr:$src2), + imm:$src3))]>; +} + +// Extract +let isAsmParserOnly = 1, Predicates = [HasAVX] in +def VPEXTRWri : Ii8<0xC5, MRMSrcReg, + (outs GR32:$dst), (ins VR128:$src1, i32i8imm:$src2), + "vpextrw\t{$src2, $src1, $dst|$dst, $src1, $src2}", + [(set GR32:$dst, (X86pextrw (v8i16 VR128:$src1), + imm:$src2))]>, OpSize, VEX; def PEXTRWri : PDIi8<0xC5, MRMSrcReg, (outs GR32:$dst), (ins VR128:$src1, i32i8imm:$src2), "pextrw\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set GR32:$dst, (X86pextrw (v8i16 VR128:$src1), imm:$src2))]>; -let Constraints = "$src1 = $dst" in { - def PINSRWrri : PDIi8<0xC4, MRMSrcReg, - (outs VR128:$dst), (ins VR128:$src1, - GR32:$src2, i32i8imm:$src3), - "pinsrw\t{$src3, $src2, $dst|$dst, $src2, $src3}", - [(set VR128:$dst, - (X86pinsrw VR128:$src1, GR32:$src2, imm:$src3))]>; - def PINSRWrmi : PDIi8<0xC4, MRMSrcMem, - (outs VR128:$dst), (ins VR128:$src1, - i16mem:$src2, i32i8imm:$src3), - "pinsrw\t{$src3, $src2, $dst|$dst, $src2, $src3}", - [(set VR128:$dst, - (X86pinsrw VR128:$src1, (extloadi16 addr:$src2), - imm:$src3))]>; -} -// Mask creation -def PMOVMSKBrr : PDI<0xD7, MRMSrcReg, (outs GR32:$dst), (ins VR128:$src), - "pmovmskb\t{$src, $dst|$dst, $src}", - [(set GR32:$dst, (int_x86_sse2_pmovmskb_128 VR128:$src))]>; +// Insert +let isAsmParserOnly = 1, Predicates = [HasAVX] in + defm PINSRW : sse2_pinsrw<0>, OpSize, VEX_4V; -// Conditional store -let Uses = [EDI] in -def MASKMOVDQU : PDI<0xF7, MRMSrcReg, (outs), (ins VR128:$src, VR128:$mask), - "maskmovdqu\t{$mask, $src|$src, $mask}", - [(int_x86_sse2_maskmov_dqu VR128:$src, VR128:$mask, EDI)]>; - -let Uses = [RDI] in -def MASKMOVDQU64 : PDI<0xF7, MRMSrcReg, (outs), (ins VR128:$src, VR128:$mask), - "maskmovdqu\t{$mask, $src|$src, $mask}", - [(int_x86_sse2_maskmov_dqu VR128:$src, VR128:$mask, RDI)]>; +let Constraints = "$src1 = $dst" in + defm VPINSRW : sse2_pinsrw, TB, OpSize; } // ExeDomain = SSEPackedInt -// Non-temporal stores -def MOVNTPDmr_Int : PDI<0x2B, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src), - "movntpd\t{$src, $dst|$dst, $src}", - [(int_x86_sse2_movnt_pd addr:$dst, VR128:$src)]>; -let ExeDomain = SSEPackedInt in -def MOVNTDQmr_Int : PDI<0xE7, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), - "movntdq\t{$src, $dst|$dst, $src}", - [(int_x86_sse2_movnt_dq addr:$dst, VR128:$src)]>; -def MOVNTImr_Int : I<0xC3, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src), - "movnti\t{$src, $dst|$dst, $src}", - [(int_x86_sse2_movnt_i addr:$dst, GR32:$src)]>, - TB, Requires<[HasSSE2]>; +//===---------------------------------------------------------------------===// +// SSE2 - Packed Mask Creation +//===---------------------------------------------------------------------===// -let AddedComplexity = 400 in { // Prefer non-temporal versions -def MOVNTPDmr : PDI<0x2B, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), - "movntpd\t{$src, $dst|$dst, $src}", - [(alignednontemporalstore(v2f64 VR128:$src), addr:$dst)]>; +let ExeDomain = SSEPackedInt in { -let ExeDomain = SSEPackedInt in -def MOVNTDQmr : PDI<0xE7, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), - "movntdq\t{$src, $dst|$dst, $src}", - [(alignednontemporalstore (v4f32 VR128:$src), addr:$dst)]>; -} +let isAsmParserOnly = 1 in +def VPMOVMSKBrr : VPDI<0xD7, MRMSrcReg, (outs GR32:$dst), (ins VR128:$src), + "pmovmskb\t{$src, $dst|$dst, $src}", + [(set GR32:$dst, (int_x86_sse2_pmovmskb_128 VR128:$src))]>, VEX; +def PMOVMSKBrr : PDI<0xD7, MRMSrcReg, (outs GR32:$dst), (ins VR128:$src), + "pmovmskb\t{$src, $dst|$dst, $src}", + [(set GR32:$dst, (int_x86_sse2_pmovmskb_128 VR128:$src))]>; -// Flush cache -def CLFLUSH : I<0xAE, MRM7m, (outs), (ins i8mem:$src), - "clflush\t$src", [(int_x86_sse2_clflush addr:$src)]>, - TB, Requires<[HasSSE2]>; +} // ExeDomain = SSEPackedInt -// Load, store, and memory fence -def LFENCE : I<0xAE, MRM_E8, (outs), (ins), - "lfence", [(int_x86_sse2_lfence)]>, TB, Requires<[HasSSE2]>; -def MFENCE : I<0xAE, MRM_F0, (outs), (ins), - "mfence", [(int_x86_sse2_mfence)]>, TB, Requires<[HasSSE2]>; +//===---------------------------------------------------------------------===// +// SSE2 - Conditional Store +//===---------------------------------------------------------------------===// -// Pause. This "instruction" is encoded as "rep; nop", so even though it -// was introduced with SSE2, it's backward compatible. -def PAUSE : I<0x90, RawFrm, (outs), (ins), "pause", []>, REP; +let ExeDomain = SSEPackedInt in { -//TODO: custom lower this so as to never even generate the noop -def : Pat<(membarrier (i8 imm), (i8 imm), (i8 imm), (i8 imm), - (i8 0)), (NOOP)>; -def : Pat<(membarrier (i8 0), (i8 0), (i8 0), (i8 1), (i8 1)), (SFENCE)>; -def : Pat<(membarrier (i8 1), (i8 0), (i8 0), (i8 0), (i8 1)), (LFENCE)>; -def : Pat<(membarrier (i8 imm), (i8 imm), (i8 imm), (i8 imm), - (i8 1)), (MFENCE)>; +let isAsmParserOnly = 1 in { +let Uses = [EDI] in +def VMASKMOVDQU : VPDI<0xF7, MRMSrcReg, (outs), + (ins VR128:$src, VR128:$mask), + "maskmovdqu\t{$mask, $src|$src, $mask}", + [(int_x86_sse2_maskmov_dqu VR128:$src, VR128:$mask, EDI)]>, VEX; +let Uses = [RDI] in +def VMASKMOVDQU64 : VPDI<0xF7, MRMSrcReg, (outs), + (ins VR128:$src, VR128:$mask), + "maskmovdqu\t{$mask, $src|$src, $mask}", + [(int_x86_sse2_maskmov_dqu VR128:$src, VR128:$mask, RDI)]>, VEX; +} -// Alias instructions that map zero vector to pxor / xorp* for sse. -// We set canFoldAsLoad because this can be converted to a constant-pool -// load of an all-ones value if folding it would be beneficial. -let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, - isCodeGenOnly = 1, ExeDomain = SSEPackedInt in - // FIXME: Change encoding to pseudo. - def V_SETALLONES : PDI<0x76, MRMInitReg, (outs VR128:$dst), (ins), "", - [(set VR128:$dst, (v4i32 immAllOnesV))]>; +let Uses = [EDI] in +def MASKMOVDQU : PDI<0xF7, MRMSrcReg, (outs), (ins VR128:$src, VR128:$mask), + "maskmovdqu\t{$mask, $src|$src, $mask}", + [(int_x86_sse2_maskmov_dqu VR128:$src, VR128:$mask, EDI)]>; +let Uses = [RDI] in +def MASKMOVDQU64 : PDI<0xF7, MRMSrcReg, (outs), (ins VR128:$src, VR128:$mask), + "maskmovdqu\t{$mask, $src|$src, $mask}", + [(int_x86_sse2_maskmov_dqu VR128:$src, VR128:$mask, RDI)]>; + +} // ExeDomain = SSEPackedInt + +//===---------------------------------------------------------------------===// +// SSE2 - Move Doubleword +//===---------------------------------------------------------------------===// +// Move Int Doubleword to Packed Double Int +let isAsmParserOnly = 1 in { +def VMOVDI2PDIrr : VPDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR32:$src), + "movd\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, + (v4i32 (scalar_to_vector GR32:$src)))]>, VEX; +def VMOVDI2PDIrm : VPDI<0x6E, MRMSrcMem, (outs VR128:$dst), (ins i32mem:$src), + "movd\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, + (v4i32 (scalar_to_vector (loadi32 addr:$src))))]>, + VEX; +} def MOVDI2PDIrr : PDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR32:$src), "movd\t{$src, $dst|$dst, $src}", [(set VR128:$dst, @@ -2489,6 +2680,18 @@ def MOVDI2PDIrm : PDI<0x6E, MRMSrcMem, (outs VR128:$dst), (ins i32mem:$src), [(set VR128:$dst, (v4i32 (scalar_to_vector (loadi32 addr:$src))))]>; + +// Move Int Doubleword to Single Scalar +let isAsmParserOnly = 1 in { +def VMOVDI2SSrr : VPDI<0x6E, MRMSrcReg, (outs FR32:$dst), (ins GR32:$src), + "movd\t{$src, $dst|$dst, $src}", + [(set FR32:$dst, (bitconvert GR32:$src))]>, VEX; + +def VMOVDI2SSrm : VPDI<0x6E, MRMSrcMem, (outs FR32:$dst), (ins i32mem:$src), + "movd\t{$src, $dst|$dst, $src}", + [(set FR32:$dst, (bitconvert (loadi32 addr:$src)))]>, + VEX; +} def MOVDI2SSrr : PDI<0x6E, MRMSrcReg, (outs FR32:$dst), (ins GR32:$src), "movd\t{$src, $dst|$dst, $src}", [(set FR32:$dst, (bitconvert GR32:$src))]>; @@ -2497,20 +2700,18 @@ def MOVDI2SSrm : PDI<0x6E, MRMSrcMem, (outs FR32:$dst), (ins i32mem:$src), "movd\t{$src, $dst|$dst, $src}", [(set FR32:$dst, (bitconvert (loadi32 addr:$src)))]>; -// SSE2 instructions with XS prefix -def MOVQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src), - "movq\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, - (v2i64 (scalar_to_vector (loadi64 addr:$src))))]>, XS, - Requires<[HasSSE2]>; -def MOVPQI2QImr : PDI<0xD6, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src), - "movq\t{$src, $dst|$dst, $src}", - [(store (i64 (vector_extract (v2i64 VR128:$src), - (iPTR 0))), addr:$dst)]>; - -def : Pat<(f64 (vector_extract (v2f64 VR128:$src), (iPTR 0))), - (f64 (EXTRACT_SUBREG (v2f64 VR128:$src), sub_sd))>; - +// Move Packed Doubleword Int to Packed Double Int +let isAsmParserOnly = 1 in { +def VMOVPDI2DIrr : VPDI<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128:$src), + "movd\t{$src, $dst|$dst, $src}", + [(set GR32:$dst, (vector_extract (v4i32 VR128:$src), + (iPTR 0)))]>, VEX; +def VMOVPDI2DImr : VPDI<0x7E, MRMDestMem, (outs), + (ins i32mem:$dst, VR128:$src), + "movd\t{$src, $dst|$dst, $src}", + [(store (i32 (vector_extract (v4i32 VR128:$src), + (iPTR 0))), addr:$dst)]>, VEX; +} def MOVPDI2DIrr : PDI<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128:$src), "movd\t{$src, $dst|$dst, $src}", [(set GR32:$dst, (vector_extract (v4i32 VR128:$src), @@ -2520,6 +2721,15 @@ def MOVPDI2DImr : PDI<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, VR128:$src), [(store (i32 (vector_extract (v4i32 VR128:$src), (iPTR 0))), addr:$dst)]>; +// Move Scalar Single to Double Int +let isAsmParserOnly = 1 in { +def VMOVSS2DIrr : VPDI<0x7E, MRMDestReg, (outs GR32:$dst), (ins FR32:$src), + "movd\t{$src, $dst|$dst, $src}", + [(set GR32:$dst, (bitconvert FR32:$src))]>, VEX; +def VMOVSS2DImr : VPDI<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, FR32:$src), + "movd\t{$src, $dst|$dst, $src}", + [(store (i32 (bitconvert FR32:$src)), addr:$dst)]>, VEX; +} def MOVSS2DIrr : PDI<0x7E, MRMDestReg, (outs GR32:$dst), (ins FR32:$src), "movd\t{$src, $dst|$dst, $src}", [(set GR32:$dst, (bitconvert FR32:$src))]>; @@ -2527,25 +2737,38 @@ def MOVSS2DImr : PDI<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, FR32:$src), "movd\t{$src, $dst|$dst, $src}", [(store (i32 (bitconvert FR32:$src)), addr:$dst)]>; -// Store / copy lower 64-bits of a XMM register. -def MOVLQ128mr : PDI<0xD6, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src), - "movq\t{$src, $dst|$dst, $src}", - [(int_x86_sse2_storel_dq addr:$dst, VR128:$src)]>; - // movd / movq to XMM register zero-extends +let AddedComplexity = 15, isAsmParserOnly = 1 in { +def VMOVZDI2PDIrr : VPDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR32:$src), + "movd\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, (v4i32 (X86vzmovl + (v4i32 (scalar_to_vector GR32:$src)))))]>, + VEX; +def VMOVZQI2PQIrr : VPDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR64:$src), + "mov{d|q}\t{$src, $dst|$dst, $src}", // X86-64 only + [(set VR128:$dst, (v2i64 (X86vzmovl + (v2i64 (scalar_to_vector GR64:$src)))))]>, + VEX, VEX_W; +} let AddedComplexity = 15 in { def MOVZDI2PDIrr : PDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR32:$src), "movd\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (v4i32 (X86vzmovl (v4i32 (scalar_to_vector GR32:$src)))))]>; -// This is X86-64 only. def MOVZQI2PQIrr : RPDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR64:$src), - "mov{d|q}\t{$src, $dst|$dst, $src}", + "mov{d|q}\t{$src, $dst|$dst, $src}", // X86-64 only [(set VR128:$dst, (v2i64 (X86vzmovl (v2i64 (scalar_to_vector GR64:$src)))))]>; } let AddedComplexity = 20 in { +let isAsmParserOnly = 1 in +def VMOVZDI2PDIrm : VPDI<0x6E, MRMSrcMem, (outs VR128:$dst), (ins i32mem:$src), + "movd\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, + (v4i32 (X86vzmovl (v4i32 (scalar_to_vector + (loadi32 addr:$src))))))]>, + VEX; def MOVZDI2PDIrm : PDI<0x6E, MRMSrcMem, (outs VR128:$dst), (ins i32mem:$src), "movd\t{$src, $dst|$dst, $src}", [(set VR128:$dst, @@ -2558,13 +2781,63 @@ def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv4f32 addr:$src)))), (MOVZDI2PDIrm addr:$src)>; def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv2i64 addr:$src)))), (MOVZDI2PDIrm addr:$src)>; +} +//===---------------------------------------------------------------------===// +// SSE2 - Move Quadword +//===---------------------------------------------------------------------===// + +// Move Quadword Int to Packed Quadword Int +let isAsmParserOnly = 1 in +def VMOVQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src), + "vmovq\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, + (v2i64 (scalar_to_vector (loadi64 addr:$src))))]>, XS, + VEX, Requires<[HasAVX]>; +def MOVQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src), + "movq\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, + (v2i64 (scalar_to_vector (loadi64 addr:$src))))]>, XS, + Requires<[HasSSE2]>; // SSE2 instruction with XS Prefix + +// Move Packed Quadword Int to Quadword Int +let isAsmParserOnly = 1 in +def VMOVPQI2QImr : VPDI<0xD6, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src), + "movq\t{$src, $dst|$dst, $src}", + [(store (i64 (vector_extract (v2i64 VR128:$src), + (iPTR 0))), addr:$dst)]>, VEX; +def MOVPQI2QImr : PDI<0xD6, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src), + "movq\t{$src, $dst|$dst, $src}", + [(store (i64 (vector_extract (v2i64 VR128:$src), + (iPTR 0))), addr:$dst)]>; + +def : Pat<(f64 (vector_extract (v2f64 VR128:$src), (iPTR 0))), + (f64 (EXTRACT_SUBREG (v2f64 VR128:$src), sub_sd))>; + +// Store / copy lower 64-bits of a XMM register. +let isAsmParserOnly = 1 in +def VMOVLQ128mr : VPDI<0xD6, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src), + "movq\t{$src, $dst|$dst, $src}", + [(int_x86_sse2_storel_dq addr:$dst, VR128:$src)]>, VEX; +def MOVLQ128mr : PDI<0xD6, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src), + "movq\t{$src, $dst|$dst, $src}", + [(int_x86_sse2_storel_dq addr:$dst, VR128:$src)]>; + +let AddedComplexity = 20, isAsmParserOnly = 1 in +def VMOVZQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src), + "vmovq\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, + (v2i64 (X86vzmovl (v2i64 (scalar_to_vector + (loadi64 addr:$src))))))]>, + XS, VEX, Requires<[HasAVX]>; + +let AddedComplexity = 20 in { def MOVZQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src), "movq\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (v2i64 (X86vzmovl (v2i64 (scalar_to_vector - (loadi64 addr:$src))))))]>, XS, - Requires<[HasSSE2]>; + (loadi64 addr:$src))))))]>, + XS, Requires<[HasSSE2]>; def : Pat<(v2i64 (X86vzmovl (loadv2i64 addr:$src))), (MOVZQI2PQIrm addr:$src)>; @@ -2575,12 +2848,23 @@ def : Pat<(v2i64 (X86vzload addr:$src)), (MOVZQI2PQIrm addr:$src)>; // Moving from XMM to XMM and clear upper 64 bits. Note, there is a bug in // IA32 document. movq xmm1, xmm2 does clear the high bits. +let isAsmParserOnly = 1, AddedComplexity = 15 in +def VMOVZPQILo2PQIrr : I<0x7E, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), + "vmovq\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, (v2i64 (X86vzmovl (v2i64 VR128:$src))))]>, + XS, VEX, Requires<[HasAVX]>; let AddedComplexity = 15 in def MOVZPQILo2PQIrr : I<0x7E, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "movq\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (v2i64 (X86vzmovl (v2i64 VR128:$src))))]>, XS, Requires<[HasSSE2]>; +let AddedComplexity = 20, isAsmParserOnly = 1 in +def VMOVZPQILo2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), + "vmovq\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, (v2i64 (X86vzmovl + (loadv2i64 addr:$src))))]>, + XS, VEX, Requires<[HasAVX]>; let AddedComplexity = 20 in { def MOVZPQILo2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), "movq\t{$src, $dst|$dst, $src}", @@ -2592,49 +2876,136 @@ def : Pat<(v2i64 (X86vzmovl (bc_v2i64 (loadv4i32 addr:$src)))), (MOVZPQILo2PQIrm addr:$src)>; } +// Instructions to match in the assembler +let isAsmParserOnly = 1 in { +// This instructions is in fact an alias to movd with 64 bit dst +def VMOVQs64rr : VPDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR64:$src), + "movq\t{$src, $dst|$dst, $src}", []>, VEX, VEX_W; +def VMOVQd64rr : VPDI<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128:$src), + "movq\t{$src, $dst|$dst, $src}", []>, VEX, VEX_W; +} + // Instructions for the disassembler // xr = XMM register // xm = mem64 +let isAsmParserOnly = 1 in +def VMOVQxrxr: I<0x7E, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), + "vmovq\t{$src, $dst|$dst, $src}", []>, VEX, XS; def MOVQxrxr : I<0x7E, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "movq\t{$src, $dst|$dst, $src}", []>, XS; //===---------------------------------------------------------------------===// -// SSE3 Instructions +// SSE2 - Misc Instructions //===---------------------------------------------------------------------===// -// Move Instructions -def MOVSHDUPrr : S3SI<0x16, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), - "movshdup\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, (v4f32 (movshdup - VR128:$src, (undef))))]>; -def MOVSHDUPrm : S3SI<0x16, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), - "movshdup\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, (movshdup - (memopv4f32 addr:$src), (undef)))]>; +// Flush cache +def CLFLUSH : I<0xAE, MRM7m, (outs), (ins i8mem:$src), + "clflush\t$src", [(int_x86_sse2_clflush addr:$src)]>, + TB, Requires<[HasSSE2]>; + +// Load, store, and memory fence +def LFENCE : I<0xAE, MRM_E8, (outs), (ins), + "lfence", [(int_x86_sse2_lfence)]>, TB, Requires<[HasSSE2]>; +def MFENCE : I<0xAE, MRM_F0, (outs), (ins), + "mfence", [(int_x86_sse2_mfence)]>, TB, Requires<[HasSSE2]>; + +// Pause. This "instruction" is encoded as "rep; nop", so even though it +// was introduced with SSE2, it's backward compatible. +def PAUSE : I<0x90, RawFrm, (outs), (ins), "pause", []>, REP; + +//TODO: custom lower this so as to never even generate the noop +def : Pat<(membarrier (i8 imm), (i8 imm), (i8 imm), (i8 imm), + (i8 0)), (NOOP)>; +def : Pat<(membarrier (i8 0), (i8 0), (i8 0), (i8 1), (i8 1)), (SFENCE)>; +def : Pat<(membarrier (i8 1), (i8 0), (i8 0), (i8 0), (i8 1)), (LFENCE)>; +def : Pat<(membarrier (i8 imm), (i8 imm), (i8 imm), (i8 imm), + (i8 1)), (MFENCE)>; + +// Alias instructions that map zero vector to pxor / xorp* for sse. +// We set canFoldAsLoad because this can be converted to a constant-pool +// load of an all-ones value if folding it would be beneficial. +let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, + isCodeGenOnly = 1, ExeDomain = SSEPackedInt in + // FIXME: Change encoding to pseudo. + def V_SETALLONES : PDI<0x76, MRMInitReg, (outs VR128:$dst), (ins), "", + [(set VR128:$dst, (v4i32 immAllOnesV))]>; -def MOVSLDUPrr : S3SI<0x12, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), - "movsldup\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, (v4f32 (movsldup +//===---------------------------------------------------------------------===// +// SSE3 - Conversion Instructions +//===---------------------------------------------------------------------===// + +let isAsmParserOnly = 1, Predicates = [HasAVX] in { +def VCVTPD2DQrr : S3DI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), + "vcvtpd2dq\t{$src, $dst|$dst, $src}", []>, VEX; +def VCVTDQ2PDrm : S3SI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), + "vcvtdq2pd\t{$src, $dst|$dst, $src}", []>, VEX; +def VCVTDQ2PDrr : S3SI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), + "vcvtdq2pd\t{$src, $dst|$dst, $src}", []>, VEX; +} + +def CVTPD2DQrm : S3DI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), + "cvtpd2dq\t{$src, $dst|$dst, $src}", []>; +def CVTPD2DQrr : S3DI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), + "cvtpd2dq\t{$src, $dst|$dst, $src}", []>; +def CVTDQ2PDrm : S3SI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), + "cvtdq2pd\t{$src, $dst|$dst, $src}", []>; +def CVTDQ2PDrr : S3SI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), + "cvtdq2pd\t{$src, $dst|$dst, $src}", []>; + +//===---------------------------------------------------------------------===// +// SSE3 - Move Instructions +//===---------------------------------------------------------------------===// + +// Replicate Single FP +multiclass sse3_replicate_sfp<bits<8> op, PatFrag rep_frag, string OpcodeStr> { +def rr : S3SI<op, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), + !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), + [(set VR128:$dst, (v4f32 (rep_frag VR128:$src, (undef))))]>; -def MOVSLDUPrm : S3SI<0x12, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), - "movsldup\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, (movsldup +def rm : S3SI<op, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), + !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), + [(set VR128:$dst, (rep_frag (memopv4f32 addr:$src), (undef)))]>; +} -def MOVDDUPrr : S3DI<0x12, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), - "movddup\t{$src, $dst|$dst, $src}", - [(set VR128:$dst,(v2f64 (movddup VR128:$src, (undef))))]>; -def MOVDDUPrm : S3DI<0x12, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src), - "movddup\t{$src, $dst|$dst, $src}", +let isAsmParserOnly = 1, Predicates = [HasAVX] in { +defm VMOVSHDUP : sse3_replicate_sfp<0x16, movshdup, "vmovshdup">, VEX; +defm VMOVSLDUP : sse3_replicate_sfp<0x12, movsldup, "vmovsldup">, VEX; +} +defm MOVSHDUP : sse3_replicate_sfp<0x16, movshdup, "movshdup">; +defm MOVSLDUP : sse3_replicate_sfp<0x12, movsldup, "movsldup">; + +// Replicate Double FP +multiclass sse3_replicate_dfp<string OpcodeStr> { +def rr : S3DI<0x12, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), + !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), + [(set VR128:$dst,(v2f64 (movddup VR128:$src, (undef))))]>; +def rm : S3DI<0x12, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src), + !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), [(set VR128:$dst, (v2f64 (movddup (scalar_to_vector (loadf64 addr:$src)), (undef))))]>; +} + +let isAsmParserOnly = 1, Predicates = [HasAVX] in + defm VMOVDDUP : sse3_replicate_dfp<"vmovddup">, VEX; +defm MOVDDUP : sse3_replicate_dfp<"movddup">; + +// Move Unaligned Integer +let isAsmParserOnly = 1 in + def VLDDQUrm : S3DI<0xF0, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), + "vlddqu\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, (int_x86_sse3_ldu_dq addr:$src))]>, VEX; +def LDDQUrm : S3DI<0xF0, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), + "lddqu\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, (int_x86_sse3_ldu_dq addr:$src))]>; def : Pat<(movddup (bc_v2f64 (v2i64 (scalar_to_vector (loadi64 addr:$src)))), (undef)), (MOVDDUPrm addr:$src)>, Requires<[HasSSE3]>; +// Several Move patterns let AddedComplexity = 5 in { def : Pat<(movddup (memopv2f64 addr:$src), (undef)), (MOVDDUPrm addr:$src)>, Requires<[HasSSE3]>; @@ -2646,52 +3017,98 @@ def : Pat<(movddup (bc_v4i32 (memopv2i64 addr:$src)), (undef)), (MOVDDUPrm addr:$src)>, Requires<[HasSSE3]>; } -// Arithmetic -let Constraints = "$src1 = $dst" in { - def ADDSUBPSrr : S3DI<0xD0, MRMSrcReg, - (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), - "addsubps\t{$src2, $dst|$dst, $src2}", - [(set VR128:$dst, (int_x86_sse3_addsub_ps VR128:$src1, - VR128:$src2))]>; - def ADDSUBPSrm : S3DI<0xD0, MRMSrcMem, - (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2), - "addsubps\t{$src2, $dst|$dst, $src2}", - [(set VR128:$dst, (int_x86_sse3_addsub_ps VR128:$src1, - (memop addr:$src2)))]>; - def ADDSUBPDrr : S3I<0xD0, MRMSrcReg, - (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), - "addsubpd\t{$src2, $dst|$dst, $src2}", - [(set VR128:$dst, (int_x86_sse3_addsub_pd VR128:$src1, - VR128:$src2))]>; - def ADDSUBPDrm : S3I<0xD0, MRMSrcMem, - (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2), - "addsubpd\t{$src2, $dst|$dst, $src2}", - [(set VR128:$dst, (int_x86_sse3_addsub_pd VR128:$src1, - (memop addr:$src2)))]>; +// vector_shuffle v1, <undef> <1, 1, 3, 3> +let AddedComplexity = 15 in +def : Pat<(v4i32 (movshdup VR128:$src, (undef))), + (MOVSHDUPrr VR128:$src)>, Requires<[HasSSE3]>; +let AddedComplexity = 20 in +def : Pat<(v4i32 (movshdup (bc_v4i32 (memopv2i64 addr:$src)), (undef))), + (MOVSHDUPrm addr:$src)>, Requires<[HasSSE3]>; + +// vector_shuffle v1, <undef> <0, 0, 2, 2> +let AddedComplexity = 15 in + def : Pat<(v4i32 (movsldup VR128:$src, (undef))), + (MOVSLDUPrr VR128:$src)>, Requires<[HasSSE3]>; +let AddedComplexity = 20 in + def : Pat<(v4i32 (movsldup (bc_v4i32 (memopv2i64 addr:$src)), (undef))), + (MOVSLDUPrm addr:$src)>, Requires<[HasSSE3]>; + +//===---------------------------------------------------------------------===// +// SSE3 - Arithmetic +//===---------------------------------------------------------------------===// + +multiclass sse3_addsub<Intrinsic Int, string OpcodeStr, bit Is2Addr = 1> { + def rr : I<0xD0, MRMSrcReg, + (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), + !if(Is2Addr, + !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), + !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), + [(set VR128:$dst, (Int VR128:$src1, + VR128:$src2))]>; + def rm : I<0xD0, MRMSrcMem, + (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2), + !if(Is2Addr, + !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), + !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), + [(set VR128:$dst, (Int VR128:$src1, + (memop addr:$src2)))]>; + } -def LDDQUrm : S3DI<0xF0, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), - "lddqu\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, (int_x86_sse3_ldu_dq addr:$src))]>; +let isAsmParserOnly = 1, Predicates = [HasAVX], + ExeDomain = SSEPackedDouble in { + defm VADDSUBPS : sse3_addsub<int_x86_sse3_addsub_ps, "vaddsubps", 0>, XD, + VEX_4V; + defm VADDSUBPD : sse3_addsub<int_x86_sse3_addsub_pd, "vaddsubpd", 0>, OpSize, + VEX_4V; +} +let Constraints = "$src1 = $dst", Predicates = [HasSSE3], + ExeDomain = SSEPackedDouble in { + defm ADDSUBPS : sse3_addsub<int_x86_sse3_addsub_ps, "addsubps">, XD; + defm ADDSUBPD : sse3_addsub<int_x86_sse3_addsub_pd, "addsubpd">, TB, OpSize; +} + +//===---------------------------------------------------------------------===// +// SSE3 Instructions +//===---------------------------------------------------------------------===// // Horizontal ops -class S3D_Intrr<bits<8> o, string OpcodeStr, Intrinsic IntId> +class S3D_Intrr<bits<8> o, string OpcodeStr, Intrinsic IntId, bit Is2Addr = 1> : S3DI<o, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), + !if(Is2Addr, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), + !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), [(set VR128:$dst, (v4f32 (IntId VR128:$src1, VR128:$src2)))]>; -class S3D_Intrm<bits<8> o, string OpcodeStr, Intrinsic IntId> +class S3D_Intrm<bits<8> o, string OpcodeStr, Intrinsic IntId, bit Is2Addr = 1> : S3DI<o, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2), + !if(Is2Addr, !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), + !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), [(set VR128:$dst, (v4f32 (IntId VR128:$src1, (memop addr:$src2))))]>; -class S3_Intrr<bits<8> o, string OpcodeStr, Intrinsic IntId> +class S3_Intrr<bits<8> o, string OpcodeStr, Intrinsic IntId, bit Is2Addr = 1> : S3I<o, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), - !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), + !if(Is2Addr, + !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), + !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), [(set VR128:$dst, (v2f64 (IntId VR128:$src1, VR128:$src2)))]>; -class S3_Intrm<bits<8> o, string OpcodeStr, Intrinsic IntId> +class S3_Intrm<bits<8> o, string OpcodeStr, Intrinsic IntId, bit Is2Addr = 1> : S3I<o, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2), - !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), + !if(Is2Addr, + !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), + !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), [(set VR128:$dst, (v2f64 (IntId VR128:$src1, (memopv2f64 addr:$src2))))]>; +let isAsmParserOnly = 1, Predicates = [HasAVX] in { + def VHADDPSrr : S3D_Intrr<0x7C, "vhaddps", int_x86_sse3_hadd_ps, 0>, VEX_4V; + def VHADDPSrm : S3D_Intrm<0x7C, "vhaddps", int_x86_sse3_hadd_ps, 0>, VEX_4V; + def VHADDPDrr : S3_Intrr <0x7C, "vhaddpd", int_x86_sse3_hadd_pd, 0>, VEX_4V; + def VHADDPDrm : S3_Intrm <0x7C, "vhaddpd", int_x86_sse3_hadd_pd, 0>, VEX_4V; + def VHSUBPSrr : S3D_Intrr<0x7D, "vhsubps", int_x86_sse3_hsub_ps, 0>, VEX_4V; + def VHSUBPSrm : S3D_Intrm<0x7D, "vhsubps", int_x86_sse3_hsub_ps, 0>, VEX_4V; + def VHSUBPDrr : S3_Intrr <0x7D, "vhsubpd", int_x86_sse3_hsub_pd, 0>, VEX_4V; + def VHSUBPDrm : S3_Intrm <0x7D, "vhsubpd", int_x86_sse3_hsub_pd, 0>, VEX_4V; +} + let Constraints = "$src1 = $dst" in { def HADDPSrr : S3D_Intrr<0x7C, "haddps", int_x86_sse3_hadd_ps>; def HADDPSrm : S3D_Intrm<0x7C, "haddps", int_x86_sse3_hadd_ps>; @@ -2703,35 +3120,14 @@ let Constraints = "$src1 = $dst" in { def HSUBPDrm : S3_Intrm <0x7D, "hsubpd", int_x86_sse3_hsub_pd>; } -// Thread synchronization -def MONITOR : I<0x01, MRM_C8, (outs), (ins), "monitor", - [(int_x86_sse3_monitor EAX, ECX, EDX)]>,TB, Requires<[HasSSE3]>; -def MWAIT : I<0x01, MRM_C9, (outs), (ins), "mwait", - [(int_x86_sse3_mwait ECX, EAX)]>, TB, Requires<[HasSSE3]>; - -// vector_shuffle v1, <undef> <1, 1, 3, 3> -let AddedComplexity = 15 in -def : Pat<(v4i32 (movshdup VR128:$src, (undef))), - (MOVSHDUPrr VR128:$src)>, Requires<[HasSSE3]>; -let AddedComplexity = 20 in -def : Pat<(v4i32 (movshdup (bc_v4i32 (memopv2i64 addr:$src)), (undef))), - (MOVSHDUPrm addr:$src)>, Requires<[HasSSE3]>; - -// vector_shuffle v1, <undef> <0, 0, 2, 2> -let AddedComplexity = 15 in - def : Pat<(v4i32 (movsldup VR128:$src, (undef))), - (MOVSLDUPrr VR128:$src)>, Requires<[HasSSE3]>; -let AddedComplexity = 20 in - def : Pat<(v4i32 (movsldup (bc_v4i32 (memopv2i64 addr:$src)), (undef))), - (MOVSLDUPrm addr:$src)>, Requires<[HasSSE3]>; - //===---------------------------------------------------------------------===// -// SSSE3 Instructions +// SSSE3 - Packed Absolute Instructions //===---------------------------------------------------------------------===// -/// SS3I_unop_rm_int_8 - Simple SSSE3 unary operator whose type is v*i8. -multiclass SS3I_unop_rm_int_8<bits<8> opc, string OpcodeStr, - Intrinsic IntId64, Intrinsic IntId128> { +/// SS3I_unop_rm_int - Simple SSSE3 unary op whose type can be v*{i8,i16,i32}. +multiclass SS3I_unop_rm_int<bits<8> opc, string OpcodeStr, + PatFrag mem_frag64, PatFrag mem_frag128, + Intrinsic IntId64, Intrinsic IntId128> { def rr64 : SS38I<opc, MRMSrcReg, (outs VR64:$dst), (ins VR64:$src), !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), [(set VR64:$dst, (IntId64 VR64:$src))]>; @@ -2739,7 +3135,7 @@ multiclass SS3I_unop_rm_int_8<bits<8> opc, string OpcodeStr, def rm64 : SS38I<opc, MRMSrcMem, (outs VR64:$dst), (ins i64mem:$src), !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), [(set VR64:$dst, - (IntId64 (bitconvert (memopv8i8 addr:$src))))]>; + (IntId64 (bitconvert (mem_frag64 addr:$src))))]>; def rr128 : SS38I<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), @@ -2752,240 +3148,203 @@ multiclass SS3I_unop_rm_int_8<bits<8> opc, string OpcodeStr, !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), [(set VR128:$dst, (IntId128 - (bitconvert (memopv16i8 addr:$src))))]>, OpSize; + (bitconvert (mem_frag128 addr:$src))))]>, OpSize; } -/// SS3I_unop_rm_int_16 - Simple SSSE3 unary operator whose type is v*i16. -multiclass SS3I_unop_rm_int_16<bits<8> opc, string OpcodeStr, - Intrinsic IntId64, Intrinsic IntId128> { - def rr64 : SS38I<opc, MRMSrcReg, (outs VR64:$dst), - (ins VR64:$src), - !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), - [(set VR64:$dst, (IntId64 VR64:$src))]>; - - def rm64 : SS38I<opc, MRMSrcMem, (outs VR64:$dst), - (ins i64mem:$src), - !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), - [(set VR64:$dst, - (IntId64 - (bitconvert (memopv4i16 addr:$src))))]>; +let isAsmParserOnly = 1, Predicates = [HasAVX] in { + defm VPABSB : SS3I_unop_rm_int<0x1C, "vpabsb", memopv8i8, memopv16i8, + int_x86_ssse3_pabs_b, + int_x86_ssse3_pabs_b_128>, VEX; + defm VPABSW : SS3I_unop_rm_int<0x1D, "vpabsw", memopv4i16, memopv8i16, + int_x86_ssse3_pabs_w, + int_x86_ssse3_pabs_w_128>, VEX; + defm VPABSD : SS3I_unop_rm_int<0x1E, "vpabsd", memopv2i32, memopv4i32, + int_x86_ssse3_pabs_d, + int_x86_ssse3_pabs_d_128>, VEX; +} - def rr128 : SS38I<opc, MRMSrcReg, (outs VR128:$dst), - (ins VR128:$src), - !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), - [(set VR128:$dst, (IntId128 VR128:$src))]>, - OpSize; +defm PABSB : SS3I_unop_rm_int<0x1C, "pabsb", memopv8i8, memopv16i8, + int_x86_ssse3_pabs_b, + int_x86_ssse3_pabs_b_128>; +defm PABSW : SS3I_unop_rm_int<0x1D, "pabsw", memopv4i16, memopv8i16, + int_x86_ssse3_pabs_w, + int_x86_ssse3_pabs_w_128>; +defm PABSD : SS3I_unop_rm_int<0x1E, "pabsd", memopv2i32, memopv4i32, + int_x86_ssse3_pabs_d, + int_x86_ssse3_pabs_d_128>; - def rm128 : SS38I<opc, MRMSrcMem, (outs VR128:$dst), - (ins i128mem:$src), - !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), - [(set VR128:$dst, - (IntId128 - (bitconvert (memopv8i16 addr:$src))))]>, OpSize; -} +//===---------------------------------------------------------------------===// +// SSSE3 - Packed Binary Operator Instructions +//===---------------------------------------------------------------------===// -/// SS3I_unop_rm_int_32 - Simple SSSE3 unary operator whose type is v*i32. -multiclass SS3I_unop_rm_int_32<bits<8> opc, string OpcodeStr, - Intrinsic IntId64, Intrinsic IntId128> { +/// SS3I_binop_rm_int - Simple SSSE3 bin op whose type can be v*{i8,i16,i32}. +multiclass SS3I_binop_rm_int<bits<8> opc, string OpcodeStr, + PatFrag mem_frag64, PatFrag mem_frag128, + Intrinsic IntId64, Intrinsic IntId128, + bit Is2Addr = 1> { + let isCommutable = 1 in def rr64 : SS38I<opc, MRMSrcReg, (outs VR64:$dst), - (ins VR64:$src), - !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), - [(set VR64:$dst, (IntId64 VR64:$src))]>; - + (ins VR64:$src1, VR64:$src2), + !if(Is2Addr, + !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), + !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), + [(set VR64:$dst, (IntId64 VR64:$src1, VR64:$src2))]>; def rm64 : SS38I<opc, MRMSrcMem, (outs VR64:$dst), - (ins i64mem:$src), - !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), - [(set VR64:$dst, - (IntId64 - (bitconvert (memopv2i32 addr:$src))))]>; + (ins VR64:$src1, i64mem:$src2), + !if(Is2Addr, + !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), + !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), + [(set VR64:$dst, + (IntId64 VR64:$src1, + (bitconvert (memopv8i8 addr:$src2))))]>; + let isCommutable = 1 in def rr128 : SS38I<opc, MRMSrcReg, (outs VR128:$dst), - (ins VR128:$src), - !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), - [(set VR128:$dst, (IntId128 VR128:$src))]>, - OpSize; - + (ins VR128:$src1, VR128:$src2), + !if(Is2Addr, + !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), + !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), + [(set VR128:$dst, (IntId128 VR128:$src1, VR128:$src2))]>, + OpSize; def rm128 : SS38I<opc, MRMSrcMem, (outs VR128:$dst), - (ins i128mem:$src), - !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), - [(set VR128:$dst, - (IntId128 - (bitconvert (memopv4i32 addr:$src))))]>, OpSize; + (ins VR128:$src1, i128mem:$src2), + !if(Is2Addr, + !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), + !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), + [(set VR128:$dst, + (IntId128 VR128:$src1, + (bitconvert (memopv16i8 addr:$src2))))]>, OpSize; } -defm PABSB : SS3I_unop_rm_int_8 <0x1C, "pabsb", - int_x86_ssse3_pabs_b, - int_x86_ssse3_pabs_b_128>; -defm PABSW : SS3I_unop_rm_int_16<0x1D, "pabsw", - int_x86_ssse3_pabs_w, - int_x86_ssse3_pabs_w_128>; -defm PABSD : SS3I_unop_rm_int_32<0x1E, "pabsd", - int_x86_ssse3_pabs_d, - int_x86_ssse3_pabs_d_128>; - -/// SS3I_binop_rm_int_8 - Simple SSSE3 binary operator whose type is v*i8. -let Constraints = "$src1 = $dst" in { - multiclass SS3I_binop_rm_int_8<bits<8> opc, string OpcodeStr, - Intrinsic IntId64, Intrinsic IntId128, - bit Commutable = 0> { - def rr64 : SS38I<opc, MRMSrcReg, (outs VR64:$dst), - (ins VR64:$src1, VR64:$src2), - !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), - [(set VR64:$dst, (IntId64 VR64:$src1, VR64:$src2))]> { - let isCommutable = Commutable; - } - def rm64 : SS38I<opc, MRMSrcMem, (outs VR64:$dst), - (ins VR64:$src1, i64mem:$src2), - !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), - [(set VR64:$dst, - (IntId64 VR64:$src1, - (bitconvert (memopv8i8 addr:$src2))))]>; - - def rr128 : SS38I<opc, MRMSrcReg, (outs VR128:$dst), - (ins VR128:$src1, VR128:$src2), - !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), - [(set VR128:$dst, (IntId128 VR128:$src1, VR128:$src2))]>, - OpSize { - let isCommutable = Commutable; - } - def rm128 : SS38I<opc, MRMSrcMem, (outs VR128:$dst), - (ins VR128:$src1, i128mem:$src2), - !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), - [(set VR128:$dst, - (IntId128 VR128:$src1, - (bitconvert (memopv16i8 addr:$src2))))]>, OpSize; - } +let isAsmParserOnly = 1, Predicates = [HasAVX] in { +let isCommutable = 0 in { + defm VPHADDW : SS3I_binop_rm_int<0x01, "vphaddw", memopv4i16, memopv8i16, + int_x86_ssse3_phadd_w, + int_x86_ssse3_phadd_w_128, 0>, VEX_4V; + defm VPHADDD : SS3I_binop_rm_int<0x02, "vphaddd", memopv2i32, memopv4i32, + int_x86_ssse3_phadd_d, + int_x86_ssse3_phadd_d_128, 0>, VEX_4V; + defm VPHADDSW : SS3I_binop_rm_int<0x03, "vphaddsw", memopv4i16, memopv8i16, + int_x86_ssse3_phadd_sw, + int_x86_ssse3_phadd_sw_128, 0>, VEX_4V; + defm VPHSUBW : SS3I_binop_rm_int<0x05, "vphsubw", memopv4i16, memopv8i16, + int_x86_ssse3_phsub_w, + int_x86_ssse3_phsub_w_128, 0>, VEX_4V; + defm VPHSUBD : SS3I_binop_rm_int<0x06, "vphsubd", memopv2i32, memopv4i32, + int_x86_ssse3_phsub_d, + int_x86_ssse3_phsub_d_128, 0>, VEX_4V; + defm VPHSUBSW : SS3I_binop_rm_int<0x07, "vphsubsw", memopv4i16, memopv8i16, + int_x86_ssse3_phsub_sw, + int_x86_ssse3_phsub_sw_128, 0>, VEX_4V; + defm VPMADDUBSW : SS3I_binop_rm_int<0x04, "vpmaddubsw", memopv8i8, memopv16i8, + int_x86_ssse3_pmadd_ub_sw, + int_x86_ssse3_pmadd_ub_sw_128, 0>, VEX_4V; + defm VPSHUFB : SS3I_binop_rm_int<0x00, "vpshufb", memopv8i8, memopv16i8, + int_x86_ssse3_pshuf_b, + int_x86_ssse3_pshuf_b_128, 0>, VEX_4V; + defm VPSIGNB : SS3I_binop_rm_int<0x08, "vpsignb", memopv8i8, memopv16i8, + int_x86_ssse3_psign_b, + int_x86_ssse3_psign_b_128, 0>, VEX_4V; + defm VPSIGNW : SS3I_binop_rm_int<0x09, "vpsignw", memopv4i16, memopv8i16, + int_x86_ssse3_psign_w, + int_x86_ssse3_psign_w_128, 0>, VEX_4V; + defm VPSIGND : SS3I_binop_rm_int<0x0A, "vpsignd", memopv2i32, memopv4i32, + int_x86_ssse3_psign_d, + int_x86_ssse3_psign_d_128, 0>, VEX_4V; } - -/// SS3I_binop_rm_int_16 - Simple SSSE3 binary operator whose type is v*i16. -let Constraints = "$src1 = $dst" in { - multiclass SS3I_binop_rm_int_16<bits<8> opc, string OpcodeStr, - Intrinsic IntId64, Intrinsic IntId128, - bit Commutable = 0> { - def rr64 : SS38I<opc, MRMSrcReg, (outs VR64:$dst), - (ins VR64:$src1, VR64:$src2), - !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), - [(set VR64:$dst, (IntId64 VR64:$src1, VR64:$src2))]> { - let isCommutable = Commutable; - } - def rm64 : SS38I<opc, MRMSrcMem, (outs VR64:$dst), - (ins VR64:$src1, i64mem:$src2), - !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), - [(set VR64:$dst, - (IntId64 VR64:$src1, - (bitconvert (memopv4i16 addr:$src2))))]>; - - def rr128 : SS38I<opc, MRMSrcReg, (outs VR128:$dst), - (ins VR128:$src1, VR128:$src2), - !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), - [(set VR128:$dst, (IntId128 VR128:$src1, VR128:$src2))]>, - OpSize { - let isCommutable = Commutable; - } - def rm128 : SS38I<opc, MRMSrcMem, (outs VR128:$dst), - (ins VR128:$src1, i128mem:$src2), - !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), - [(set VR128:$dst, - (IntId128 VR128:$src1, - (bitconvert (memopv8i16 addr:$src2))))]>, OpSize; - } +defm VPMULHRSW : SS3I_binop_rm_int<0x0B, "vpmulhrsw", memopv4i16, memopv8i16, + int_x86_ssse3_pmul_hr_sw, + int_x86_ssse3_pmul_hr_sw_128, 0>, VEX_4V; } -/// SS3I_binop_rm_int_32 - Simple SSSE3 binary operator whose type is v*i32. -let Constraints = "$src1 = $dst" in { - multiclass SS3I_binop_rm_int_32<bits<8> opc, string OpcodeStr, - Intrinsic IntId64, Intrinsic IntId128, - bit Commutable = 0> { - def rr64 : SS38I<opc, MRMSrcReg, (outs VR64:$dst), - (ins VR64:$src1, VR64:$src2), - !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), - [(set VR64:$dst, (IntId64 VR64:$src1, VR64:$src2))]> { - let isCommutable = Commutable; - } - def rm64 : SS38I<opc, MRMSrcMem, (outs VR64:$dst), - (ins VR64:$src1, i64mem:$src2), - !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), - [(set VR64:$dst, - (IntId64 VR64:$src1, - (bitconvert (memopv2i32 addr:$src2))))]>; - - def rr128 : SS38I<opc, MRMSrcReg, (outs VR128:$dst), - (ins VR128:$src1, VR128:$src2), - !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), - [(set VR128:$dst, (IntId128 VR128:$src1, VR128:$src2))]>, - OpSize { - let isCommutable = Commutable; - } - def rm128 : SS38I<opc, MRMSrcMem, (outs VR128:$dst), - (ins VR128:$src1, i128mem:$src2), - !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), - [(set VR128:$dst, - (IntId128 VR128:$src1, - (bitconvert (memopv4i32 addr:$src2))))]>, OpSize; - } +// None of these have i8 immediate fields. +let ImmT = NoImm, Constraints = "$src1 = $dst" in { +let isCommutable = 0 in { + defm PHADDW : SS3I_binop_rm_int<0x01, "phaddw", memopv4i16, memopv8i16, + int_x86_ssse3_phadd_w, + int_x86_ssse3_phadd_w_128>; + defm PHADDD : SS3I_binop_rm_int<0x02, "phaddd", memopv2i32, memopv4i32, + int_x86_ssse3_phadd_d, + int_x86_ssse3_phadd_d_128>; + defm PHADDSW : SS3I_binop_rm_int<0x03, "phaddsw", memopv4i16, memopv8i16, + int_x86_ssse3_phadd_sw, + int_x86_ssse3_phadd_sw_128>; + defm PHSUBW : SS3I_binop_rm_int<0x05, "phsubw", memopv4i16, memopv8i16, + int_x86_ssse3_phsub_w, + int_x86_ssse3_phsub_w_128>; + defm PHSUBD : SS3I_binop_rm_int<0x06, "phsubd", memopv2i32, memopv4i32, + int_x86_ssse3_phsub_d, + int_x86_ssse3_phsub_d_128>; + defm PHSUBSW : SS3I_binop_rm_int<0x07, "phsubsw", memopv4i16, memopv8i16, + int_x86_ssse3_phsub_sw, + int_x86_ssse3_phsub_sw_128>; + defm PMADDUBSW : SS3I_binop_rm_int<0x04, "pmaddubsw", memopv8i8, memopv16i8, + int_x86_ssse3_pmadd_ub_sw, + int_x86_ssse3_pmadd_ub_sw_128>; + defm PSHUFB : SS3I_binop_rm_int<0x00, "pshufb", memopv8i8, memopv16i8, + int_x86_ssse3_pshuf_b, + int_x86_ssse3_pshuf_b_128>; + defm PSIGNB : SS3I_binop_rm_int<0x08, "psignb", memopv8i8, memopv16i8, + int_x86_ssse3_psign_b, + int_x86_ssse3_psign_b_128>; + defm PSIGNW : SS3I_binop_rm_int<0x09, "psignw", memopv4i16, memopv8i16, + int_x86_ssse3_psign_w, + int_x86_ssse3_psign_w_128>; + defm PSIGND : SS3I_binop_rm_int<0x0A, "psignd", memopv2i32, memopv4i32, + int_x86_ssse3_psign_d, + int_x86_ssse3_psign_d_128>; +} +defm PMULHRSW : SS3I_binop_rm_int<0x0B, "pmulhrsw", memopv4i16, memopv8i16, + int_x86_ssse3_pmul_hr_sw, + int_x86_ssse3_pmul_hr_sw_128>; } -let ImmT = NoImm in { // None of these have i8 immediate fields. -defm PHADDW : SS3I_binop_rm_int_16<0x01, "phaddw", - int_x86_ssse3_phadd_w, - int_x86_ssse3_phadd_w_128>; -defm PHADDD : SS3I_binop_rm_int_32<0x02, "phaddd", - int_x86_ssse3_phadd_d, - int_x86_ssse3_phadd_d_128>; -defm PHADDSW : SS3I_binop_rm_int_16<0x03, "phaddsw", - int_x86_ssse3_phadd_sw, - int_x86_ssse3_phadd_sw_128>; -defm PHSUBW : SS3I_binop_rm_int_16<0x05, "phsubw", - int_x86_ssse3_phsub_w, - int_x86_ssse3_phsub_w_128>; -defm PHSUBD : SS3I_binop_rm_int_32<0x06, "phsubd", - int_x86_ssse3_phsub_d, - int_x86_ssse3_phsub_d_128>; -defm PHSUBSW : SS3I_binop_rm_int_16<0x07, "phsubsw", - int_x86_ssse3_phsub_sw, - int_x86_ssse3_phsub_sw_128>; -defm PMADDUBSW : SS3I_binop_rm_int_8 <0x04, "pmaddubsw", - int_x86_ssse3_pmadd_ub_sw, - int_x86_ssse3_pmadd_ub_sw_128>; -defm PMULHRSW : SS3I_binop_rm_int_16<0x0B, "pmulhrsw", - int_x86_ssse3_pmul_hr_sw, - int_x86_ssse3_pmul_hr_sw_128, 1>; - -defm PSHUFB : SS3I_binop_rm_int_8 <0x00, "pshufb", - int_x86_ssse3_pshuf_b, - int_x86_ssse3_pshuf_b_128>; -defm PSIGNB : SS3I_binop_rm_int_8 <0x08, "psignb", - int_x86_ssse3_psign_b, - int_x86_ssse3_psign_b_128>; -defm PSIGNW : SS3I_binop_rm_int_16<0x09, "psignw", - int_x86_ssse3_psign_w, - int_x86_ssse3_psign_w_128>; -defm PSIGND : SS3I_binop_rm_int_32<0x0A, "psignd", - int_x86_ssse3_psign_d, - int_x86_ssse3_psign_d_128>; -} - -// palignr patterns. -let Constraints = "$src1 = $dst" in { - def PALIGNR64rr : SS3AI<0x0F, MRMSrcReg, (outs VR64:$dst), - (ins VR64:$src1, VR64:$src2, i8imm:$src3), - "palignr\t{$src3, $src2, $dst|$dst, $src2, $src3}", - []>; - def PALIGNR64rm : SS3AI<0x0F, MRMSrcMem, (outs VR64:$dst), - (ins VR64:$src1, i64mem:$src2, i8imm:$src3), - "palignr\t{$src3, $src2, $dst|$dst, $src2, $src3}", - []>; - - def PALIGNR128rr : SS3AI<0x0F, MRMSrcReg, (outs VR128:$dst), - (ins VR128:$src1, VR128:$src2, i8imm:$src3), - "palignr\t{$src3, $src2, $dst|$dst, $src2, $src3}", - []>, OpSize; - def PALIGNR128rm : SS3AI<0x0F, MRMSrcMem, (outs VR128:$dst), - (ins VR128:$src1, i128mem:$src2, i8imm:$src3), - "palignr\t{$src3, $src2, $dst|$dst, $src2, $src3}", - []>, OpSize; +def : Pat<(X86pshufb VR128:$src, VR128:$mask), + (PSHUFBrr128 VR128:$src, VR128:$mask)>, Requires<[HasSSSE3]>; +def : Pat<(X86pshufb VR128:$src, (bc_v16i8 (memopv2i64 addr:$mask))), + (PSHUFBrm128 VR128:$src, addr:$mask)>, Requires<[HasSSSE3]>; + +//===---------------------------------------------------------------------===// +// SSSE3 - Packed Align Instruction Patterns +//===---------------------------------------------------------------------===// + +multiclass sse3_palign<string asm, bit Is2Addr = 1> { + def R64rr : SS3AI<0x0F, MRMSrcReg, (outs VR64:$dst), + (ins VR64:$src1, VR64:$src2, i8imm:$src3), + !if(Is2Addr, + !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), + !strconcat(asm, + "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), + []>; + def R64rm : SS3AI<0x0F, MRMSrcMem, (outs VR64:$dst), + (ins VR64:$src1, i64mem:$src2, i8imm:$src3), + !if(Is2Addr, + !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), + !strconcat(asm, + "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), + []>; + + def R128rr : SS3AI<0x0F, MRMSrcReg, (outs VR128:$dst), + (ins VR128:$src1, VR128:$src2, i8imm:$src3), + !if(Is2Addr, + !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), + !strconcat(asm, + "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), + []>, OpSize; + def R128rm : SS3AI<0x0F, MRMSrcMem, (outs VR128:$dst), + (ins VR128:$src1, i128mem:$src2, i8imm:$src3), + !if(Is2Addr, + !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), + !strconcat(asm, + "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), + []>, OpSize; } +let isAsmParserOnly = 1, Predicates = [HasAVX] in + defm VPALIGN : sse3_palign<"vpalignr", 0>, VEX_4V; +let Constraints = "$src1 = $dst" in + defm PALIGN : sse3_palign<"palignr">; + let AddedComplexity = 5 in { def : Pat<(v1i64 (palign:$src3 VR64:$src1, VR64:$src2)), @@ -2996,10 +3355,6 @@ def : Pat<(v2i32 (palign:$src3 VR64:$src1, VR64:$src2)), (PALIGNR64rr VR64:$src2, VR64:$src1, (SHUFFLE_get_palign_imm VR64:$src3))>, Requires<[HasSSSE3]>; -def : Pat<(v2f32 (palign:$src3 VR64:$src1, VR64:$src2)), - (PALIGNR64rr VR64:$src2, VR64:$src1, - (SHUFFLE_get_palign_imm VR64:$src3))>, - Requires<[HasSSSE3]>; def : Pat<(v4i16 (palign:$src3 VR64:$src1, VR64:$src2)), (PALIGNR64rr VR64:$src2, VR64:$src1, (SHUFFLE_get_palign_imm VR64:$src3))>, @@ -3027,10 +3382,15 @@ def : Pat<(v16i8 (palign:$src3 VR128:$src1, VR128:$src2)), Requires<[HasSSSE3]>; } -def : Pat<(X86pshufb VR128:$src, VR128:$mask), - (PSHUFBrr128 VR128:$src, VR128:$mask)>, Requires<[HasSSSE3]>; -def : Pat<(X86pshufb VR128:$src, (bc_v16i8 (memopv2i64 addr:$mask))), - (PSHUFBrm128 VR128:$src, addr:$mask)>, Requires<[HasSSSE3]>; +//===---------------------------------------------------------------------===// +// SSSE3 Misc Instructions +//===---------------------------------------------------------------------===// + +// Thread synchronization +def MONITOR : I<0x01, MRM_C8, (outs), (ins), "monitor", + [(int_x86_sse3_monitor EAX, ECX, EDX)]>,TB, Requires<[HasSSE3]>; +def MWAIT : I<0x01, MRM_C9, (outs), (ins), "mwait", + [(int_x86_sse3_mwait ECX, EAX)]>, TB, Requires<[HasSSE3]>; //===---------------------------------------------------------------------===// // Non-Instruction Patterns @@ -3311,287 +3671,9 @@ def : Pat<(store (v16i8 VR128:$src), addr:$dst), (MOVUPSmr addr:$dst, VR128:$src)>; //===----------------------------------------------------------------------===// -// SSE4.1 Instructions +// SSE4.1 - Packed Move with Sign/Zero Extend //===----------------------------------------------------------------------===// -multiclass sse41_fp_unop_rm<bits<8> opcps, bits<8> opcpd, - string OpcodeStr, - Intrinsic V4F32Int, - Intrinsic V2F64Int> { - // Intrinsic operation, reg. - // Vector intrinsic operation, reg - def PSr_Int : SS4AIi8<opcps, MRMSrcReg, - (outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2), - !strconcat(OpcodeStr, - "ps\t{$src2, $src1, $dst|$dst, $src1, $src2}"), - [(set VR128:$dst, (V4F32Int VR128:$src1, imm:$src2))]>, - OpSize; - - // Vector intrinsic operation, mem - def PSm_Int : Ii8<opcps, MRMSrcMem, - (outs VR128:$dst), (ins f128mem:$src1, i32i8imm:$src2), - !strconcat(OpcodeStr, - "ps\t{$src2, $src1, $dst|$dst, $src1, $src2}"), - [(set VR128:$dst, - (V4F32Int (memopv4f32 addr:$src1),imm:$src2))]>, - TA, OpSize, - Requires<[HasSSE41]>; - - // Vector intrinsic operation, reg - def PDr_Int : SS4AIi8<opcpd, MRMSrcReg, - (outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2), - !strconcat(OpcodeStr, - "pd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), - [(set VR128:$dst, (V2F64Int VR128:$src1, imm:$src2))]>, - OpSize; - - // Vector intrinsic operation, mem - def PDm_Int : SS4AIi8<opcpd, MRMSrcMem, - (outs VR128:$dst), (ins f128mem:$src1, i32i8imm:$src2), - !strconcat(OpcodeStr, - "pd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), - [(set VR128:$dst, - (V2F64Int (memopv2f64 addr:$src1),imm:$src2))]>, - OpSize; -} - -let Constraints = "$src1 = $dst" in { -multiclass sse41_fp_binop_rm<bits<8> opcss, bits<8> opcsd, - string OpcodeStr, - Intrinsic F32Int, - Intrinsic F64Int> { - // Intrinsic operation, reg. - def SSr_Int : SS4AIi8<opcss, MRMSrcReg, - (outs VR128:$dst), - (ins VR128:$src1, VR128:$src2, i32i8imm:$src3), - !strconcat(OpcodeStr, - "ss\t{$src3, $src2, $dst|$dst, $src2, $src3}"), - [(set VR128:$dst, - (F32Int VR128:$src1, VR128:$src2, imm:$src3))]>, - OpSize; - - // Intrinsic operation, mem. - def SSm_Int : SS4AIi8<opcss, MRMSrcMem, - (outs VR128:$dst), - (ins VR128:$src1, ssmem:$src2, i32i8imm:$src3), - !strconcat(OpcodeStr, - "ss\t{$src3, $src2, $dst|$dst, $src2, $src3}"), - [(set VR128:$dst, - (F32Int VR128:$src1, sse_load_f32:$src2, imm:$src3))]>, - OpSize; - - // Intrinsic operation, reg. - def SDr_Int : SS4AIi8<opcsd, MRMSrcReg, - (outs VR128:$dst), - (ins VR128:$src1, VR128:$src2, i32i8imm:$src3), - !strconcat(OpcodeStr, - "sd\t{$src3, $src2, $dst|$dst, $src2, $src3}"), - [(set VR128:$dst, - (F64Int VR128:$src1, VR128:$src2, imm:$src3))]>, - OpSize; - - // Intrinsic operation, mem. - def SDm_Int : SS4AIi8<opcsd, MRMSrcMem, - (outs VR128:$dst), - (ins VR128:$src1, sdmem:$src2, i32i8imm:$src3), - !strconcat(OpcodeStr, - "sd\t{$src3, $src2, $dst|$dst, $src2, $src3}"), - [(set VR128:$dst, - (F64Int VR128:$src1, sse_load_f64:$src2, imm:$src3))]>, - OpSize; -} -} - -// FP round - roundss, roundps, roundsd, roundpd -defm ROUND : sse41_fp_unop_rm<0x08, 0x09, "round", - int_x86_sse41_round_ps, int_x86_sse41_round_pd>; -defm ROUND : sse41_fp_binop_rm<0x0A, 0x0B, "round", - int_x86_sse41_round_ss, int_x86_sse41_round_sd>; - -// SS41I_unop_rm_int_v16 - SSE 4.1 unary operator whose type is v8i16. -multiclass SS41I_unop_rm_int_v16<bits<8> opc, string OpcodeStr, - Intrinsic IntId128> { - def rr128 : SS48I<opc, MRMSrcReg, (outs VR128:$dst), - (ins VR128:$src), - !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), - [(set VR128:$dst, (IntId128 VR128:$src))]>, OpSize; - def rm128 : SS48I<opc, MRMSrcMem, (outs VR128:$dst), - (ins i128mem:$src), - !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), - [(set VR128:$dst, - (IntId128 - (bitconvert (memopv8i16 addr:$src))))]>, OpSize; -} - -defm PHMINPOSUW : SS41I_unop_rm_int_v16 <0x41, "phminposuw", - int_x86_sse41_phminposuw>; - -/// SS41I_binop_rm_int - Simple SSE 4.1 binary operator -let Constraints = "$src1 = $dst" in { - multiclass SS41I_binop_rm_int<bits<8> opc, string OpcodeStr, - Intrinsic IntId128, bit Commutable = 0> { - def rr : SS48I<opc, MRMSrcReg, (outs VR128:$dst), - (ins VR128:$src1, VR128:$src2), - !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), - [(set VR128:$dst, (IntId128 VR128:$src1, VR128:$src2))]>, - OpSize { - let isCommutable = Commutable; - } - def rm : SS48I<opc, MRMSrcMem, (outs VR128:$dst), - (ins VR128:$src1, i128mem:$src2), - !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), - [(set VR128:$dst, - (IntId128 VR128:$src1, - (bitconvert (memopv16i8 addr:$src2))))]>, OpSize; - } -} - -defm PCMPEQQ : SS41I_binop_rm_int<0x29, "pcmpeqq", - int_x86_sse41_pcmpeqq, 1>; -defm PACKUSDW : SS41I_binop_rm_int<0x2B, "packusdw", - int_x86_sse41_packusdw, 0>; -defm PMINSB : SS41I_binop_rm_int<0x38, "pminsb", - int_x86_sse41_pminsb, 1>; -defm PMINSD : SS41I_binop_rm_int<0x39, "pminsd", - int_x86_sse41_pminsd, 1>; -defm PMINUD : SS41I_binop_rm_int<0x3B, "pminud", - int_x86_sse41_pminud, 1>; -defm PMINUW : SS41I_binop_rm_int<0x3A, "pminuw", - int_x86_sse41_pminuw, 1>; -defm PMAXSB : SS41I_binop_rm_int<0x3C, "pmaxsb", - int_x86_sse41_pmaxsb, 1>; -defm PMAXSD : SS41I_binop_rm_int<0x3D, "pmaxsd", - int_x86_sse41_pmaxsd, 1>; -defm PMAXUD : SS41I_binop_rm_int<0x3F, "pmaxud", - int_x86_sse41_pmaxud, 1>; -defm PMAXUW : SS41I_binop_rm_int<0x3E, "pmaxuw", - int_x86_sse41_pmaxuw, 1>; - -defm PMULDQ : SS41I_binop_rm_int<0x28, "pmuldq", int_x86_sse41_pmuldq, 1>; - -def : Pat<(v2i64 (X86pcmpeqq VR128:$src1, VR128:$src2)), - (PCMPEQQrr VR128:$src1, VR128:$src2)>; -def : Pat<(v2i64 (X86pcmpeqq VR128:$src1, (memop addr:$src2))), - (PCMPEQQrm VR128:$src1, addr:$src2)>; - -/// SS41I_binop_rm_int - Simple SSE 4.1 binary operator -let Constraints = "$src1 = $dst" in { - multiclass SS41I_binop_patint<bits<8> opc, string OpcodeStr, ValueType OpVT, - SDNode OpNode, Intrinsic IntId128, - bit Commutable = 0> { - def rr : SS48I<opc, MRMSrcReg, (outs VR128:$dst), - (ins VR128:$src1, VR128:$src2), - !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), - [(set VR128:$dst, (OpNode (OpVT VR128:$src1), - VR128:$src2))]>, OpSize { - let isCommutable = Commutable; - } - def rr_int : SS48I<opc, MRMSrcReg, (outs VR128:$dst), - (ins VR128:$src1, VR128:$src2), - !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), - [(set VR128:$dst, (IntId128 VR128:$src1, VR128:$src2))]>, - OpSize { - let isCommutable = Commutable; - } - def rm : SS48I<opc, MRMSrcMem, (outs VR128:$dst), - (ins VR128:$src1, i128mem:$src2), - !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), - [(set VR128:$dst, - (OpVT (OpNode VR128:$src1, (memop addr:$src2))))]>, OpSize; - def rm_int : SS48I<opc, MRMSrcMem, (outs VR128:$dst), - (ins VR128:$src1, i128mem:$src2), - !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), - [(set VR128:$dst, - (IntId128 VR128:$src1, (memop addr:$src2)))]>, - OpSize; - } -} - -/// SS48I_binop_rm - Simple SSE41 binary operator. -let Constraints = "$src1 = $dst" in { -multiclass SS48I_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode, - ValueType OpVT, bit Commutable = 0> { - def rr : SS48I<opc, MRMSrcReg, (outs VR128:$dst), - (ins VR128:$src1, VR128:$src2), - !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), - [(set VR128:$dst, (OpVT (OpNode VR128:$src1, VR128:$src2)))]>, - OpSize { - let isCommutable = Commutable; - } - def rm : SS48I<opc, MRMSrcMem, (outs VR128:$dst), - (ins VR128:$src1, i128mem:$src2), - !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), - [(set VR128:$dst, (OpNode VR128:$src1, - (bc_v4i32 (memopv2i64 addr:$src2))))]>, - OpSize; -} -} - -defm PMULLD : SS48I_binop_rm<0x40, "pmulld", mul, v4i32, 1>; - -/// SS41I_binop_rmi_int - SSE 4.1 binary operator with 8-bit immediate -let Constraints = "$src1 = $dst" in { - multiclass SS41I_binop_rmi_int<bits<8> opc, string OpcodeStr, - Intrinsic IntId128, bit Commutable = 0> { - def rri : SS4AIi8<opc, MRMSrcReg, (outs VR128:$dst), - (ins VR128:$src1, VR128:$src2, i32i8imm:$src3), - !strconcat(OpcodeStr, - "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), - [(set VR128:$dst, - (IntId128 VR128:$src1, VR128:$src2, imm:$src3))]>, - OpSize { - let isCommutable = Commutable; - } - def rmi : SS4AIi8<opc, MRMSrcMem, (outs VR128:$dst), - (ins VR128:$src1, i128mem:$src2, i32i8imm:$src3), - !strconcat(OpcodeStr, - "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), - [(set VR128:$dst, - (IntId128 VR128:$src1, - (bitconvert (memopv16i8 addr:$src2)), imm:$src3))]>, - OpSize; - } -} - -defm BLENDPS : SS41I_binop_rmi_int<0x0C, "blendps", - int_x86_sse41_blendps, 0>; -defm BLENDPD : SS41I_binop_rmi_int<0x0D, "blendpd", - int_x86_sse41_blendpd, 0>; -defm PBLENDW : SS41I_binop_rmi_int<0x0E, "pblendw", - int_x86_sse41_pblendw, 0>; -defm DPPS : SS41I_binop_rmi_int<0x40, "dpps", - int_x86_sse41_dpps, 1>; -defm DPPD : SS41I_binop_rmi_int<0x41, "dppd", - int_x86_sse41_dppd, 1>; -defm MPSADBW : SS41I_binop_rmi_int<0x42, "mpsadbw", - int_x86_sse41_mpsadbw, 0>; - - -/// SS41I_ternary_int - SSE 4.1 ternary operator -let Uses = [XMM0], Constraints = "$src1 = $dst" in { - multiclass SS41I_ternary_int<bits<8> opc, string OpcodeStr, Intrinsic IntId> { - def rr0 : SS48I<opc, MRMSrcReg, (outs VR128:$dst), - (ins VR128:$src1, VR128:$src2), - !strconcat(OpcodeStr, - "\t{%xmm0, $src2, $dst|$dst, $src2, %xmm0}"), - [(set VR128:$dst, (IntId VR128:$src1, VR128:$src2, XMM0))]>, - OpSize; - - def rm0 : SS48I<opc, MRMSrcMem, (outs VR128:$dst), - (ins VR128:$src1, i128mem:$src2), - !strconcat(OpcodeStr, - "\t{%xmm0, $src2, $dst|$dst, $src2, %xmm0}"), - [(set VR128:$dst, - (IntId VR128:$src1, - (bitconvert (memopv16i8 addr:$src2)), XMM0))]>, OpSize; - } -} - -defm BLENDVPD : SS41I_ternary_int<0x15, "blendvpd", int_x86_sse41_blendvpd>; -defm BLENDVPS : SS41I_ternary_int<0x14, "blendvps", int_x86_sse41_blendvps>; -defm PBLENDVB : SS41I_ternary_int<0x10, "pblendvb", int_x86_sse41_pblendvb>; - - multiclass SS41I_binop_rm_int8<bits<8> opc, string OpcodeStr, Intrinsic IntId> { def rr : SS48I<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), @@ -3604,6 +3686,21 @@ multiclass SS41I_binop_rm_int8<bits<8> opc, string OpcodeStr, Intrinsic IntId> { OpSize; } +let isAsmParserOnly = 1, Predicates = [HasAVX] in { +defm VPMOVSXBW : SS41I_binop_rm_int8<0x20, "vpmovsxbw", int_x86_sse41_pmovsxbw>, + VEX; +defm VPMOVSXWD : SS41I_binop_rm_int8<0x23, "vpmovsxwd", int_x86_sse41_pmovsxwd>, + VEX; +defm VPMOVSXDQ : SS41I_binop_rm_int8<0x25, "vpmovsxdq", int_x86_sse41_pmovsxdq>, + VEX; +defm VPMOVZXBW : SS41I_binop_rm_int8<0x30, "vpmovzxbw", int_x86_sse41_pmovzxbw>, + VEX; +defm VPMOVZXWD : SS41I_binop_rm_int8<0x33, "vpmovzxwd", int_x86_sse41_pmovzxwd>, + VEX; +defm VPMOVZXDQ : SS41I_binop_rm_int8<0x35, "vpmovzxdq", int_x86_sse41_pmovzxdq>, + VEX; +} + defm PMOVSXBW : SS41I_binop_rm_int8<0x20, "pmovsxbw", int_x86_sse41_pmovsxbw>; defm PMOVSXWD : SS41I_binop_rm_int8<0x23, "pmovsxwd", int_x86_sse41_pmovsxwd>; defm PMOVSXDQ : SS41I_binop_rm_int8<0x25, "pmovsxdq", int_x86_sse41_pmovsxdq>; @@ -3655,6 +3752,17 @@ multiclass SS41I_binop_rm_int4<bits<8> opc, string OpcodeStr, Intrinsic IntId> { OpSize; } +let isAsmParserOnly = 1, Predicates = [HasAVX] in { +defm VPMOVSXBD : SS41I_binop_rm_int4<0x21, "vpmovsxbd", int_x86_sse41_pmovsxbd>, + VEX; +defm VPMOVSXWQ : SS41I_binop_rm_int4<0x24, "vpmovsxwq", int_x86_sse41_pmovsxwq>, + VEX; +defm VPMOVZXBD : SS41I_binop_rm_int4<0x31, "vpmovzxbd", int_x86_sse41_pmovzxbd>, + VEX; +defm VPMOVZXWQ : SS41I_binop_rm_int4<0x34, "vpmovzxwq", int_x86_sse41_pmovzxwq>, + VEX; +} + defm PMOVSXBD : SS41I_binop_rm_int4<0x21, "pmovsxbd", int_x86_sse41_pmovsxbd>; defm PMOVSXWQ : SS41I_binop_rm_int4<0x24, "pmovsxwq", int_x86_sse41_pmovsxwq>; defm PMOVZXBD : SS41I_binop_rm_int4<0x31, "pmovzxbd", int_x86_sse41_pmovzxbd>; @@ -3685,6 +3793,12 @@ multiclass SS41I_binop_rm_int2<bits<8> opc, string OpcodeStr, Intrinsic IntId> { OpSize; } +let isAsmParserOnly = 1, Predicates = [HasAVX] in { +defm VPMOVSXBQ : SS41I_binop_rm_int2<0x22, "vpmovsxbq", int_x86_sse41_pmovsxbq>, + VEX; +defm VPMOVZXBQ : SS41I_binop_rm_int2<0x32, "vpmovzxbq", int_x86_sse41_pmovzxbq>, + VEX; +} defm PMOVSXBQ : SS41I_binop_rm_int2<0x22, "pmovsxbq", int_x86_sse41_pmovsxbq>; defm PMOVZXBQ : SS41I_binop_rm_int2<0x32, "pmovzxbq", int_x86_sse41_pmovzxbq>; @@ -3699,6 +3813,9 @@ def : Pat<(int_x86_sse41_pmovzxbq (v4i32 (scalar_to_vector (loadi32 addr:$src))))))), (PMOVZXBQrm addr:$src)>, Requires<[HasSSE41]>; +//===----------------------------------------------------------------------===// +// SSE4.1 - Extract Instructions +//===----------------------------------------------------------------------===// /// SS41I_binop_ext8 - SSE 4.1 extract 8 bits to 32 bit reg or 8 bit mem multiclass SS41I_extract8<bits<8> opc, string OpcodeStr> { @@ -3718,6 +3835,9 @@ multiclass SS41I_extract8<bits<8> opc, string OpcodeStr> { // (store (i8 (trunc (X86pextrb (v16i8 VR128:$src1), imm:$src2))), addr:$dst) } +let isAsmParserOnly = 1, Predicates = [HasAVX] in + defm VPEXTRB : SS41I_extract8<0x14, "vpextrb">, VEX; + defm PEXTRB : SS41I_extract8<0x14, "pextrb">; @@ -3733,6 +3853,9 @@ multiclass SS41I_extract16<bits<8> opc, string OpcodeStr> { // (store (i16 (trunc (X86pextrw (v16i8 VR128:$src1), imm:$src2))), addr:$dst) } +let isAsmParserOnly = 1, Predicates = [HasAVX] in + defm VPEXTRW : SS41I_extract16<0x15, "vpextrw">, VEX; + defm PEXTRW : SS41I_extract16<0x15, "pextrw">; @@ -3752,8 +3875,31 @@ multiclass SS41I_extract32<bits<8> opc, string OpcodeStr> { addr:$dst)]>, OpSize; } +let isAsmParserOnly = 1, Predicates = [HasAVX] in + defm VPEXTRD : SS41I_extract32<0x16, "vpextrd">, VEX; + defm PEXTRD : SS41I_extract32<0x16, "pextrd">; +/// SS41I_extract32 - SSE 4.1 extract 32 bits to int reg or memory destination +multiclass SS41I_extract64<bits<8> opc, string OpcodeStr> { + def rr : SS4AIi8<opc, MRMDestReg, (outs GR64:$dst), + (ins VR128:$src1, i32i8imm:$src2), + !strconcat(OpcodeStr, + "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), + [(set GR64:$dst, + (extractelt (v2i64 VR128:$src1), imm:$src2))]>, OpSize, REX_W; + def mr : SS4AIi8<opc, MRMDestMem, (outs), + (ins i64mem:$dst, VR128:$src1, i32i8imm:$src2), + !strconcat(OpcodeStr, + "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), + [(store (extractelt (v2i64 VR128:$src1), imm:$src2), + addr:$dst)]>, OpSize, REX_W; +} + +let isAsmParserOnly = 1, Predicates = [HasAVX] in + defm VPEXTRQ : SS41I_extract64<0x16, "vpextrq">, VEX, VEX_W; + +defm PEXTRQ : SS41I_extract64<0x16, "pextrq">; /// SS41I_extractf32 - SSE 4.1 extract 32 bits fp value to int reg or memory /// destination @@ -3773,6 +3919,8 @@ multiclass SS41I_extractf32<bits<8> opc, string OpcodeStr> { addr:$dst)]>, OpSize; } +let isAsmParserOnly = 1, Predicates = [HasAVX] in + defm VEXTRACTPS : SS41I_extractf32<0x17, "vextractps">, VEX; defm EXTRACTPS : SS41I_extractf32<0x17, "extractps">; // Also match an EXTRACTPS store when the store is done as f32 instead of i32. @@ -3782,78 +3930,530 @@ def : Pat<(store (f32 (bitconvert (extractelt (bc_v4i32 (v4f32 VR128:$src1)), (EXTRACTPSmr addr:$dst, VR128:$src1, imm:$src2)>, Requires<[HasSSE41]>; -let Constraints = "$src1 = $dst" in { - multiclass SS41I_insert8<bits<8> opc, string OpcodeStr> { - def rr : SS4AIi8<opc, MRMSrcReg, (outs VR128:$dst), - (ins VR128:$src1, GR32:$src2, i32i8imm:$src3), - !strconcat(OpcodeStr, - "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), - [(set VR128:$dst, - (X86pinsrb VR128:$src1, GR32:$src2, imm:$src3))]>, OpSize; - def rm : SS4AIi8<opc, MRMSrcMem, (outs VR128:$dst), - (ins VR128:$src1, i8mem:$src2, i32i8imm:$src3), - !strconcat(OpcodeStr, - "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), - [(set VR128:$dst, - (X86pinsrb VR128:$src1, (extloadi8 addr:$src2), - imm:$src3))]>, OpSize; - } +//===----------------------------------------------------------------------===// +// SSE4.1 - Insert Instructions +//===----------------------------------------------------------------------===// + +multiclass SS41I_insert8<bits<8> opc, string asm, bit Is2Addr = 1> { + def rr : SS4AIi8<opc, MRMSrcReg, (outs VR128:$dst), + (ins VR128:$src1, GR32:$src2, i32i8imm:$src3), + !if(Is2Addr, + !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), + !strconcat(asm, + "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), + [(set VR128:$dst, + (X86pinsrb VR128:$src1, GR32:$src2, imm:$src3))]>, OpSize; + def rm : SS4AIi8<opc, MRMSrcMem, (outs VR128:$dst), + (ins VR128:$src1, i8mem:$src2, i32i8imm:$src3), + !if(Is2Addr, + !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), + !strconcat(asm, + "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), + [(set VR128:$dst, + (X86pinsrb VR128:$src1, (extloadi8 addr:$src2), + imm:$src3))]>, OpSize; } -defm PINSRB : SS41I_insert8<0x20, "pinsrb">; +let isAsmParserOnly = 1, Predicates = [HasAVX] in + defm VPINSRB : SS41I_insert8<0x20, "vpinsrb", 0>, VEX_4V; +let Constraints = "$src1 = $dst" in + defm PINSRB : SS41I_insert8<0x20, "pinsrb">; + +multiclass SS41I_insert32<bits<8> opc, string asm, bit Is2Addr = 1> { + def rr : SS4AIi8<opc, MRMSrcReg, (outs VR128:$dst), + (ins VR128:$src1, GR32:$src2, i32i8imm:$src3), + !if(Is2Addr, + !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), + !strconcat(asm, + "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), + [(set VR128:$dst, + (v4i32 (insertelt VR128:$src1, GR32:$src2, imm:$src3)))]>, + OpSize; + def rm : SS4AIi8<opc, MRMSrcMem, (outs VR128:$dst), + (ins VR128:$src1, i32mem:$src2, i32i8imm:$src3), + !if(Is2Addr, + !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), + !strconcat(asm, + "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), + [(set VR128:$dst, + (v4i32 (insertelt VR128:$src1, (loadi32 addr:$src2), + imm:$src3)))]>, OpSize; +} -let Constraints = "$src1 = $dst" in { - multiclass SS41I_insert32<bits<8> opc, string OpcodeStr> { - def rr : SS4AIi8<opc, MRMSrcReg, (outs VR128:$dst), - (ins VR128:$src1, GR32:$src2, i32i8imm:$src3), - !strconcat(OpcodeStr, - "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), - [(set VR128:$dst, - (v4i32 (insertelt VR128:$src1, GR32:$src2, imm:$src3)))]>, - OpSize; - def rm : SS4AIi8<opc, MRMSrcMem, (outs VR128:$dst), - (ins VR128:$src1, i32mem:$src2, i32i8imm:$src3), - !strconcat(OpcodeStr, - "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), - [(set VR128:$dst, - (v4i32 (insertelt VR128:$src1, (loadi32 addr:$src2), - imm:$src3)))]>, OpSize; - } +let isAsmParserOnly = 1, Predicates = [HasAVX] in + defm VPINSRD : SS41I_insert32<0x22, "vpinsrd", 0>, VEX_4V; +let Constraints = "$src1 = $dst" in + defm PINSRD : SS41I_insert32<0x22, "pinsrd">; + +multiclass SS41I_insert64<bits<8> opc, string asm, bit Is2Addr = 1> { + def rr : SS4AIi8<opc, MRMSrcReg, (outs VR128:$dst), + (ins VR128:$src1, GR64:$src2, i32i8imm:$src3), + !if(Is2Addr, + !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), + !strconcat(asm, + "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), + [(set VR128:$dst, + (v2i64 (insertelt VR128:$src1, GR64:$src2, imm:$src3)))]>, + OpSize; + def rm : SS4AIi8<opc, MRMSrcMem, (outs VR128:$dst), + (ins VR128:$src1, i64mem:$src2, i32i8imm:$src3), + !if(Is2Addr, + !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), + !strconcat(asm, + "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), + [(set VR128:$dst, + (v2i64 (insertelt VR128:$src1, (loadi64 addr:$src2), + imm:$src3)))]>, OpSize; } -defm PINSRD : SS41I_insert32<0x22, "pinsrd">; +let isAsmParserOnly = 1, Predicates = [HasAVX] in + defm VPINSRQ : SS41I_insert64<0x22, "vpinsrq", 0>, VEX_4V, VEX_W; +let Constraints = "$src1 = $dst" in + defm PINSRQ : SS41I_insert64<0x22, "pinsrq">, REX_W; // insertps has a few different modes, there's the first two here below which // are optimized inserts that won't zero arbitrary elements in the destination // vector. The next one matches the intrinsic and could zero arbitrary elements // in the target vector. -let Constraints = "$src1 = $dst" in { - multiclass SS41I_insertf32<bits<8> opc, string OpcodeStr> { - def rr : SS4AIi8<opc, MRMSrcReg, (outs VR128:$dst), - (ins VR128:$src1, VR128:$src2, i32i8imm:$src3), - !strconcat(OpcodeStr, - "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), - [(set VR128:$dst, - (X86insrtps VR128:$src1, VR128:$src2, imm:$src3))]>, +multiclass SS41I_insertf32<bits<8> opc, string asm, bit Is2Addr = 1> { + def rr : SS4AIi8<opc, MRMSrcReg, (outs VR128:$dst), + (ins VR128:$src1, VR128:$src2, i32i8imm:$src3), + !if(Is2Addr, + !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), + !strconcat(asm, + "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), + [(set VR128:$dst, + (X86insrtps VR128:$src1, VR128:$src2, imm:$src3))]>, OpSize; - def rm : SS4AIi8<opc, MRMSrcMem, (outs VR128:$dst), - (ins VR128:$src1, f32mem:$src2, i32i8imm:$src3), - !strconcat(OpcodeStr, - "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), - [(set VR128:$dst, - (X86insrtps VR128:$src1, - (v4f32 (scalar_to_vector (loadf32 addr:$src2))), - imm:$src3))]>, OpSize; - } + def rm : SS4AIi8<opc, MRMSrcMem, (outs VR128:$dst), + (ins VR128:$src1, f32mem:$src2, i32i8imm:$src3), + !if(Is2Addr, + !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), + !strconcat(asm, + "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), + [(set VR128:$dst, + (X86insrtps VR128:$src1, + (v4f32 (scalar_to_vector (loadf32 addr:$src2))), + imm:$src3))]>, OpSize; } -defm INSERTPS : SS41I_insertf32<0x21, "insertps">; +let Constraints = "$src1 = $dst" in + defm INSERTPS : SS41I_insertf32<0x21, "insertps">; +let isAsmParserOnly = 1, Predicates = [HasAVX] in + defm VINSERTPS : SS41I_insertf32<0x21, "vinsertps", 0>, VEX_4V; def : Pat<(int_x86_sse41_insertps VR128:$src1, VR128:$src2, imm:$src3), (INSERTPSrr VR128:$src1, VR128:$src2, imm:$src3)>; +//===----------------------------------------------------------------------===// +// SSE4.1 - Round Instructions +//===----------------------------------------------------------------------===// + +multiclass sse41_fp_unop_rm<bits<8> opcps, bits<8> opcpd, + string OpcodeStr, + Intrinsic V4F32Int, + Intrinsic V2F64Int> { + // Intrinsic operation, reg. + // Vector intrinsic operation, reg + def PSr_Int : SS4AIi8<opcps, MRMSrcReg, + (outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2), + !strconcat(OpcodeStr, + "ps\t{$src2, $src1, $dst|$dst, $src1, $src2}"), + [(set VR128:$dst, (V4F32Int VR128:$src1, imm:$src2))]>, + OpSize; + + // Vector intrinsic operation, mem + def PSm_Int : Ii8<opcps, MRMSrcMem, + (outs VR128:$dst), (ins f128mem:$src1, i32i8imm:$src2), + !strconcat(OpcodeStr, + "ps\t{$src2, $src1, $dst|$dst, $src1, $src2}"), + [(set VR128:$dst, + (V4F32Int (memopv4f32 addr:$src1),imm:$src2))]>, + TA, OpSize, + Requires<[HasSSE41]>; + + // Vector intrinsic operation, reg + def PDr_Int : SS4AIi8<opcpd, MRMSrcReg, + (outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2), + !strconcat(OpcodeStr, + "pd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), + [(set VR128:$dst, (V2F64Int VR128:$src1, imm:$src2))]>, + OpSize; + + // Vector intrinsic operation, mem + def PDm_Int : SS4AIi8<opcpd, MRMSrcMem, + (outs VR128:$dst), (ins f128mem:$src1, i32i8imm:$src2), + !strconcat(OpcodeStr, + "pd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), + [(set VR128:$dst, + (V2F64Int (memopv2f64 addr:$src1),imm:$src2))]>, + OpSize; +} + +multiclass sse41_fp_unop_rm_avx<bits<8> opcps, bits<8> opcpd, + string OpcodeStr> { + // Intrinsic operation, reg. + // Vector intrinsic operation, reg + def PSr : SS4AIi8<opcps, MRMSrcReg, + (outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2), + !strconcat(OpcodeStr, + "ps\t{$src2, $src1, $dst|$dst, $src1, $src2}"), + []>, OpSize; + + // Vector intrinsic operation, mem + def PSm : Ii8<opcps, MRMSrcMem, + (outs VR128:$dst), (ins f128mem:$src1, i32i8imm:$src2), + !strconcat(OpcodeStr, + "ps\t{$src2, $src1, $dst|$dst, $src1, $src2}"), + []>, TA, OpSize, Requires<[HasSSE41]>; + + // Vector intrinsic operation, reg + def PDr : SS4AIi8<opcpd, MRMSrcReg, + (outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2), + !strconcat(OpcodeStr, + "pd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), + []>, OpSize; + + // Vector intrinsic operation, mem + def PDm : SS4AIi8<opcpd, MRMSrcMem, + (outs VR128:$dst), (ins f128mem:$src1, i32i8imm:$src2), + !strconcat(OpcodeStr, + "pd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), + []>, OpSize; +} + +multiclass sse41_fp_binop_rm<bits<8> opcss, bits<8> opcsd, + string OpcodeStr, + Intrinsic F32Int, + Intrinsic F64Int, bit Is2Addr = 1> { + // Intrinsic operation, reg. + def SSr_Int : SS4AIi8<opcss, MRMSrcReg, + (outs VR128:$dst), (ins VR128:$src1, VR128:$src2, i32i8imm:$src3), + !if(Is2Addr, + !strconcat(OpcodeStr, + "ss\t{$src3, $src2, $dst|$dst, $src2, $src3}"), + !strconcat(OpcodeStr, + "ss\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), + [(set VR128:$dst, (F32Int VR128:$src1, VR128:$src2, imm:$src3))]>, + OpSize; + + // Intrinsic operation, mem. + def SSm_Int : SS4AIi8<opcss, MRMSrcMem, + (outs VR128:$dst), (ins VR128:$src1, ssmem:$src2, i32i8imm:$src3), + !if(Is2Addr, + !strconcat(OpcodeStr, + "ss\t{$src3, $src2, $dst|$dst, $src2, $src3}"), + !strconcat(OpcodeStr, + "ss\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), + [(set VR128:$dst, + (F32Int VR128:$src1, sse_load_f32:$src2, imm:$src3))]>, + OpSize; + + // Intrinsic operation, reg. + def SDr_Int : SS4AIi8<opcsd, MRMSrcReg, + (outs VR128:$dst), (ins VR128:$src1, VR128:$src2, i32i8imm:$src3), + !if(Is2Addr, + !strconcat(OpcodeStr, + "sd\t{$src3, $src2, $dst|$dst, $src2, $src3}"), + !strconcat(OpcodeStr, + "sd\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), + [(set VR128:$dst, (F64Int VR128:$src1, VR128:$src2, imm:$src3))]>, + OpSize; + + // Intrinsic operation, mem. + def SDm_Int : SS4AIi8<opcsd, MRMSrcMem, + (outs VR128:$dst), (ins VR128:$src1, sdmem:$src2, i32i8imm:$src3), + !if(Is2Addr, + !strconcat(OpcodeStr, + "sd\t{$src3, $src2, $dst|$dst, $src2, $src3}"), + !strconcat(OpcodeStr, + "sd\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), + [(set VR128:$dst, + (F64Int VR128:$src1, sse_load_f64:$src2, imm:$src3))]>, + OpSize; +} + +multiclass sse41_fp_binop_rm_avx<bits<8> opcss, bits<8> opcsd, + string OpcodeStr> { + // Intrinsic operation, reg. + def SSr : SS4AIi8<opcss, MRMSrcReg, + (outs VR128:$dst), (ins VR128:$src1, VR128:$src2, i32i8imm:$src3), + !strconcat(OpcodeStr, + "ss\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), + []>, OpSize; + + // Intrinsic operation, mem. + def SSm : SS4AIi8<opcss, MRMSrcMem, + (outs VR128:$dst), (ins VR128:$src1, ssmem:$src2, i32i8imm:$src3), + !strconcat(OpcodeStr, + "ss\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), + []>, OpSize; + + // Intrinsic operation, reg. + def SDr : SS4AIi8<opcsd, MRMSrcReg, + (outs VR128:$dst), (ins VR128:$src1, VR128:$src2, i32i8imm:$src3), + !strconcat(OpcodeStr, + "sd\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), + []>, OpSize; + + // Intrinsic operation, mem. + def SDm : SS4AIi8<opcsd, MRMSrcMem, + (outs VR128:$dst), (ins VR128:$src1, sdmem:$src2, i32i8imm:$src3), + !strconcat(OpcodeStr, + "sd\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), + []>, OpSize; +} + +// FP round - roundss, roundps, roundsd, roundpd +let isAsmParserOnly = 1, Predicates = [HasAVX] in { + // Intrinsic form + defm VROUND : sse41_fp_unop_rm<0x08, 0x09, "vround", + int_x86_sse41_round_ps, int_x86_sse41_round_pd>, + VEX; + defm VROUND : sse41_fp_binop_rm<0x0A, 0x0B, "vround", + int_x86_sse41_round_ss, int_x86_sse41_round_sd, + 0>, VEX_4V; + // Instructions for the assembler + defm VROUND : sse41_fp_unop_rm_avx<0x08, 0x09, "vround">, VEX; + defm VROUND : sse41_fp_binop_rm_avx<0x0A, 0x0B, "vround">, VEX_4V; +} + +defm ROUND : sse41_fp_unop_rm<0x08, 0x09, "round", + int_x86_sse41_round_ps, int_x86_sse41_round_pd>; +let Constraints = "$src1 = $dst" in +defm ROUND : sse41_fp_binop_rm<0x0A, 0x0B, "round", + int_x86_sse41_round_ss, int_x86_sse41_round_sd>; + +//===----------------------------------------------------------------------===// +// SSE4.1 - Misc Instructions +//===----------------------------------------------------------------------===// + +// SS41I_unop_rm_int_v16 - SSE 4.1 unary operator whose type is v8i16. +multiclass SS41I_unop_rm_int_v16<bits<8> opc, string OpcodeStr, + Intrinsic IntId128> { + def rr128 : SS48I<opc, MRMSrcReg, (outs VR128:$dst), + (ins VR128:$src), + !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), + [(set VR128:$dst, (IntId128 VR128:$src))]>, OpSize; + def rm128 : SS48I<opc, MRMSrcMem, (outs VR128:$dst), + (ins i128mem:$src), + !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), + [(set VR128:$dst, + (IntId128 + (bitconvert (memopv8i16 addr:$src))))]>, OpSize; +} + +let isAsmParserOnly = 1, Predicates = [HasAVX] in +defm VPHMINPOSUW : SS41I_unop_rm_int_v16 <0x41, "vphminposuw", + int_x86_sse41_phminposuw>, VEX; +defm PHMINPOSUW : SS41I_unop_rm_int_v16 <0x41, "phminposuw", + int_x86_sse41_phminposuw>; + +/// SS41I_binop_rm_int - Simple SSE 4.1 binary operator +multiclass SS41I_binop_rm_int<bits<8> opc, string OpcodeStr, + Intrinsic IntId128, bit Is2Addr = 1> { + let isCommutable = 1 in + def rr : SS48I<opc, MRMSrcReg, (outs VR128:$dst), + (ins VR128:$src1, VR128:$src2), + !if(Is2Addr, + !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), + !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), + [(set VR128:$dst, (IntId128 VR128:$src1, VR128:$src2))]>, OpSize; + def rm : SS48I<opc, MRMSrcMem, (outs VR128:$dst), + (ins VR128:$src1, i128mem:$src2), + !if(Is2Addr, + !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), + !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), + [(set VR128:$dst, + (IntId128 VR128:$src1, + (bitconvert (memopv16i8 addr:$src2))))]>, OpSize; +} + +let isAsmParserOnly = 1, Predicates = [HasAVX] in { + let isCommutable = 0 in + defm VPACKUSDW : SS41I_binop_rm_int<0x2B, "vpackusdw", int_x86_sse41_packusdw, + 0>, VEX_4V; + defm VPCMPEQQ : SS41I_binop_rm_int<0x29, "vpcmpeqq", int_x86_sse41_pcmpeqq, + 0>, VEX_4V; + defm VPMINSB : SS41I_binop_rm_int<0x38, "vpminsb", int_x86_sse41_pminsb, + 0>, VEX_4V; + defm VPMINSD : SS41I_binop_rm_int<0x39, "vpminsd", int_x86_sse41_pminsd, + 0>, VEX_4V; + defm VPMINUD : SS41I_binop_rm_int<0x3B, "vpminud", int_x86_sse41_pminud, + 0>, VEX_4V; + defm VPMINUW : SS41I_binop_rm_int<0x3A, "vpminuw", int_x86_sse41_pminuw, + 0>, VEX_4V; + defm VPMAXSB : SS41I_binop_rm_int<0x3C, "vpmaxsb", int_x86_sse41_pmaxsb, + 0>, VEX_4V; + defm VPMAXSD : SS41I_binop_rm_int<0x3D, "vpmaxsd", int_x86_sse41_pmaxsd, + 0>, VEX_4V; + defm VPMAXUD : SS41I_binop_rm_int<0x3F, "vpmaxud", int_x86_sse41_pmaxud, + 0>, VEX_4V; + defm VPMAXUW : SS41I_binop_rm_int<0x3E, "vpmaxuw", int_x86_sse41_pmaxuw, + 0>, VEX_4V; + defm VPMULDQ : SS41I_binop_rm_int<0x28, "vpmuldq", int_x86_sse41_pmuldq, + 0>, VEX_4V; +} + +let Constraints = "$src1 = $dst" in { + let isCommutable = 0 in + defm PACKUSDW : SS41I_binop_rm_int<0x2B, "packusdw", int_x86_sse41_packusdw>; + defm PCMPEQQ : SS41I_binop_rm_int<0x29, "pcmpeqq", int_x86_sse41_pcmpeqq>; + defm PMINSB : SS41I_binop_rm_int<0x38, "pminsb", int_x86_sse41_pminsb>; + defm PMINSD : SS41I_binop_rm_int<0x39, "pminsd", int_x86_sse41_pminsd>; + defm PMINUD : SS41I_binop_rm_int<0x3B, "pminud", int_x86_sse41_pminud>; + defm PMINUW : SS41I_binop_rm_int<0x3A, "pminuw", int_x86_sse41_pminuw>; + defm PMAXSB : SS41I_binop_rm_int<0x3C, "pmaxsb", int_x86_sse41_pmaxsb>; + defm PMAXSD : SS41I_binop_rm_int<0x3D, "pmaxsd", int_x86_sse41_pmaxsd>; + defm PMAXUD : SS41I_binop_rm_int<0x3F, "pmaxud", int_x86_sse41_pmaxud>; + defm PMAXUW : SS41I_binop_rm_int<0x3E, "pmaxuw", int_x86_sse41_pmaxuw>; + defm PMULDQ : SS41I_binop_rm_int<0x28, "pmuldq", int_x86_sse41_pmuldq>; +} + +def : Pat<(v2i64 (X86pcmpeqq VR128:$src1, VR128:$src2)), + (PCMPEQQrr VR128:$src1, VR128:$src2)>; +def : Pat<(v2i64 (X86pcmpeqq VR128:$src1, (memop addr:$src2))), + (PCMPEQQrm VR128:$src1, addr:$src2)>; + +/// SS48I_binop_rm - Simple SSE41 binary operator. +multiclass SS48I_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode, + ValueType OpVT, bit Is2Addr = 1> { + let isCommutable = 1 in + def rr : SS48I<opc, MRMSrcReg, (outs VR128:$dst), + (ins VR128:$src1, VR128:$src2), + !if(Is2Addr, + !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), + !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), + [(set VR128:$dst, (OpVT (OpNode VR128:$src1, VR128:$src2)))]>, + OpSize; + def rm : SS48I<opc, MRMSrcMem, (outs VR128:$dst), + (ins VR128:$src1, i128mem:$src2), + !if(Is2Addr, + !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), + !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), + [(set VR128:$dst, (OpNode VR128:$src1, + (bc_v4i32 (memopv2i64 addr:$src2))))]>, + OpSize; +} + +let isAsmParserOnly = 1, Predicates = [HasAVX] in + defm VPMULLD : SS48I_binop_rm<0x40, "vpmulld", mul, v4i32, 0>, VEX_4V; +let Constraints = "$src1 = $dst" in + defm PMULLD : SS48I_binop_rm<0x40, "pmulld", mul, v4i32>; + +/// SS41I_binop_rmi_int - SSE 4.1 binary operator with 8-bit immediate +multiclass SS41I_binop_rmi_int<bits<8> opc, string OpcodeStr, + Intrinsic IntId128, bit Is2Addr = 1> { + let isCommutable = 1 in + def rri : SS4AIi8<opc, MRMSrcReg, (outs VR128:$dst), + (ins VR128:$src1, VR128:$src2, i32i8imm:$src3), + !if(Is2Addr, + !strconcat(OpcodeStr, + "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), + !strconcat(OpcodeStr, + "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), + [(set VR128:$dst, + (IntId128 VR128:$src1, VR128:$src2, imm:$src3))]>, + OpSize; + def rmi : SS4AIi8<opc, MRMSrcMem, (outs VR128:$dst), + (ins VR128:$src1, i128mem:$src2, i32i8imm:$src3), + !if(Is2Addr, + !strconcat(OpcodeStr, + "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), + !strconcat(OpcodeStr, + "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), + [(set VR128:$dst, + (IntId128 VR128:$src1, + (bitconvert (memopv16i8 addr:$src2)), imm:$src3))]>, + OpSize; +} + +let isAsmParserOnly = 1, Predicates = [HasAVX] in { + let isCommutable = 0 in { + defm VBLENDPS : SS41I_binop_rmi_int<0x0C, "vblendps", int_x86_sse41_blendps, + 0>, VEX_4V; + defm VBLENDPD : SS41I_binop_rmi_int<0x0D, "vblendpd", int_x86_sse41_blendpd, + 0>, VEX_4V; + defm VPBLENDW : SS41I_binop_rmi_int<0x0E, "vpblendw", int_x86_sse41_pblendw, + 0>, VEX_4V; + defm VMPSADBW : SS41I_binop_rmi_int<0x42, "vmpsadbw", int_x86_sse41_mpsadbw, + 0>, VEX_4V; + } + defm VDPPS : SS41I_binop_rmi_int<0x40, "vdpps", int_x86_sse41_dpps, + 0>, VEX_4V; + defm VDPPD : SS41I_binop_rmi_int<0x41, "vdppd", int_x86_sse41_dppd, + 0>, VEX_4V; +} + +let Constraints = "$src1 = $dst" in { + let isCommutable = 0 in { + defm BLENDPS : SS41I_binop_rmi_int<0x0C, "blendps", int_x86_sse41_blendps>; + defm BLENDPD : SS41I_binop_rmi_int<0x0D, "blendpd", int_x86_sse41_blendpd>; + defm PBLENDW : SS41I_binop_rmi_int<0x0E, "pblendw", int_x86_sse41_pblendw>; + defm MPSADBW : SS41I_binop_rmi_int<0x42, "mpsadbw", int_x86_sse41_mpsadbw>; + } + defm DPPS : SS41I_binop_rmi_int<0x40, "dpps", int_x86_sse41_dpps>; + defm DPPD : SS41I_binop_rmi_int<0x41, "dppd", int_x86_sse41_dppd>; +} + +/// SS41I_quaternary_int_avx - AVX SSE 4.1 with 4 operators +let isAsmParserOnly = 1, Predicates = [HasAVX] in { + multiclass SS41I_quaternary_int_avx<bits<8> opc, string OpcodeStr> { + def rr : I<opc, MRMSrcReg, (outs VR128:$dst), + (ins VR128:$src1, VR128:$src2, VR128:$src3), + !strconcat(OpcodeStr, + "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), + [], SSEPackedInt>, OpSize, TA, VEX_4V, VEX_I8IMM; + + def rm : I<opc, MRMSrcMem, (outs VR128:$dst), + (ins VR128:$src1, i128mem:$src2, VR128:$src3), + !strconcat(OpcodeStr, + "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), + [], SSEPackedInt>, OpSize, TA, VEX_4V, VEX_I8IMM; + } +} + +defm VBLENDVPD : SS41I_quaternary_int_avx<0x4B, "vblendvpd">; +defm VBLENDVPS : SS41I_quaternary_int_avx<0x4A, "vblendvps">; +defm VPBLENDVB : SS41I_quaternary_int_avx<0x4C, "vpblendvb">; + +/// SS41I_ternary_int - SSE 4.1 ternary operator +let Uses = [XMM0], Constraints = "$src1 = $dst" in { + multiclass SS41I_ternary_int<bits<8> opc, string OpcodeStr, Intrinsic IntId> { + def rr0 : SS48I<opc, MRMSrcReg, (outs VR128:$dst), + (ins VR128:$src1, VR128:$src2), + !strconcat(OpcodeStr, + "\t{%xmm0, $src2, $dst|$dst, $src2, %xmm0}"), + [(set VR128:$dst, (IntId VR128:$src1, VR128:$src2, XMM0))]>, + OpSize; + + def rm0 : SS48I<opc, MRMSrcMem, (outs VR128:$dst), + (ins VR128:$src1, i128mem:$src2), + !strconcat(OpcodeStr, + "\t{%xmm0, $src2, $dst|$dst, $src2, %xmm0}"), + [(set VR128:$dst, + (IntId VR128:$src1, + (bitconvert (memopv16i8 addr:$src2)), XMM0))]>, OpSize; + } +} + +defm BLENDVPD : SS41I_ternary_int<0x15, "blendvpd", int_x86_sse41_blendvpd>; +defm BLENDVPS : SS41I_ternary_int<0x14, "blendvps", int_x86_sse41_blendvps>; +defm PBLENDVB : SS41I_ternary_int<0x10, "pblendvb", int_x86_sse41_pblendvb>; + // ptest instruction we'll lower to this in X86ISelLowering primarily from // the intel intrinsic that corresponds to this. +let Defs = [EFLAGS], isAsmParserOnly = 1, Predicates = [HasAVX] in { +def VPTESTrr : SS48I<0x17, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2), + "vptest\t{$src2, $src1|$src1, $src2}", + [(set EFLAGS, (X86ptest VR128:$src1, VR128:$src2))]>, + OpSize, VEX; +def VPTESTrm : SS48I<0x17, MRMSrcMem, (outs), (ins VR128:$src1, i128mem:$src2), + "vptest\t{$src2, $src1|$src1, $src2}", + [(set EFLAGS, (X86ptest VR128:$src1, (load addr:$src2)))]>, + OpSize, VEX; +} + let Defs = [EFLAGS] in { def PTESTrr : SS48I<0x17, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2), "ptest \t{$src2, $src1|$src1, $src2}", @@ -3865,43 +4465,207 @@ def PTESTrm : SS48I<0x17, MRMSrcMem, (outs), (ins VR128:$src1, i128mem:$src2), OpSize; } +let isAsmParserOnly = 1, Predicates = [HasAVX] in +def VMOVNTDQArm : SS48I<0x2A, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), + "vmovntdqa\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, (int_x86_sse41_movntdqa addr:$src))]>, + OpSize, VEX; def MOVNTDQArm : SS48I<0x2A, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), "movntdqa\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse41_movntdqa addr:$src))]>, OpSize; - //===----------------------------------------------------------------------===// -// SSE4.2 Instructions +// SSE4.2 - Compare Instructions //===----------------------------------------------------------------------===// /// SS42I_binop_rm_int - Simple SSE 4.2 binary operator -let Constraints = "$src1 = $dst" in { - multiclass SS42I_binop_rm_int<bits<8> opc, string OpcodeStr, - Intrinsic IntId128, bit Commutable = 0> { - def rr : SS428I<opc, MRMSrcReg, (outs VR128:$dst), - (ins VR128:$src1, VR128:$src2), - !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), - [(set VR128:$dst, (IntId128 VR128:$src1, VR128:$src2))]>, - OpSize { - let isCommutable = Commutable; - } - def rm : SS428I<opc, MRMSrcMem, (outs VR128:$dst), - (ins VR128:$src1, i128mem:$src2), - !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), - [(set VR128:$dst, - (IntId128 VR128:$src1, - (bitconvert (memopv16i8 addr:$src2))))]>, OpSize; - } +multiclass SS42I_binop_rm_int<bits<8> opc, string OpcodeStr, + Intrinsic IntId128, bit Is2Addr = 1> { + def rr : SS428I<opc, MRMSrcReg, (outs VR128:$dst), + (ins VR128:$src1, VR128:$src2), + !if(Is2Addr, + !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), + !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), + [(set VR128:$dst, (IntId128 VR128:$src1, VR128:$src2))]>, + OpSize; + def rm : SS428I<opc, MRMSrcMem, (outs VR128:$dst), + (ins VR128:$src1, i128mem:$src2), + !if(Is2Addr, + !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), + !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), + [(set VR128:$dst, + (IntId128 VR128:$src1, + (bitconvert (memopv16i8 addr:$src2))))]>, OpSize; } -defm PCMPGTQ : SS42I_binop_rm_int<0x37, "pcmpgtq", int_x86_sse42_pcmpgtq>; +let isAsmParserOnly = 1, Predicates = [HasAVX] in + defm VPCMPGTQ : SS42I_binop_rm_int<0x37, "vpcmpgtq", int_x86_sse42_pcmpgtq, + 0>, VEX_4V; +let Constraints = "$src1 = $dst" in + defm PCMPGTQ : SS42I_binop_rm_int<0x37, "pcmpgtq", int_x86_sse42_pcmpgtq>; def : Pat<(v2i64 (X86pcmpgtq VR128:$src1, VR128:$src2)), (PCMPGTQrr VR128:$src1, VR128:$src2)>; def : Pat<(v2i64 (X86pcmpgtq VR128:$src1, (memop addr:$src2))), (PCMPGTQrm VR128:$src1, addr:$src2)>; +//===----------------------------------------------------------------------===// +// SSE4.2 - String/text Processing Instructions +//===----------------------------------------------------------------------===// + +// Packed Compare Implicit Length Strings, Return Mask +let Defs = [EFLAGS], usesCustomInserter = 1 in { + def PCMPISTRM128REG : SS42AI<0, Pseudo, (outs VR128:$dst), + (ins VR128:$src1, VR128:$src2, i8imm:$src3), + "#PCMPISTRM128rr PSEUDO!", + [(set VR128:$dst, (int_x86_sse42_pcmpistrm128 VR128:$src1, VR128:$src2, + imm:$src3))]>, OpSize; + def PCMPISTRM128MEM : SS42AI<0, Pseudo, (outs VR128:$dst), + (ins VR128:$src1, i128mem:$src2, i8imm:$src3), + "#PCMPISTRM128rm PSEUDO!", + [(set VR128:$dst, (int_x86_sse42_pcmpistrm128 + VR128:$src1, (load addr:$src2), imm:$src3))]>, OpSize; +} + +let Defs = [XMM0, EFLAGS], isAsmParserOnly = 1, + Predicates = [HasAVX] in { + def VPCMPISTRM128rr : SS42AI<0x62, MRMSrcReg, (outs), + (ins VR128:$src1, VR128:$src2, i8imm:$src3), + "vpcmpistrm\t{$src3, $src2, $src1|$src1, $src2, $src3}", []>, OpSize, VEX; + def VPCMPISTRM128rm : SS42AI<0x62, MRMSrcMem, (outs), + (ins VR128:$src1, i128mem:$src2, i8imm:$src3), + "vpcmpistrm\t{$src3, $src2, $src1|$src1, $src2, $src3}", []>, OpSize, VEX; +} + +let Defs = [XMM0, EFLAGS] in { + def PCMPISTRM128rr : SS42AI<0x62, MRMSrcReg, (outs), + (ins VR128:$src1, VR128:$src2, i8imm:$src3), + "pcmpistrm\t{$src3, $src2, $src1|$src1, $src2, $src3}", []>, OpSize; + def PCMPISTRM128rm : SS42AI<0x62, MRMSrcMem, (outs), + (ins VR128:$src1, i128mem:$src2, i8imm:$src3), + "pcmpistrm\t{$src3, $src2, $src1|$src1, $src2, $src3}", []>, OpSize; +} + +// Packed Compare Explicit Length Strings, Return Mask +let Defs = [EFLAGS], Uses = [EAX, EDX], usesCustomInserter = 1 in { + def PCMPESTRM128REG : SS42AI<0, Pseudo, (outs VR128:$dst), + (ins VR128:$src1, VR128:$src3, i8imm:$src5), + "#PCMPESTRM128rr PSEUDO!", + [(set VR128:$dst, + (int_x86_sse42_pcmpestrm128 + VR128:$src1, EAX, VR128:$src3, EDX, imm:$src5))]>, OpSize; + + def PCMPESTRM128MEM : SS42AI<0, Pseudo, (outs VR128:$dst), + (ins VR128:$src1, i128mem:$src3, i8imm:$src5), + "#PCMPESTRM128rm PSEUDO!", + [(set VR128:$dst, (int_x86_sse42_pcmpestrm128 + VR128:$src1, EAX, (load addr:$src3), EDX, imm:$src5))]>, + OpSize; +} + +let isAsmParserOnly = 1, Predicates = [HasAVX], + Defs = [XMM0, EFLAGS], Uses = [EAX, EDX] in { + def VPCMPESTRM128rr : SS42AI<0x60, MRMSrcReg, (outs), + (ins VR128:$src1, VR128:$src3, i8imm:$src5), + "vpcmpestrm\t{$src5, $src3, $src1|$src1, $src3, $src5}", []>, OpSize, VEX; + def VPCMPESTRM128rm : SS42AI<0x60, MRMSrcMem, (outs), + (ins VR128:$src1, i128mem:$src3, i8imm:$src5), + "vpcmpestrm\t{$src5, $src3, $src1|$src1, $src3, $src5}", []>, OpSize, VEX; +} + +let Defs = [XMM0, EFLAGS], Uses = [EAX, EDX] in { + def PCMPESTRM128rr : SS42AI<0x60, MRMSrcReg, (outs), + (ins VR128:$src1, VR128:$src3, i8imm:$src5), + "pcmpestrm\t{$src5, $src3, $src1|$src1, $src3, $src5}", []>, OpSize; + def PCMPESTRM128rm : SS42AI<0x60, MRMSrcMem, (outs), + (ins VR128:$src1, i128mem:$src3, i8imm:$src5), + "pcmpestrm\t{$src5, $src3, $src1|$src1, $src3, $src5}", []>, OpSize; +} + +// Packed Compare Implicit Length Strings, Return Index +let Defs = [ECX, EFLAGS] in { + multiclass SS42AI_pcmpistri<Intrinsic IntId128, string asm = "pcmpistri"> { + def rr : SS42AI<0x63, MRMSrcReg, (outs), + (ins VR128:$src1, VR128:$src2, i8imm:$src3), + !strconcat(asm, "\t{$src3, $src2, $src1|$src1, $src2, $src3}"), + [(set ECX, (IntId128 VR128:$src1, VR128:$src2, imm:$src3)), + (implicit EFLAGS)]>, OpSize; + def rm : SS42AI<0x63, MRMSrcMem, (outs), + (ins VR128:$src1, i128mem:$src2, i8imm:$src3), + !strconcat(asm, "\t{$src3, $src2, $src1|$src1, $src2, $src3}"), + [(set ECX, (IntId128 VR128:$src1, (load addr:$src2), imm:$src3)), + (implicit EFLAGS)]>, OpSize; + } +} + +let isAsmParserOnly = 1, Predicates = [HasAVX] in { +defm VPCMPISTRI : SS42AI_pcmpistri<int_x86_sse42_pcmpistri128, "vpcmpistri">, + VEX; +defm VPCMPISTRIA : SS42AI_pcmpistri<int_x86_sse42_pcmpistria128, "vpcmpistri">, + VEX; +defm VPCMPISTRIC : SS42AI_pcmpistri<int_x86_sse42_pcmpistric128, "vpcmpistri">, + VEX; +defm VPCMPISTRIO : SS42AI_pcmpistri<int_x86_sse42_pcmpistrio128, "vpcmpistri">, + VEX; +defm VPCMPISTRIS : SS42AI_pcmpistri<int_x86_sse42_pcmpistris128, "vpcmpistri">, + VEX; +defm VPCMPISTRIZ : SS42AI_pcmpistri<int_x86_sse42_pcmpistriz128, "vpcmpistri">, + VEX; +} + +defm PCMPISTRI : SS42AI_pcmpistri<int_x86_sse42_pcmpistri128>; +defm PCMPISTRIA : SS42AI_pcmpistri<int_x86_sse42_pcmpistria128>; +defm PCMPISTRIC : SS42AI_pcmpistri<int_x86_sse42_pcmpistric128>; +defm PCMPISTRIO : SS42AI_pcmpistri<int_x86_sse42_pcmpistrio128>; +defm PCMPISTRIS : SS42AI_pcmpistri<int_x86_sse42_pcmpistris128>; +defm PCMPISTRIZ : SS42AI_pcmpistri<int_x86_sse42_pcmpistriz128>; + +// Packed Compare Explicit Length Strings, Return Index +let Defs = [ECX, EFLAGS], Uses = [EAX, EDX] in { + multiclass SS42AI_pcmpestri<Intrinsic IntId128, string asm = "pcmpestri"> { + def rr : SS42AI<0x61, MRMSrcReg, (outs), + (ins VR128:$src1, VR128:$src3, i8imm:$src5), + !strconcat(asm, "\t{$src5, $src3, $src1|$src1, $src3, $src5}"), + [(set ECX, (IntId128 VR128:$src1, EAX, VR128:$src3, EDX, imm:$src5)), + (implicit EFLAGS)]>, OpSize; + def rm : SS42AI<0x61, MRMSrcMem, (outs), + (ins VR128:$src1, i128mem:$src3, i8imm:$src5), + !strconcat(asm, "\t{$src5, $src3, $src1|$src1, $src3, $src5}"), + [(set ECX, + (IntId128 VR128:$src1, EAX, (load addr:$src3), EDX, imm:$src5)), + (implicit EFLAGS)]>, OpSize; + } +} + +let isAsmParserOnly = 1, Predicates = [HasAVX] in { +defm VPCMPESTRI : SS42AI_pcmpestri<int_x86_sse42_pcmpestri128, "vpcmpestri">, + VEX; +defm VPCMPESTRIA : SS42AI_pcmpestri<int_x86_sse42_pcmpestria128, "vpcmpestri">, + VEX; +defm VPCMPESTRIC : SS42AI_pcmpestri<int_x86_sse42_pcmpestric128, "vpcmpestri">, + VEX; +defm VPCMPESTRIO : SS42AI_pcmpestri<int_x86_sse42_pcmpestrio128, "vpcmpestri">, + VEX; +defm VPCMPESTRIS : SS42AI_pcmpestri<int_x86_sse42_pcmpestris128, "vpcmpestri">, + VEX; +defm VPCMPESTRIZ : SS42AI_pcmpestri<int_x86_sse42_pcmpestriz128, "vpcmpestri">, + VEX; +} + +defm PCMPESTRI : SS42AI_pcmpestri<int_x86_sse42_pcmpestri128>; +defm PCMPESTRIA : SS42AI_pcmpestri<int_x86_sse42_pcmpestria128>; +defm PCMPESTRIC : SS42AI_pcmpestri<int_x86_sse42_pcmpestric128>; +defm PCMPESTRIO : SS42AI_pcmpestri<int_x86_sse42_pcmpestrio128>; +defm PCMPESTRIS : SS42AI_pcmpestri<int_x86_sse42_pcmpestris128>; +defm PCMPESTRIZ : SS42AI_pcmpestri<int_x86_sse42_pcmpestriz128>; + +//===----------------------------------------------------------------------===// +// SSE4.2 - CRC Instructions +//===----------------------------------------------------------------------===// + +// No CRC instructions have AVX equivalents + // crc intrinsic instruction // This set of instructions are only rm, the only difference is the size // of r and m. @@ -3969,133 +4733,52 @@ let Constraints = "$src1 = $dst" in { REX_W; } -// String/text processing instructions. -let Defs = [EFLAGS], usesCustomInserter = 1 in { -def PCMPISTRM128REG : SS42AI<0, Pseudo, (outs VR128:$dst), - (ins VR128:$src1, VR128:$src2, i8imm:$src3), - "#PCMPISTRM128rr PSEUDO!", - [(set VR128:$dst, (int_x86_sse42_pcmpistrm128 VR128:$src1, VR128:$src2, - imm:$src3))]>, OpSize; -def PCMPISTRM128MEM : SS42AI<0, Pseudo, (outs VR128:$dst), - (ins VR128:$src1, i128mem:$src2, i8imm:$src3), - "#PCMPISTRM128rm PSEUDO!", - [(set VR128:$dst, (int_x86_sse42_pcmpistrm128 VR128:$src1, (load addr:$src2), - imm:$src3))]>, OpSize; -} - -let Defs = [XMM0, EFLAGS] in { -def PCMPISTRM128rr : SS42AI<0x62, MRMSrcReg, (outs), - (ins VR128:$src1, VR128:$src2, i8imm:$src3), - "pcmpistrm\t{$src3, $src2, $src1|$src1, $src2, $src3}", []>, OpSize; -def PCMPISTRM128rm : SS42AI<0x62, MRMSrcMem, (outs), - (ins VR128:$src1, i128mem:$src2, i8imm:$src3), - "pcmpistrm\t{$src3, $src2, $src1|$src1, $src2, $src3}", []>, OpSize; -} +//===----------------------------------------------------------------------===// +// AES-NI Instructions +//===----------------------------------------------------------------------===// -let Defs = [EFLAGS], Uses = [EAX, EDX], usesCustomInserter = 1 in { -def PCMPESTRM128REG : SS42AI<0, Pseudo, (outs VR128:$dst), - (ins VR128:$src1, VR128:$src3, i8imm:$src5), - "#PCMPESTRM128rr PSEUDO!", - [(set VR128:$dst, - (int_x86_sse42_pcmpestrm128 - VR128:$src1, EAX, VR128:$src3, EDX, imm:$src5))]>, OpSize; - -def PCMPESTRM128MEM : SS42AI<0, Pseudo, (outs VR128:$dst), - (ins VR128:$src1, i128mem:$src3, i8imm:$src5), - "#PCMPESTRM128rm PSEUDO!", - [(set VR128:$dst, (int_x86_sse42_pcmpestrm128 - VR128:$src1, EAX, (load addr:$src3), EDX, imm:$src5))]>, - OpSize; +multiclass AESI_binop_rm_int<bits<8> opc, string OpcodeStr, + Intrinsic IntId128, bit Is2Addr = 1> { + def rr : AES8I<opc, MRMSrcReg, (outs VR128:$dst), + (ins VR128:$src1, VR128:$src2), + !if(Is2Addr, + !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), + !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), + [(set VR128:$dst, (IntId128 VR128:$src1, VR128:$src2))]>, + OpSize; + def rm : AES8I<opc, MRMSrcMem, (outs VR128:$dst), + (ins VR128:$src1, i128mem:$src2), + !if(Is2Addr, + !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), + !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), + [(set VR128:$dst, + (IntId128 VR128:$src1, + (bitconvert (memopv16i8 addr:$src2))))]>, OpSize; } -let Defs = [XMM0, EFLAGS], Uses = [EAX, EDX] in { -def PCMPESTRM128rr : SS42AI<0x60, MRMSrcReg, (outs), - (ins VR128:$src1, VR128:$src3, i8imm:$src5), - "pcmpestrm\t{$src5, $src3, $src1|$src1, $src3, $src5}", []>, OpSize; -def PCMPESTRM128rm : SS42AI<0x60, MRMSrcMem, (outs), - (ins VR128:$src1, i128mem:$src3, i8imm:$src5), - "pcmpestrm\t{$src5, $src3, $src1|$src1, $src3, $src5}", []>, OpSize; +// Perform One Round of an AES Encryption/Decryption Flow +let isAsmParserOnly = 1, Predicates = [HasAVX, HasAES] in { + defm VAESENC : AESI_binop_rm_int<0xDC, "vaesenc", + int_x86_aesni_aesenc, 0>, VEX_4V; + defm VAESENCLAST : AESI_binop_rm_int<0xDD, "vaesenclast", + int_x86_aesni_aesenclast, 0>, VEX_4V; + defm VAESDEC : AESI_binop_rm_int<0xDE, "vaesdec", + int_x86_aesni_aesdec, 0>, VEX_4V; + defm VAESDECLAST : AESI_binop_rm_int<0xDF, "vaesdeclast", + int_x86_aesni_aesdeclast, 0>, VEX_4V; } -let Defs = [ECX, EFLAGS] in { - multiclass SS42AI_pcmpistri<Intrinsic IntId128> { - def rr : SS42AI<0x63, MRMSrcReg, (outs), - (ins VR128:$src1, VR128:$src2, i8imm:$src3), - "pcmpistri\t{$src3, $src2, $src1|$src1, $src2, $src3}", - [(set ECX, (IntId128 VR128:$src1, VR128:$src2, imm:$src3)), - (implicit EFLAGS)]>, OpSize; - def rm : SS42AI<0x63, MRMSrcMem, (outs), - (ins VR128:$src1, i128mem:$src2, i8imm:$src3), - "pcmpistri\t{$src3, $src2, $src1|$src1, $src2, $src3}", - [(set ECX, (IntId128 VR128:$src1, (load addr:$src2), imm:$src3)), - (implicit EFLAGS)]>, OpSize; - } -} - -defm PCMPISTRI : SS42AI_pcmpistri<int_x86_sse42_pcmpistri128>; -defm PCMPISTRIA : SS42AI_pcmpistri<int_x86_sse42_pcmpistria128>; -defm PCMPISTRIC : SS42AI_pcmpistri<int_x86_sse42_pcmpistric128>; -defm PCMPISTRIO : SS42AI_pcmpistri<int_x86_sse42_pcmpistrio128>; -defm PCMPISTRIS : SS42AI_pcmpistri<int_x86_sse42_pcmpistris128>; -defm PCMPISTRIZ : SS42AI_pcmpistri<int_x86_sse42_pcmpistriz128>; - -let Defs = [ECX, EFLAGS] in { -let Uses = [EAX, EDX] in { - multiclass SS42AI_pcmpestri<Intrinsic IntId128> { - def rr : SS42AI<0x61, MRMSrcReg, (outs), - (ins VR128:$src1, VR128:$src3, i8imm:$src5), - "pcmpestri\t{$src5, $src3, $src1|$src1, $src3, $src5}", - [(set ECX, (IntId128 VR128:$src1, EAX, VR128:$src3, EDX, imm:$src5)), - (implicit EFLAGS)]>, OpSize; - def rm : SS42AI<0x61, MRMSrcMem, (outs), - (ins VR128:$src1, i128mem:$src3, i8imm:$src5), - "pcmpestri\t{$src5, $src3, $src1|$src1, $src3, $src5}", - [(set ECX, - (IntId128 VR128:$src1, EAX, (load addr:$src3), EDX, imm:$src5)), - (implicit EFLAGS)]>, OpSize; - } -} -} - -defm PCMPESTRI : SS42AI_pcmpestri<int_x86_sse42_pcmpestri128>; -defm PCMPESTRIA : SS42AI_pcmpestri<int_x86_sse42_pcmpestria128>; -defm PCMPESTRIC : SS42AI_pcmpestri<int_x86_sse42_pcmpestric128>; -defm PCMPESTRIO : SS42AI_pcmpestri<int_x86_sse42_pcmpestrio128>; -defm PCMPESTRIS : SS42AI_pcmpestri<int_x86_sse42_pcmpestris128>; -defm PCMPESTRIZ : SS42AI_pcmpestri<int_x86_sse42_pcmpestriz128>; - -//===----------------------------------------------------------------------===// -// AES-NI Instructions -//===----------------------------------------------------------------------===// - let Constraints = "$src1 = $dst" in { - multiclass AESI_binop_rm_int<bits<8> opc, string OpcodeStr, - Intrinsic IntId128, bit Commutable = 0> { - def rr : AES8I<opc, MRMSrcReg, (outs VR128:$dst), - (ins VR128:$src1, VR128:$src2), - !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), - [(set VR128:$dst, (IntId128 VR128:$src1, VR128:$src2))]>, - OpSize { - let isCommutable = Commutable; - } - def rm : AES8I<opc, MRMSrcMem, (outs VR128:$dst), - (ins VR128:$src1, i128mem:$src2), - !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), - [(set VR128:$dst, - (IntId128 VR128:$src1, - (bitconvert (memopv16i8 addr:$src2))))]>, OpSize; - } + defm AESENC : AESI_binop_rm_int<0xDC, "aesenc", + int_x86_aesni_aesenc>; + defm AESENCLAST : AESI_binop_rm_int<0xDD, "aesenclast", + int_x86_aesni_aesenclast>; + defm AESDEC : AESI_binop_rm_int<0xDE, "aesdec", + int_x86_aesni_aesdec>; + defm AESDECLAST : AESI_binop_rm_int<0xDF, "aesdeclast", + int_x86_aesni_aesdeclast>; } -defm AESENC : AESI_binop_rm_int<0xDC, "aesenc", - int_x86_aesni_aesenc>; -defm AESENCLAST : AESI_binop_rm_int<0xDD, "aesenclast", - int_x86_aesni_aesenclast>; -defm AESDEC : AESI_binop_rm_int<0xDE, "aesdec", - int_x86_aesni_aesdec>; -defm AESDECLAST : AESI_binop_rm_int<0xDF, "aesdeclast", - int_x86_aesni_aesdeclast>; - def : Pat<(v2i64 (int_x86_aesni_aesenc VR128:$src1, VR128:$src2)), (AESENCrr VR128:$src1, VR128:$src2)>; def : Pat<(v2i64 (int_x86_aesni_aesenc VR128:$src1, (memop addr:$src2))), @@ -4113,13 +4796,27 @@ def : Pat<(v2i64 (int_x86_aesni_aesdeclast VR128:$src1, VR128:$src2)), def : Pat<(v2i64 (int_x86_aesni_aesdeclast VR128:$src1, (memop addr:$src2))), (AESDECLASTrm VR128:$src1, addr:$src2)>; +// Perform the AES InvMixColumn Transformation +let isAsmParserOnly = 1, Predicates = [HasAVX, HasAES] in { + def VAESIMCrr : AES8I<0xDB, MRMSrcReg, (outs VR128:$dst), + (ins VR128:$src1), + "vaesimc\t{$src1, $dst|$dst, $src1}", + [(set VR128:$dst, + (int_x86_aesni_aesimc VR128:$src1))]>, + OpSize, VEX; + def VAESIMCrm : AES8I<0xDB, MRMSrcMem, (outs VR128:$dst), + (ins i128mem:$src1), + "vaesimc\t{$src1, $dst|$dst, $src1}", + [(set VR128:$dst, + (int_x86_aesni_aesimc (bitconvert (memopv2i64 addr:$src1))))]>, + OpSize, VEX; +} def AESIMCrr : AES8I<0xDB, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1), "aesimc\t{$src1, $dst|$dst, $src1}", [(set VR128:$dst, (int_x86_aesni_aesimc VR128:$src1))]>, OpSize; - def AESIMCrm : AES8I<0xDB, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src1), "aesimc\t{$src1, $dst|$dst, $src1}", @@ -4127,6 +4824,22 @@ def AESIMCrm : AES8I<0xDB, MRMSrcMem, (outs VR128:$dst), (int_x86_aesni_aesimc (bitconvert (memopv2i64 addr:$src1))))]>, OpSize; +// AES Round Key Generation Assist +let isAsmParserOnly = 1, Predicates = [HasAVX, HasAES] in { + def VAESKEYGENASSIST128rr : AESAI<0xDF, MRMSrcReg, (outs VR128:$dst), + (ins VR128:$src1, i8imm:$src2), + "vaeskeygenassist\t{$src2, $src1, $dst|$dst, $src1, $src2}", + [(set VR128:$dst, + (int_x86_aesni_aeskeygenassist VR128:$src1, imm:$src2))]>, + OpSize, VEX; + def VAESKEYGENASSIST128rm : AESAI<0xDF, MRMSrcMem, (outs VR128:$dst), + (ins i128mem:$src1, i8imm:$src2), + "vaeskeygenassist\t{$src2, $src1, $dst|$dst, $src1, $src2}", + [(set VR128:$dst, + (int_x86_aesni_aeskeygenassist (bitconvert (memopv2i64 addr:$src1)), + imm:$src2))]>, + OpSize, VEX; +} def AESKEYGENASSIST128rr : AESAI<0xDF, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, i8imm:$src2), "aeskeygenassist\t{$src2, $src1, $dst|$dst, $src1, $src2}", diff --git a/lib/Target/X86/X86MCCodeEmitter.cpp b/lib/Target/X86/X86MCCodeEmitter.cpp index a9681e6670fc..633ddd49d74d 100644 --- a/lib/Target/X86/X86MCCodeEmitter.cpp +++ b/lib/Target/X86/X86MCCodeEmitter.cpp @@ -30,7 +30,7 @@ class X86MCCodeEmitter : public MCCodeEmitter { MCContext &Ctx; bool Is64BitMode; public: - X86MCCodeEmitter(TargetMachine &tm, MCContext &ctx, bool is64Bit) + X86MCCodeEmitter(TargetMachine &tm, MCContext &ctx, bool is64Bit) : TM(tm), TII(*TM.getInstrInfo()), Ctx(ctx) { Is64BitMode = is64Bit; } @@ -38,17 +38,18 @@ public: ~X86MCCodeEmitter() {} unsigned getNumFixupKinds() const { - return 4; + return 5; } const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const { const static MCFixupKindInfo Infos[] = { { "reloc_pcrel_4byte", 0, 4 * 8, MCFixupKindInfo::FKF_IsPCRel }, { "reloc_pcrel_1byte", 0, 1 * 8, MCFixupKindInfo::FKF_IsPCRel }, + { "reloc_pcrel_2byte", 0, 2 * 8, MCFixupKindInfo::FKF_IsPCRel }, { "reloc_riprel_4byte", 0, 4 * 8, MCFixupKindInfo::FKF_IsPCRel }, { "reloc_riprel_4byte_movq_load", 0, 4 * 8, MCFixupKindInfo::FKF_IsPCRel } }; - + if (Kind < FirstTargetFixupKind) return MCCodeEmitter::getFixupKindInfo(Kind); @@ -56,16 +57,38 @@ public: "Invalid kind!"); return Infos[Kind - FirstTargetFixupKind]; } - + static unsigned GetX86RegNum(const MCOperand &MO) { return X86RegisterInfo::getX86RegNum(MO.getReg()); } - + + // On regular x86, both XMM0-XMM7 and XMM8-XMM15 are encoded in the range + // 0-7 and the difference between the 2 groups is given by the REX prefix. + // In the VEX prefix, registers are seen sequencially from 0-15 and encoded + // in 1's complement form, example: + // + // ModRM field => XMM9 => 1 + // VEX.VVVV => XMM9 => ~9 + // + // See table 4-35 of Intel AVX Programming Reference for details. + static unsigned char getVEXRegisterEncoding(const MCInst &MI, + unsigned OpNum) { + unsigned SrcReg = MI.getOperand(OpNum).getReg(); + unsigned SrcRegNum = GetX86RegNum(MI.getOperand(OpNum)); + if ((SrcReg >= X86::XMM8 && SrcReg <= X86::XMM15) || + (SrcReg >= X86::YMM8 && SrcReg <= X86::YMM15)) + SrcRegNum += 8; + + // The registers represented through VEX_VVVV should + // be encoded in 1's complement form. + return (~SrcRegNum) & 0xf; + } + void EmitByte(unsigned char C, unsigned &CurByte, raw_ostream &OS) const { OS << (char)C; ++CurByte; } - + void EmitConstant(uint64_t Val, unsigned Size, unsigned &CurByte, raw_ostream &OS) const { // Output the constant in little endian byte order. @@ -75,38 +98,49 @@ public: } } - void EmitImmediate(const MCOperand &Disp, + void EmitImmediate(const MCOperand &Disp, unsigned ImmSize, MCFixupKind FixupKind, unsigned &CurByte, raw_ostream &OS, SmallVectorImpl<MCFixup> &Fixups, int ImmOffset = 0) const; - + inline static unsigned char ModRMByte(unsigned Mod, unsigned RegOpcode, unsigned RM) { assert(Mod < 4 && RegOpcode < 8 && RM < 8 && "ModRM Fields out of range!"); return RM | (RegOpcode << 3) | (Mod << 6); } - + void EmitRegModRMByte(const MCOperand &ModRMReg, unsigned RegOpcodeFld, unsigned &CurByte, raw_ostream &OS) const { EmitByte(ModRMByte(3, RegOpcodeFld, GetX86RegNum(ModRMReg)), CurByte, OS); } - + void EmitSIBByte(unsigned SS, unsigned Index, unsigned Base, unsigned &CurByte, raw_ostream &OS) const { // SIB byte is in the same format as the ModRMByte. EmitByte(ModRMByte(SS, Index, Base), CurByte, OS); } - - + + void EmitMemModRMByte(const MCInst &MI, unsigned Op, - unsigned RegOpcodeField, - unsigned TSFlags, unsigned &CurByte, raw_ostream &OS, + unsigned RegOpcodeField, + uint64_t TSFlags, unsigned &CurByte, raw_ostream &OS, SmallVectorImpl<MCFixup> &Fixups) const; - + void EncodeInstruction(const MCInst &MI, raw_ostream &OS, SmallVectorImpl<MCFixup> &Fixups) const; - + + void EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte, int MemOperand, + const MCInst &MI, const TargetInstrDesc &Desc, + raw_ostream &OS) const; + + void EmitSegmentOverridePrefix(uint64_t TSFlags, unsigned &CurByte, + int MemOperand, const MCInst &MI, + raw_ostream &OS) const; + + void EmitOpcodePrefix(uint64_t TSFlags, unsigned &CurByte, int MemOperand, + const MCInst &MI, const TargetInstrDesc &Desc, + raw_ostream &OS) const; }; } // end anonymous namespace @@ -124,24 +158,23 @@ MCCodeEmitter *llvm::createX86_64MCCodeEmitter(const Target &, return new X86MCCodeEmitter(TM, Ctx, true); } - -/// isDisp8 - Return true if this signed displacement fits in a 8-bit -/// sign-extended field. +/// isDisp8 - Return true if this signed displacement fits in a 8-bit +/// sign-extended field. static bool isDisp8(int Value) { return Value == (signed char)Value; } /// getImmFixupKind - Return the appropriate fixup kind to use for an immediate /// in an instruction with the specified TSFlags. -static MCFixupKind getImmFixupKind(unsigned TSFlags) { +static MCFixupKind getImmFixupKind(uint64_t TSFlags) { unsigned Size = X86II::getSizeOfImm(TSFlags); bool isPCRel = X86II::isImmPCRel(TSFlags); - + switch (Size) { default: assert(0 && "Unknown immediate size"); case 1: return isPCRel ? MCFixupKind(X86::reloc_pcrel_1byte) : FK_Data_1; + case 2: return isPCRel ? MCFixupKind(X86::reloc_pcrel_2byte) : FK_Data_2; case 4: return isPCRel ? MCFixupKind(X86::reloc_pcrel_4byte) : FK_Data_4; - case 2: assert(!isPCRel); return FK_Data_2; case 8: assert(!isPCRel); return FK_Data_8; } } @@ -162,29 +195,30 @@ EmitImmediate(const MCOperand &DispOp, unsigned Size, MCFixupKind FixupKind, // If we have an immoffset, add it to the expression. const MCExpr *Expr = DispOp.getExpr(); - + // If the fixup is pc-relative, we need to bias the value to be relative to // the start of the field, not the end of the field. if (FixupKind == MCFixupKind(X86::reloc_pcrel_4byte) || FixupKind == MCFixupKind(X86::reloc_riprel_4byte) || FixupKind == MCFixupKind(X86::reloc_riprel_4byte_movq_load)) ImmOffset -= 4; + if (FixupKind == MCFixupKind(X86::reloc_pcrel_2byte)) + ImmOffset -= 2; if (FixupKind == MCFixupKind(X86::reloc_pcrel_1byte)) ImmOffset -= 1; - + if (ImmOffset) Expr = MCBinaryExpr::CreateAdd(Expr, MCConstantExpr::Create(ImmOffset, Ctx), Ctx); - + // Emit a symbolic constant as a fixup and 4 zeros. Fixups.push_back(MCFixup::Create(CurByte, Expr, FixupKind)); EmitConstant(0, Size, CurByte, OS); } - void X86MCCodeEmitter::EmitMemModRMByte(const MCInst &MI, unsigned Op, unsigned RegOpcodeField, - unsigned TSFlags, unsigned &CurByte, + uint64_t TSFlags, unsigned &CurByte, raw_ostream &OS, SmallVectorImpl<MCFixup> &Fixups) const{ const MCOperand &Disp = MI.getOperand(Op+3); @@ -192,43 +226,43 @@ void X86MCCodeEmitter::EmitMemModRMByte(const MCInst &MI, unsigned Op, const MCOperand &Scale = MI.getOperand(Op+1); const MCOperand &IndexReg = MI.getOperand(Op+2); unsigned BaseReg = Base.getReg(); - + // Handle %rip relative addressing. if (BaseReg == X86::RIP) { // [disp32+RIP] in X86-64 mode - assert(IndexReg.getReg() == 0 && Is64BitMode && - "Invalid rip-relative address"); + assert(Is64BitMode && "Rip-relative addressing requires 64-bit mode"); + assert(IndexReg.getReg() == 0 && "Invalid rip-relative address"); EmitByte(ModRMByte(0, RegOpcodeField, 5), CurByte, OS); - + unsigned FixupKind = X86::reloc_riprel_4byte; - + // movq loads are handled with a special relocation form which allows the // linker to eliminate some loads for GOT references which end up in the // same linkage unit. if (MI.getOpcode() == X86::MOV64rm || MI.getOpcode() == X86::MOV64rm_TC) FixupKind = X86::reloc_riprel_4byte_movq_load; - + // rip-relative addressing is actually relative to the *next* instruction. // Since an immediate can follow the mod/rm byte for an instruction, this // means that we need to bias the immediate field of the instruction with // the size of the immediate field. If we have this case, add it into the // expression to emit. int ImmSize = X86II::hasImm(TSFlags) ? X86II::getSizeOfImm(TSFlags) : 0; - + EmitImmediate(Disp, 4, MCFixupKind(FixupKind), CurByte, OS, Fixups, -ImmSize); return; } - + unsigned BaseRegNo = BaseReg ? GetX86RegNum(Base) : -1U; - + // Determine whether a SIB byte is needed. - // If no BaseReg, issue a RIP relative instruction only if the MCE can + // If no BaseReg, issue a RIP relative instruction only if the MCE can // resolve addresses on-the-fly, otherwise use SIB (Intel Manual 2A, table // 2-7) and absolute references. if (// The SIB byte must be used if there is an index register. - IndexReg.getReg() == 0 && + IndexReg.getReg() == 0 && // The SIB byte must be used if the base is ESP/RSP/R12, all of which // encode to an R/M value of 4, which indicates that a SIB byte is // present. @@ -242,7 +276,7 @@ void X86MCCodeEmitter::EmitMemModRMByte(const MCInst &MI, unsigned Op, EmitImmediate(Disp, 4, FK_Data_4, CurByte, OS, Fixups); return; } - + // If the base is not EBP/ESP and there is no displacement, use simple // indirect register encoding, this handles addresses like [EAX]. The // encoding for [EBP] with no displacement means [disp32] so we handle it @@ -251,24 +285,24 @@ void X86MCCodeEmitter::EmitMemModRMByte(const MCInst &MI, unsigned Op, EmitByte(ModRMByte(0, RegOpcodeField, BaseRegNo), CurByte, OS); return; } - + // Otherwise, if the displacement fits in a byte, encode as [REG+disp8]. if (Disp.isImm() && isDisp8(Disp.getImm())) { EmitByte(ModRMByte(1, RegOpcodeField, BaseRegNo), CurByte, OS); EmitImmediate(Disp, 1, FK_Data_1, CurByte, OS, Fixups); return; } - + // Otherwise, emit the most general non-SIB encoding: [REG+disp32] EmitByte(ModRMByte(2, RegOpcodeField, BaseRegNo), CurByte, OS); EmitImmediate(Disp, 4, FK_Data_4, CurByte, OS, Fixups); return; } - + // We need a SIB byte, so start by outputting the ModR/M byte first assert(IndexReg.getReg() != X86::ESP && IndexReg.getReg() != X86::RSP && "Cannot use ESP as index reg!"); - + bool ForceDisp32 = false; bool ForceDisp8 = false; if (BaseReg == 0) { @@ -294,13 +328,13 @@ void X86MCCodeEmitter::EmitMemModRMByte(const MCInst &MI, unsigned Op, // Emit the normal disp32 encoding. EmitByte(ModRMByte(2, RegOpcodeField, 4), CurByte, OS); } - + // Calculate what the SS field value should be... static const unsigned SSTable[] = { ~0, 0, 1, ~0, 2, ~0, ~0, ~0, 3 }; unsigned SS = SSTable[Scale.getImm()]; - + if (BaseReg == 0) { - // Handle the SIB byte for the case where there is no base, see Intel + // Handle the SIB byte for the case where there is no base, see Intel // Manual 2A, table 2-7. The displacement has already been output. unsigned IndexRegNo; if (IndexReg.getReg()) @@ -316,7 +350,7 @@ void X86MCCodeEmitter::EmitMemModRMByte(const MCInst &MI, unsigned Op, IndexRegNo = 4; // For example [ESP+1*<noreg>+4] EmitSIBByte(SS, IndexRegNo, GetX86RegNum(Base), CurByte, OS); } - + // Do we need to output a displacement? if (ForceDisp8) EmitImmediate(Disp, 1, FK_Data_1, CurByte, OS, Fixups); @@ -324,26 +358,216 @@ void X86MCCodeEmitter::EmitMemModRMByte(const MCInst &MI, unsigned Op, EmitImmediate(Disp, 4, FK_Data_4, CurByte, OS, Fixups); } +/// EmitVEXOpcodePrefix - AVX instructions are encoded using a opcode prefix +/// called VEX. +void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte, + int MemOperand, const MCInst &MI, + const TargetInstrDesc &Desc, + raw_ostream &OS) const { + bool HasVEX_4V = false; + if (TSFlags & X86II::VEX_4V) + HasVEX_4V = true; + + // VEX_R: opcode externsion equivalent to REX.R in + // 1's complement (inverted) form + // + // 1: Same as REX_R=0 (must be 1 in 32-bit mode) + // 0: Same as REX_R=1 (64 bit mode only) + // + unsigned char VEX_R = 0x1; + + // VEX_X: equivalent to REX.X, only used when a + // register is used for index in SIB Byte. + // + // 1: Same as REX.X=0 (must be 1 in 32-bit mode) + // 0: Same as REX.X=1 (64-bit mode only) + unsigned char VEX_X = 0x1; + + // VEX_B: + // + // 1: Same as REX_B=0 (ignored in 32-bit mode) + // 0: Same as REX_B=1 (64 bit mode only) + // + unsigned char VEX_B = 0x1; + + // VEX_W: opcode specific (use like REX.W, or used for + // opcode extension, or ignored, depending on the opcode byte) + unsigned char VEX_W = 0; + + // VEX_5M (VEX m-mmmmm field): + // + // 0b00000: Reserved for future use + // 0b00001: implied 0F leading opcode + // 0b00010: implied 0F 38 leading opcode bytes + // 0b00011: implied 0F 3A leading opcode bytes + // 0b00100-0b11111: Reserved for future use + // + unsigned char VEX_5M = 0x1; + + // VEX_4V (VEX vvvv field): a register specifier + // (in 1's complement form) or 1111 if unused. + unsigned char VEX_4V = 0xf; + + // VEX_L (Vector Length): + // + // 0: scalar or 128-bit vector + // 1: 256-bit vector + // + unsigned char VEX_L = 0; + + // VEX_PP: opcode extension providing equivalent + // functionality of a SIMD prefix + // + // 0b00: None + // 0b01: 66 + // 0b10: F3 + // 0b11: F2 + // + unsigned char VEX_PP = 0; + + // Encode the operand size opcode prefix as needed. + if (TSFlags & X86II::OpSize) + VEX_PP = 0x01; + + if (TSFlags & X86II::VEX_W) + VEX_W = 1; + + switch (TSFlags & X86II::Op0Mask) { + default: assert(0 && "Invalid prefix!"); + case X86II::T8: // 0F 38 + VEX_5M = 0x2; + break; + case X86II::TA: // 0F 3A + VEX_5M = 0x3; + break; + case X86II::TF: // F2 0F 38 + VEX_PP = 0x3; + VEX_5M = 0x2; + break; + case X86II::XS: // F3 0F + VEX_PP = 0x2; + break; + case X86II::XD: // F2 0F + VEX_PP = 0x3; + break; + case X86II::TB: // Bypass: Not used by VEX + case 0: + break; // No prefix! + } + + // Set the vector length to 256-bit if YMM0-YMM15 is used + for (unsigned i = 0; i != MI.getNumOperands(); ++i) { + if (!MI.getOperand(i).isReg()) + continue; + unsigned SrcReg = MI.getOperand(i).getReg(); + if (SrcReg >= X86::YMM0 && SrcReg <= X86::YMM15) + VEX_L = 1; + } + + unsigned NumOps = MI.getNumOperands(); + unsigned CurOp = 0; + + switch (TSFlags & X86II::FormMask) { + case X86II::MRMInitReg: assert(0 && "FIXME: Remove this!"); + case X86II::MRM0m: case X86II::MRM1m: + case X86II::MRM2m: case X86II::MRM3m: + case X86II::MRM4m: case X86II::MRM5m: + case X86II::MRM6m: case X86II::MRM7m: + case X86II::MRMDestMem: + NumOps = CurOp = X86::AddrNumOperands; + case X86II::MRMSrcMem: + case X86II::MRMSrcReg: + if (MI.getNumOperands() > CurOp && MI.getOperand(CurOp).isReg() && + X86InstrInfo::isX86_64ExtendedReg(MI.getOperand(CurOp).getReg())) + VEX_R = 0x0; + + // CurOp and NumOps are equal when VEX_R represents a register used + // to index a memory destination (which is the last operand) + CurOp = (CurOp == NumOps) ? 0 : CurOp+1; + + if (HasVEX_4V) { + VEX_4V = getVEXRegisterEncoding(MI, CurOp); + CurOp++; + } + + // If the last register should be encoded in the immediate field + // do not use any bit from VEX prefix to this register, ignore it + if (TSFlags & X86II::VEX_I8IMM) + NumOps--; + + for (; CurOp != NumOps; ++CurOp) { + const MCOperand &MO = MI.getOperand(CurOp); + if (MO.isReg() && X86InstrInfo::isX86_64ExtendedReg(MO.getReg())) + VEX_B = 0x0; + if (!VEX_B && MO.isReg() && + ((TSFlags & X86II::FormMask) == X86II::MRMSrcMem) && + X86InstrInfo::isX86_64ExtendedReg(MO.getReg())) + VEX_X = 0x0; + } + break; + default: // MRMDestReg, MRM0r-MRM7r + if (MI.getOperand(CurOp).isReg() && + X86InstrInfo::isX86_64ExtendedReg(MI.getOperand(CurOp).getReg())) + VEX_B = 0; + + if (HasVEX_4V) + VEX_4V = getVEXRegisterEncoding(MI, CurOp); + + CurOp++; + for (; CurOp != NumOps; ++CurOp) { + const MCOperand &MO = MI.getOperand(CurOp); + if (MO.isReg() && !HasVEX_4V && + X86InstrInfo::isX86_64ExtendedReg(MO.getReg())) + VEX_R = 0x0; + } + break; + assert(0 && "Not implemented!"); + } + + // Emit segment override opcode prefix as needed. + EmitSegmentOverridePrefix(TSFlags, CurByte, MemOperand, MI, OS); + + // VEX opcode prefix can have 2 or 3 bytes + // + // 3 bytes: + // +-----+ +--------------+ +-------------------+ + // | C4h | | RXB | m-mmmm | | W | vvvv | L | pp | + // +-----+ +--------------+ +-------------------+ + // 2 bytes: + // +-----+ +-------------------+ + // | C5h | | R | vvvv | L | pp | + // +-----+ +-------------------+ + // + unsigned char LastByte = VEX_PP | (VEX_L << 2) | (VEX_4V << 3); + + if (VEX_B && VEX_X && !VEX_W && (VEX_5M == 1)) { // 2 byte VEX prefix + EmitByte(0xC5, CurByte, OS); + EmitByte(LastByte | (VEX_R << 7), CurByte, OS); + return; + } + + // 3 byte VEX prefix + EmitByte(0xC4, CurByte, OS); + EmitByte(VEX_R << 7 | VEX_X << 6 | VEX_B << 5 | VEX_5M, CurByte, OS); + EmitByte(LastByte | (VEX_W << 7), CurByte, OS); +} + /// DetermineREXPrefix - Determine if the MCInst has to be encoded with a X86-64 /// REX prefix which specifies 1) 64-bit instructions, 2) non-default operand /// size, and 3) use of X86-64 extended registers. -static unsigned DetermineREXPrefix(const MCInst &MI, unsigned TSFlags, +static unsigned DetermineREXPrefix(const MCInst &MI, uint64_t TSFlags, const TargetInstrDesc &Desc) { - // Pseudo instructions never have a rex byte. - if ((TSFlags & X86II::FormMask) == X86II::Pseudo) - return 0; - unsigned REX = 0; if (TSFlags & X86II::REX_W) - REX |= 1 << 3; - + REX |= 1 << 3; // set REX.W + if (MI.getNumOperands() == 0) return REX; - + unsigned NumOps = MI.getNumOperands(); // FIXME: MCInst should explicitize the two-addrness. bool isTwoAddr = NumOps > 1 && Desc.getOperandConstraint(1, TOI::TIED_TO) != -1; - + // If it accesses SPL, BPL, SIL, or DIL, then it requires a 0x40 REX prefix. unsigned i = isTwoAddr ? 1 : 0; for (; i != NumOps; ++i) { @@ -353,34 +577,34 @@ static unsigned DetermineREXPrefix(const MCInst &MI, unsigned TSFlags, if (!X86InstrInfo::isX86_64NonExtLowByteReg(Reg)) continue; // FIXME: The caller of DetermineREXPrefix slaps this prefix onto anything // that returns non-zero. - REX |= 0x40; + REX |= 0x40; // REX fixed encoding prefix break; } - + switch (TSFlags & X86II::FormMask) { case X86II::MRMInitReg: assert(0 && "FIXME: Remove this!"); case X86II::MRMSrcReg: if (MI.getOperand(0).isReg() && X86InstrInfo::isX86_64ExtendedReg(MI.getOperand(0).getReg())) - REX |= 1 << 2; + REX |= 1 << 2; // set REX.R i = isTwoAddr ? 2 : 1; for (; i != NumOps; ++i) { const MCOperand &MO = MI.getOperand(i); if (MO.isReg() && X86InstrInfo::isX86_64ExtendedReg(MO.getReg())) - REX |= 1 << 0; + REX |= 1 << 0; // set REX.B } break; case X86II::MRMSrcMem: { if (MI.getOperand(0).isReg() && X86InstrInfo::isX86_64ExtendedReg(MI.getOperand(0).getReg())) - REX |= 1 << 2; + REX |= 1 << 2; // set REX.R unsigned Bit = 0; i = isTwoAddr ? 2 : 1; for (; i != NumOps; ++i) { const MCOperand &MO = MI.getOperand(i); if (MO.isReg()) { if (X86InstrInfo::isX86_64ExtendedReg(MO.getReg())) - REX |= 1 << Bit; + REX |= 1 << Bit; // set REX.B (Bit=0) and REX.X (Bit=1) Bit++; } } @@ -391,17 +615,17 @@ static unsigned DetermineREXPrefix(const MCInst &MI, unsigned TSFlags, case X86II::MRM4m: case X86II::MRM5m: case X86II::MRM6m: case X86II::MRM7m: case X86II::MRMDestMem: { - unsigned e = (isTwoAddr ? X86AddrNumOperands+1 : X86AddrNumOperands); + unsigned e = (isTwoAddr ? X86::AddrNumOperands+1 : X86::AddrNumOperands); i = isTwoAddr ? 1 : 0; if (NumOps > e && MI.getOperand(e).isReg() && X86InstrInfo::isX86_64ExtendedReg(MI.getOperand(e).getReg())) - REX |= 1 << 2; + REX |= 1 << 2; // set REX.R unsigned Bit = 0; for (; i != e; ++i) { const MCOperand &MO = MI.getOperand(i); if (MO.isReg()) { if (X86InstrInfo::isX86_64ExtendedReg(MO.getReg())) - REX |= 1 << Bit; + REX |= 1 << Bit; // REX.B (Bit=0) and REX.X (Bit=1) Bit++; } } @@ -410,39 +634,40 @@ static unsigned DetermineREXPrefix(const MCInst &MI, unsigned TSFlags, default: if (MI.getOperand(0).isReg() && X86InstrInfo::isX86_64ExtendedReg(MI.getOperand(0).getReg())) - REX |= 1 << 0; + REX |= 1 << 0; // set REX.B i = isTwoAddr ? 2 : 1; for (unsigned e = NumOps; i != e; ++i) { const MCOperand &MO = MI.getOperand(i); if (MO.isReg() && X86InstrInfo::isX86_64ExtendedReg(MO.getReg())) - REX |= 1 << 2; + REX |= 1 << 2; // set REX.R } break; } return REX; } -void X86MCCodeEmitter:: -EncodeInstruction(const MCInst &MI, raw_ostream &OS, - SmallVectorImpl<MCFixup> &Fixups) const { - unsigned Opcode = MI.getOpcode(); - const TargetInstrDesc &Desc = TII.get(Opcode); - unsigned TSFlags = Desc.TSFlags; - - // Keep track of the current byte being emitted. - unsigned CurByte = 0; - - // FIXME: We should emit the prefixes in exactly the same order as GAS does, - // in order to provide diffability. - - // Emit the lock opcode prefix as needed. - if (TSFlags & X86II::LOCK) - EmitByte(0xF0, CurByte, OS); - - // Emit segment override opcode prefix as needed. +/// EmitSegmentOverridePrefix - Emit segment override opcode prefix as needed +void X86MCCodeEmitter::EmitSegmentOverridePrefix(uint64_t TSFlags, + unsigned &CurByte, int MemOperand, + const MCInst &MI, + raw_ostream &OS) const { switch (TSFlags & X86II::SegOvrMask) { default: assert(0 && "Invalid segment!"); - case 0: break; // No segment override! + case 0: + // No segment override, check for explicit one on memory operand. + if (MemOperand != -1) { // If the instruction has a memory operand. + switch (MI.getOperand(MemOperand+X86::AddrSegmentReg).getReg()) { + default: assert(0 && "Unknown segment register!"); + case 0: break; + case X86::CS: EmitByte(0x2E, CurByte, OS); break; + case X86::SS: EmitByte(0x36, CurByte, OS); break; + case X86::DS: EmitByte(0x3E, CurByte, OS); break; + case X86::ES: EmitByte(0x26, CurByte, OS); break; + case X86::FS: EmitByte(0x64, CurByte, OS); break; + case X86::GS: EmitByte(0x65, CurByte, OS); break; + } + } + break; case X86II::FS: EmitByte(0x64, CurByte, OS); break; @@ -450,19 +675,36 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS, EmitByte(0x65, CurByte, OS); break; } - +} + +/// EmitOpcodePrefix - Emit all instruction prefixes prior to the opcode. +/// +/// MemOperand is the operand # of the start of a memory operand if present. If +/// Not present, it is -1. +void X86MCCodeEmitter::EmitOpcodePrefix(uint64_t TSFlags, unsigned &CurByte, + int MemOperand, const MCInst &MI, + const TargetInstrDesc &Desc, + raw_ostream &OS) const { + + // Emit the lock opcode prefix as needed. + if (TSFlags & X86II::LOCK) + EmitByte(0xF0, CurByte, OS); + + // Emit segment override opcode prefix as needed. + EmitSegmentOverridePrefix(TSFlags, CurByte, MemOperand, MI, OS); + // Emit the repeat opcode prefix as needed. if ((TSFlags & X86II::Op0Mask) == X86II::REP) EmitByte(0xF3, CurByte, OS); - + // Emit the operand size opcode prefix as needed. if (TSFlags & X86II::OpSize) EmitByte(0x66, CurByte, OS); - + // Emit the address size opcode prefix as needed. if (TSFlags & X86II::AdSize) EmitByte(0x67, CurByte, OS); - + bool Need0FPrefix = false; switch (TSFlags & X86II::Op0Mask) { default: assert(0 && "Invalid prefix!"); @@ -494,18 +736,18 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS, case X86II::DE: EmitByte(0xDE, CurByte, OS); break; case X86II::DF: EmitByte(0xDF, CurByte, OS); break; } - + // Handle REX prefix. // FIXME: Can this come before F2 etc to simplify emission? if (Is64BitMode) { if (unsigned REX = DetermineREXPrefix(MI, TSFlags, Desc)) EmitByte(0x40 | REX, CurByte, OS); } - + // 0x0F escape code must be emitted just before the opcode. if (Need0FPrefix) EmitByte(0x0F, CurByte, OS); - + // FIXME: Pull this up into previous switch if REX can be moved earlier. switch (TSFlags & X86II::Op0Mask) { case X86II::TF: // F2 0F 38 @@ -516,8 +758,21 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS, EmitByte(0x3A, CurByte, OS); break; } - +} + +void X86MCCodeEmitter:: +EncodeInstruction(const MCInst &MI, raw_ostream &OS, + SmallVectorImpl<MCFixup> &Fixups) const { + unsigned Opcode = MI.getOpcode(); + const TargetInstrDesc &Desc = TII.get(Opcode); + uint64_t TSFlags = Desc.TSFlags; + + // Pseudo instructions don't get encoded. + if ((TSFlags & X86II::FormMask) == X86II::Pseudo) + return; + // If this is a two-address instruction, skip one of the register operands. + // FIXME: This should be handled during MCInst lowering. unsigned NumOps = Desc.getNumOperands(); unsigned CurOp = 0; if (NumOps > 1 && Desc.getOperandConstraint(1, TOI::TIED_TO) != -1) @@ -525,56 +780,85 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS, else if (NumOps > 2 && Desc.getOperandConstraint(NumOps-1, TOI::TIED_TO)== 0) // Skip the last source operand that is tied_to the dest reg. e.g. LXADD32 --NumOps; - + + // Keep track of the current byte being emitted. + unsigned CurByte = 0; + + // Is this instruction encoded using the AVX VEX prefix? + bool HasVEXPrefix = false; + + // It uses the VEX.VVVV field? + bool HasVEX_4V = false; + + if (TSFlags & X86II::VEX) + HasVEXPrefix = true; + if (TSFlags & X86II::VEX_4V) + HasVEX_4V = true; + + // Determine where the memory operand starts, if present. + int MemoryOperand = X86II::getMemoryOperandNo(TSFlags); + if (MemoryOperand != -1) MemoryOperand += CurOp; + + if (!HasVEXPrefix) + EmitOpcodePrefix(TSFlags, CurByte, MemoryOperand, MI, Desc, OS); + else + EmitVEXOpcodePrefix(TSFlags, CurByte, MemoryOperand, MI, Desc, OS); + unsigned char BaseOpcode = X86II::getBaseOpcodeFor(TSFlags); + unsigned SrcRegNum = 0; switch (TSFlags & X86II::FormMask) { case X86II::MRMInitReg: assert(0 && "FIXME: Remove this form when the JIT moves to MCCodeEmitter!"); default: errs() << "FORM: " << (TSFlags & X86II::FormMask) << "\n"; assert(0 && "Unknown FormMask value in X86MCCodeEmitter!"); - case X86II::Pseudo: return; // Pseudo instructions encode to nothing. + case X86II::Pseudo: + assert(0 && "Pseudo instruction shouldn't be emitted"); case X86II::RawFrm: EmitByte(BaseOpcode, CurByte, OS); break; - + case X86II::AddRegFrm: EmitByte(BaseOpcode + GetX86RegNum(MI.getOperand(CurOp++)), CurByte, OS); break; - + case X86II::MRMDestReg: EmitByte(BaseOpcode, CurByte, OS); EmitRegModRMByte(MI.getOperand(CurOp), GetX86RegNum(MI.getOperand(CurOp+1)), CurByte, OS); CurOp += 2; break; - + case X86II::MRMDestMem: EmitByte(BaseOpcode, CurByte, OS); EmitMemModRMByte(MI, CurOp, - GetX86RegNum(MI.getOperand(CurOp + X86AddrNumOperands)), + GetX86RegNum(MI.getOperand(CurOp + X86::AddrNumOperands)), TSFlags, CurByte, OS, Fixups); - CurOp += X86AddrNumOperands + 1; + CurOp += X86::AddrNumOperands + 1; break; - + case X86II::MRMSrcReg: EmitByte(BaseOpcode, CurByte, OS); - EmitRegModRMByte(MI.getOperand(CurOp+1), GetX86RegNum(MI.getOperand(CurOp)), - CurByte, OS); - CurOp += 2; + SrcRegNum = CurOp + 1; + + if (HasVEX_4V) // Skip 1st src (which is encoded in VEX_VVVV) + SrcRegNum++; + + EmitRegModRMByte(MI.getOperand(SrcRegNum), + GetX86RegNum(MI.getOperand(CurOp)), CurByte, OS); + CurOp = SrcRegNum + 1; break; - + case X86II::MRMSrcMem: { + int AddrOperands = X86::AddrNumOperands; + unsigned FirstMemOp = CurOp+1; + if (HasVEX_4V) { + ++AddrOperands; + ++FirstMemOp; // Skip the register source (which is encoded in VEX_VVVV). + } + EmitByte(BaseOpcode, CurByte, OS); - // FIXME: Maybe lea should have its own form? This is a horrible hack. - int AddrOperands; - if (Opcode == X86::LEA64r || Opcode == X86::LEA64_32r || - Opcode == X86::LEA16r || Opcode == X86::LEA32r) - AddrOperands = X86AddrNumOperands - 1; // No segment register - else - AddrOperands = X86AddrNumOperands; - - EmitMemModRMByte(MI, CurOp+1, GetX86RegNum(MI.getOperand(CurOp)), + EmitMemModRMByte(MI, FirstMemOp, GetX86RegNum(MI.getOperand(CurOp)), TSFlags, CurByte, OS, Fixups); CurOp += AddrOperands + 1; break; @@ -584,6 +868,8 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS, case X86II::MRM2r: case X86II::MRM3r: case X86II::MRM4r: case X86II::MRM5r: case X86II::MRM6r: case X86II::MRM7r: + if (HasVEX_4V) // Skip the register dst (which is encoded in VEX_VVVV). + CurOp++; EmitByte(BaseOpcode, CurByte, OS); EmitRegModRMByte(MI.getOperand(CurOp++), (TSFlags & X86II::FormMask)-X86II::MRM0r, @@ -596,7 +882,7 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS, EmitByte(BaseOpcode, CurByte, OS); EmitMemModRMByte(MI, CurOp, (TSFlags & X86II::FormMask)-X86II::MRM0m, TSFlags, CurByte, OS, Fixups); - CurOp += X86AddrNumOperands; + CurOp += X86::AddrNumOperands; break; case X86II::MRM_C1: EmitByte(BaseOpcode, CurByte, OS); @@ -639,14 +925,27 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS, EmitByte(0xF9, CurByte, OS); break; } - + // If there is a remaining operand, it must be a trailing immediate. Emit it // according to the right size for the instruction. - if (CurOp != NumOps) - EmitImmediate(MI.getOperand(CurOp++), - X86II::getSizeOfImm(TSFlags), getImmFixupKind(TSFlags), - CurByte, OS, Fixups); - + if (CurOp != NumOps) { + // The last source register of a 4 operand instruction in AVX is encoded + // in bits[7:4] of a immediate byte, and bits[3:0] are ignored. + if (TSFlags & X86II::VEX_I8IMM) { + const MCOperand &MO = MI.getOperand(CurOp++); + bool IsExtReg = + X86InstrInfo::isX86_64ExtendedReg(MO.getReg()); + unsigned RegNum = (IsExtReg ? (1 << 7) : 0); + RegNum |= GetX86RegNum(MO) << 4; + EmitImmediate(MCOperand::CreateImm(RegNum), 1, FK_Data_1, CurByte, OS, + Fixups); + } else + EmitImmediate(MI.getOperand(CurOp++), + X86II::getSizeOfImm(TSFlags), getImmFixupKind(TSFlags), + CurByte, OS, Fixups); + } + + #ifndef NDEBUG // FIXME: Verify. if (/*!Desc.isVariadic() &&*/ CurOp != NumOps) { diff --git a/lib/Target/X86/X86RegisterInfo.cpp b/lib/Target/X86/X86RegisterInfo.cpp index 98975ea01bae..5f31e00ebabd 100644 --- a/lib/Target/X86/X86RegisterInfo.cpp +++ b/lib/Target/X86/X86RegisterInfo.cpp @@ -127,21 +127,29 @@ unsigned X86RegisterInfo::getX86RegNum(unsigned RegNo) { case X86::ST4: case X86::ST5: case X86::ST6: case X86::ST7: return RegNo-X86::ST0; - case X86::XMM0: case X86::XMM8: case X86::MM0: + case X86::XMM0: case X86::XMM8: + case X86::YMM0: case X86::YMM8: case X86::MM0: return 0; - case X86::XMM1: case X86::XMM9: case X86::MM1: + case X86::XMM1: case X86::XMM9: + case X86::YMM1: case X86::YMM9: case X86::MM1: return 1; - case X86::XMM2: case X86::XMM10: case X86::MM2: + case X86::XMM2: case X86::XMM10: + case X86::YMM2: case X86::YMM10: case X86::MM2: return 2; - case X86::XMM3: case X86::XMM11: case X86::MM3: + case X86::XMM3: case X86::XMM11: + case X86::YMM3: case X86::YMM11: case X86::MM3: return 3; - case X86::XMM4: case X86::XMM12: case X86::MM4: + case X86::XMM4: case X86::XMM12: + case X86::YMM4: case X86::YMM12: case X86::MM4: return 4; - case X86::XMM5: case X86::XMM13: case X86::MM5: + case X86::XMM5: case X86::XMM13: + case X86::YMM5: case X86::YMM13: case X86::MM5: return 5; - case X86::XMM6: case X86::XMM14: case X86::MM6: + case X86::XMM6: case X86::XMM14: + case X86::YMM6: case X86::YMM14: case X86::MM6: return 6; - case X86::XMM7: case X86::XMM15: case X86::MM7: + case X86::XMM7: case X86::XMM15: + case X86::YMM7: case X86::YMM15: case X86::MM7: return 7; case X86::ES: @@ -157,6 +165,34 @@ unsigned X86RegisterInfo::getX86RegNum(unsigned RegNo) { case X86::GS: return 5; + case X86::CR0: + return 0; + case X86::CR1: + return 1; + case X86::CR2: + return 2; + case X86::CR3: + return 3; + case X86::CR4: + return 4; + + case X86::DR0: + return 0; + case X86::DR1: + return 1; + case X86::DR2: + return 2; + case X86::DR3: + return 3; + case X86::DR4: + return 4; + case X86::DR5: + return 5; + case X86::DR6: + return 6; + case X86::DR7: + return 7; + default: assert(isVirtualRegister(RegNo) && "Unknown physical register!"); llvm_unreachable("Register allocator hasn't allocated reg correctly yet!"); @@ -357,56 +393,6 @@ X86RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { } } -const TargetRegisterClass* const* -X86RegisterInfo::getCalleeSavedRegClasses(const MachineFunction *MF) const { - bool callsEHReturn = false; - if (MF) - callsEHReturn = MF->getMMI().callsEHReturn(); - - static const TargetRegisterClass * const CalleeSavedRegClasses32Bit[] = { - &X86::GR32RegClass, &X86::GR32RegClass, - &X86::GR32RegClass, &X86::GR32RegClass, 0 - }; - static const TargetRegisterClass * const CalleeSavedRegClasses32EHRet[] = { - &X86::GR32RegClass, &X86::GR32RegClass, - &X86::GR32RegClass, &X86::GR32RegClass, - &X86::GR32RegClass, &X86::GR32RegClass, 0 - }; - static const TargetRegisterClass * const CalleeSavedRegClasses64Bit[] = { - &X86::GR64RegClass, &X86::GR64RegClass, - &X86::GR64RegClass, &X86::GR64RegClass, - &X86::GR64RegClass, &X86::GR64RegClass, 0 - }; - static const TargetRegisterClass * const CalleeSavedRegClasses64EHRet[] = { - &X86::GR64RegClass, &X86::GR64RegClass, - &X86::GR64RegClass, &X86::GR64RegClass, - &X86::GR64RegClass, &X86::GR64RegClass, - &X86::GR64RegClass, &X86::GR64RegClass, 0 - }; - static const TargetRegisterClass * const CalleeSavedRegClassesWin64[] = { - &X86::GR64RegClass, &X86::GR64RegClass, - &X86::GR64RegClass, &X86::GR64RegClass, - &X86::GR64RegClass, &X86::GR64RegClass, - &X86::GR64RegClass, &X86::GR64RegClass, - &X86::VR128RegClass, &X86::VR128RegClass, - &X86::VR128RegClass, &X86::VR128RegClass, - &X86::VR128RegClass, &X86::VR128RegClass, - &X86::VR128RegClass, &X86::VR128RegClass, - &X86::VR128RegClass, &X86::VR128RegClass, 0 - }; - - if (Is64Bit) { - if (IsWin64) - return CalleeSavedRegClassesWin64; - else - return (callsEHReturn ? - CalleeSavedRegClasses64EHRet : CalleeSavedRegClasses64Bit); - } else { - return (callsEHReturn ? - CalleeSavedRegClasses32EHRet : CalleeSavedRegClasses32Bit); - } -} - BitVector X86RegisterInfo::getReservedRegs(const MachineFunction &MF) const { BitVector Reserved(getNumRegs()); // Set the stack-pointer register and its aliases as reserved. @@ -696,8 +682,7 @@ X86RegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, // } // [EBP] MFI->CreateFixedObject(-TailCallReturnAddrDelta, - (-1U*SlotSize)+TailCallReturnAddrDelta, - true, false); + (-1U*SlotSize)+TailCallReturnAddrDelta, true); } if (hasFP(MF)) { @@ -710,7 +695,7 @@ X86RegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, -(int)SlotSize + TFI.getOffsetOfLocalArea() + TailCallReturnAddrDelta, - true, false); + true); assert(FrameIdx == MFI->getObjectIndexBegin() && "Slot for EBP register must be last in order to be found!"); FrameIdx = 0; @@ -1240,8 +1225,8 @@ void X86RegisterInfo::emitEpilogue(MachineFunction &MF, if (CSSize) { unsigned Opc = Is64Bit ? X86::LEA64r : X86::LEA32r; MachineInstr *MI = - addLeaRegOffset(BuildMI(MF, DL, TII.get(Opc), StackPtr), - FramePtr, false, -CSSize); + addRegOffset(BuildMI(MF, DL, TII.get(Opc), StackPtr), + FramePtr, false, -CSSize); MBB.insert(MBBI, MI); } else { BuildMI(MBB, MBBI, DL, @@ -1301,9 +1286,11 @@ void X86RegisterInfo::emitEpilogue(MachineFunction &MF, for (unsigned i = 0; i != 5; ++i) MIB.addOperand(MBBI->getOperand(i)); } else if (RetOpcode == X86::TCRETURNri64) { - BuildMI(MBB, MBBI, DL, TII.get(X86::TAILJMPr64), JumpTarget.getReg()); + BuildMI(MBB, MBBI, DL, TII.get(X86::TAILJMPr64)). + addReg(JumpTarget.getReg(), RegState::Kill); } else { - BuildMI(MBB, MBBI, DL, TII.get(X86::TAILJMPr), JumpTarget.getReg()); + BuildMI(MBB, MBBI, DL, TII.get(X86::TAILJMPr)). + addReg(JumpTarget.getReg(), RegState::Kill); } MachineInstr *NewMI = prior(MBBI); diff --git a/lib/Target/X86/X86RegisterInfo.h b/lib/Target/X86/X86RegisterInfo.h index d0b82e2f1ce1..d852bcd2011c 100644 --- a/lib/Target/X86/X86RegisterInfo.h +++ b/lib/Target/X86/X86RegisterInfo.h @@ -105,12 +105,6 @@ public: /// callee-save registers on this target. const unsigned *getCalleeSavedRegs(const MachineFunction* MF = 0) const; - /// getCalleeSavedRegClasses - Return a null-terminated list of the preferred - /// register classes to spill each callee-saved register with. The order and - /// length of this list match the getCalleeSavedRegs() list. - const TargetRegisterClass* const* - getCalleeSavedRegClasses(const MachineFunction *MF = 0) const; - /// getReservedRegs - Returns a bitset indexed by physical register number /// indicating if a register is a special register that has particular uses and /// should be considered unavailable at all times, e.g. SP, RA. This is used by diff --git a/lib/Target/X86/X86RegisterInfo.td b/lib/Target/X86/X86RegisterInfo.td index 91cfaa977eb5..9f0382e3fae9 100644 --- a/lib/Target/X86/X86RegisterInfo.td +++ b/lib/Target/X86/X86RegisterInfo.td @@ -147,7 +147,7 @@ let Namespace = "X86" in { def MM5 : Register<"mm5">, DwarfRegNum<[46, 34, 34]>; def MM6 : Register<"mm6">, DwarfRegNum<[47, 35, 35]>; def MM7 : Register<"mm7">, DwarfRegNum<[48, 36, 36]>; - + // Pseudo Floating Point registers def FP0 : Register<"fp0">; def FP1 : Register<"fp1">; @@ -155,7 +155,7 @@ let Namespace = "X86" in { def FP3 : Register<"fp3">; def FP4 : Register<"fp4">; def FP5 : Register<"fp5">; - def FP6 : Register<"fp6">; + def FP6 : Register<"fp6">; // XMM Registers, used by the various SSE instruction set extensions. // The sub_ss and sub_sd subregs are the same registers with another regclass. @@ -357,7 +357,7 @@ def GR16 : RegisterClass<"X86", [i16], 16, }]; } -def GR32 : RegisterClass<"X86", [i32], 32, +def GR32 : RegisterClass<"X86", [i32], 32, [EAX, ECX, EDX, ESI, EDI, EBX, EBP, ESP, R8D, R9D, R10D, R11D, R14D, R15D, R12D, R13D]> { let SubRegClasses = [(GR8 sub_8bit, sub_8bit_hi), (GR16 sub_16bit)]; @@ -412,7 +412,7 @@ def GR32 : RegisterClass<"X86", [i32], 32, // GR64 - 64-bit GPRs. This oddly includes RIP, which isn't accurate, since // RIP isn't really a register and it can't be used anywhere except in an // address, but it doesn't cause trouble. -def GR64 : RegisterClass<"X86", [i64], 64, +def GR64 : RegisterClass<"X86", [i64], 64, [RAX, RCX, RDX, RSI, RDI, R8, R9, R10, R11, RBX, R14, R15, R12, R13, RBP, RSP, RIP]> { let SubRegClasses = [(GR8 sub_8bit, sub_8bit_hi), @@ -446,7 +446,7 @@ def SEGMENT_REG : RegisterClass<"X86", [i16], 16, [CS, DS, SS, ES, FS, GS]> { } // Debug registers. -def DEBUG_REG : RegisterClass<"X86", [i32], 32, +def DEBUG_REG : RegisterClass<"X86", [i32], 32, [DR0, DR1, DR2, DR3, DR4, DR5, DR6, DR7]> { } @@ -780,14 +780,14 @@ def RST : RegisterClass<"X86", [f80, f64, f32], 32, } // Generic vector registers: VR64 and VR128. -def VR64 : RegisterClass<"X86", [v8i8, v4i16, v2i32, v1i64, v2f32], 64, +def VR64 : RegisterClass<"X86", [v8i8, v4i16, v2i32, v1i64], 64, [MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7]>; def VR128 : RegisterClass<"X86", [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],128, [XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7, XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15]> { let SubRegClasses = [(FR32 sub_ss), (FR64 sub_sd)]; - + let MethodProtos = [{ iterator allocation_order_end(const MachineFunction &MF) const; }]; @@ -803,11 +803,27 @@ def VR128 : RegisterClass<"X86", [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],128, } }]; } -def VR256 : RegisterClass<"X86", [ v8i32, v4i64, v8f32, v4f64],256, + +def VR256 : RegisterClass<"X86", [v8i32, v4i64, v8f32, v4f64], 256, [YMM0, YMM1, YMM2, YMM3, YMM4, YMM5, YMM6, YMM7, YMM8, YMM9, YMM10, YMM11, YMM12, YMM13, YMM14, YMM15]> { let SubRegClasses = [(FR32 sub_ss), (FR64 sub_sd), (VR128 sub_xmm)]; + + let MethodProtos = [{ + iterator allocation_order_end(const MachineFunction &MF) const; + }]; + let MethodBodies = [{ + VR256Class::iterator + VR256Class::allocation_order_end(const MachineFunction &MF) const { + const TargetMachine &TM = MF.getTarget(); + const X86Subtarget &Subtarget = TM.getSubtarget<X86Subtarget>(); + if (!Subtarget.is64Bit()) + return end()-8; // Only YMM0 to YMM7 are available in 32-bit mode. + else + return end(); + } + }]; } // Status flags registers. diff --git a/lib/Target/X86/X86Subtarget.cpp b/lib/Target/X86/X86Subtarget.cpp index 09a26858eb7e..4a10be518f03 100644 --- a/lib/Target/X86/X86Subtarget.cpp +++ b/lib/Target/X86/X86Subtarget.cpp @@ -53,9 +53,12 @@ ClassifyGlobalReference(const GlobalValue *GV, const TargetMachine &TM) const { if (GV->hasDLLImportLinkage()) return X86II::MO_DLLIMPORT; - // Materializable GVs (in JIT lazy compilation mode) do not require an - // extra load from stub. - bool isDecl = GV->isDeclaration() && !GV->isMaterializable(); + // Determine whether this is a reference to a definition or a declaration. + // Materializable GVs (in JIT lazy compilation mode) do not require an extra + // load from stub. + bool isDecl = GV->hasAvailableExternallyLinkage(); + if (GV->isDeclaration() && !GV->isMaterializable()) + isDecl = true; // X86-64 in PIC mode. if (isPICStyleRIPRel()) { @@ -293,12 +296,11 @@ X86Subtarget::X86Subtarget(const std::string &TT, const std::string &FS, , IsBTMemSlow(false) , IsUAMemFast(false) , HasVectorUAMem(false) - , DarwinVers(0) , stackAlignment(8) // FIXME: this is a known good value for Yonah. How about others? , MaxInlineSizeThreshold(128) - , Is64Bit(is64Bit) - , TargetType(isELF) { // Default to ELF unless otherwise specified. + , TargetTriple(TT) + , Is64Bit(is64Bit) { // default to hard float ABI if (FloatABIType == FloatABI::Default) @@ -328,47 +330,40 @@ X86Subtarget::X86Subtarget(const std::string &TT, const std::string &FS, HasCMov = true; } - DEBUG(dbgs() << "Subtarget features: SSELevel " << X86SSELevel << ", 3DNowLevel " << X863DNowLevel << ", 64bit " << HasX86_64 << "\n"); assert((!Is64Bit || HasX86_64) && "64-bit code requested on a subtarget that doesn't support it!"); - // Set the boolean corresponding to the current target triple, or the default - // if one cannot be determined, to true. - if (TT.length() > 5) { - size_t Pos; - if ((Pos = TT.find("-darwin")) != std::string::npos) { - TargetType = isDarwin; - - // Compute the darwin version number. - if (isdigit(TT[Pos+7])) - DarwinVers = atoi(&TT[Pos+7]); - else - DarwinVers = 8; // Minimum supported darwin is Tiger. - } else if (TT.find("linux") != std::string::npos) { - // Linux doesn't imply ELF, but we don't currently support anything else. - TargetType = isELF; - } else if (TT.find("cygwin") != std::string::npos) { - TargetType = isCygwin; - } else if (TT.find("mingw") != std::string::npos) { - TargetType = isMingw; - } else if (TT.find("win32") != std::string::npos) { - TargetType = isWindows; - } else if (TT.find("windows") != std::string::npos) { - TargetType = isWindows; - } else if (TT.find("-cl") != std::string::npos) { - TargetType = isDarwin; - DarwinVers = 9; - } - } - // Stack alignment is 16 bytes on Darwin (both 32 and 64 bit) and for all 64 // bit targets. - if (TargetType == isDarwin || Is64Bit) + if (isTargetDarwin() || Is64Bit) stackAlignment = 16; if (StackAlignment) stackAlignment = StackAlignment; } + +/// IsCalleePop - Determines whether the callee is required to pop its +/// own arguments. Callee pop is necessary to support tail calls. +bool X86Subtarget::IsCalleePop(bool IsVarArg, + CallingConv::ID CallingConv) const { + if (IsVarArg) + return false; + + switch (CallingConv) { + default: + return false; + case CallingConv::X86_StdCall: + return !is64Bit(); + case CallingConv::X86_FastCall: + return !is64Bit(); + case CallingConv::X86_ThisCall: + return !is64Bit(); + case CallingConv::Fast: + return GuaranteedTailCallOpt; + case CallingConv::GHC: + return GuaranteedTailCallOpt; + } +} diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h index 646af91517fe..486dbc4e2e90 100644 --- a/lib/Target/X86/X86Subtarget.h +++ b/lib/Target/X86/X86Subtarget.h @@ -14,7 +14,9 @@ #ifndef X86SUBTARGET_H #define X86SUBTARGET_H +#include "llvm/ADT/Triple.h" #include "llvm/Target/TargetSubtarget.h" +#include "llvm/CallingConv.h" #include <string> namespace llvm { @@ -88,10 +90,6 @@ protected: /// operands. This may require setting a feature bit in the processor. bool HasVectorUAMem; - /// DarwinVers - Nonzero if this is a darwin platform: the numeric - /// version of the platform, e.g. 8 = 10.4 (Tiger), 9 = 10.5 (Leopard), etc. - unsigned char DarwinVers; // Is any darwin-x86 platform. - /// stackAlignment - The minimum alignment known to hold of the stack frame on /// entry to the function and which must be maintained by every function. unsigned stackAlignment; @@ -99,6 +97,9 @@ protected: /// Max. memset / memcpy size that is turned into rep/movs, rep/stos ops. /// unsigned MaxInlineSizeThreshold; + + /// TargetTriple - What processor and OS we're targeting. + Triple TargetTriple; private: /// Is64Bit - True if the processor supports 64-bit instructions and @@ -106,9 +107,6 @@ private: bool Is64Bit; public: - enum { - isELF, isCygwin, isDarwin, isWindows, isMingw - } TargetType; /// This constructor initializes the data members to match that /// of the specified triple. @@ -157,24 +155,31 @@ public: bool isUnalignedMemAccessFast() const { return IsUAMemFast; } bool hasVectorUAMem() const { return HasVectorUAMem; } - bool isTargetDarwin() const { return TargetType == isDarwin; } - bool isTargetELF() const { return TargetType == isELF; } + bool isTargetDarwin() const { return TargetTriple.getOS() == Triple::Darwin; } + + // ELF is a reasonably sane default and the only other X86 targets we + // support are Darwin and Windows. Just use "not those". + bool isTargetELF() const { + return !isTargetDarwin() && !isTargetWindows() && !isTargetCygMing(); + } + bool isTargetLinux() const { return TargetTriple.getOS() == Triple::Linux; } - bool isTargetWindows() const { return TargetType == isWindows; } - bool isTargetMingw() const { return TargetType == isMingw; } - bool isTargetCygwin() const { return TargetType == isCygwin; } + bool isTargetWindows() const { return TargetTriple.getOS() == Triple::Win32; } + bool isTargetMingw() const { + return TargetTriple.getOS() == Triple::MinGW32 || + TargetTriple.getOS() == Triple::MinGW64; } + bool isTargetCygwin() const { return TargetTriple.getOS() == Triple::Cygwin; } bool isTargetCygMing() const { - return TargetType == isMingw || TargetType == isCygwin; + return isTargetMingw() || isTargetCygwin(); } - + /// isTargetCOFF - Return true if this is any COFF/Windows target variant. bool isTargetCOFF() const { - return TargetType == isMingw || TargetType == isCygwin || - TargetType == isWindows; + return isTargetMingw() || isTargetCygwin() || isTargetWindows(); } bool isTargetWin64() const { - return Is64Bit && (TargetType == isMingw || TargetType == isWindows); + return Is64Bit && (isTargetMingw() || isTargetWindows()); } std::string getDataLayout() const { @@ -208,7 +213,10 @@ public: /// getDarwinVers - Return the darwin version number, 8 = Tiger, 9 = Leopard, /// 10 = Snow Leopard, etc. - unsigned getDarwinVers() const { return DarwinVers; } + unsigned getDarwinVers() const { + if (isTargetDarwin()) return TargetTriple.getDarwinMajorNumber(); + return 0; + } /// ClassifyGlobalReference - Classify a global variable reference for the /// current subtarget according to how we should reference it in a non-pcrel @@ -237,6 +245,9 @@ public: /// indicating the number of scheduling cycles of backscheduling that /// should be attempted. unsigned getSpecialAddressLatency() const; + + /// IsCalleePop - Test whether a function should pop its own arguments. + bool IsCalleePop(bool isVarArg, CallingConv::ID CallConv) const; }; } // End llvm namespace diff --git a/lib/Target/X86/X86TargetMachine.cpp b/lib/Target/X86/X86TargetMachine.cpp index f2c50583c95f..df00d3ffcc79 100644 --- a/lib/Target/X86/X86TargetMachine.cpp +++ b/lib/Target/X86/X86TargetMachine.cpp @@ -173,14 +173,18 @@ bool X86TargetMachine::addInstSelector(PassManagerBase &PM, // Install an instruction selector. PM.add(createX86ISelDag(*this, OptLevel)); - // Install a pass to insert x87 FP_REG_KILL instructions, as needed. - PM.add(createX87FPRegKillInserterPass()); + // For 32-bit, prepend instructions to set the "global base reg" for PIC. + if (!Subtarget.is64Bit()) + PM.add(createGlobalBaseRegPass()); return false; } bool X86TargetMachine::addPreRegAlloc(PassManagerBase &PM, CodeGenOpt::Level OptLevel) { + // Install a pass to insert x87 FP_REG_KILL instructions, as needed. + PM.add(createX87FPRegKillInserterPass()); + PM.add(createX86MaxStackAlignmentHeuristicPass()); return false; // -print-machineinstr shouldn't print after this. } diff --git a/lib/Target/XCore/AsmPrinter/XCoreAsmPrinter.cpp b/lib/Target/XCore/AsmPrinter/XCoreAsmPrinter.cpp index c100c590135e..6656bdc10eae 100644 --- a/lib/Target/XCore/AsmPrinter/XCoreAsmPrinter.cpp +++ b/lib/Target/XCore/AsmPrinter/XCoreAsmPrinter.cpp @@ -138,7 +138,6 @@ void XCoreAsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) { // FALL THROUGH case GlobalValue::InternalLinkage: case GlobalValue::PrivateLinkage: - case GlobalValue::LinkerPrivateLinkage: break; case GlobalValue::DLLImportLinkage: llvm_unreachable("DLLImport linkage is not supported by this target!"); diff --git a/lib/Target/XCore/XCoreISelLowering.cpp b/lib/Target/XCore/XCoreISelLowering.cpp index b23057226c99..abe7b2fd42be 100644 --- a/lib/Target/XCore/XCoreISelLowering.cpp +++ b/lib/Target/XCore/XCoreISelLowering.cpp @@ -245,7 +245,7 @@ SDValue XCoreTargetLowering:: LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const { const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal(); - SDValue GA = DAG.getTargetGlobalAddress(GV, MVT::i32); + SDValue GA = DAG.getTargetGlobalAddress(GV, Op.getDebugLoc(), MVT::i32); // If it's a debug information descriptor, don't mess with it. if (DAG.isVerifiedDebugInfoDesc(Op)) return GA; @@ -269,7 +269,7 @@ LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const DebugLoc dl = Op.getDebugLoc(); // transform to label + getid() * size const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal(); - SDValue GA = DAG.getTargetGlobalAddress(GV, MVT::i32); + SDValue GA = DAG.getTargetGlobalAddress(GV, dl, MVT::i32); const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV); if (!GVar) { // If GV is an alias then use the aliasee to determine size @@ -454,12 +454,12 @@ LowerLOAD(SDValue Op, SelectionDAG &DAG) const if (LD->getAlignment() == 2) { int SVOffset = LD->getSrcValueOffset(); - SDValue Low = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, Chain, + SDValue Low = DAG.getExtLoad(ISD::ZEXTLOAD, MVT::i32, dl, Chain, BasePtr, LD->getSrcValue(), SVOffset, MVT::i16, LD->isVolatile(), LD->isNonTemporal(), 2); SDValue HighAddr = DAG.getNode(ISD::ADD, dl, MVT::i32, BasePtr, DAG.getConstant(2, MVT::i32)); - SDValue High = DAG.getExtLoad(ISD::EXTLOAD, dl, MVT::i32, Chain, + SDValue High = DAG.getExtLoad(ISD::EXTLOAD, MVT::i32, dl, Chain, HighAddr, LD->getSrcValue(), SVOffset + 2, MVT::i16, LD->isVolatile(), LD->isNonTemporal(), 2); @@ -812,6 +812,7 @@ XCoreTargetLowering::LowerCall(SDValue Chain, SDValue Callee, CallingConv::ID CallConv, bool isVarArg, bool &isTailCall, const SmallVectorImpl<ISD::OutputArg> &Outs, + const SmallVectorImpl<SDValue> &OutVals, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const { @@ -826,7 +827,7 @@ XCoreTargetLowering::LowerCall(SDValue Chain, SDValue Callee, case CallingConv::Fast: case CallingConv::C: return LowerCCCCallTo(Chain, Callee, CallConv, isVarArg, isTailCall, - Outs, Ins, dl, DAG, InVals); + Outs, OutVals, Ins, dl, DAG, InVals); } } @@ -839,6 +840,7 @@ XCoreTargetLowering::LowerCCCCallTo(SDValue Chain, SDValue Callee, CallingConv::ID CallConv, bool isVarArg, bool isTailCall, const SmallVectorImpl<ISD::OutputArg> &Outs, + const SmallVectorImpl<SDValue> &OutVals, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const { @@ -866,7 +868,7 @@ XCoreTargetLowering::LowerCCCCallTo(SDValue Chain, SDValue Callee, // Walk the register/memloc assignments, inserting copies/loads. for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { CCValAssign &VA = ArgLocs[i]; - SDValue Arg = Outs[i].Val; + SDValue Arg = OutVals[i]; // Promote the value if needed. switch (VA.getLocInfo()) { @@ -919,7 +921,7 @@ XCoreTargetLowering::LowerCCCCallTo(SDValue Chain, SDValue Callee, // turn it into a TargetGlobalAddress node so that legalize doesn't hack it. // Likewise ExternalSymbol -> TargetExternalSymbol. if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) - Callee = DAG.getTargetGlobalAddress(G->getGlobal(), MVT::i32); + Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl, MVT::i32); else if (ExternalSymbolSDNode *E = dyn_cast<ExternalSymbolSDNode>(Callee)) Callee = DAG.getTargetExternalSymbol(E->getSymbol(), MVT::i32); @@ -1072,7 +1074,7 @@ XCoreTargetLowering::LowerCCCArguments(SDValue Chain, // Create the frame index object for this incoming parameter... int FI = MFI->CreateFixedObject(ObjSize, LRSaveSize + VA.getLocMemOffset(), - true, false); + true); // Create the SelectionDAG nodes corresponding to a load //from this parameter @@ -1097,7 +1099,7 @@ XCoreTargetLowering::LowerCCCArguments(SDValue Chain, // address for (unsigned i = array_lengthof(ArgRegs) - 1; i >= FirstVAReg; --i) { // Create a stack slot - int FI = MFI->CreateFixedObject(4, offset, true, false); + int FI = MFI->CreateFixedObject(4, offset, true); if (i == FirstVAReg) { XFI->setVarArgsFrameIndex(FI); } @@ -1120,7 +1122,7 @@ XCoreTargetLowering::LowerCCCArguments(SDValue Chain, // This will point to the next argument passed via stack. XFI->setVarArgsFrameIndex( MFI->CreateFixedObject(4, LRSaveSize + CCInfo.getNextStackOffset(), - true, false)); + true)); } } @@ -1133,19 +1135,19 @@ XCoreTargetLowering::LowerCCCArguments(SDValue Chain, bool XCoreTargetLowering:: CanLowerReturn(CallingConv::ID CallConv, bool isVarArg, - const SmallVectorImpl<EVT> &OutTys, - const SmallVectorImpl<ISD::ArgFlagsTy> &ArgsFlags, - SelectionDAG &DAG) const { + const SmallVectorImpl<ISD::OutputArg> &Outs, + LLVMContext &Context) const { SmallVector<CCValAssign, 16> RVLocs; CCState CCInfo(CallConv, isVarArg, getTargetMachine(), - RVLocs, *DAG.getContext()); - return CCInfo.CheckReturn(OutTys, ArgsFlags, RetCC_XCore); + RVLocs, Context); + return CCInfo.CheckReturn(Outs, RetCC_XCore); } SDValue XCoreTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl<ISD::OutputArg> &Outs, + const SmallVectorImpl<SDValue> &OutVals, DebugLoc dl, SelectionDAG &DAG) const { // CCValAssign - represent the assignment of @@ -1175,7 +1177,7 @@ XCoreTargetLowering::LowerReturn(SDValue Chain, assert(VA.isRegLoc() && "Can only return in registers!"); Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), - Outs[i].Val, Flag); + OutVals[i], Flag); // guarantee that all emitted copies are // stuck together, avoiding something bad @@ -1221,23 +1223,22 @@ XCoreTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, MachineFunction *F = BB->getParent(); MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB); MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB); - BuildMI(BB, dl, TII.get(XCore::BRFT_lru6)) - .addReg(MI->getOperand(1).getReg()).addMBB(sinkMBB); F->insert(It, copy0MBB); F->insert(It, sinkMBB); - // Update machine-CFG edges by first adding all successors of the current - // block to the new block which will contain the Phi node for the select. - for (MachineBasicBlock::succ_iterator I = BB->succ_begin(), - E = BB->succ_end(); I != E; ++I) - sinkMBB->addSuccessor(*I); - // Next, remove all successors of the current block, and add the true - // and fallthrough blocks as its successors. - while (!BB->succ_empty()) - BB->removeSuccessor(BB->succ_begin()); + + // Transfer the remainder of BB and its successor edges to sinkMBB. + sinkMBB->splice(sinkMBB->begin(), BB, + llvm::next(MachineBasicBlock::iterator(MI)), + BB->end()); + sinkMBB->transferSuccessorsAndUpdatePHIs(BB); + // Next, add the true and fallthrough blocks as its successors. BB->addSuccessor(copy0MBB); BB->addSuccessor(sinkMBB); + BuildMI(BB, dl, TII.get(XCore::BRFT_lru6)) + .addReg(MI->getOperand(1).getReg()).addMBB(sinkMBB); + // copy0MBB: // %FalseValue = ... // # fallthrough to sinkMBB @@ -1250,11 +1251,12 @@ XCoreTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ] // ... BB = sinkMBB; - BuildMI(BB, dl, TII.get(XCore::PHI), MI->getOperand(0).getReg()) + BuildMI(*BB, BB->begin(), dl, + TII.get(XCore::PHI), MI->getOperand(0).getReg()) .addReg(MI->getOperand(3).getReg()).addMBB(copy0MBB) .addReg(MI->getOperand(2).getReg()).addMBB(thisMBB); - F->DeleteMachineInstr(MI); // The pseudo instruction is gone now. + MI->eraseFromParent(); // The pseudo instruction is gone now. return BB; } @@ -1379,7 +1381,6 @@ SDValue XCoreTargetLowering::PerformDAGCombine(SDNode *N, SDValue Mul0, Mul1, Addend0, Addend1; if (N->getValueType(0) == MVT::i32 && isADDADDMUL(SDValue(N, 0), Mul0, Mul1, Addend0, Addend1, true)) { - SDValue Zero = DAG.getConstant(0, MVT::i32); SDValue Ignored = DAG.getNode(XCoreISD::LMUL, dl, DAG.getVTList(MVT::i32, MVT::i32), Mul0, Mul1, Addend0, Addend1); diff --git a/lib/Target/XCore/XCoreISelLowering.h b/lib/Target/XCore/XCoreISelLowering.h index d8d2a3aa7315..febc198f4faf 100644 --- a/lib/Target/XCore/XCoreISelLowering.h +++ b/lib/Target/XCore/XCoreISelLowering.h @@ -120,6 +120,7 @@ namespace llvm { CallingConv::ID CallConv, bool isVarArg, bool isTailCall, const SmallVectorImpl<ISD::OutputArg> &Outs, + const SmallVectorImpl<SDValue> &OutVals, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const; @@ -178,6 +179,7 @@ namespace llvm { CallingConv::ID CallConv, bool isVarArg, bool &isTailCall, const SmallVectorImpl<ISD::OutputArg> &Outs, + const SmallVectorImpl<SDValue> &OutVals, const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const; @@ -186,13 +188,13 @@ namespace llvm { LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl<ISD::OutputArg> &Outs, + const SmallVectorImpl<SDValue> &OutVals, DebugLoc dl, SelectionDAG &DAG) const; virtual bool CanLowerReturn(CallingConv::ID CallConv, bool isVarArg, - const SmallVectorImpl<EVT> &OutTys, - const SmallVectorImpl<ISD::ArgFlagsTy> &ArgsFlags, - SelectionDAG &DAG) const; + const SmallVectorImpl<ISD::OutputArg> &ArgsFlags, + LLVMContext &Context) const; }; } diff --git a/lib/Target/XCore/XCoreInstrInfo.cpp b/lib/Target/XCore/XCoreInstrInfo.cpp index 5260258d6b7e..dd90ea976770 100644 --- a/lib/Target/XCore/XCoreInstrInfo.cpp +++ b/lib/Target/XCore/XCoreInstrInfo.cpp @@ -299,9 +299,8 @@ XCoreInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, unsigned XCoreInstrInfo::InsertBranch(MachineBasicBlock &MBB,MachineBasicBlock *TBB, MachineBasicBlock *FBB, - const SmallVectorImpl<MachineOperand> &Cond)const{ - // FIXME there should probably be a DebugLoc argument here - DebugLoc dl; + const SmallVectorImpl<MachineOperand> &Cond, + DebugLoc DL)const{ // Shouldn't be a fall through. assert(TBB && "InsertBranch must not be told to insert a fallthrough"); assert((Cond.size() == 2 || Cond.size() == 0) && @@ -310,11 +309,11 @@ XCoreInstrInfo::InsertBranch(MachineBasicBlock &MBB,MachineBasicBlock *TBB, if (FBB == 0) { // One way branch. if (Cond.empty()) { // Unconditional branch - BuildMI(&MBB, dl, get(XCore::BRFU_lu6)).addMBB(TBB); + BuildMI(&MBB, DL, get(XCore::BRFU_lu6)).addMBB(TBB); } else { // Conditional branch. unsigned Opc = GetCondBranchFromCond((XCore::CondCode)Cond[0].getImm()); - BuildMI(&MBB, dl, get(Opc)).addReg(Cond[1].getReg()) + BuildMI(&MBB, DL, get(Opc)).addReg(Cond[1].getReg()) .addMBB(TBB); } return 1; @@ -323,9 +322,9 @@ XCoreInstrInfo::InsertBranch(MachineBasicBlock &MBB,MachineBasicBlock *TBB, // Two-way Conditional branch. assert(Cond.size() == 2 && "Unexpected number of components!"); unsigned Opc = GetCondBranchFromCond((XCore::CondCode)Cond[0].getImm()); - BuildMI(&MBB, dl, get(Opc)).addReg(Cond[1].getReg()) + BuildMI(&MBB, DL, get(Opc)).addReg(Cond[1].getReg()) .addMBB(TBB); - BuildMI(&MBB, dl, get(XCore::BRFU_lu6)).addMBB(FBB); + BuildMI(&MBB, DL, get(XCore::BRFU_lu6)).addMBB(FBB); return 2; } @@ -357,37 +356,31 @@ XCoreInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const { return 2; } -bool XCoreInstrInfo::copyRegToReg(MachineBasicBlock &MBB, - MachineBasicBlock::iterator I, - unsigned DestReg, unsigned SrcReg, - const TargetRegisterClass *DestRC, - const TargetRegisterClass *SrcRC, - DebugLoc DL) const { - - if (DestRC == SrcRC) { - if (DestRC == XCore::GRRegsRegisterClass) { - BuildMI(MBB, I, DL, get(XCore::ADD_2rus), DestReg) - .addReg(SrcReg) - .addImm(0); - return true; - } else { - return false; - } +void XCoreInstrInfo::copyPhysReg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, DebugLoc DL, + unsigned DestReg, unsigned SrcReg, + bool KillSrc) const { + bool GRDest = XCore::GRRegsRegClass.contains(DestReg); + bool GRSrc = XCore::GRRegsRegClass.contains(SrcReg); + + if (GRDest && GRSrc) { + BuildMI(MBB, I, DL, get(XCore::ADD_2rus), DestReg) + .addReg(SrcReg, getKillRegState(KillSrc)) + .addImm(0); + return; } - if (SrcRC == XCore::RRegsRegisterClass && SrcReg == XCore::SP && - DestRC == XCore::GRRegsRegisterClass) { - BuildMI(MBB, I, DL, get(XCore::LDAWSP_ru6), DestReg) - .addImm(0); - return true; + if (GRDest && SrcReg == XCore::SP) { + BuildMI(MBB, I, DL, get(XCore::LDAWSP_ru6), DestReg).addImm(0); + return; } - if (DestRC == XCore::RRegsRegisterClass && DestReg == XCore::SP && - SrcRC == XCore::GRRegsRegisterClass) { + + if (DestReg == XCore::SP && GRSrc) { BuildMI(MBB, I, DL, get(XCore::SETSP_1r)) - .addReg(SrcReg); - return true; + .addReg(SrcReg, getKillRegState(KillSrc)); + return; } - return false; + llvm_unreachable("Impossible reg-to-reg copy"); } void XCoreInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, @@ -438,8 +431,10 @@ bool XCoreInstrInfo::spillCalleeSavedRegisters(MachineBasicBlock &MBB, // Add the callee-saved register as live-in. It's killed at the spill. MBB.addLiveIn(it->getReg()); - storeRegToStackSlot(MBB, MI, it->getReg(), true, - it->getFrameIdx(), it->getRegClass(), &RI); + unsigned Reg = it->getReg(); + const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); + storeRegToStackSlot(MBB, MI, Reg, true, + it->getFrameIdx(), RC, &RI); if (emitFrameMoves) { MCSymbol *SaveLabel = MF->getContext().CreateTempSymbol(); BuildMI(MBB, MI, DL, get(XCore::DBG_LABEL)).addSym(SaveLabel); @@ -460,10 +455,11 @@ bool XCoreInstrInfo::restoreCalleeSavedRegisters(MachineBasicBlock &MBB, --BeforeI; for (std::vector<CalleeSavedInfo>::const_iterator it = CSI.begin(); it != CSI.end(); ++it) { - + unsigned Reg = it->getReg(); + const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); loadRegFromStackSlot(MBB, MI, it->getReg(), it->getFrameIdx(), - it->getRegClass(), &RI); + RC, &RI); assert(MI != MBB.begin() && "loadRegFromStackSlot didn't insert any code!"); // Insert in reverse order. loadRegFromStackSlot can insert multiple diff --git a/lib/Target/XCore/XCoreInstrInfo.h b/lib/Target/XCore/XCoreInstrInfo.h index 9035ea90c9bd..e5b0171579fc 100644 --- a/lib/Target/XCore/XCoreInstrInfo.h +++ b/lib/Target/XCore/XCoreInstrInfo.h @@ -58,17 +58,16 @@ public: bool AllowModify) const; virtual unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, - MachineBasicBlock *FBB, - const SmallVectorImpl<MachineOperand> &Cond) const; + MachineBasicBlock *FBB, + const SmallVectorImpl<MachineOperand> &Cond, + DebugLoc DL) const; virtual unsigned RemoveBranch(MachineBasicBlock &MBB) const; - virtual bool copyRegToReg(MachineBasicBlock &MBB, - MachineBasicBlock::iterator I, - unsigned DestReg, unsigned SrcReg, - const TargetRegisterClass *DestRC, - const TargetRegisterClass *SrcRC, - DebugLoc DL) const; + virtual void copyPhysReg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, DebugLoc DL, + unsigned DestReg, unsigned SrcReg, + bool KillSrc) const; virtual void storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, diff --git a/lib/Target/XCore/XCoreInstrInfo.td b/lib/Target/XCore/XCoreInstrInfo.td index dd3cbc169908..19b9b1f8c00c 100644 --- a/lib/Target/XCore/XCoreInstrInfo.td +++ b/lib/Target/XCore/XCoreInstrInfo.td @@ -733,7 +733,7 @@ def NEG : _F2R<(outs GRRegs:$dst), (ins GRRegs:$b), // TODO setd, eet, eef, getts, setpt, outct, inct, chkct, outt, intt, out, // in, outshr, inshr, testct, testwct, tinitpc, tinitdp, tinitsp, tinitcp, // tsetmr, sext (reg), zext (reg) -let isTwoAddress = 1 in { +let Constraints = "$src1 = $dst" in { let neverHasSideEffects = 1 in def SEXT_rus : _FRUS<(outs GRRegs:$dst), (ins GRRegs:$src1, i32imm:$src2), "sext $dst, $src2", diff --git a/lib/Target/XCore/XCoreRegisterInfo.cpp b/lib/Target/XCore/XCoreRegisterInfo.cpp index 0cfb358617e8..2a88342180e4 100644 --- a/lib/Target/XCore/XCoreRegisterInfo.cpp +++ b/lib/Target/XCore/XCoreRegisterInfo.cpp @@ -82,18 +82,6 @@ const unsigned* XCoreRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) return CalleeSavedRegs; } -const TargetRegisterClass* const* -XCoreRegisterInfo::getCalleeSavedRegClasses(const MachineFunction *MF) const { - static const TargetRegisterClass * const CalleeSavedRegClasses[] = { - XCore::GRRegsRegisterClass, XCore::GRRegsRegisterClass, - XCore::GRRegsRegisterClass, XCore::GRRegsRegisterClass, - XCore::GRRegsRegisterClass, XCore::GRRegsRegisterClass, - XCore::GRRegsRegisterClass, XCore::RRegsRegisterClass, - 0 - }; - return CalleeSavedRegClasses; -} - BitVector XCoreRegisterInfo::getReservedRegs(const MachineFunction &MF) const { BitVector Reserved(getNumRegs()); Reserved.set(XCore::CP); @@ -320,7 +308,7 @@ XCoreRegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, int FrameIdx; if (! isVarArg) { // A fixed offset of 0 allows us to save / restore LR using entsp / retsp. - FrameIdx = MFI->CreateFixedObject(RC->getSize(), 0, true, false); + FrameIdx = MFI->CreateFixedObject(RC->getSize(), 0, true); } else { FrameIdx = MFI->CreateStackObject(RC->getSize(), RC->getAlignment(), false); diff --git a/lib/Target/XCore/XCoreRegisterInfo.h b/lib/Target/XCore/XCoreRegisterInfo.h index 5bdd05922598..66132ba8ff66 100644 --- a/lib/Target/XCore/XCoreRegisterInfo.h +++ b/lib/Target/XCore/XCoreRegisterInfo.h @@ -44,9 +44,6 @@ public: const unsigned *getCalleeSavedRegs(const MachineFunction *MF = 0) const; - const TargetRegisterClass* const* getCalleeSavedRegClasses( - const MachineFunction *MF = 0) const; - BitVector getReservedRegs(const MachineFunction &MF) const; bool requiresRegisterScavenging(const MachineFunction &MF) const; |