diff options
Diffstat (limited to 'lib/Target/ARM')
-rw-r--r-- | lib/Target/ARM/ARMAsmPrinter.cpp | 1 | ||||
-rw-r--r-- | lib/Target/ARM/ARMBaseInstrInfo.cpp | 3 | ||||
-rw-r--r-- | lib/Target/ARM/ARMBaseRegisterInfo.cpp | 19 | ||||
-rw-r--r-- | lib/Target/ARM/ARMCallLowering.cpp | 2 | ||||
-rw-r--r-- | lib/Target/ARM/ARMISelLowering.cpp | 20 | ||||
-rw-r--r-- | lib/Target/ARM/ARMISelLowering.h | 3 | ||||
-rw-r--r-- | lib/Target/ARM/ARMInstrThumb2.td | 2 | ||||
-rw-r--r-- | lib/Target/ARM/ARMInstructionSelector.cpp | 312 | ||||
-rw-r--r-- | lib/Target/ARM/ARMLegalizerInfo.cpp | 207 | ||||
-rw-r--r-- | lib/Target/ARM/ARMLegalizerInfo.h | 33 | ||||
-rw-r--r-- | lib/Target/ARM/ARMRegisterBankInfo.cpp | 32 | ||||
-rw-r--r-- | lib/Target/ARM/ARMTargetTransformInfo.cpp | 18 | ||||
-rw-r--r-- | lib/Target/ARM/ARMTargetTransformInfo.h | 36 | ||||
-rw-r--r-- | lib/Target/ARM/AsmParser/ARMAsmParser.cpp | 11 | ||||
-rw-r--r-- | lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp | 15 | ||||
-rw-r--r-- | lib/Target/ARM/MCTargetDesc/ARMAsmBackend.h | 8 | ||||
-rw-r--r-- | lib/Target/ARM/Thumb1FrameLowering.cpp | 2 |
17 files changed, 596 insertions, 128 deletions
diff --git a/lib/Target/ARM/ARMAsmPrinter.cpp b/lib/Target/ARM/ARMAsmPrinter.cpp index 90f635c812542..582153daebde9 100644 --- a/lib/Target/ARM/ARMAsmPrinter.cpp +++ b/lib/Target/ARM/ARMAsmPrinter.cpp @@ -1103,6 +1103,7 @@ void ARMAsmPrinter::EmitUnwindingInstruction(const MachineInstr *MI) { case ARM::tPUSH: // Special case here: no src & dst reg, but two extra imp ops. StartOp = 2; NumOffset = 2; + LLVM_FALLTHROUGH; case ARM::STMDB_UPD: case ARM::t2STMDB_UPD: case ARM::VSTMDDB_UPD: diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp index 1ec6b24b2ed67..3cf5950a1918d 100644 --- a/lib/Target/ARM/ARMBaseInstrInfo.cpp +++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp @@ -1880,6 +1880,9 @@ isProfitableToIfCvt(MachineBasicBlock &TBB, // Diamond: TBB is the block that is branched to, FBB is the fallthrough TUnpredCycles = TCycles + TakenBranchCost; FUnpredCycles = FCycles + NotTakenBranchCost; + // The branch at the end of FBB will disappear when it's predicated, so + // discount it from PredCost. + PredCost -= 1 * ScalingUpFactor; } // The total cost is the cost of each path scaled by their probabilites unsigned TUnpredCost = Probability.scale(TUnpredCycles * ScalingUpFactor); diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/lib/Target/ARM/ARMBaseRegisterInfo.cpp index b4fb292c0116d..e97a7ce5067f9 100644 --- a/lib/Target/ARM/ARMBaseRegisterInfo.cpp +++ b/lib/Target/ARM/ARMBaseRegisterInfo.cpp @@ -193,10 +193,11 @@ getReservedRegs(const MachineFunction &MF) const { for (unsigned R = 0; R < 16; ++R) markSuperRegs(Reserved, ARM::D16 + R); } - const TargetRegisterClass *RC = &ARM::GPRPairRegClass; - for(TargetRegisterClass::iterator I = RC->begin(), E = RC->end(); I!=E; ++I) - for (MCSubRegIterator SI(*I, this); SI.isValid(); ++SI) - if (Reserved.test(*SI)) markSuperRegs(Reserved, *I); + const TargetRegisterClass &RC = ARM::GPRPairRegClass; + for (unsigned Reg : RC) + for (MCSubRegIterator SI(Reg, this); SI.isValid(); ++SI) + if (Reserved.test(*SI)) + markSuperRegs(Reserved, Reg); assert(checkAllSuperRegsMarked(Reserved)); return Reserved; @@ -315,8 +316,7 @@ ARMBaseRegisterInfo::getRegAllocationHints(unsigned VirtReg, Hints.push_back(PairedPhys); // Then prefer even or odd registers. - for (unsigned I = 0, E = Order.size(); I != E; ++I) { - unsigned Reg = Order[I]; + for (unsigned Reg : Order) { if (Reg == PairedPhys || (getEncodingValue(Reg) & 1) != Odd) continue; // Don't provide hints that are paired to a reserved register. @@ -659,11 +659,8 @@ bool ARMBaseRegisterInfo::isFrameOffsetLegal(const MachineInstr *MI, unsigned Ba const MCInstrDesc &Desc = MI->getDesc(); unsigned AddrMode = (Desc.TSFlags & ARMII::AddrModeMask); unsigned i = 0; - - while (!MI->getOperand(i).isFI()) { - ++i; - assert(i < MI->getNumOperands() &&"Instr doesn't have FrameIndex operand!"); - } + for (; !MI->getOperand(i).isFI(); ++i) + assert(i+1 < MI->getNumOperands() && "Instr doesn't have FrameIndex operand!"); // AddrMode4 and AddrMode6 cannot handle any offset. if (AddrMode == ARMII::AddrMode4 || AddrMode == ARMII::AddrMode6) diff --git a/lib/Target/ARM/ARMCallLowering.cpp b/lib/Target/ARM/ARMCallLowering.cpp index e498f70b820db..051827a6a6a2f 100644 --- a/lib/Target/ARM/ARMCallLowering.cpp +++ b/lib/Target/ARM/ARMCallLowering.cpp @@ -321,7 +321,7 @@ struct IncomingValueHandler : public CallLowering::ValueHandler { assert(VA.getValVT().getSizeInBits() <= 64 && "Unsupported value size"); assert(VA.getLocVT().getSizeInBits() <= 64 && "Unsupported location size"); - // The necesary extensions are handled on the other side of the ABI + // The necessary extensions are handled on the other side of the ABI // boundary. markPhysRegUsed(PhysReg); MIRBuilder.buildCopy(ValVReg, PhysReg); diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index e42514acd76f0..6ba7593543a92 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -3398,9 +3398,9 @@ ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG, static SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *Subtarget) { SDLoc dl(Op); - ConstantSDNode *ScopeN = cast<ConstantSDNode>(Op.getOperand(2)); - auto Scope = static_cast<SynchronizationScope>(ScopeN->getZExtValue()); - if (Scope == SynchronizationScope::SingleThread) + ConstantSDNode *SSIDNode = cast<ConstantSDNode>(Op.getOperand(2)); + auto SSID = static_cast<SyncScope::ID>(SSIDNode->getZExtValue()); + if (SSID == SyncScope::SingleThread) return Op; if (!Subtarget->hasDataBarrier()) { @@ -5356,15 +5356,15 @@ static SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) { // Integer comparisons. switch (SetCCOpcode) { default: llvm_unreachable("Illegal integer comparison"); - case ISD::SETNE: Invert = true; + case ISD::SETNE: Invert = true; LLVM_FALLTHROUGH; case ISD::SETEQ: Opc = ARMISD::VCEQ; break; - case ISD::SETLT: Swap = true; + case ISD::SETLT: Swap = true; LLVM_FALLTHROUGH; case ISD::SETGT: Opc = ARMISD::VCGT; break; - case ISD::SETLE: Swap = true; + case ISD::SETLE: Swap = true; LLVM_FALLTHROUGH; case ISD::SETGE: Opc = ARMISD::VCGE; break; - case ISD::SETULT: Swap = true; + case ISD::SETULT: Swap = true; LLVM_FALLTHROUGH; case ISD::SETUGT: Opc = ARMISD::VCGTU; break; - case ISD::SETULE: Swap = true; + case ISD::SETULE: Swap = true; LLVM_FALLTHROUGH; case ISD::SETUGE: Opc = ARMISD::VCGEU; break; } @@ -13779,7 +13779,9 @@ bool ARMTargetLowering::lowerInterleavedLoad( // Convert the integer vector to pointer vector if the element is pointer. if (EltTy->isPointerTy()) - SubVec = Builder.CreateIntToPtr(SubVec, SV->getType()); + SubVec = Builder.CreateIntToPtr( + SubVec, VectorType::get(SV->getType()->getVectorElementType(), + VecTy->getVectorNumElements())); SubVecs[SV].push_back(SubVec); } diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h index 5044134f5b1e2..f05b142552369 100644 --- a/lib/Target/ARM/ARMISelLowering.h +++ b/lib/Target/ARM/ARMISelLowering.h @@ -510,7 +510,8 @@ class InstrItineraryData; bool canCombineStoreAndExtract(Type *VectorTy, Value *Idx, unsigned &Cost) const override; - bool canMergeStoresTo(unsigned AddressSpace, EVT MemVT) const override { + bool canMergeStoresTo(unsigned AddressSpace, EVT MemVT, + const SelectionDAG &DAG) const override { // Do not merge to larger than i32. return (MemVT.getSizeInBits() <= 32); } diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td index 53db5acbe805c..42eac12e457b2 100644 --- a/lib/Target/ARM/ARMInstrThumb2.td +++ b/lib/Target/ARM/ARMInstrThumb2.td @@ -4799,7 +4799,7 @@ def : t2InstAlias<"add${p} $Rd, pc, $imm", // Pseudo instruction ldr Rt, =immediate def t2LDRConstPool : t2AsmPseudo<"ldr${p} $Rt, $immediate", - (ins GPRnopc:$Rt, const_pool_asm_imm:$immediate, pred:$p)>; + (ins GPR:$Rt, const_pool_asm_imm:$immediate, pred:$p)>; // Version w/ the .w suffix. def : t2InstAlias<"ldr${p}.w $Rt, $immediate", (t2LDRConstPool GPRnopc:$Rt, diff --git a/lib/Target/ARM/ARMInstructionSelector.cpp b/lib/Target/ARM/ARMInstructionSelector.cpp index 374176d1d7371..29ef69ad0010f 100644 --- a/lib/Target/ARM/ARMInstructionSelector.cpp +++ b/lib/Target/ARM/ARMInstructionSelector.cpp @@ -20,6 +20,8 @@ #define DEBUG_TYPE "arm-isel" +#include "llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h" + using namespace llvm; #ifndef LLVM_BUILD_GLOBAL_ISEL @@ -42,13 +44,32 @@ public: private: bool selectImpl(MachineInstr &I) const; - bool selectICmp(MachineInstrBuilder &MIB, const ARMBaseInstrInfo &TII, - MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI, - const RegisterBankInfo &RBI) const; + struct CmpConstants; + struct InsertInfo; + + bool selectCmp(CmpConstants Helper, MachineInstrBuilder &MIB, + MachineRegisterInfo &MRI) const; + + // Helper for inserting a comparison sequence that sets \p ResReg to either 1 + // if \p LHSReg and \p RHSReg are in the relationship defined by \p Cond, or + // \p PrevRes otherwise. In essence, it computes PrevRes OR (LHS Cond RHS). + bool insertComparison(CmpConstants Helper, InsertInfo I, unsigned ResReg, + ARMCC::CondCodes Cond, unsigned LHSReg, unsigned RHSReg, + unsigned PrevRes) const; + + // Set \p DestReg to \p Constant. + void putConstant(InsertInfo I, unsigned DestReg, unsigned Constant) const; + + bool selectSelect(MachineInstrBuilder &MIB, MachineRegisterInfo &MRI) const; + + // Check if the types match and both operands have the expected size and + // register bank. + bool validOpRegPair(MachineRegisterInfo &MRI, unsigned LHS, unsigned RHS, + unsigned ExpectedSize, unsigned ExpectedRegBankID) const; - bool selectSelect(MachineInstrBuilder &MIB, const ARMBaseInstrInfo &TII, - MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI, - const RegisterBankInfo &RBI) const; + // Check if the register has the expected size and register bank. + bool validReg(MachineRegisterInfo &MRI, unsigned Reg, unsigned ExpectedSize, + unsigned ExpectedRegBankID) const; const ARMBaseInstrInfo &TII; const ARMBaseRegisterInfo &TRI; @@ -251,120 +272,233 @@ static unsigned selectLoadStoreOpCode(unsigned Opc, unsigned RegBank, return Opc; } -static ARMCC::CondCodes getComparePred(CmpInst::Predicate Pred) { +// When lowering comparisons, we sometimes need to perform two compares instead +// of just one. Get the condition codes for both comparisons. If only one is +// needed, the second member of the pair is ARMCC::AL. +static std::pair<ARMCC::CondCodes, ARMCC::CondCodes> +getComparePreds(CmpInst::Predicate Pred) { + std::pair<ARMCC::CondCodes, ARMCC::CondCodes> Preds = {ARMCC::AL, ARMCC::AL}; switch (Pred) { - // Needs two compares... case CmpInst::FCMP_ONE: + Preds = {ARMCC::GT, ARMCC::MI}; + break; case CmpInst::FCMP_UEQ: - default: - // AL is our "false" for now. The other two need more compares. - return ARMCC::AL; + Preds = {ARMCC::EQ, ARMCC::VS}; + break; case CmpInst::ICMP_EQ: case CmpInst::FCMP_OEQ: - return ARMCC::EQ; + Preds.first = ARMCC::EQ; + break; case CmpInst::ICMP_SGT: case CmpInst::FCMP_OGT: - return ARMCC::GT; + Preds.first = ARMCC::GT; + break; case CmpInst::ICMP_SGE: case CmpInst::FCMP_OGE: - return ARMCC::GE; + Preds.first = ARMCC::GE; + break; case CmpInst::ICMP_UGT: case CmpInst::FCMP_UGT: - return ARMCC::HI; + Preds.first = ARMCC::HI; + break; case CmpInst::FCMP_OLT: - return ARMCC::MI; + Preds.first = ARMCC::MI; + break; case CmpInst::ICMP_ULE: case CmpInst::FCMP_OLE: - return ARMCC::LS; + Preds.first = ARMCC::LS; + break; case CmpInst::FCMP_ORD: - return ARMCC::VC; + Preds.first = ARMCC::VC; + break; case CmpInst::FCMP_UNO: - return ARMCC::VS; + Preds.first = ARMCC::VS; + break; case CmpInst::FCMP_UGE: - return ARMCC::PL; + Preds.first = ARMCC::PL; + break; case CmpInst::ICMP_SLT: case CmpInst::FCMP_ULT: - return ARMCC::LT; + Preds.first = ARMCC::LT; + break; case CmpInst::ICMP_SLE: case CmpInst::FCMP_ULE: - return ARMCC::LE; + Preds.first = ARMCC::LE; + break; case CmpInst::FCMP_UNE: case CmpInst::ICMP_NE: - return ARMCC::NE; + Preds.first = ARMCC::NE; + break; case CmpInst::ICMP_UGE: - return ARMCC::HS; + Preds.first = ARMCC::HS; + break; case CmpInst::ICMP_ULT: - return ARMCC::LO; + Preds.first = ARMCC::LO; + break; + default: + break; } + assert(Preds.first != ARMCC::AL && "No comparisons needed?"); + return Preds; } -bool ARMInstructionSelector::selectICmp(MachineInstrBuilder &MIB, - const ARMBaseInstrInfo &TII, - MachineRegisterInfo &MRI, - const TargetRegisterInfo &TRI, - const RegisterBankInfo &RBI) const { - auto &MBB = *MIB->getParent(); - auto InsertBefore = std::next(MIB->getIterator()); - auto &DebugLoc = MIB->getDebugLoc(); - - // Move 0 into the result register. - auto Mov0I = BuildMI(MBB, InsertBefore, DebugLoc, TII.get(ARM::MOVi)) - .addDef(MRI.createVirtualRegister(&ARM::GPRRegClass)) - .addImm(0) - .add(predOps(ARMCC::AL)) - .add(condCodeOp()); - if (!constrainSelectedInstRegOperands(*Mov0I, TII, TRI, RBI)) +struct ARMInstructionSelector::CmpConstants { + CmpConstants(unsigned CmpOpcode, unsigned FlagsOpcode, unsigned OpRegBank, + unsigned OpSize) + : ComparisonOpcode(CmpOpcode), ReadFlagsOpcode(FlagsOpcode), + OperandRegBankID(OpRegBank), OperandSize(OpSize) {} + + // The opcode used for performing the comparison. + const unsigned ComparisonOpcode; + + // The opcode used for reading the flags set by the comparison. May be + // ARM::INSTRUCTION_LIST_END if we don't need to read the flags. + const unsigned ReadFlagsOpcode; + + // The assumed register bank ID for the operands. + const unsigned OperandRegBankID; + + // The assumed size in bits for the operands. + const unsigned OperandSize; +}; + +struct ARMInstructionSelector::InsertInfo { + InsertInfo(MachineInstrBuilder &MIB) + : MBB(*MIB->getParent()), InsertBefore(std::next(MIB->getIterator())), + DbgLoc(MIB->getDebugLoc()) {} + + MachineBasicBlock &MBB; + const MachineBasicBlock::instr_iterator InsertBefore; + const DebugLoc &DbgLoc; +}; + +void ARMInstructionSelector::putConstant(InsertInfo I, unsigned DestReg, + unsigned Constant) const { + (void)BuildMI(I.MBB, I.InsertBefore, I.DbgLoc, TII.get(ARM::MOVi)) + .addDef(DestReg) + .addImm(Constant) + .add(predOps(ARMCC::AL)) + .add(condCodeOp()); +} + +bool ARMInstructionSelector::validOpRegPair(MachineRegisterInfo &MRI, + unsigned LHSReg, unsigned RHSReg, + unsigned ExpectedSize, + unsigned ExpectedRegBankID) const { + return MRI.getType(LHSReg) == MRI.getType(RHSReg) && + validReg(MRI, LHSReg, ExpectedSize, ExpectedRegBankID) && + validReg(MRI, RHSReg, ExpectedSize, ExpectedRegBankID); +} + +bool ARMInstructionSelector::validReg(MachineRegisterInfo &MRI, unsigned Reg, + unsigned ExpectedSize, + unsigned ExpectedRegBankID) const { + if (MRI.getType(Reg).getSizeInBits() != ExpectedSize) { + DEBUG(dbgs() << "Unexpected size for register"); return false; + } - // Perform the comparison. - auto LHSReg = MIB->getOperand(2).getReg(); - auto RHSReg = MIB->getOperand(3).getReg(); - assert(MRI.getType(LHSReg) == MRI.getType(RHSReg) && - MRI.getType(LHSReg).getSizeInBits() == 32 && - MRI.getType(RHSReg).getSizeInBits() == 32 && - "Unsupported types for comparison operation"); - auto CmpI = BuildMI(MBB, InsertBefore, DebugLoc, TII.get(ARM::CMPrr)) - .addUse(LHSReg) - .addUse(RHSReg) - .add(predOps(ARMCC::AL)); - if (!constrainSelectedInstRegOperands(*CmpI, TII, TRI, RBI)) + if (RBI.getRegBank(Reg, MRI, TRI)->getID() != ExpectedRegBankID) { + DEBUG(dbgs() << "Unexpected register bank for register"); return false; + } + + return true; +} + +bool ARMInstructionSelector::selectCmp(CmpConstants Helper, + MachineInstrBuilder &MIB, + MachineRegisterInfo &MRI) const { + const InsertInfo I(MIB); - // Move 1 into the result register if the flags say so. auto ResReg = MIB->getOperand(0).getReg(); + if (!validReg(MRI, ResReg, 1, ARM::GPRRegBankID)) + return false; + auto Cond = static_cast<CmpInst::Predicate>(MIB->getOperand(1).getPredicate()); - auto ARMCond = getComparePred(Cond); - if (ARMCond == ARMCC::AL) + if (Cond == CmpInst::FCMP_TRUE || Cond == CmpInst::FCMP_FALSE) { + putConstant(I, ResReg, Cond == CmpInst::FCMP_TRUE ? 1 : 0); + MIB->eraseFromParent(); + return true; + } + + auto LHSReg = MIB->getOperand(2).getReg(); + auto RHSReg = MIB->getOperand(3).getReg(); + if (!validOpRegPair(MRI, LHSReg, RHSReg, Helper.OperandSize, + Helper.OperandRegBankID)) return false; - auto Mov1I = BuildMI(MBB, InsertBefore, DebugLoc, TII.get(ARM::MOVCCi)) + auto ARMConds = getComparePreds(Cond); + auto ZeroReg = MRI.createVirtualRegister(&ARM::GPRRegClass); + putConstant(I, ZeroReg, 0); + + if (ARMConds.second == ARMCC::AL) { + // Simple case, we only need one comparison and we're done. + if (!insertComparison(Helper, I, ResReg, ARMConds.first, LHSReg, RHSReg, + ZeroReg)) + return false; + } else { + // Not so simple, we need two successive comparisons. + auto IntermediateRes = MRI.createVirtualRegister(&ARM::GPRRegClass); + if (!insertComparison(Helper, I, IntermediateRes, ARMConds.first, LHSReg, + RHSReg, ZeroReg)) + return false; + if (!insertComparison(Helper, I, ResReg, ARMConds.second, LHSReg, RHSReg, + IntermediateRes)) + return false; + } + + MIB->eraseFromParent(); + return true; +} + +bool ARMInstructionSelector::insertComparison(CmpConstants Helper, InsertInfo I, + unsigned ResReg, + ARMCC::CondCodes Cond, + unsigned LHSReg, unsigned RHSReg, + unsigned PrevRes) const { + // Perform the comparison. + auto CmpI = + BuildMI(I.MBB, I.InsertBefore, I.DbgLoc, TII.get(Helper.ComparisonOpcode)) + .addUse(LHSReg) + .addUse(RHSReg) + .add(predOps(ARMCC::AL)); + if (!constrainSelectedInstRegOperands(*CmpI, TII, TRI, RBI)) + return false; + + // Read the comparison flags (if necessary). + if (Helper.ReadFlagsOpcode != ARM::INSTRUCTION_LIST_END) { + auto ReadI = BuildMI(I.MBB, I.InsertBefore, I.DbgLoc, + TII.get(Helper.ReadFlagsOpcode)) + .add(predOps(ARMCC::AL)); + if (!constrainSelectedInstRegOperands(*ReadI, TII, TRI, RBI)) + return false; + } + + // Select either 1 or the previous result based on the value of the flags. + auto Mov1I = BuildMI(I.MBB, I.InsertBefore, I.DbgLoc, TII.get(ARM::MOVCCi)) .addDef(ResReg) - .addUse(Mov0I->getOperand(0).getReg()) + .addUse(PrevRes) .addImm(1) - .add(predOps(ARMCond, ARM::CPSR)); + .add(predOps(Cond, ARM::CPSR)); if (!constrainSelectedInstRegOperands(*Mov1I, TII, TRI, RBI)) return false; - MIB->eraseFromParent(); return true; } bool ARMInstructionSelector::selectSelect(MachineInstrBuilder &MIB, - const ARMBaseInstrInfo &TII, - MachineRegisterInfo &MRI, - const TargetRegisterInfo &TRI, - const RegisterBankInfo &RBI) const { + MachineRegisterInfo &MRI) const { auto &MBB = *MIB->getParent(); auto InsertBefore = std::next(MIB->getIterator()); - auto &DebugLoc = MIB->getDebugLoc(); + auto &DbgLoc = MIB->getDebugLoc(); // Compare the condition to 0. auto CondReg = MIB->getOperand(1).getReg(); - assert(MRI.getType(CondReg).getSizeInBits() == 1 && - RBI.getRegBank(CondReg, MRI, TRI)->getID() == ARM::GPRRegBankID && + assert(validReg(MRI, CondReg, 1, ARM::GPRRegBankID) && "Unsupported types for select operation"); - auto CmpI = BuildMI(MBB, InsertBefore, DebugLoc, TII.get(ARM::CMPri)) + auto CmpI = BuildMI(MBB, InsertBefore, DbgLoc, TII.get(ARM::CMPri)) .addUse(CondReg) .addImm(0) .add(predOps(ARMCC::AL)); @@ -376,13 +510,10 @@ bool ARMInstructionSelector::selectSelect(MachineInstrBuilder &MIB, auto ResReg = MIB->getOperand(0).getReg(); auto TrueReg = MIB->getOperand(2).getReg(); auto FalseReg = MIB->getOperand(3).getReg(); - assert(MRI.getType(ResReg) == MRI.getType(TrueReg) && - MRI.getType(TrueReg) == MRI.getType(FalseReg) && - MRI.getType(FalseReg).getSizeInBits() == 32 && - RBI.getRegBank(TrueReg, MRI, TRI)->getID() == ARM::GPRRegBankID && - RBI.getRegBank(FalseReg, MRI, TRI)->getID() == ARM::GPRRegBankID && + assert(validOpRegPair(MRI, ResReg, TrueReg, 32, ARM::GPRRegBankID) && + validOpRegPair(MRI, TrueReg, FalseReg, 32, ARM::GPRRegBankID) && "Unsupported types for select operation"); - auto Mov1I = BuildMI(MBB, InsertBefore, DebugLoc, TII.get(ARM::MOVCCr)) + auto Mov1I = BuildMI(MBB, InsertBefore, DbgLoc, TII.get(ARM::MOVCCr)) .addDef(ResReg) .addUse(TrueReg) .addUse(FalseReg) @@ -494,10 +625,32 @@ bool ARMInstructionSelector::select(MachineInstr &I) const { I.setDesc(TII.get(COPY)); return selectCopy(I, TII, MRI, TRI, RBI); } - case G_ICMP: - return selectICmp(MIB, TII, MRI, TRI, RBI); case G_SELECT: - return selectSelect(MIB, TII, MRI, TRI, RBI); + return selectSelect(MIB, MRI); + case G_ICMP: { + CmpConstants Helper(ARM::CMPrr, ARM::INSTRUCTION_LIST_END, + ARM::GPRRegBankID, 32); + return selectCmp(Helper, MIB, MRI); + } + case G_FCMP: { + assert(TII.getSubtarget().hasVFP2() && "Can't select fcmp without VFP"); + + unsigned OpReg = I.getOperand(2).getReg(); + unsigned Size = MRI.getType(OpReg).getSizeInBits(); + + if (Size == 64 && TII.getSubtarget().isFPOnlySP()) { + DEBUG(dbgs() << "Subtarget only supports single precision"); + return false; + } + if (Size != 32 && Size != 64) { + DEBUG(dbgs() << "Unsupported size for G_FCMP operand"); + return false; + } + + CmpConstants Helper(Size == 32 ? ARM::VCMPS : ARM::VCMPD, ARM::FMSTAT, + ARM::FPRRegBankID, Size); + return selectCmp(Helper, MIB, MRI); + } case G_GEP: I.setDesc(TII.get(ARM::ADDrr)); MIB.add(predOps(ARMCC::AL)).add(condCodeOp()); @@ -510,11 +663,10 @@ bool ARMInstructionSelector::select(MachineInstr &I) const { break; case G_CONSTANT: { unsigned Reg = I.getOperand(0).getReg(); - if (MRI.getType(Reg).getSizeInBits() != 32) + + if (!validReg(MRI, Reg, 32, ARM::GPRRegBankID)) return false; - assert(RBI.getRegBank(Reg, MRI, TRI)->getID() == ARM::GPRRegBankID && - "Expected constant to live in a GPR"); I.setDesc(TII.get(ARM::MOVi)); MIB.add(predOps(ARMCC::AL)).add(condCodeOp()); diff --git a/lib/Target/ARM/ARMLegalizerInfo.cpp b/lib/Target/ARM/ARMLegalizerInfo.cpp index f3e62d09cc30a..f23e62595d2e9 100644 --- a/lib/Target/ARM/ARMLegalizerInfo.cpp +++ b/lib/Target/ARM/ARMLegalizerInfo.cpp @@ -28,6 +28,10 @@ using namespace llvm; #error "You shouldn't build this" #endif +static bool AEABI(const ARMSubtarget &ST) { + return ST.isTargetAEABI() || ST.isTargetGNUAEABI() || ST.isTargetMuslAEABI(); +} + ARMLegalizerInfo::ARMLegalizerInfo(const ARMSubtarget &ST) { using namespace TargetOpcode; @@ -66,8 +70,7 @@ ARMLegalizerInfo::ARMLegalizerInfo(const ARMSubtarget &ST) { for (unsigned Op : {G_SREM, G_UREM}) if (ST.hasDivideInARMMode()) setAction({Op, s32}, Lower); - else if (ST.isTargetAEABI() || ST.isTargetGNUAEABI() || - ST.isTargetMuslAEABI()) + else if (AEABI(ST)) setAction({Op, s32}, Custom); else setAction({Op, s32}, Libcall); @@ -86,6 +89,8 @@ ARMLegalizerInfo::ARMLegalizerInfo(const ARMSubtarget &ST) { setAction({G_SELECT, 1, s1}, Legal); setAction({G_CONSTANT, s32}, Legal); + for (auto Ty : {s1, s8, s16}) + setAction({G_CONSTANT, Ty}, WidenScalar); setAction({G_ICMP, s1}, Legal); for (auto Ty : {s8, s16}) @@ -99,9 +104,22 @@ ARMLegalizerInfo::ARMLegalizerInfo(const ARMSubtarget &ST) { setAction({G_LOAD, s64}, Legal); setAction({G_STORE, s64}, Legal); + + setAction({G_FCMP, s1}, Legal); + setAction({G_FCMP, 1, s32}, Legal); + setAction({G_FCMP, 1, s64}, Legal); } else { for (auto Ty : {s32, s64}) setAction({G_FADD, Ty}, Libcall); + + setAction({G_FCMP, s1}, Legal); + setAction({G_FCMP, 1, s32}, Custom); + setAction({G_FCMP, 1, s64}, Custom); + + if (AEABI(ST)) + setFCmpLibcallsAEABI(); + else + setFCmpLibcallsGNU(); } for (unsigned Op : {G_FREM, G_FPOW}) @@ -111,11 +129,120 @@ ARMLegalizerInfo::ARMLegalizerInfo(const ARMSubtarget &ST) { computeTables(); } +void ARMLegalizerInfo::setFCmpLibcallsAEABI() { + // FCMP_TRUE and FCMP_FALSE don't need libcalls, they should be + // default-initialized. + FCmp32Libcalls.resize(CmpInst::LAST_FCMP_PREDICATE + 1); + FCmp32Libcalls[CmpInst::FCMP_OEQ] = { + {RTLIB::OEQ_F32, CmpInst::BAD_ICMP_PREDICATE}}; + FCmp32Libcalls[CmpInst::FCMP_OGE] = { + {RTLIB::OGE_F32, CmpInst::BAD_ICMP_PREDICATE}}; + FCmp32Libcalls[CmpInst::FCMP_OGT] = { + {RTLIB::OGT_F32, CmpInst::BAD_ICMP_PREDICATE}}; + FCmp32Libcalls[CmpInst::FCMP_OLE] = { + {RTLIB::OLE_F32, CmpInst::BAD_ICMP_PREDICATE}}; + FCmp32Libcalls[CmpInst::FCMP_OLT] = { + {RTLIB::OLT_F32, CmpInst::BAD_ICMP_PREDICATE}}; + FCmp32Libcalls[CmpInst::FCMP_ORD] = {{RTLIB::O_F32, CmpInst::ICMP_EQ}}; + FCmp32Libcalls[CmpInst::FCMP_UGE] = {{RTLIB::OLT_F32, CmpInst::ICMP_EQ}}; + FCmp32Libcalls[CmpInst::FCMP_UGT] = {{RTLIB::OLE_F32, CmpInst::ICMP_EQ}}; + FCmp32Libcalls[CmpInst::FCMP_ULE] = {{RTLIB::OGT_F32, CmpInst::ICMP_EQ}}; + FCmp32Libcalls[CmpInst::FCMP_ULT] = {{RTLIB::OGE_F32, CmpInst::ICMP_EQ}}; + FCmp32Libcalls[CmpInst::FCMP_UNE] = {{RTLIB::UNE_F32, CmpInst::ICMP_EQ}}; + FCmp32Libcalls[CmpInst::FCMP_UNO] = { + {RTLIB::UO_F32, CmpInst::BAD_ICMP_PREDICATE}}; + FCmp32Libcalls[CmpInst::FCMP_ONE] = { + {RTLIB::OGT_F32, CmpInst::BAD_ICMP_PREDICATE}, + {RTLIB::OLT_F32, CmpInst::BAD_ICMP_PREDICATE}}; + FCmp32Libcalls[CmpInst::FCMP_UEQ] = { + {RTLIB::OEQ_F32, CmpInst::BAD_ICMP_PREDICATE}, + {RTLIB::UO_F32, CmpInst::BAD_ICMP_PREDICATE}}; + + FCmp64Libcalls.resize(CmpInst::LAST_FCMP_PREDICATE + 1); + FCmp64Libcalls[CmpInst::FCMP_OEQ] = { + {RTLIB::OEQ_F64, CmpInst::BAD_ICMP_PREDICATE}}; + FCmp64Libcalls[CmpInst::FCMP_OGE] = { + {RTLIB::OGE_F64, CmpInst::BAD_ICMP_PREDICATE}}; + FCmp64Libcalls[CmpInst::FCMP_OGT] = { + {RTLIB::OGT_F64, CmpInst::BAD_ICMP_PREDICATE}}; + FCmp64Libcalls[CmpInst::FCMP_OLE] = { + {RTLIB::OLE_F64, CmpInst::BAD_ICMP_PREDICATE}}; + FCmp64Libcalls[CmpInst::FCMP_OLT] = { + {RTLIB::OLT_F64, CmpInst::BAD_ICMP_PREDICATE}}; + FCmp64Libcalls[CmpInst::FCMP_ORD] = {{RTLIB::O_F64, CmpInst::ICMP_EQ}}; + FCmp64Libcalls[CmpInst::FCMP_UGE] = {{RTLIB::OLT_F64, CmpInst::ICMP_EQ}}; + FCmp64Libcalls[CmpInst::FCMP_UGT] = {{RTLIB::OLE_F64, CmpInst::ICMP_EQ}}; + FCmp64Libcalls[CmpInst::FCMP_ULE] = {{RTLIB::OGT_F64, CmpInst::ICMP_EQ}}; + FCmp64Libcalls[CmpInst::FCMP_ULT] = {{RTLIB::OGE_F64, CmpInst::ICMP_EQ}}; + FCmp64Libcalls[CmpInst::FCMP_UNE] = {{RTLIB::UNE_F64, CmpInst::ICMP_EQ}}; + FCmp64Libcalls[CmpInst::FCMP_UNO] = { + {RTLIB::UO_F64, CmpInst::BAD_ICMP_PREDICATE}}; + FCmp64Libcalls[CmpInst::FCMP_ONE] = { + {RTLIB::OGT_F64, CmpInst::BAD_ICMP_PREDICATE}, + {RTLIB::OLT_F64, CmpInst::BAD_ICMP_PREDICATE}}; + FCmp64Libcalls[CmpInst::FCMP_UEQ] = { + {RTLIB::OEQ_F64, CmpInst::BAD_ICMP_PREDICATE}, + {RTLIB::UO_F64, CmpInst::BAD_ICMP_PREDICATE}}; +} + +void ARMLegalizerInfo::setFCmpLibcallsGNU() { + // FCMP_TRUE and FCMP_FALSE don't need libcalls, they should be + // default-initialized. + FCmp32Libcalls.resize(CmpInst::LAST_FCMP_PREDICATE + 1); + FCmp32Libcalls[CmpInst::FCMP_OEQ] = {{RTLIB::OEQ_F32, CmpInst::ICMP_EQ}}; + FCmp32Libcalls[CmpInst::FCMP_OGE] = {{RTLIB::OGE_F32, CmpInst::ICMP_SGE}}; + FCmp32Libcalls[CmpInst::FCMP_OGT] = {{RTLIB::OGT_F32, CmpInst::ICMP_SGT}}; + FCmp32Libcalls[CmpInst::FCMP_OLE] = {{RTLIB::OLE_F32, CmpInst::ICMP_SLE}}; + FCmp32Libcalls[CmpInst::FCMP_OLT] = {{RTLIB::OLT_F32, CmpInst::ICMP_SLT}}; + FCmp32Libcalls[CmpInst::FCMP_ORD] = {{RTLIB::O_F32, CmpInst::ICMP_EQ}}; + FCmp32Libcalls[CmpInst::FCMP_UGE] = {{RTLIB::OLT_F32, CmpInst::ICMP_SGE}}; + FCmp32Libcalls[CmpInst::FCMP_UGT] = {{RTLIB::OLE_F32, CmpInst::ICMP_SGT}}; + FCmp32Libcalls[CmpInst::FCMP_ULE] = {{RTLIB::OGT_F32, CmpInst::ICMP_SLE}}; + FCmp32Libcalls[CmpInst::FCMP_ULT] = {{RTLIB::OGE_F32, CmpInst::ICMP_SLT}}; + FCmp32Libcalls[CmpInst::FCMP_UNE] = {{RTLIB::UNE_F32, CmpInst::ICMP_NE}}; + FCmp32Libcalls[CmpInst::FCMP_UNO] = {{RTLIB::UO_F32, CmpInst::ICMP_NE}}; + FCmp32Libcalls[CmpInst::FCMP_ONE] = {{RTLIB::OGT_F32, CmpInst::ICMP_SGT}, + {RTLIB::OLT_F32, CmpInst::ICMP_SLT}}; + FCmp32Libcalls[CmpInst::FCMP_UEQ] = {{RTLIB::OEQ_F32, CmpInst::ICMP_EQ}, + {RTLIB::UO_F32, CmpInst::ICMP_NE}}; + + FCmp64Libcalls.resize(CmpInst::LAST_FCMP_PREDICATE + 1); + FCmp64Libcalls[CmpInst::FCMP_OEQ] = {{RTLIB::OEQ_F64, CmpInst::ICMP_EQ}}; + FCmp64Libcalls[CmpInst::FCMP_OGE] = {{RTLIB::OGE_F64, CmpInst::ICMP_SGE}}; + FCmp64Libcalls[CmpInst::FCMP_OGT] = {{RTLIB::OGT_F64, CmpInst::ICMP_SGT}}; + FCmp64Libcalls[CmpInst::FCMP_OLE] = {{RTLIB::OLE_F64, CmpInst::ICMP_SLE}}; + FCmp64Libcalls[CmpInst::FCMP_OLT] = {{RTLIB::OLT_F64, CmpInst::ICMP_SLT}}; + FCmp64Libcalls[CmpInst::FCMP_ORD] = {{RTLIB::O_F64, CmpInst::ICMP_EQ}}; + FCmp64Libcalls[CmpInst::FCMP_UGE] = {{RTLIB::OLT_F64, CmpInst::ICMP_SGE}}; + FCmp64Libcalls[CmpInst::FCMP_UGT] = {{RTLIB::OLE_F64, CmpInst::ICMP_SGT}}; + FCmp64Libcalls[CmpInst::FCMP_ULE] = {{RTLIB::OGT_F64, CmpInst::ICMP_SLE}}; + FCmp64Libcalls[CmpInst::FCMP_ULT] = {{RTLIB::OGE_F64, CmpInst::ICMP_SLT}}; + FCmp64Libcalls[CmpInst::FCMP_UNE] = {{RTLIB::UNE_F64, CmpInst::ICMP_NE}}; + FCmp64Libcalls[CmpInst::FCMP_UNO] = {{RTLIB::UO_F64, CmpInst::ICMP_NE}}; + FCmp64Libcalls[CmpInst::FCMP_ONE] = {{RTLIB::OGT_F64, CmpInst::ICMP_SGT}, + {RTLIB::OLT_F64, CmpInst::ICMP_SLT}}; + FCmp64Libcalls[CmpInst::FCMP_UEQ] = {{RTLIB::OEQ_F64, CmpInst::ICMP_EQ}, + {RTLIB::UO_F64, CmpInst::ICMP_NE}}; +} + +ARMLegalizerInfo::FCmpLibcallsList +ARMLegalizerInfo::getFCmpLibcalls(CmpInst::Predicate Predicate, + unsigned Size) const { + assert(CmpInst::isFPPredicate(Predicate) && "Unsupported FCmp predicate"); + if (Size == 32) + return FCmp32Libcalls[Predicate]; + if (Size == 64) + return FCmp64Libcalls[Predicate]; + llvm_unreachable("Unsupported size for FCmp predicate"); +} + bool ARMLegalizerInfo::legalizeCustom(MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &MIRBuilder) const { using namespace TargetOpcode; + MIRBuilder.setInstr(MI); + switch (MI.getOpcode()) { default: return false; @@ -137,9 +264,9 @@ bool ARMLegalizerInfo::legalizeCustom(MachineInstr &MI, auto RetVal = MRI.createGenericVirtualRegister( getLLTForType(*RetTy, MIRBuilder.getMF().getDataLayout())); - auto Status = replaceWithLibcall(MI, MIRBuilder, Libcall, {RetVal, RetTy}, - {{MI.getOperand(1).getReg(), ArgTy}, - {MI.getOperand(2).getReg(), ArgTy}}); + auto Status = createLibcall(MIRBuilder, Libcall, {RetVal, RetTy}, + {{MI.getOperand(1).getReg(), ArgTy}, + {MI.getOperand(2).getReg(), ArgTy}}); if (Status != LegalizerHelper::Legalized) return false; @@ -149,8 +276,76 @@ bool ARMLegalizerInfo::legalizeCustom(MachineInstr &MI, MIRBuilder.buildUnmerge( {MRI.createGenericVirtualRegister(LLT::scalar(32)), OriginalResult}, RetVal); + break; + } + case G_FCMP: { + assert(MRI.getType(MI.getOperand(2).getReg()) == + MRI.getType(MI.getOperand(3).getReg()) && + "Mismatched operands for G_FCMP"); + auto OpSize = MRI.getType(MI.getOperand(2).getReg()).getSizeInBits(); + + auto OriginalResult = MI.getOperand(0).getReg(); + auto Predicate = + static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate()); + auto Libcalls = getFCmpLibcalls(Predicate, OpSize); + + if (Libcalls.empty()) { + assert((Predicate == CmpInst::FCMP_TRUE || + Predicate == CmpInst::FCMP_FALSE) && + "Predicate needs libcalls, but none specified"); + MIRBuilder.buildConstant(OriginalResult, + Predicate == CmpInst::FCMP_TRUE ? 1 : 0); + MI.eraseFromParent(); + return true; + } + + auto &Ctx = MIRBuilder.getMF().getFunction()->getContext(); + assert((OpSize == 32 || OpSize == 64) && "Unsupported operand size"); + auto *ArgTy = OpSize == 32 ? Type::getFloatTy(Ctx) : Type::getDoubleTy(Ctx); + auto *RetTy = Type::getInt32Ty(Ctx); + + SmallVector<unsigned, 2> Results; + for (auto Libcall : Libcalls) { + auto LibcallResult = MRI.createGenericVirtualRegister(LLT::scalar(32)); + auto Status = + createLibcall(MIRBuilder, Libcall.LibcallID, {LibcallResult, RetTy}, + {{MI.getOperand(2).getReg(), ArgTy}, + {MI.getOperand(3).getReg(), ArgTy}}); + + if (Status != LegalizerHelper::Legalized) + return false; - return LegalizerHelper::Legalized; + auto ProcessedResult = + Libcalls.size() == 1 + ? OriginalResult + : MRI.createGenericVirtualRegister(MRI.getType(OriginalResult)); + + // We have a result, but we need to transform it into a proper 1-bit 0 or + // 1, taking into account the different peculiarities of the values + // returned by the comparison functions. + CmpInst::Predicate ResultPred = Libcall.Predicate; + if (ResultPred == CmpInst::BAD_ICMP_PREDICATE) { + // We have a nice 0 or 1, and we just need to truncate it back to 1 bit + // to keep the types consistent. + MIRBuilder.buildTrunc(ProcessedResult, LibcallResult); + } else { + // We need to compare against 0. + assert(CmpInst::isIntPredicate(ResultPred) && "Unsupported predicate"); + auto Zero = MRI.createGenericVirtualRegister(LLT::scalar(32)); + MIRBuilder.buildConstant(Zero, 0); + MIRBuilder.buildICmp(ResultPred, ProcessedResult, LibcallResult, Zero); + } + Results.push_back(ProcessedResult); + } + + if (Results.size() != 1) { + assert(Results.size() == 2 && "Unexpected number of results"); + MIRBuilder.buildOr(OriginalResult, Results[0], Results[1]); + } + break; } } + + MI.eraseFromParent(); + return true; } diff --git a/lib/Target/ARM/ARMLegalizerInfo.h b/lib/Target/ARM/ARMLegalizerInfo.h index a9bdd367737e5..78ab9412c04ba 100644 --- a/lib/Target/ARM/ARMLegalizerInfo.h +++ b/lib/Target/ARM/ARMLegalizerInfo.h @@ -14,7 +14,10 @@ #ifndef LLVM_LIB_TARGET_ARM_ARMMACHINELEGALIZER_H #define LLVM_LIB_TARGET_ARM_ARMMACHINELEGALIZER_H +#include "llvm/ADT/IndexedMap.h" #include "llvm/CodeGen/GlobalISel/LegalizerInfo.h" +#include "llvm/CodeGen/RuntimeLibcalls.h" +#include "llvm/IR/Instructions.h" namespace llvm { @@ -27,6 +30,36 @@ public: bool legalizeCustom(MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &MIRBuilder) const override; + +private: + void setFCmpLibcallsGNU(); + void setFCmpLibcallsAEABI(); + + struct FCmpLibcallInfo { + // Which libcall this is. + RTLIB::Libcall LibcallID; + + // The predicate to be used when comparing the value returned by the + // function with a relevant constant (currently hard-coded to zero). This is + // necessary because often the libcall will return e.g. a value greater than + // 0 to represent 'true' and anything negative to represent 'false', or + // maybe 0 to represent 'true' and non-zero for 'false'. If no comparison is + // needed, this should be CmpInst::BAD_ICMP_PREDICATE. + CmpInst::Predicate Predicate; + }; + using FCmpLibcallsList = SmallVector<FCmpLibcallInfo, 2>; + + // Map from each FCmp predicate to the corresponding libcall infos. A FCmp + // instruction may be lowered to one or two libcalls, which is why we need a + // list. If two libcalls are needed, their results will be OR'ed. + using FCmpLibcallsMapTy = IndexedMap<FCmpLibcallsList>; + + FCmpLibcallsMapTy FCmp32Libcalls; + FCmpLibcallsMapTy FCmp64Libcalls; + + // Get the libcall(s) corresponding to \p Predicate for operands of \p Size + // bits. + FCmpLibcallsList getFCmpLibcalls(CmpInst::Predicate, unsigned Size) const; }; } // End llvm namespace. #endif diff --git a/lib/Target/ARM/ARMRegisterBankInfo.cpp b/lib/Target/ARM/ARMRegisterBankInfo.cpp index 11fb81a4f9fea..c0c09e8c15afd 100644 --- a/lib/Target/ARM/ARMRegisterBankInfo.cpp +++ b/lib/Target/ARM/ARMRegisterBankInfo.cpp @@ -212,8 +212,6 @@ ARMRegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { const MachineFunction &MF = *MI.getParent()->getParent(); const MachineRegisterInfo &MRI = MF.getRegInfo(); - LLT Ty = MRI.getType(MI.getOperand(0).getReg()); - unsigned NumOperands = MI.getNumOperands(); const ValueMapping *OperandsMapping = &ARM::ValueMappings[ARM::GPR3OpsIdx]; @@ -236,26 +234,31 @@ ARMRegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { OperandsMapping = &ARM::ValueMappings[ARM::GPR3OpsIdx]; break; case G_LOAD: - case G_STORE: + case G_STORE: { + LLT Ty = MRI.getType(MI.getOperand(0).getReg()); OperandsMapping = Ty.getSizeInBits() == 64 ? getOperandsMapping({&ARM::ValueMappings[ARM::DPR3OpsIdx], &ARM::ValueMappings[ARM::GPR3OpsIdx]}) : &ARM::ValueMappings[ARM::GPR3OpsIdx]; break; - case G_FADD: + } + case G_FADD: { + LLT Ty = MRI.getType(MI.getOperand(0).getReg()); assert((Ty.getSizeInBits() == 32 || Ty.getSizeInBits() == 64) && "Unsupported size for G_FADD"); OperandsMapping = Ty.getSizeInBits() == 64 ? &ARM::ValueMappings[ARM::DPR3OpsIdx] : &ARM::ValueMappings[ARM::SPR3OpsIdx]; break; + } case G_CONSTANT: case G_FRAME_INDEX: OperandsMapping = getOperandsMapping({&ARM::ValueMappings[ARM::GPR3OpsIdx], nullptr}); break; case G_SELECT: { + LLT Ty = MRI.getType(MI.getOperand(0).getReg()); LLT Ty2 = MRI.getType(MI.getOperand(1).getReg()); (void)Ty2; assert(Ty.getSizeInBits() == 32 && "Unsupported size for G_SELECT"); @@ -277,9 +280,29 @@ ARMRegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { &ARM::ValueMappings[ARM::GPR3OpsIdx]}); break; } + case G_FCMP: { + LLT Ty = MRI.getType(MI.getOperand(0).getReg()); + LLT Ty1 = MRI.getType(MI.getOperand(2).getReg()); + LLT Ty2 = MRI.getType(MI.getOperand(3).getReg()); + (void)Ty2; + assert(Ty.getSizeInBits() == 1 && "Unsupported size for G_FCMP"); + assert(Ty1.getSizeInBits() == Ty2.getSizeInBits() && + "Mismatched operand sizes for G_FCMP"); + + unsigned Size = Ty1.getSizeInBits(); + assert((Size == 32 || Size == 64) && "Unsupported size for G_FCMP"); + + auto FPRValueMapping = Size == 32 ? &ARM::ValueMappings[ARM::SPR3OpsIdx] + : &ARM::ValueMappings[ARM::DPR3OpsIdx]; + OperandsMapping = + getOperandsMapping({&ARM::ValueMappings[ARM::GPR3OpsIdx], nullptr, + FPRValueMapping, FPRValueMapping}); + break; + } case G_MERGE_VALUES: { // We only support G_MERGE_VALUES for creating a double precision floating // point value out of two GPRs. + LLT Ty = MRI.getType(MI.getOperand(0).getReg()); LLT Ty1 = MRI.getType(MI.getOperand(1).getReg()); LLT Ty2 = MRI.getType(MI.getOperand(2).getReg()); if (Ty.getSizeInBits() != 64 || Ty1.getSizeInBits() != 32 || @@ -294,6 +317,7 @@ ARMRegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { case G_UNMERGE_VALUES: { // We only support G_UNMERGE_VALUES for splitting a double precision // floating point value into two GPRs. + LLT Ty = MRI.getType(MI.getOperand(0).getReg()); LLT Ty1 = MRI.getType(MI.getOperand(1).getReg()); LLT Ty2 = MRI.getType(MI.getOperand(2).getReg()); if (Ty.getSizeInBits() != 32 || Ty1.getSizeInBits() != 32 || diff --git a/lib/Target/ARM/ARMTargetTransformInfo.cpp b/lib/Target/ARM/ARMTargetTransformInfo.cpp index 8eb9dbf5f9de6..51b0fedd2b54f 100644 --- a/lib/Target/ARM/ARMTargetTransformInfo.cpp +++ b/lib/Target/ARM/ARMTargetTransformInfo.cpp @@ -15,6 +15,24 @@ using namespace llvm; #define DEBUG_TYPE "armtti" +bool ARMTTIImpl::areInlineCompatible(const Function *Caller, + const Function *Callee) const { + const TargetMachine &TM = getTLI()->getTargetMachine(); + const FeatureBitset &CallerBits = + TM.getSubtargetImpl(*Caller)->getFeatureBits(); + const FeatureBitset &CalleeBits = + TM.getSubtargetImpl(*Callee)->getFeatureBits(); + + // To inline a callee, all features not in the whitelist must match exactly. + bool MatchExact = (CallerBits & ~InlineFeatureWhitelist) == + (CalleeBits & ~InlineFeatureWhitelist); + // For features in the whitelist, the callee's features must be a subset of + // the callers'. + bool MatchSubset = ((CallerBits & CalleeBits) & InlineFeatureWhitelist) == + (CalleeBits & InlineFeatureWhitelist); + return MatchExact && MatchSubset; +} + int ARMTTIImpl::getIntImmCost(const APInt &Imm, Type *Ty) { assert(Ty->isIntegerTy()); diff --git a/lib/Target/ARM/ARMTargetTransformInfo.h b/lib/Target/ARM/ARMTargetTransformInfo.h index 8a1a378638779..0695a4e633467 100644 --- a/lib/Target/ARM/ARMTargetTransformInfo.h +++ b/lib/Target/ARM/ARMTargetTransformInfo.h @@ -33,6 +33,39 @@ class ARMTTIImpl : public BasicTTIImplBase<ARMTTIImpl> { const ARMSubtarget *ST; const ARMTargetLowering *TLI; + // Currently the following features are excluded from InlineFeatureWhitelist. + // ModeThumb, FeatureNoARM, ModeSoftFloat, FeatureVFPOnlySP, FeatureD16 + // Depending on whether they are set or unset, different + // instructions/registers are available. For example, inlining a callee with + // -thumb-mode in a caller with +thumb-mode, may cause the assembler to + // fail if the callee uses ARM only instructions, e.g. in inline asm. + const FeatureBitset InlineFeatureWhitelist = { + ARM::FeatureVFP2, ARM::FeatureVFP3, ARM::FeatureNEON, ARM::FeatureThumb2, + ARM::FeatureFP16, ARM::FeatureVFP4, ARM::FeatureFPARMv8, + ARM::FeatureFullFP16, ARM::FeatureHWDivThumb, + ARM::FeatureHWDivARM, ARM::FeatureDB, ARM::FeatureV7Clrex, + ARM::FeatureAcquireRelease, ARM::FeatureSlowFPBrcc, + ARM::FeaturePerfMon, ARM::FeatureTrustZone, ARM::Feature8MSecExt, + ARM::FeatureCrypto, ARM::FeatureCRC, ARM::FeatureRAS, + ARM::FeatureFPAO, ARM::FeatureFuseAES, ARM::FeatureZCZeroing, + ARM::FeatureProfUnpredicate, ARM::FeatureSlowVGETLNi32, + ARM::FeatureSlowVDUP32, ARM::FeaturePreferVMOVSR, + ARM::FeaturePrefISHSTBarrier, ARM::FeatureMuxedUnits, + ARM::FeatureSlowOddRegister, ARM::FeatureSlowLoadDSubreg, + ARM::FeatureDontWidenVMOVS, ARM::FeatureExpandMLx, + ARM::FeatureHasVMLxHazards, ARM::FeatureNEONForFPMovs, + ARM::FeatureNEONForFP, ARM::FeatureCheckVLDnAlign, + ARM::FeatureHasSlowFPVMLx, ARM::FeatureVMLxForwarding, + ARM::FeaturePref32BitThumb, ARM::FeatureAvoidPartialCPSR, + ARM::FeatureCheapPredicableCPSR, ARM::FeatureAvoidMOVsShOp, + ARM::FeatureHasRetAddrStack, ARM::FeatureHasNoBranchPredictor, + ARM::FeatureDSP, ARM::FeatureMP, ARM::FeatureVirtualization, + ARM::FeatureMClass, ARM::FeatureRClass, ARM::FeatureAClass, + ARM::FeatureNaClTrap, ARM::FeatureStrictAlign, ARM::FeatureLongCalls, + ARM::FeatureExecuteOnly, ARM::FeatureReserveR9, ARM::FeatureNoMovt, + ARM::FeatureNoNegativeImmediates + }; + const ARMSubtarget *getST() const { return ST; } const ARMTargetLowering *getTLI() const { return TLI; } @@ -41,6 +74,9 @@ public: : BaseT(TM, F.getParent()->getDataLayout()), ST(TM->getSubtargetImpl(F)), TLI(ST->getTargetLowering()) {} + bool areInlineCompatible(const Function *Caller, + const Function *Callee) const; + bool enableInterleavedAccessVectorization() { return true; } /// Floating-point computation using ARMv8 AArch32 Advanced diff --git a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp index 891b5c60e1fd6..1129826f21f64 100644 --- a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp +++ b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp @@ -5249,6 +5249,7 @@ bool ARMAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic) { // Fall though for the Identifier case that is not a register or a // special name. + LLVM_FALLTHROUGH; } case AsmToken::LParen: // parenthesized expressions like (_strcmp-4) case AsmToken::Integer: // things like 1f and 2b as a branch targets @@ -8992,6 +8993,8 @@ unsigned ARMAsmParser::MatchInstruction(OperandVector &Operands, MCInst &Inst, return PlainMatchResult; } +std::string ARMMnemonicSpellCheck(StringRef S, uint64_t FBS); + static const char *getSubtargetFeatureName(uint64_t Val); bool ARMAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, OperandVector &Operands, @@ -9085,9 +9088,13 @@ bool ARMAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, return Error(ErrorLoc, "invalid operand for instruction"); } - case Match_MnemonicFail: - return Error(IDLoc, "invalid instruction", + case Match_MnemonicFail: { + uint64_t FBS = ComputeAvailableFeatures(getSTI().getFeatureBits()); + std::string Suggestion = ARMMnemonicSpellCheck( + ((ARMOperand &)*Operands[0]).getToken(), FBS); + return Error(IDLoc, "invalid instruction" + Suggestion, ((ARMOperand &)*Operands[0]).getLocRange()); + } case Match_RequiresNotITBlock: return Error(IDLoc, "flag setting instruction only valid outside IT block"); case Match_RequiresITBlock: diff --git a/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp b/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp index 22de728fe06e1..a77df7a2598f4 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp @@ -361,9 +361,8 @@ static uint32_t joinHalfWords(uint32_t FirstHalf, uint32_t SecondHalf, unsigned ARMAsmBackend::adjustFixupValue(const MCAssembler &Asm, const MCFixup &Fixup, const MCValue &Target, uint64_t Value, - bool IsPCRel, MCContext &Ctx, - bool IsLittleEndian, - bool IsResolved) const { + bool IsResolved, MCContext &Ctx, + bool IsLittleEndian) const { unsigned Kind = Fixup.getKind(); // MachO tries to make .o files that look vaguely pre-linked, so for MOVW/MOVT @@ -392,7 +391,7 @@ unsigned ARMAsmBackend::adjustFixupValue(const MCAssembler &Asm, case FK_SecRel_4: return Value; case ARM::fixup_arm_movt_hi16: - if (!IsPCRel) + if (IsResolved || !STI->getTargetTriple().isOSBinFormatELF()) Value >>= 16; LLVM_FALLTHROUGH; case ARM::fixup_arm_movw_lo16: { @@ -404,7 +403,7 @@ unsigned ARMAsmBackend::adjustFixupValue(const MCAssembler &Asm, return Value; } case ARM::fixup_t2_movt_hi16: - if (!IsPCRel) + if (IsResolved || !STI->getTargetTriple().isOSBinFormatELF()) Value >>= 16; LLVM_FALLTHROUGH; case ARM::fixup_t2_movw_lo16: { @@ -885,11 +884,11 @@ static unsigned getFixupKindContainerSizeBytes(unsigned Kind) { void ARMAsmBackend::applyFixup(const MCAssembler &Asm, const MCFixup &Fixup, const MCValue &Target, MutableArrayRef<char> Data, uint64_t Value, - bool IsPCRel) const { + bool IsResolved) const { unsigned NumBytes = getFixupKindNumBytes(Fixup.getKind()); MCContext &Ctx = Asm.getContext(); - Value = adjustFixupValue(Asm, Fixup, Target, Value, IsPCRel, Ctx, - IsLittleEndian, true); + Value = adjustFixupValue(Asm, Fixup, Target, Value, IsResolved, Ctx, + IsLittleEndian); if (!Value) return; // Doesn't change encoding. diff --git a/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.h b/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.h index 84b54bbb9a49b..02374966dafe7 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.h +++ b/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.h @@ -42,13 +42,13 @@ public: const MCValue &Target) override; unsigned adjustFixupValue(const MCAssembler &Asm, const MCFixup &Fixup, - const MCValue &Target, uint64_t Value, bool IsPCRel, - MCContext &Ctx, bool IsLittleEndian, - bool IsResolved) const; + const MCValue &Target, uint64_t Value, + bool IsResolved, MCContext &Ctx, + bool IsLittleEndian) const; void applyFixup(const MCAssembler &Asm, const MCFixup &Fixup, const MCValue &Target, MutableArrayRef<char> Data, - uint64_t Value, bool IsPCRel) const override; + uint64_t Value, bool IsResolved) const override; unsigned getRelaxedOpcode(unsigned Op) const; diff --git a/lib/Target/ARM/Thumb1FrameLowering.cpp b/lib/Target/ARM/Thumb1FrameLowering.cpp index 0b6574c37de12..5709b4e617987 100644 --- a/lib/Target/ARM/Thumb1FrameLowering.cpp +++ b/lib/Target/ARM/Thumb1FrameLowering.cpp @@ -236,7 +236,7 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF, case ARM::R12: if (STI.splitFramePushPop(MF)) break; - // fallthough + LLVM_FALLTHROUGH; case ARM::R0: case ARM::R1: case ARM::R2: |