diff options
Diffstat (limited to 'lib/Target/ARM')
-rw-r--r-- | lib/Target/ARM/ARM.h | 1 | ||||
-rw-r--r-- | lib/Target/ARM/ARM.td | 4 | ||||
-rw-r--r-- | lib/Target/ARM/ARMFastISel.cpp | 2 | ||||
-rw-r--r-- | lib/Target/ARM/ARMISelLowering.cpp | 134 | ||||
-rw-r--r-- | lib/Target/ARM/ARMISelLowering.h | 2 | ||||
-rw-r--r-- | lib/Target/ARM/ARMInstrInfo.td | 9 | ||||
-rw-r--r-- | lib/Target/ARM/ARMInstrThumb2.td | 4 | ||||
-rw-r--r-- | lib/Target/ARM/ARMInstructionSelector.cpp | 58 | ||||
-rw-r--r-- | lib/Target/ARM/ARMLegalizerInfo.cpp | 7 | ||||
-rw-r--r-- | lib/Target/ARM/ARMRegisterBankInfo.cpp | 20 | ||||
-rw-r--r-- | lib/Target/ARM/ARMSubtarget.cpp | 5 | ||||
-rw-r--r-- | lib/Target/ARM/ARMSubtarget.h | 9 | ||||
-rw-r--r-- | lib/Target/ARM/ARMTargetMachine.cpp | 8 | ||||
-rw-r--r-- | lib/Target/ARM/ARMTargetMachine.h | 3 | ||||
-rw-r--r-- | lib/Target/ARM/ARMTargetTransformInfo.cpp | 19 | ||||
-rw-r--r-- | lib/Target/ARM/ARMTargetTransformInfo.h | 2 | ||||
-rw-r--r-- | lib/Target/ARM/AsmParser/ARMAsmParser.cpp | 10 | ||||
-rw-r--r-- | lib/Target/ARM/Disassembler/ARMDisassembler.cpp | 4 | ||||
-rw-r--r-- | lib/Target/ARM/Thumb2SizeReduction.cpp | 8 |
19 files changed, 236 insertions, 73 deletions
diff --git a/lib/Target/ARM/ARM.h b/lib/Target/ARM/ARM.h index 3aac689c63104..9ffb4c2055f98 100644 --- a/lib/Target/ARM/ARM.h +++ b/lib/Target/ARM/ARM.h @@ -61,6 +61,7 @@ void initializeARMLoadStoreOptPass(PassRegistry &); void initializeARMPreAllocLoadStoreOptPass(PassRegistry &); void initializeARMConstantIslandsPass(PassRegistry &); void initializeARMExpandPseudoPass(PassRegistry &); +void initializeThumb2SizeReducePass(PassRegistry &); } // end namespace llvm diff --git a/lib/Target/ARM/ARM.td b/lib/Target/ARM/ARM.td index c1a3f639461d1..c9766aa2161ab 100644 --- a/lib/Target/ARM/ARM.td +++ b/lib/Target/ARM/ARM.td @@ -83,6 +83,9 @@ def FeatureDB : SubtargetFeature<"db", "HasDataBarrier", "true", def FeatureV7Clrex : SubtargetFeature<"v7clrex", "HasV7Clrex", "true", "Has v7 clrex instruction">; +def FeatureDFB : SubtargetFeature<"dfb", "HasFullDataBarrier", "true", + "Has full data barrier (dfb) instruction">; + def FeatureAcquireRelease : SubtargetFeature<"acquire-release", "HasAcquireRelease", "true", "Has v8 acquire/release (lda/ldaex " @@ -617,6 +620,7 @@ def ARMv83a : Architecture<"armv8.3-a", "ARMv83a", [HasV8_3aOps, def ARMv8r : Architecture<"armv8-r", "ARMv8r", [HasV8Ops, FeatureRClass, FeatureDB, + FeatureDFB, FeatureDSP, FeatureCRC, FeatureMP, diff --git a/lib/Target/ARM/ARMFastISel.cpp b/lib/Target/ARM/ARMFastISel.cpp index 0ea435062ec03..60048d4453d89 100644 --- a/lib/Target/ARM/ARMFastISel.cpp +++ b/lib/Target/ARM/ARMFastISel.cpp @@ -1416,7 +1416,7 @@ bool ARMFastISel::ARMEmitCmp(const Value *Src1Value, const Value *Src2Value, case MVT::i8: case MVT::i16: needsExt = true; - // Intentional fall-through. + LLVM_FALLTHROUGH; case MVT::i32: if (isThumb2) { if (!UseImm) diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index 1b4d7ff508489..aeda7c06a27ac 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -1041,7 +1041,7 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM, if (!Subtarget->isThumb1Only()) setOperationAction(ISD::SETCCE, MVT::i32, Custom); - setOperationAction(ISD::BRCOND, MVT::Other, Expand); + setOperationAction(ISD::BRCOND, MVT::Other, Custom); setOperationAction(ISD::BR_CC, MVT::i32, Custom); setOperationAction(ISD::BR_CC, MVT::f32, Custom); setOperationAction(ISD::BR_CC, MVT::f64, Custom); @@ -1084,20 +1084,11 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM, } } - // Combine sin / cos into one node or libcall if possible. - if (Subtarget->hasSinCos()) { - setLibcallName(RTLIB::SINCOS_F32, "sincosf"); - setLibcallName(RTLIB::SINCOS_F64, "sincos"); - if (Subtarget->isTargetWatchABI()) { - setLibcallCallingConv(RTLIB::SINCOS_F32, CallingConv::ARM_AAPCS_VFP); - setLibcallCallingConv(RTLIB::SINCOS_F64, CallingConv::ARM_AAPCS_VFP); - } - if (Subtarget->isTargetIOS() || Subtarget->isTargetWatchOS()) { - // For iOS, we don't want to the normal expansion of a libcall to - // sincos. We want to issue a libcall to __sincos_stret. - setOperationAction(ISD::FSINCOS, MVT::f64, Custom); - setOperationAction(ISD::FSINCOS, MVT::f32, Custom); - } + // Use __sincos_stret if available. + if (getLibcallName(RTLIB::SINCOS_STRET_F32) != nullptr && + getLibcallName(RTLIB::SINCOS_STRET_F64) != nullptr) { + setOperationAction(ISD::FSINCOS, MVT::f64, Custom); + setOperationAction(ISD::FSINCOS, MVT::f32, Custom); } // FP-ARMv8 implements a lot of rounding-like FP operations. @@ -1255,6 +1246,7 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const { case ARMISD::CMOV: return "ARMISD::CMOV"; case ARMISD::SSAT: return "ARMISD::SSAT"; + case ARMISD::USAT: return "ARMISD::USAT"; case ARMISD::SRL_FLAG: return "ARMISD::SRL_FLAG"; case ARMISD::SRA_FLAG: return "ARMISD::SRA_FLAG"; @@ -3902,6 +3894,10 @@ ARMTargetLowering::duplicateCmp(SDValue Cmp, SelectionDAG &DAG) const { return DAG.getNode(ARMISD::FMSTAT, DL, MVT::Glue, Cmp); } +// This function returns three things: the arithmetic computation itself +// (Value), a comparison (OverflowCmp), and a condition code (ARMcc). The +// comparison and the condition code define the case in which the arithmetic +// computation *does not* overflow. std::pair<SDValue, SDValue> ARMTargetLowering::getARMXALUOOp(SDValue Op, SelectionDAG &DAG, SDValue &ARMcc) const { @@ -3927,7 +3923,11 @@ ARMTargetLowering::getARMXALUOOp(SDValue Op, SelectionDAG &DAG, break; case ISD::UADDO: ARMcc = DAG.getConstant(ARMCC::HS, dl, MVT::i32); - Value = DAG.getNode(ISD::ADD, dl, Op.getValueType(), LHS, RHS); + // We use ADDC here to correspond to its use in LowerUnsignedALUO. + // We do not use it in the USUBO case as Value may not be used. + Value = DAG.getNode(ARMISD::ADDC, dl, + DAG.getVTList(Op.getValueType(), MVT::i32), LHS, RHS) + .getValue(0); OverflowCmp = DAG.getNode(ARMISD::CMP, dl, MVT::Glue, Value, LHS); break; case ISD::SSUBO: @@ -4205,7 +4205,7 @@ static bool isUpperSaturate(const SDValue LHS, const SDValue RHS, ((K == LHS && K == TrueVal) || (K == RHS && K == FalseVal))); } -// Check if two chained conditionals could be converted into SSAT. +// Check if two chained conditionals could be converted into SSAT or USAT. // // SSAT can replace a set of two conditional selectors that bound a number to an // interval of type [k, ~k] when k + 1 is a power of 2. Here are some examples: @@ -4216,10 +4216,14 @@ static bool isUpperSaturate(const SDValue LHS, const SDValue RHS, // x < k ? (x < -k ? -k : x) : k // etc. // +// USAT works similarily to SSAT but bounds on the interval [0, k] where k + 1 is +// a power of 2. +// // It returns true if the conversion can be done, false otherwise. -// Additionally, the variable is returned in parameter V and the constant in K. +// Additionally, the variable is returned in parameter V, the constant in K and +// usat is set to true if the conditional represents an unsigned saturation static bool isSaturatingConditional(const SDValue &Op, SDValue &V, - uint64_t &K) { + uint64_t &K, bool &usat) { SDValue LHS1 = Op.getOperand(0); SDValue RHS1 = Op.getOperand(1); SDValue TrueVal1 = Op.getOperand(2); @@ -4286,13 +4290,23 @@ static bool isSaturatingConditional(const SDValue &Op, SDValue &V, int64_t Val1 = cast<ConstantSDNode>(*K1)->getSExtValue(); int64_t Val2 = cast<ConstantSDNode>(*K2)->getSExtValue(); int64_t PosVal = std::max(Val1, Val2); + int64_t NegVal = std::min(Val1, Val2); if (((Val1 > Val2 && UpperCheckOp == &Op) || (Val1 < Val2 && UpperCheckOp == &Op2)) && - Val1 == ~Val2 && isPowerOf2_64(PosVal + 1)) { + isPowerOf2_64(PosVal + 1)) { + + // Handle the difference between USAT (unsigned) and SSAT (signed) saturation + if (Val1 == ~Val2) + usat = false; + else if (NegVal == 0) + usat = true; + else + return false; V = V2; K = (uint64_t)PosVal; // At this point, PosVal is guaranteed to be positive + return true; } @@ -4306,10 +4320,16 @@ SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const { // Try to convert two saturating conditional selects into a single SSAT SDValue SatValue; uint64_t SatConstant; + bool SatUSat; if (((!Subtarget->isThumb() && Subtarget->hasV6Ops()) || Subtarget->isThumb2()) && - isSaturatingConditional(Op, SatValue, SatConstant)) - return DAG.getNode(ARMISD::SSAT, dl, VT, SatValue, - DAG.getConstant(countTrailingOnes(SatConstant), dl, VT)); + isSaturatingConditional(Op, SatValue, SatConstant, SatUSat)) { + if (SatUSat) + return DAG.getNode(ARMISD::USAT, dl, VT, SatValue, + DAG.getConstant(countTrailingOnes(SatConstant), dl, VT)); + else + return DAG.getNode(ARMISD::SSAT, dl, VT, SatValue, + DAG.getConstant(countTrailingOnes(SatConstant), dl, VT)); + } SDValue LHS = Op.getOperand(0); SDValue RHS = Op.getOperand(1); @@ -4506,6 +4526,39 @@ ARMTargetLowering::OptimizeVFPBrcond(SDValue Op, SelectionDAG &DAG) const { return SDValue(); } +SDValue ARMTargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const { + SDValue Chain = Op.getOperand(0); + SDValue Cond = Op.getOperand(1); + SDValue Dest = Op.getOperand(2); + SDLoc dl(Op); + + // Optimize {s|u}{add|sub}.with.overflow feeding into a branch instruction. + unsigned Opc = Cond.getOpcode(); + if (Cond.getResNo() == 1 && (Opc == ISD::SADDO || Opc == ISD::UADDO || + Opc == ISD::SSUBO || Opc == ISD::USUBO)) { + // Only lower legal XALUO ops. + if (!DAG.getTargetLoweringInfo().isTypeLegal(Cond->getValueType(0))) + return SDValue(); + + // The actual operation with overflow check. + SDValue Value, OverflowCmp; + SDValue ARMcc; + std::tie(Value, OverflowCmp) = getARMXALUOOp(Cond, DAG, ARMcc); + + // Reverse the condition code. + ARMCC::CondCodes CondCode = + (ARMCC::CondCodes)cast<const ConstantSDNode>(ARMcc)->getZExtValue(); + CondCode = ARMCC::getOppositeCondition(CondCode); + ARMcc = DAG.getConstant(CondCode, SDLoc(ARMcc), MVT::i32); + SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); + + return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other, Chain, Dest, ARMcc, CCR, + OverflowCmp); + } + + return SDValue(); +} + SDValue ARMTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const { SDValue Chain = Op.getOperand(0); ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get(); @@ -4526,6 +4579,33 @@ SDValue ARMTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const { } } + // Optimize {s|u}{add|sub}.with.overflow feeding into a branch instruction. + unsigned Opc = LHS.getOpcode(); + if (LHS.getResNo() == 1 && (isOneConstant(RHS) || isNullConstant(RHS)) && + (Opc == ISD::SADDO || Opc == ISD::UADDO || Opc == ISD::SSUBO || + Opc == ISD::USUBO) && (CC == ISD::SETEQ || CC == ISD::SETNE)) { + // Only lower legal XALUO ops. + if (!DAG.getTargetLoweringInfo().isTypeLegal(LHS->getValueType(0))) + return SDValue(); + + // The actual operation with overflow check. + SDValue Value, OverflowCmp; + SDValue ARMcc; + std::tie(Value, OverflowCmp) = getARMXALUOOp(LHS.getValue(0), DAG, ARMcc); + + if ((CC == ISD::SETNE) != isOneConstant(RHS)) { + // Reverse the condition code. + ARMCC::CondCodes CondCode = + (ARMCC::CondCodes)cast<const ConstantSDNode>(ARMcc)->getZExtValue(); + CondCode = ARMCC::getOppositeCondition(CondCode); + ARMcc = DAG.getConstant(CondCode, SDLoc(ARMcc), MVT::i32); + } + SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); + + return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other, Chain, Dest, ARMcc, CCR, + OverflowCmp); + } + if (LHS.getValueType() == MVT::i32) { SDValue ARMcc; SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl); @@ -7523,10 +7603,9 @@ SDValue ARMTargetLowering::LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const { Entry.IsZExt = false; Args.push_back(Entry); - const char *LibcallName = - (ArgVT == MVT::f64) ? "__sincos_stret" : "__sincosf_stret"; RTLIB::Libcall LC = - (ArgVT == MVT::f64) ? RTLIB::SINCOS_F64 : RTLIB::SINCOS_F32; + (ArgVT == MVT::f64) ? RTLIB::SINCOS_STRET_F64 : RTLIB::SINCOS_STRET_F32; + const char *LibcallName = getLibcallName(LC); CallingConv::ID CC = getLibcallCallingConv(LC); SDValue Callee = DAG.getExternalSymbol(LibcallName, getPointerTy(DL)); @@ -7782,6 +7861,7 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG); case ISD::SELECT: return LowerSELECT(Op, DAG); case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG); + case ISD::BRCOND: return LowerBRCOND(Op, DAG); case ISD::BR_CC: return LowerBR_CC(Op, DAG); case ISD::BR_JT: return LowerBR_JT(Op, DAG); case ISD::VASTART: return LowerVASTART(Op, DAG); @@ -13751,7 +13831,7 @@ Instruction *ARMTargetLowering::emitLeadingFence(IRBuilder<> &Builder, case AtomicOrdering::SequentiallyConsistent: if (!Inst->hasAtomicStore()) return nullptr; // Nothing to do - /*FALLTHROUGH*/ + LLVM_FALLTHROUGH; case AtomicOrdering::Release: case AtomicOrdering::AcquireRelease: if (Subtarget->preferISHSTBarriers()) diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h index 0a1af8d89f9bf..bf63dfae4407e 100644 --- a/lib/Target/ARM/ARMISelLowering.h +++ b/lib/Target/ARM/ARMISelLowering.h @@ -87,6 +87,7 @@ class VectorType; CMOV, // ARM conditional move instructions. SSAT, // Signed saturation + USAT, // Unsigned saturation BCC_i64, @@ -643,6 +644,7 @@ class VectorType; SDValue LowerUnsignedALUO(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const; SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const; SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const; diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td index 4e13af5963008..eb8526bfeadfb 100644 --- a/lib/Target/ARM/ARMInstrInfo.td +++ b/lib/Target/ARM/ARMInstrInfo.td @@ -139,6 +139,8 @@ def ARMcmov : SDNode<"ARMISD::CMOV", SDT_ARMCMov, def ARMssatnoshift : SDNode<"ARMISD::SSAT", SDTIntSatNoShOp, []>; +def ARMusatnoshift : SDNode<"ARMISD::USAT", SDTIntSatNoShOp, []>; + def ARMbrcond : SDNode<"ARMISD::BRCOND", SDT_ARMBrcond, [SDNPHasChain, SDNPInGlue, SDNPOutGlue]>; @@ -278,6 +280,9 @@ def HasDSP : Predicate<"Subtarget->hasDSP()">, def HasDB : Predicate<"Subtarget->hasDataBarrier()">, AssemblerPredicate<"FeatureDB", "data-barriers">; +def HasDFB : Predicate<"Subtarget->hasFullDataBarrier()">, + AssemblerPredicate<"FeatureDFB", + "full-data-barrier">; def HasV7Clrex : Predicate<"Subtarget->hasV7Clrex()">, AssemblerPredicate<"FeatureV7Clrex", "v7 clrex">; @@ -3832,6 +3837,8 @@ def : ARMV6Pat<(int_arm_usat GPRnopc:$a, imm0_31:$pos), (USAT imm0_31:$pos, GPRnopc:$a, 0)>; def : ARMPat<(ARMssatnoshift GPRnopc:$Rn, imm0_31:$imm), (SSAT imm0_31:$imm, GPRnopc:$Rn, 0)>; +def : ARMPat<(ARMusatnoshift GPRnopc:$Rn, imm0_31:$imm), + (USAT imm0_31:$imm, GPRnopc:$Rn, 0)>; def : ARMV6Pat<(int_arm_ssat16 GPRnopc:$a, imm1_16:$pos), (SSAT16 imm1_16:$pos, GPRnopc:$a)>; def : ARMV6Pat<(int_arm_usat16 GPRnopc:$a, imm0_15:$pos), @@ -5846,6 +5853,8 @@ include "ARMInstrNEON.td" def : InstAlias<"dmb", (DMB 0xf), 0>, Requires<[IsARM, HasDB]>; def : InstAlias<"dsb", (DSB 0xf), 0>, Requires<[IsARM, HasDB]>; def : InstAlias<"isb", (ISB 0xf), 0>, Requires<[IsARM, HasDB]>; +// Armv8-R 'Data Full Barrier' +def : InstAlias<"dfb", (DSB 0xc), 1>, Requires<[IsARM, HasDFB]>; // System instructions def : MnemonicAlias<"swi", "svc">; diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td index 670ed127da7e9..4592249f57952 100644 --- a/lib/Target/ARM/ARMInstrThumb2.td +++ b/lib/Target/ARM/ARMInstrThumb2.td @@ -2336,6 +2336,8 @@ def t2USAT16: T2SatI<(ins imm0_15:$sat_imm, rGPR:$Rn), def : T2Pat<(ARMssatnoshift GPRnopc:$Rn, imm0_31:$imm), (t2SSAT imm0_31:$imm, GPRnopc:$Rn, 0)>; +def : T2Pat<(ARMusatnoshift GPRnopc:$Rn, imm0_31:$imm), + (t2USAT imm0_31:$imm, GPRnopc:$Rn, 0)>; def : T2Pat<(int_arm_ssat GPR:$a, imm1_32:$pos), (t2SSAT imm1_32:$pos, GPR:$a, 0)>; def : T2Pat<(int_arm_usat GPR:$a, imm0_31:$pos), @@ -4506,6 +4508,8 @@ def : t2InstAlias<"tst${p} $Rn, $Rm", def : InstAlias<"dmb${p}", (t2DMB 0xf, pred:$p), 0>, Requires<[HasDB]>; def : InstAlias<"dsb${p}", (t2DSB 0xf, pred:$p), 0>, Requires<[HasDB]>; def : InstAlias<"isb${p}", (t2ISB 0xf, pred:$p), 0>, Requires<[HasDB]>; +// Armv8-R 'Data Full Barrier' +def : InstAlias<"dfb${p}", (t2DSB 0xc, pred:$p), 1>, Requires<[HasDFB]>; // Alias for LDR, LDRB, LDRH, LDRSB, and LDRSH without the ".w" optional // width specifier. diff --git a/lib/Target/ARM/ARMInstructionSelector.cpp b/lib/Target/ARM/ARMInstructionSelector.cpp index 6bbeae2e11514..b0fd0b476920c 100644 --- a/lib/Target/ARM/ARMInstructionSelector.cpp +++ b/lib/Target/ARM/ARMInstructionSelector.cpp @@ -669,13 +669,22 @@ bool ARMInstructionSelector::select(MachineInstr &I, return true; } + using namespace TargetOpcode; + if (I.getOpcode() == G_CONSTANT) { + // Pointer constants should be treated the same as 32-bit integer constants. + // Change the type and let TableGen handle it. + unsigned ResultReg = I.getOperand(0).getReg(); + LLT Ty = MRI.getType(ResultReg); + if (Ty.isPointer()) + MRI.setType(ResultReg, LLT::scalar(32)); + } + if (selectImpl(I, CoverageInfo)) return true; MachineInstrBuilder MIB{MF, I}; bool isSExt = false; - using namespace TargetOpcode; switch (I.getOpcode()) { case G_SEXT: isSExt = true; @@ -741,6 +750,31 @@ bool ARMInstructionSelector::select(MachineInstr &I, const auto &SrcRegBank = *RBI.getRegBank(SrcReg, MRI, TRI); const auto &DstRegBank = *RBI.getRegBank(DstReg, MRI, TRI); + if (SrcRegBank.getID() == ARM::FPRRegBankID) { + // This should only happen in the obscure case where we have put a 64-bit + // integer into a D register. Get it out of there and keep only the + // interesting part. + assert(I.getOpcode() == G_TRUNC && "Unsupported operand for G_ANYEXT"); + assert(DstRegBank.getID() == ARM::GPRRegBankID && + "Unsupported combination of register banks"); + assert(MRI.getType(SrcReg).getSizeInBits() == 64 && "Unsupported size"); + assert(MRI.getType(DstReg).getSizeInBits() <= 32 && "Unsupported size"); + + unsigned IgnoredBits = MRI.createVirtualRegister(&ARM::GPRRegClass); + auto InsertBefore = std::next(I.getIterator()); + auto MovI = + BuildMI(MBB, InsertBefore, I.getDebugLoc(), TII.get(ARM::VMOVRRD)) + .addDef(DstReg) + .addDef(IgnoredBits) + .addUse(SrcReg) + .add(predOps(ARMCC::AL)); + if (!constrainSelectedInstRegOperands(*MovI, TII, TRI, RBI)) + return false; + + MIB->eraseFromParent(); + return true; + } + if (SrcRegBank.getID() != DstRegBank.getID()) { DEBUG(dbgs() << "G_TRUNC/G_ANYEXT operands on different register banks\n"); return false; @@ -754,6 +788,28 @@ bool ARMInstructionSelector::select(MachineInstr &I, I.setDesc(TII.get(COPY)); return selectCopy(I, TII, MRI, TRI, RBI); } + case G_INTTOPTR: + case G_PTRTOINT: { + auto SrcReg = I.getOperand(1).getReg(); + auto DstReg = I.getOperand(0).getReg(); + + const auto &SrcRegBank = *RBI.getRegBank(SrcReg, MRI, TRI); + const auto &DstRegBank = *RBI.getRegBank(DstReg, MRI, TRI); + + if (SrcRegBank.getID() != DstRegBank.getID()) { + DEBUG(dbgs() + << "G_INTTOPTR/G_PTRTOINT operands on different register banks\n"); + return false; + } + + if (SrcRegBank.getID() != ARM::GPRRegBankID) { + DEBUG(dbgs() << "G_INTTOPTR/G_PTRTOINT on non-GPR not supported yet\n"); + return false; + } + + I.setDesc(TII.get(COPY)); + return selectCopy(I, TII, MRI, TRI, RBI); + } case G_SELECT: return selectSelect(MIB, MRI); case G_ICMP: { diff --git a/lib/Target/ARM/ARMLegalizerInfo.cpp b/lib/Target/ARM/ARMLegalizerInfo.cpp index 2dd1dff64e878..8cff1f0869d0d 100644 --- a/lib/Target/ARM/ARMLegalizerInfo.cpp +++ b/lib/Target/ARM/ARMLegalizerInfo.cpp @@ -126,6 +126,12 @@ ARMLegalizerInfo::ARMLegalizerInfo(const ARMSubtarget &ST) { setAction({Op, s32}, Legal); } + setAction({G_INTTOPTR, p0}, Legal); + setAction({G_INTTOPTR, 1, s32}, Legal); + + setAction({G_PTRTOINT, s32}, Legal); + setAction({G_PTRTOINT, 1, p0}, Legal); + for (unsigned Op : {G_ASHR, G_LSHR, G_SHL}) setAction({Op, s32}, Legal); @@ -139,6 +145,7 @@ ARMLegalizerInfo::ARMLegalizerInfo(const ARMSubtarget &ST) { setAction({G_BRCOND, s1}, Legal); setAction({G_CONSTANT, s32}, Legal); + setAction({G_CONSTANT, p0}, Legal); setLegalizeScalarToDifferentSizeStrategy(G_CONSTANT, 0, widen_1_8_16); setAction({G_ICMP, s1}, Legal); diff --git a/lib/Target/ARM/ARMRegisterBankInfo.cpp b/lib/Target/ARM/ARMRegisterBankInfo.cpp index b32bfd449544f..fad0e98285e6f 100644 --- a/lib/Target/ARM/ARMRegisterBankInfo.cpp +++ b/lib/Target/ARM/ARMRegisterBankInfo.cpp @@ -226,12 +226,30 @@ ARMRegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { case G_SEXT: case G_ZEXT: case G_ANYEXT: - case G_TRUNC: case G_GEP: + case G_INTTOPTR: + case G_PTRTOINT: // FIXME: We're abusing the fact that everything lives in a GPR for now; in // the real world we would use different mappings. OperandsMapping = &ARM::ValueMappings[ARM::GPR3OpsIdx]; break; + case G_TRUNC: { + // In some cases we may end up with a G_TRUNC from a 64-bit value to a + // 32-bit value. This isn't a real floating point trunc (that would be a + // G_FPTRUNC). Instead it is an integer trunc in disguise, which can appear + // because the legalizer doesn't distinguish between integer and floating + // point values so it may leave some 64-bit integers un-narrowed. Until we + // have a more principled solution that doesn't let such things sneak all + // the way to this point, just map the source to a DPR and the destination + // to a GPR. + LLT LargeTy = MRI.getType(MI.getOperand(1).getReg()); + OperandsMapping = + LargeTy.getSizeInBits() <= 32 + ? &ARM::ValueMappings[ARM::GPR3OpsIdx] + : getOperandsMapping({&ARM::ValueMappings[ARM::GPR3OpsIdx], + &ARM::ValueMappings[ARM::DPR3OpsIdx]}); + break; + } case G_LOAD: case G_STORE: { LLT Ty = MRI.getType(MI.getOperand(0).getReg()); diff --git a/lib/Target/ARM/ARMSubtarget.cpp b/lib/Target/ARM/ARMSubtarget.cpp index 4d4a88126ce65..23027e92481f1 100644 --- a/lib/Target/ARM/ARMSubtarget.cpp +++ b/lib/Target/ARM/ARMSubtarget.cpp @@ -348,11 +348,6 @@ unsigned ARMSubtarget::getMispredictionPenalty() const { return SchedModel.MispredictPenalty; } -bool ARMSubtarget::hasSinCos() const { - return isTargetWatchOS() || - (isTargetIOS() && !getTargetTriple().isOSVersionLT(7, 0)); -} - bool ARMSubtarget::enableMachineScheduler() const { // Enable the MachineScheduler before register allocation for subtargets // with the use-misched feature. diff --git a/lib/Target/ARM/ARMSubtarget.h b/lib/Target/ARM/ARMSubtarget.h index 9301197e13877..eedb675a3304e 100644 --- a/lib/Target/ARM/ARMSubtarget.h +++ b/lib/Target/ARM/ARMSubtarget.h @@ -236,6 +236,10 @@ protected: /// instructions. bool HasDataBarrier = false; + /// HasFullDataBarrier - True if the subtarget supports DFB data barrier + /// instruction. + bool HasFullDataBarrier = false; + /// HasV7Clrex - True if the subtarget supports CLREX instructions bool HasV7Clrex = false; @@ -544,6 +548,7 @@ public: bool hasDivideInThumbMode() const { return HasHardwareDivideInThumb; } bool hasDivideInARMMode() const { return HasHardwareDivideInARM; } bool hasDataBarrier() const { return HasDataBarrier; } + bool hasFullDataBarrier() const { return HasFullDataBarrier; } bool hasV7Clrex() const { return HasV7Clrex; } bool hasAcquireRelease() const { return HasAcquireRelease; } @@ -712,10 +717,6 @@ public: unsigned getMispredictionPenalty() const; - /// This function returns true if the target has sincos() routine in its - /// compiler runtime or math libraries. - bool hasSinCos() const; - /// Returns true if machine scheduler should be enabled. bool enableMachineScheduler() const override; diff --git a/lib/Target/ARM/ARMTargetMachine.cpp b/lib/Target/ARM/ARMTargetMachine.cpp index 51982b2dab14c..0f6d1eddc9857 100644 --- a/lib/Target/ARM/ARMTargetMachine.cpp +++ b/lib/Target/ARM/ARMTargetMachine.cpp @@ -92,6 +92,7 @@ extern "C" void LLVMInitializeARMTarget() { initializeARMConstantIslandsPass(Registry); initializeARMExecutionDepsFixPass(Registry); initializeARMExpandPseudoPass(Registry); + initializeThumb2SizeReducePass(Registry); } static std::unique_ptr<TargetLoweringObjectFile> createTLOF(const Triple &TT) { @@ -282,10 +283,9 @@ ARMBaseTargetMachine::getSubtargetImpl(const Function &F) const { return I.get(); } -TargetIRAnalysis ARMBaseTargetMachine::getTargetIRAnalysis() { - return TargetIRAnalysis([this](const Function &F) { - return TargetTransformInfo(ARMTTIImpl(this, F)); - }); +TargetTransformInfo +ARMBaseTargetMachine::getTargetTransformInfo(const Function &F) { + return TargetTransformInfo(ARMTTIImpl(this, F)); } ARMLETargetMachine::ARMLETargetMachine(const Target &T, const Triple &TT, diff --git a/lib/Target/ARM/ARMTargetMachine.h b/lib/Target/ARM/ARMTargetMachine.h index 655ec3202bfbd..2072bb731f0a0 100644 --- a/lib/Target/ARM/ARMTargetMachine.h +++ b/lib/Target/ARM/ARMTargetMachine.h @@ -53,8 +53,7 @@ public: const ARMSubtarget *getSubtargetImpl() const = delete; bool isLittleEndian() const { return isLittle; } - /// \brief Get the TargetIRAnalysis for this target. - TargetIRAnalysis getTargetIRAnalysis() override; + TargetTransformInfo getTargetTransformInfo(const Function &F) override; // Pass Pipeline Configuration TargetPassConfig *createPassConfig(PassManagerBase &PM) override; diff --git a/lib/Target/ARM/ARMTargetTransformInfo.cpp b/lib/Target/ARM/ARMTargetTransformInfo.cpp index cae01e415eff1..43d7888075b56 100644 --- a/lib/Target/ARM/ARMTargetTransformInfo.cpp +++ b/lib/Target/ARM/ARMTargetTransformInfo.cpp @@ -394,25 +394,6 @@ int ARMTTIImpl::getAddressComputationCost(Type *Ty, ScalarEvolution *SE, return 1; } -int ARMTTIImpl::getFPOpCost(Type *Ty) { - // Use similar logic that's in ARMISelLowering: - // Any ARM CPU with VFP2 has floating point, but Thumb1 didn't have access - // to VFP. - - if (ST->hasVFP2() && !ST->isThumb1Only()) { - if (Ty->isFloatTy()) { - return TargetTransformInfo::TCC_Basic; - } - - if (Ty->isDoubleTy()) { - return ST->isFPOnlySP() ? TargetTransformInfo::TCC_Expensive : - TargetTransformInfo::TCC_Basic; - } - } - - return TargetTransformInfo::TCC_Expensive; -} - int ARMTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, Type *SubTp) { // We only handle costs of reverse and alternate shuffles for now. diff --git a/lib/Target/ARM/ARMTargetTransformInfo.h b/lib/Target/ARM/ARMTargetTransformInfo.h index 99353a3219a0a..cd9fa07090207 100644 --- a/lib/Target/ARM/ARMTargetTransformInfo.h +++ b/lib/Target/ARM/ARMTargetTransformInfo.h @@ -156,8 +156,6 @@ public: int getAddressComputationCost(Type *Val, ScalarEvolution *SE, const SCEV *Ptr); - int getFPOpCost(Type *Ty); - int getArithmeticInstrCost( unsigned Opcode, Type *Ty, TTI::OperandValueKind Op1Info = TTI::OK_AnyValue, diff --git a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp index 26fda5f22b4f2..97b642c99f800 100644 --- a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp +++ b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp @@ -5581,11 +5581,11 @@ void ARMAsmParser::getMnemonicAcceptInfo(StringRef Mnemonic, StringRef FullInst, CanAcceptPredicationCode = Mnemonic != "cdp2" && Mnemonic != "clrex" && Mnemonic != "mcr2" && Mnemonic != "mcrr2" && Mnemonic != "mrc2" && Mnemonic != "mrrc2" && - Mnemonic != "dmb" && Mnemonic != "dsb" && Mnemonic != "isb" && - Mnemonic != "pld" && Mnemonic != "pli" && Mnemonic != "pldw" && - Mnemonic != "ldc2" && Mnemonic != "ldc2l" && Mnemonic != "stc2" && - Mnemonic != "stc2l" && !Mnemonic.startswith("rfe") && - !Mnemonic.startswith("srs"); + Mnemonic != "dmb" && Mnemonic != "dfb" && Mnemonic != "dsb" && + Mnemonic != "isb" && Mnemonic != "pld" && Mnemonic != "pli" && + Mnemonic != "pldw" && Mnemonic != "ldc2" && Mnemonic != "ldc2l" && + Mnemonic != "stc2" && Mnemonic != "stc2l" && + !Mnemonic.startswith("rfe") && !Mnemonic.startswith("srs"); } else if (isThumbOne()) { if (hasV6MOps()) CanAcceptPredicationCode = Mnemonic != "movs"; diff --git a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp index a29a2eeccfe87..53c6358776753 100644 --- a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp +++ b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp @@ -2386,6 +2386,7 @@ static DecodeStatus DecodeVLDInstruction(MCInst &Inst, unsigned Insn, case ARM::VLD4q32_UPD: if (!Check(S, DecodeDPRRegisterClass(Inst, (Rd+2)%32, Address, Decoder))) return MCDisassembler::Fail; + break; default: break; } @@ -3326,6 +3327,7 @@ static DecodeStatus DecodeT2AddrModeSOReg(MCInst &Inst, unsigned Val, case ARM::t2STRs: if (Rn == 15) return MCDisassembler::Fail; + break; default: break; } @@ -3391,6 +3393,7 @@ static DecodeStatus DecodeT2LoadShift(MCInst &Inst, unsigned Insn, break; case ARM::t2LDRSBs: Inst.setOpcode(ARM::t2PLIs); + break; default: break; } @@ -3854,6 +3857,7 @@ static DecodeStatus DecodeT2AddrModeImm12(MCInst &Inst, unsigned Val, case ARM::t2STRHi12: if (Rn == 15) return MCDisassembler::Fail; + break; default: break; } diff --git a/lib/Target/ARM/Thumb2SizeReduction.cpp b/lib/Target/ARM/Thumb2SizeReduction.cpp index 3920c73fba6aa..5357e26856ead 100644 --- a/lib/Target/ARM/Thumb2SizeReduction.cpp +++ b/lib/Target/ARM/Thumb2SizeReduction.cpp @@ -45,6 +45,7 @@ using namespace llvm; #define DEBUG_TYPE "t2-reduce-size" +#define THUMB2_SIZE_REDUCE_NAME "Thumb2 instruction size reduce pass" STATISTIC(NumNarrows, "Number of 32-bit instrs reduced to 16-bit ones"); STATISTIC(Num2Addrs, "Number of 32-bit instrs reduced to 2addr 16-bit ones"); @@ -162,7 +163,7 @@ namespace { const Thumb2InstrInfo *TII; const ARMSubtarget *STI; - Thumb2SizeReduce(std::function<bool(const Function &)> Ftor); + Thumb2SizeReduce(std::function<bool(const Function &)> Ftor = nullptr); bool runOnMachineFunction(MachineFunction &MF) override; @@ -172,7 +173,7 @@ namespace { } StringRef getPassName() const override { - return "Thumb2 instruction size reduction pass"; + return THUMB2_SIZE_REDUCE_NAME; } private: @@ -237,6 +238,9 @@ namespace { } // end anonymous namespace +INITIALIZE_PASS(Thumb2SizeReduce, DEBUG_TYPE, THUMB2_SIZE_REDUCE_NAME, false, + false) + Thumb2SizeReduce::Thumb2SizeReduce(std::function<bool(const Function &)> Ftor) : MachineFunctionPass(ID), PredicateFtor(std::move(Ftor)) { OptimizeSize = MinimizeSize = false; |