diff options
Diffstat (limited to 'lib/Target/ARM')
| -rw-r--r-- | lib/Target/ARM/ARM.td | 3 | ||||
| -rw-r--r-- | lib/Target/ARM/ARMAsmPrinter.cpp | 3 | ||||
| -rw-r--r-- | lib/Target/ARM/ARMBaseInstrInfo.cpp | 8 | ||||
| -rw-r--r-- | lib/Target/ARM/ARMISelLowering.cpp | 54 | ||||
| -rw-r--r-- | lib/Target/ARM/ARMISelLowering.h | 2 | ||||
| -rw-r--r-- | lib/Target/ARM/ARMInstrThumb2.td | 10 | ||||
| -rw-r--r-- | lib/Target/ARM/ARMInstructionSelector.cpp | 105 | ||||
| -rw-r--r-- | lib/Target/ARM/ARMLegalizerInfo.cpp | 6 | ||||
| -rw-r--r-- | lib/Target/ARM/ARMMacroFusion.cpp | 57 | ||||
| -rw-r--r-- | lib/Target/ARM/ARMMacroFusion.h | 24 | ||||
| -rw-r--r-- | lib/Target/ARM/ARMRegisterBankInfo.cpp | 10 | ||||
| -rw-r--r-- | lib/Target/ARM/ARMSubtarget.h | 8 | ||||
| -rw-r--r-- | lib/Target/ARM/ARMTargetMachine.cpp | 7 | ||||
| -rw-r--r-- | lib/Target/ARM/AsmParser/ARMAsmParser.cpp | 66 | ||||
| -rw-r--r-- | lib/Target/ARM/CMakeLists.txt | 1 | ||||
| -rw-r--r-- | lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp | 63 | ||||
| -rw-r--r-- | lib/Target/ARM/MCTargetDesc/ARMAsmBackend.h | 14 | ||||
| -rw-r--r-- | lib/Target/ARM/MCTargetDesc/ARMWinCOFFObjectWriter.cpp | 7 |
18 files changed, 357 insertions, 91 deletions
diff --git a/lib/Target/ARM/ARM.td b/lib/Target/ARM/ARM.td index ca68f5d42c32c..6f67183df6a18 100644 --- a/lib/Target/ARM/ARM.td +++ b/lib/Target/ARM/ARM.td @@ -100,7 +100,8 @@ def FeatureRAS : SubtargetFeature<"ras", "HasRAS", "true", "Enable Reliability, Availability and Serviceability extensions">; def FeatureFPAO : SubtargetFeature<"fpao", "HasFPAO", "true", "Enable fast computation of positive address offsets">; - +def FeatureFuseAES : SubtargetFeature<"fuse-aes", "HasFuseAES", "true", + "CPU fuses AES crypto operations">; // Cyclone has preferred instructions for zeroing VFP registers, which can // execute in 0 cycles. diff --git a/lib/Target/ARM/ARMAsmPrinter.cpp b/lib/Target/ARM/ARMAsmPrinter.cpp index f9da036c7e468..90f635c812542 100644 --- a/lib/Target/ARM/ARMAsmPrinter.cpp +++ b/lib/Target/ARM/ARMAsmPrinter.cpp @@ -1504,6 +1504,9 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) { return; } case ARM::CONSTPOOL_ENTRY: { + if (Subtarget->genExecuteOnly()) + llvm_unreachable("execute-only should not generate constant pools"); + /// CONSTPOOL_ENTRY - This instruction represents a floating constant pool /// in the function. The first operand is the ID# for this instruction, the /// second is the index into the MachineConstantPool that this is, the third diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp index 8715657ad5e25..e0810c358f2da 100644 --- a/lib/Target/ARM/ARMBaseInstrInfo.cpp +++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp @@ -665,12 +665,14 @@ bool ARMBaseInstrInfo::isPredicable(const MachineInstr &MI) const { const ARMFunctionInfo *AFI = MI.getParent()->getParent()->getInfo<ARMFunctionInfo>(); + // Neon instructions in Thumb2 IT blocks are deprecated, see ARMARM. + // In their ARM encoding, they can't be encoded in a conditional form. + if ((MI.getDesc().TSFlags & ARMII::DomainMask) == ARMII::DomainNEON) + return false; + if (AFI->isThumb2Function()) { if (getSubtarget().restrictIT()) return isV8EligibleForIT(&MI); - } else { // non-Thumb - if ((MI.getDesc().TSFlags & ARMII::DomainMask) == ARMII::DomainNEON) - return false; } return true; diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index 5b2d093e8f0da..2bcc707e9fc3c 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -2669,12 +2669,35 @@ static SDValue LowerWRITE_REGISTER(SDValue Op, SelectionDAG &DAG) { // Select(N) returns N. So the raw TargetGlobalAddress nodes, etc. can only // be used to form addressing mode. These wrapped nodes will be selected // into MOVi. -static SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) { +SDValue ARMTargetLowering::LowerConstantPool(SDValue Op, + SelectionDAG &DAG) const { EVT PtrVT = Op.getValueType(); // FIXME there is no actual debug info here SDLoc dl(Op); ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op); SDValue Res; + + // When generating execute-only code Constant Pools must be promoted to the + // global data section. It's a bit ugly that we can't share them across basic + // blocks, but this way we guarantee that execute-only behaves correct with + // position-independent addressing modes. + if (Subtarget->genExecuteOnly()) { + auto AFI = DAG.getMachineFunction().getInfo<ARMFunctionInfo>(); + auto T = const_cast<Type*>(CP->getType()); + auto C = const_cast<Constant*>(CP->getConstVal()); + auto M = const_cast<Module*>(DAG.getMachineFunction(). + getFunction()->getParent()); + auto GV = new GlobalVariable( + *M, T, /*isConst=*/true, GlobalVariable::InternalLinkage, C, + Twine(DAG.getDataLayout().getPrivateGlobalPrefix()) + "CP" + + Twine(DAG.getMachineFunction().getFunctionNumber()) + "_" + + Twine(AFI->createPICLabelUId()) + ); + SDValue GA = DAG.getTargetGlobalAddress(dyn_cast<GlobalValue>(GV), + dl, PtrVT); + return LowerGlobalAddress(GA, DAG); + } + if (CP->isMachineConstantPoolEntry()) Res = DAG.getTargetConstantPool(CP->getMachineCPVal(), PtrVT, CP->getAlignment()); @@ -3118,6 +3141,19 @@ static bool isReadOnly(const GlobalValue *GV) { isa<Function>(GV); } +SDValue ARMTargetLowering::LowerGlobalAddress(SDValue Op, + SelectionDAG &DAG) const { + switch (Subtarget->getTargetTriple().getObjectFormat()) { + default: llvm_unreachable("unknown object format"); + case Triple::COFF: + return LowerGlobalAddressWindows(Op, DAG); + case Triple::ELF: + return LowerGlobalAddressELF(Op, DAG); + case Triple::MachO: + return LowerGlobalAddressDarwin(Op, DAG); + } +} + SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op, SelectionDAG &DAG) const { EVT PtrVT = getPointerTy(DAG.getDataLayout()); @@ -7634,21 +7670,9 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { switch (Op.getOpcode()) { default: llvm_unreachable("Don't know how to custom lower this!"); case ISD::WRITE_REGISTER: return LowerWRITE_REGISTER(Op, DAG); - case ISD::ConstantPool: - if (Subtarget->genExecuteOnly()) - llvm_unreachable("execute-only should not generate constant pools"); - return LowerConstantPool(Op, DAG); + case ISD::ConstantPool: return LowerConstantPool(Op, DAG); case ISD::BlockAddress: return LowerBlockAddress(Op, DAG); - case ISD::GlobalAddress: - switch (Subtarget->getTargetTriple().getObjectFormat()) { - default: llvm_unreachable("unknown object format"); - case Triple::COFF: - return LowerGlobalAddressWindows(Op, DAG); - case Triple::ELF: - return LowerGlobalAddressELF(Op, DAG); - case Triple::MachO: - return LowerGlobalAddressDarwin(Op, DAG); - } + case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG); case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG); case ISD::SELECT: return LowerSELECT(Op, DAG); case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG); diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h index 26da528c19e6d..5044134f5b1e2 100644 --- a/lib/Target/ARM/ARMISelLowering.h +++ b/lib/Target/ARM/ARMISelLowering.h @@ -601,6 +601,8 @@ class InstrItineraryData; SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *Subtarget) const; SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const; SDValue LowerGlobalAddressDarwin(SDValue Op, SelectionDAG &DAG) const; SDValue LowerGlobalAddressELF(SDValue Op, SelectionDAG &DAG) const; SDValue LowerGlobalAddressWindows(SDValue Op, SelectionDAG &DAG) const; diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td index 45471a4e95b39..53db5acbe805c 100644 --- a/lib/Target/ARM/ARMInstrThumb2.td +++ b/lib/Target/ARM/ARMInstrThumb2.td @@ -4756,6 +4756,16 @@ def t2MOVsr: t2AsmPseudo<"mov${p} $Rd, $shift", def t2MOVSsr: t2AsmPseudo<"movs${p} $Rd, $shift", (ins rGPR:$Rd, so_reg_reg:$shift, pred:$p)>; +// Aliases for the above with the .w qualifier +def : t2InstAlias<"mov${p}.w $Rd, $shift", + (t2MOVsi rGPR:$Rd, t2_so_reg:$shift, pred:$p)>; +def : t2InstAlias<"movs${p}.w $Rd, $shift", + (t2MOVSsi rGPR:$Rd, t2_so_reg:$shift, pred:$p)>; +def : t2InstAlias<"mov${p}.w $Rd, $shift", + (t2MOVsr rGPR:$Rd, so_reg_reg:$shift, pred:$p)>; +def : t2InstAlias<"movs${p}.w $Rd, $shift", + (t2MOVSsr rGPR:$Rd, so_reg_reg:$shift, pred:$p)>; + // ADR w/o the .w suffix def : t2InstAlias<"adr${p} $Rd, $addr", (t2ADR rGPR:$Rd, t2adrlabel:$addr, pred:$p)>; diff --git a/lib/Target/ARM/ARMInstructionSelector.cpp b/lib/Target/ARM/ARMInstructionSelector.cpp index 2ae3bad4076b0..4cb0eca5ee5f8 100644 --- a/lib/Target/ARM/ARMInstructionSelector.cpp +++ b/lib/Target/ARM/ARMInstructionSelector.cpp @@ -42,6 +42,10 @@ public: private: bool selectImpl(MachineInstr &I) const; + bool selectICmp(MachineInstrBuilder &MIB, const ARMBaseInstrInfo &TII, + MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI, + const RegisterBankInfo &RBI) const; + const ARMBaseInstrInfo &TII; const ARMBaseRegisterInfo &TRI; const ARMBaseTargetMachine &TM; @@ -243,6 +247,105 @@ static unsigned selectLoadStoreOpCode(unsigned Opc, unsigned RegBank, return Opc; } +static ARMCC::CondCodes getComparePred(CmpInst::Predicate Pred) { + switch (Pred) { + // Needs two compares... + case CmpInst::FCMP_ONE: + case CmpInst::FCMP_UEQ: + default: + // AL is our "false" for now. The other two need more compares. + return ARMCC::AL; + case CmpInst::ICMP_EQ: + case CmpInst::FCMP_OEQ: + return ARMCC::EQ; + case CmpInst::ICMP_SGT: + case CmpInst::FCMP_OGT: + return ARMCC::GT; + case CmpInst::ICMP_SGE: + case CmpInst::FCMP_OGE: + return ARMCC::GE; + case CmpInst::ICMP_UGT: + case CmpInst::FCMP_UGT: + return ARMCC::HI; + case CmpInst::FCMP_OLT: + return ARMCC::MI; + case CmpInst::ICMP_ULE: + case CmpInst::FCMP_OLE: + return ARMCC::LS; + case CmpInst::FCMP_ORD: + return ARMCC::VC; + case CmpInst::FCMP_UNO: + return ARMCC::VS; + case CmpInst::FCMP_UGE: + return ARMCC::PL; + case CmpInst::ICMP_SLT: + case CmpInst::FCMP_ULT: + return ARMCC::LT; + case CmpInst::ICMP_SLE: + case CmpInst::FCMP_ULE: + return ARMCC::LE; + case CmpInst::FCMP_UNE: + case CmpInst::ICMP_NE: + return ARMCC::NE; + case CmpInst::ICMP_UGE: + return ARMCC::HS; + case CmpInst::ICMP_ULT: + return ARMCC::LO; + } +} + +bool ARMInstructionSelector::selectICmp(MachineInstrBuilder &MIB, + const ARMBaseInstrInfo &TII, + MachineRegisterInfo &MRI, + const TargetRegisterInfo &TRI, + const RegisterBankInfo &RBI) const { + auto &MBB = *MIB->getParent(); + auto InsertBefore = std::next(MIB->getIterator()); + auto &DebugLoc = MIB->getDebugLoc(); + + // Move 0 into the result register. + auto Mov0I = BuildMI(MBB, InsertBefore, DebugLoc, TII.get(ARM::MOVi)) + .addDef(MRI.createVirtualRegister(&ARM::GPRRegClass)) + .addImm(0) + .add(predOps(ARMCC::AL)) + .add(condCodeOp()); + if (!constrainSelectedInstRegOperands(*Mov0I, TII, TRI, RBI)) + return false; + + // Perform the comparison. + auto LHSReg = MIB->getOperand(2).getReg(); + auto RHSReg = MIB->getOperand(3).getReg(); + assert(MRI.getType(LHSReg) == MRI.getType(RHSReg) && + MRI.getType(LHSReg).getSizeInBits() == 32 && + MRI.getType(RHSReg).getSizeInBits() == 32 && + "Unsupported types for comparison operation"); + auto CmpI = BuildMI(MBB, InsertBefore, DebugLoc, TII.get(ARM::CMPrr)) + .addUse(LHSReg) + .addUse(RHSReg) + .add(predOps(ARMCC::AL)); + if (!constrainSelectedInstRegOperands(*CmpI, TII, TRI, RBI)) + return false; + + // Move 1 into the result register if the flags say so. + auto ResReg = MIB->getOperand(0).getReg(); + auto Cond = + static_cast<CmpInst::Predicate>(MIB->getOperand(1).getPredicate()); + auto ARMCond = getComparePred(Cond); + if (ARMCond == ARMCC::AL) + return false; + + auto Mov1I = BuildMI(MBB, InsertBefore, DebugLoc, TII.get(ARM::MOVCCi)) + .addDef(ResReg) + .addUse(Mov0I->getOperand(0).getReg()) + .addImm(1) + .add(predOps(ARMCond, ARM::CPSR)); + if (!constrainSelectedInstRegOperands(*Mov1I, TII, TRI, RBI)) + return false; + + MIB->eraseFromParent(); + return true; +} + bool ARMInstructionSelector::select(MachineInstr &I) const { assert(I.getParent() && "Instruction should be in a basic block!"); assert(I.getParent()->getParent() && "Instruction should be in a function!"); @@ -343,6 +446,8 @@ bool ARMInstructionSelector::select(MachineInstr &I) const { I.setDesc(TII.get(COPY)); return selectCopy(I, TII, MRI, TRI, RBI); } + case G_ICMP: + return selectICmp(MIB, TII, MRI, TRI, RBI); case G_GEP: I.setDesc(TII.get(ARM::ADDrr)); MIB.add(predOps(ARMCC::AL)).add(condCodeOp()); diff --git a/lib/Target/ARM/ARMLegalizerInfo.cpp b/lib/Target/ARM/ARMLegalizerInfo.cpp index a706079d98662..5873c7fb38729 100644 --- a/lib/Target/ARM/ARMLegalizerInfo.cpp +++ b/lib/Target/ARM/ARMLegalizerInfo.cpp @@ -86,6 +86,12 @@ ARMLegalizerInfo::ARMLegalizerInfo(const ARMSubtarget &ST) { setAction({G_CONSTANT, s32}, Legal); + setAction({G_ICMP, s1}, Legal); + for (auto Ty : {s8, s16}) + setAction({G_ICMP, 1, Ty}, WidenScalar); + for (auto Ty : {s32, p0}) + setAction({G_ICMP, 1, Ty}, Legal); + if (!ST.useSoftFloat() && ST.hasVFP2()) { setAction({G_FADD, s32}, Legal); setAction({G_FADD, s64}, Legal); diff --git a/lib/Target/ARM/ARMMacroFusion.cpp b/lib/Target/ARM/ARMMacroFusion.cpp new file mode 100644 index 0000000000000..1b6e97c28d453 --- /dev/null +++ b/lib/Target/ARM/ARMMacroFusion.cpp @@ -0,0 +1,57 @@ +//===- ARMMacroFusion.cpp - ARM Macro Fusion ----------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +/// \file This file contains the ARM implementation of the DAG scheduling +/// mutation to pair instructions back to back. +// +//===----------------------------------------------------------------------===// + +#include "ARMMacroFusion.h" +#include "ARMSubtarget.h" +#include "llvm/CodeGen/MacroFusion.h" +#include "llvm/Target/TargetInstrInfo.h" + +namespace llvm { + +/// \brief Check if the instr pair, FirstMI and SecondMI, should be fused +/// together. Given SecondMI, when FirstMI is unspecified, then check if +/// SecondMI may be part of a fused pair at all. +static bool shouldScheduleAdjacent(const TargetInstrInfo &TII, + const TargetSubtargetInfo &TSI, + const MachineInstr *FirstMI, + const MachineInstr &SecondMI) { + const ARMSubtarget &ST = static_cast<const ARMSubtarget&>(TSI); + + // Assume wildcards for unspecified instrs. + unsigned FirstOpcode = + FirstMI ? FirstMI->getOpcode() + : static_cast<unsigned>(ARM::INSTRUCTION_LIST_END); + unsigned SecondOpcode = SecondMI.getOpcode(); + + if (ST.hasFuseAES()) + // Fuse AES crypto operations. + switch(SecondOpcode) { + // AES encode. + case ARM::AESMC : + return FirstOpcode == ARM::AESE || + FirstOpcode == ARM::INSTRUCTION_LIST_END; + // AES decode. + case ARM::AESIMC: + return FirstOpcode == ARM::AESD || + FirstOpcode == ARM::INSTRUCTION_LIST_END; + } + + return false; +} + +std::unique_ptr<ScheduleDAGMutation> createARMMacroFusionDAGMutation () { + return createMacroFusionDAGMutation(shouldScheduleAdjacent); +} + +} // end namespace llvm diff --git a/lib/Target/ARM/ARMMacroFusion.h b/lib/Target/ARM/ARMMacroFusion.h new file mode 100644 index 0000000000000..1e4fc6687eae8 --- /dev/null +++ b/lib/Target/ARM/ARMMacroFusion.h @@ -0,0 +1,24 @@ +//===- ARMMacroFusion.h - ARM Macro Fusion ------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +/// \file This file contains the ARM definition of the DAG scheduling mutation +/// to pair instructions back to back. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/MachineScheduler.h" + +namespace llvm { + +/// Note that you have to add: +/// DAG.addMutation(createARMMacroFusionDAGMutation()); +/// to ARMPassConfig::createMachineScheduler() to have an effect. +std::unique_ptr<ScheduleDAGMutation> createARMMacroFusionDAGMutation(); + +} // llvm diff --git a/lib/Target/ARM/ARMRegisterBankInfo.cpp b/lib/Target/ARM/ARMRegisterBankInfo.cpp index f59b075e6dd9a..2350d0c6ef69e 100644 --- a/lib/Target/ARM/ARMRegisterBankInfo.cpp +++ b/lib/Target/ARM/ARMRegisterBankInfo.cpp @@ -255,6 +255,16 @@ ARMRegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { OperandsMapping = getOperandsMapping({&ARM::ValueMappings[ARM::GPR3OpsIdx], nullptr}); break; + case G_ICMP: { + LLT Ty2 = MRI.getType(MI.getOperand(2).getReg()); + (void)Ty2; + assert(Ty2.getSizeInBits() == 32 && "Unsupported size for G_ICMP"); + OperandsMapping = + getOperandsMapping({&ARM::ValueMappings[ARM::GPR3OpsIdx], nullptr, + &ARM::ValueMappings[ARM::GPR3OpsIdx], + &ARM::ValueMappings[ARM::GPR3OpsIdx]}); + break; + } case G_MERGE_VALUES: { // We only support G_MERGE_VALUES for creating a double precision floating // point value out of two GPRs. diff --git a/lib/Target/ARM/ARMSubtarget.h b/lib/Target/ARM/ARMSubtarget.h index af682dd8321cf..d890d0fa777e8 100644 --- a/lib/Target/ARM/ARMSubtarget.h +++ b/lib/Target/ARM/ARMSubtarget.h @@ -285,6 +285,10 @@ protected: /// HasFPAO - if true, processor does positive address offset computation faster bool HasFPAO = false; + /// HasFuseAES - if true, processor executes back to back AES instruction + /// pairs faster. + bool HasFuseAES = false; + /// If true, if conversion may decide to leave some instructions unpredicated. bool IsProfitableToUnpredicate = false; @@ -561,6 +565,10 @@ public: bool hasD16() const { return HasD16; } bool hasFullFP16() const { return HasFullFP16; } + bool hasFuseAES() const { return HasFuseAES; } + /// \brief Return true if the CPU supports any kind of instruction fusion. + bool hasFusion() const { return hasFuseAES(); } + const Triple &getTargetTriple() const { return TargetTriple; } bool isTargetDarwin() const { return TargetTriple.isOSDarwin(); } diff --git a/lib/Target/ARM/ARMTargetMachine.cpp b/lib/Target/ARM/ARMTargetMachine.cpp index c0506cfda6129..eb71e557ec917 100644 --- a/lib/Target/ARM/ARMTargetMachine.cpp +++ b/lib/Target/ARM/ARMTargetMachine.cpp @@ -17,6 +17,7 @@ #include "ARMRegisterBankInfo.h" #endif #include "ARMSubtarget.h" +#include "ARMMacroFusion.h" #include "ARMTargetMachine.h" #include "ARMTargetObjectFile.h" #include "ARMTargetTransformInfo.h" @@ -394,6 +395,9 @@ public: createMachineScheduler(MachineSchedContext *C) const override { ScheduleDAGMILive *DAG = createGenericSchedLive(C); // add DAG Mutations here. + const ARMSubtarget &ST = C->MF->getSubtarget<ARMSubtarget>(); + if (ST.hasFusion()) + DAG->addMutation(createARMMacroFusionDAGMutation()); return DAG; } @@ -401,6 +405,9 @@ public: createPostMachineScheduler(MachineSchedContext *C) const override { ScheduleDAGMI *DAG = createGenericSchedPostRA(C); // add DAG Mutations here. + const ARMSubtarget &ST = C->MF->getSubtarget<ARMSubtarget>(); + if (ST.hasFusion()) + DAG->addMutation(createARMMacroFusionDAGMutation()); return DAG; } diff --git a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp index 19fba3033bb2b..891b5c60e1fd6 100644 --- a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp +++ b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp @@ -6860,6 +6860,17 @@ static unsigned getRealVLDOpcode(unsigned Opc, unsigned &Spacing) { bool ARMAsmParser::processInstruction(MCInst &Inst, const OperandVector &Operands, MCStreamer &Out) { + // Check if we have the wide qualifier, because if it's present we + // must avoid selecting a 16-bit thumb instruction. + bool HasWideQualifier = false; + for (auto &Op : Operands) { + ARMOperand &ARMOp = static_cast<ARMOperand&>(*Op); + if (ARMOp.isToken() && ARMOp.getToken() == ".w") { + HasWideQualifier = true; + break; + } + } + switch (Inst.getOpcode()) { // Alias for alternate form of 'ldr{,b}t Rt, [Rn], #imm' instruction. case ARM::LDRT_POST: @@ -6939,8 +6950,7 @@ bool ARMAsmParser::processInstruction(MCInst &Inst, // Select the narrow version if the immediate will fit. if (Inst.getOperand(1).getImm() > 0 && Inst.getOperand(1).getImm() <= 0xff && - !(static_cast<ARMOperand &>(*Operands[2]).isToken() && - static_cast<ARMOperand &>(*Operands[2]).getToken() == ".w")) + !HasWideQualifier) Inst.setOpcode(ARM::tLDRpci); else Inst.setOpcode(ARM::t2LDRpci); @@ -6971,10 +6981,9 @@ bool ARMAsmParser::processInstruction(MCInst &Inst, else if (Inst.getOpcode() == ARM::t2LDRConstPool) TmpInst.setOpcode(ARM::t2LDRpci); const ARMOperand &PoolOperand = - (static_cast<ARMOperand &>(*Operands[2]).isToken() && - static_cast<ARMOperand &>(*Operands[2]).getToken() == ".w") ? - static_cast<ARMOperand &>(*Operands[4]) : - static_cast<ARMOperand &>(*Operands[3]); + (HasWideQualifier ? + static_cast<ARMOperand &>(*Operands[4]) : + static_cast<ARMOperand &>(*Operands[3])); const MCExpr *SubExprVal = PoolOperand.getConstantPoolImm(); // If SubExprVal is a constant we may be able to use a MOV if (isa<MCConstantExpr>(SubExprVal) && @@ -8117,8 +8126,7 @@ bool ARMAsmParser::processInstruction(MCInst &Inst, if (isARMLowRegister(Inst.getOperand(0).getReg()) && isARMLowRegister(Inst.getOperand(1).getReg()) && Inst.getOperand(5).getReg() == (inITBlock() ? 0 : ARM::CPSR) && - !(static_cast<ARMOperand &>(*Operands[3]).isToken() && - static_cast<ARMOperand &>(*Operands[3]).getToken() == ".w")) { + !HasWideQualifier) { unsigned NewOpc; switch (Inst.getOpcode()) { default: llvm_unreachable("unexpected opcode"); @@ -8152,7 +8160,8 @@ bool ARMAsmParser::processInstruction(MCInst &Inst, isARMLowRegister(Inst.getOperand(1).getReg()) && isARMLowRegister(Inst.getOperand(2).getReg()) && Inst.getOperand(0).getReg() == Inst.getOperand(1).getReg() && - inITBlock() == (Inst.getOpcode() == ARM::t2MOVsr)) + inITBlock() == (Inst.getOpcode() == ARM::t2MOVsr) && + !HasWideQualifier) isNarrow = true; MCInst TmpInst; unsigned newOpc; @@ -8186,7 +8195,8 @@ bool ARMAsmParser::processInstruction(MCInst &Inst, bool isNarrow = false; if (isARMLowRegister(Inst.getOperand(0).getReg()) && isARMLowRegister(Inst.getOperand(1).getReg()) && - inITBlock() == (Inst.getOpcode() == ARM::t2MOVsi)) + inITBlock() == (Inst.getOpcode() == ARM::t2MOVsi) && + !HasWideQualifier) isNarrow = true; MCInst TmpInst; unsigned newOpc; @@ -8415,10 +8425,8 @@ bool ARMAsmParser::processInstruction(MCInst &Inst, !isARMLowRegister(Inst.getOperand(0).getReg()) || (Inst.getOperand(2).isImm() && (unsigned)Inst.getOperand(2).getImm() > 255) || - ((!inITBlock() && Inst.getOperand(5).getReg() != ARM::CPSR) || - (inITBlock() && Inst.getOperand(5).getReg() != 0)) || - (static_cast<ARMOperand &>(*Operands[3]).isToken() && - static_cast<ARMOperand &>(*Operands[3]).getToken() == ".w")) + Inst.getOperand(5).getReg() != (inITBlock() ? 0 : ARM::CPSR) || + HasWideQualifier) break; MCInst TmpInst; TmpInst.setOpcode(Inst.getOpcode() == ARM::t2ADDri ? @@ -8447,8 +8455,7 @@ bool ARMAsmParser::processInstruction(MCInst &Inst, } if (!Transform || Inst.getOperand(5).getReg() != 0 || - (static_cast<ARMOperand &>(*Operands[3]).isToken() && - static_cast<ARMOperand &>(*Operands[3]).getToken() == ".w")) + HasWideQualifier) break; MCInst TmpInst; TmpInst.setOpcode(ARM::tADDhirr); @@ -8568,11 +8575,8 @@ bool ARMAsmParser::processInstruction(MCInst &Inst, if (isARMLowRegister(Inst.getOperand(0).getReg()) && (Inst.getOperand(1).isImm() && (unsigned)Inst.getOperand(1).getImm() <= 255) && - ((!inITBlock() && Inst.getOperand(2).getImm() == ARMCC::AL && - Inst.getOperand(4).getReg() == ARM::CPSR) || - (inITBlock() && Inst.getOperand(4).getReg() == 0)) && - (!static_cast<ARMOperand &>(*Operands[2]).isToken() || - static_cast<ARMOperand &>(*Operands[2]).getToken() != ".w")) { + Inst.getOperand(4).getReg() == (inITBlock() ? 0 : ARM::CPSR) && + !HasWideQualifier) { // The operands aren't in the same order for tMOVi8... MCInst TmpInst; TmpInst.setOpcode(ARM::tMOVi8); @@ -8593,8 +8597,7 @@ bool ARMAsmParser::processInstruction(MCInst &Inst, isARMLowRegister(Inst.getOperand(1).getReg()) && Inst.getOperand(2).getImm() == ARMCC::AL && Inst.getOperand(4).getReg() == ARM::CPSR && - (!static_cast<ARMOperand &>(*Operands[2]).isToken() || - static_cast<ARMOperand &>(*Operands[2]).getToken() != ".w")) { + !HasWideQualifier) { // The operands aren't the same for tMOV[S]r... (no cc_out) MCInst TmpInst; TmpInst.setOpcode(Inst.getOperand(4).getReg() ? ARM::tMOVSr : ARM::tMOVr); @@ -8616,8 +8619,7 @@ bool ARMAsmParser::processInstruction(MCInst &Inst, if (isARMLowRegister(Inst.getOperand(0).getReg()) && isARMLowRegister(Inst.getOperand(1).getReg()) && Inst.getOperand(2).getImm() == 0 && - (!static_cast<ARMOperand &>(*Operands[2]).isToken() || - static_cast<ARMOperand &>(*Operands[2]).getToken() != ".w")) { + !HasWideQualifier) { unsigned NewOpc; switch (Inst.getOpcode()) { default: llvm_unreachable("Illegal opcode!"); @@ -8716,11 +8718,8 @@ bool ARMAsmParser::processInstruction(MCInst &Inst, if ((isARMLowRegister(Inst.getOperand(1).getReg()) && isARMLowRegister(Inst.getOperand(2).getReg())) && Inst.getOperand(0).getReg() == Inst.getOperand(1).getReg() && - ((!inITBlock() && Inst.getOperand(5).getReg() == ARM::CPSR) || - (inITBlock() && Inst.getOperand(5).getReg() != ARM::CPSR)) && - (!static_cast<ARMOperand &>(*Operands[3]).isToken() || - !static_cast<ARMOperand &>(*Operands[3]).getToken().equals_lower( - ".w"))) { + Inst.getOperand(5).getReg() == (inITBlock() ? 0 : ARM::CPSR) && + !HasWideQualifier) { unsigned NewOpc; switch (Inst.getOpcode()) { default: llvm_unreachable("unexpected opcode"); @@ -8756,11 +8755,8 @@ bool ARMAsmParser::processInstruction(MCInst &Inst, isARMLowRegister(Inst.getOperand(2).getReg())) && (Inst.getOperand(0).getReg() == Inst.getOperand(1).getReg() || Inst.getOperand(0).getReg() == Inst.getOperand(2).getReg()) && - ((!inITBlock() && Inst.getOperand(5).getReg() == ARM::CPSR) || - (inITBlock() && Inst.getOperand(5).getReg() != ARM::CPSR)) && - (!static_cast<ARMOperand &>(*Operands[3]).isToken() || - !static_cast<ARMOperand &>(*Operands[3]).getToken().equals_lower( - ".w"))) { + Inst.getOperand(5).getReg() == (inITBlock() ? 0 : ARM::CPSR) && + !HasWideQualifier) { unsigned NewOpc; switch (Inst.getOpcode()) { default: llvm_unreachable("unexpected opcode"); diff --git a/lib/Target/ARM/CMakeLists.txt b/lib/Target/ARM/CMakeLists.txt index 3cde43967568b..cf6827fd6ca19 100644 --- a/lib/Target/ARM/CMakeLists.txt +++ b/lib/Target/ARM/CMakeLists.txt @@ -49,6 +49,7 @@ add_llvm_target(ARMCodeGen ARMLoadStoreOptimizer.cpp ARMMCInstLower.cpp ARMMachineFunctionInfo.cpp + ARMMacroFusion.cpp ARMRegisterInfo.cpp ARMOptimizeBarriersPass.cpp ARMSelectionDAGInfo.cpp diff --git a/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp b/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp index 716492ea25662..81760f03940ad 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp @@ -358,11 +358,27 @@ static uint32_t joinHalfWords(uint32_t FirstHalf, uint32_t SecondHalf, return Value; } -unsigned ARMAsmBackend::adjustFixupValue(const MCFixup &Fixup, uint64_t Value, +unsigned ARMAsmBackend::adjustFixupValue(const MCAssembler &Asm, + const MCFixup &Fixup, + const MCValue &Target, uint64_t Value, bool IsPCRel, MCContext &Ctx, bool IsLittleEndian, bool IsResolved) const { unsigned Kind = Fixup.getKind(); + + // MachO tries to make .o files that look vaguely pre-linked, so for MOVW/MOVT + // and .word relocations they put the Thumb bit into the addend if possible. + // Other relocation types don't want this bit though (branches couldn't encode + // it if it *was* present, and no other relocations exist) and it can + // interfere with checking valid expressions. + if (const MCSymbolRefExpr *A = Target.getSymA()) { + if (A->hasSubsectionsViaSymbols() && Asm.isThumbFunc(&A->getSymbol()) && + (Kind == FK_Data_4 || Kind == ARM::fixup_arm_movw_lo16 || + Kind == ARM::fixup_arm_movt_hi16 || Kind == ARM::fixup_t2_movw_lo16 || + Kind == ARM::fixup_t2_movt_hi16)) + Value |= 1; + } + switch (Kind) { default: Ctx.reportError(Fixup.getLoc(), "bad relocation fixup type"); @@ -505,6 +521,13 @@ unsigned ARMAsmBackend::adjustFixupValue(const MCFixup &Fixup, uint64_t Value, return swapHalfWords(out, IsLittleEndian); } case ARM::fixup_arm_thumb_bl: { + // FIXME: We get both thumb1 and thumb2 in here, so we can only check for + // the less strict thumb2 value. + if (!isInt<26>(Value - 4)) { + Ctx.reportError(Fixup.getLoc(), "Relocation out of range"); + return 0; + } + // The value doesn't encode the low bit (always zero) and is offset by // four. The 32-bit immediate value is encoded as // imm32 = SignExtend(S:I1:I2:imm10:imm11:0) @@ -716,29 +739,11 @@ unsigned ARMAsmBackend::adjustFixupValue(const MCFixup &Fixup, uint64_t Value, } void ARMAsmBackend::processFixupValue(const MCAssembler &Asm, - const MCAsmLayout &Layout, const MCFixup &Fixup, - const MCFragment *DF, - const MCValue &Target, uint64_t &Value, - bool &IsResolved) { + const MCValue &Target, bool &IsResolved) { const MCSymbolRefExpr *A = Target.getSymA(); const MCSymbol *Sym = A ? &A->getSymbol() : nullptr; const unsigned FixupKind = Fixup.getKind() ; - // MachO (the only user of "Value") tries to make .o files that look vaguely - // pre-linked, so for MOVW/MOVT and .word relocations they put the Thumb bit - // into the addend if possible. Other relocation types don't want this bit - // though (branches couldn't encode it if it *was* present, and no other - // relocations exist) and it can interfere with checking valid expressions. - if (FixupKind == FK_Data_4 || - FixupKind == ARM::fixup_arm_movw_lo16 || - FixupKind == ARM::fixup_arm_movt_hi16 || - FixupKind == ARM::fixup_t2_movw_lo16 || - FixupKind == ARM::fixup_t2_movt_hi16) { - if (Sym) { - if (Asm.isThumbFunc(Sym)) - Value |= 1; - } - } if (IsResolved && (unsigned)Fixup.getKind() == ARM::fixup_arm_thumb_bl) { assert(Sym && "How did we resolve this?"); @@ -747,7 +752,7 @@ void ARMAsmBackend::processFixupValue(const MCAssembler &Asm, // If the symbol is out of range, produce a relocation and hope the // linker can handle it. GNU AS produces an error in this case. - if (Sym->isExternal() || Value >= 0x400004) + if (Sym->isExternal()) IsResolved = false; } // Create relocations for unconditional branches to function symbols with @@ -759,6 +764,7 @@ void ARMAsmBackend::processFixupValue(const MCAssembler &Asm, IsResolved = false; if (!Asm.isThumbFunc(Sym) && (FixupKind == ARM::fixup_arm_thumb_br || FixupKind == ARM::fixup_arm_thumb_bl || + FixupKind == ARM::fixup_t2_condbranch || FixupKind == ARM::fixup_t2_uncondbranch)) IsResolved = false; } @@ -875,22 +881,25 @@ static unsigned getFixupKindContainerSizeBytes(unsigned Kind) { } } -void ARMAsmBackend::applyFixup(const MCFixup &Fixup, char *Data, - unsigned DataSize, uint64_t Value, bool IsPCRel, - MCContext &Ctx) const { +void ARMAsmBackend::applyFixup(const MCAssembler &Asm, const MCFixup &Fixup, + const MCValue &Target, + MutableArrayRef<char> Data, uint64_t Value, + bool IsPCRel) const { unsigned NumBytes = getFixupKindNumBytes(Fixup.getKind()); - Value = adjustFixupValue(Fixup, Value, IsPCRel, Ctx, IsLittleEndian, true); + MCContext &Ctx = Asm.getContext(); + Value = adjustFixupValue(Asm, Fixup, Target, Value, IsPCRel, Ctx, + IsLittleEndian, true); if (!Value) return; // Doesn't change encoding. unsigned Offset = Fixup.getOffset(); - assert(Offset + NumBytes <= DataSize && "Invalid fixup offset!"); + assert(Offset + NumBytes <= Data.size() && "Invalid fixup offset!"); // Used to point to big endian bytes. unsigned FullSizeBytes; if (!IsLittleEndian) { FullSizeBytes = getFixupKindContainerSizeBytes(Fixup.getKind()); - assert((Offset + FullSizeBytes) <= DataSize && "Invalid fixup size!"); + assert((Offset + FullSizeBytes) <= Data.size() && "Invalid fixup size!"); assert(NumBytes <= FullSizeBytes && "Invalid fixup size!"); } diff --git a/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.h b/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.h index 2ddedb5d61059..6a0ba2ed41c1a 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.h +++ b/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.h @@ -40,17 +40,17 @@ public: /// processFixupValue - Target hook to process the literal value of a fixup /// if necessary. - void processFixupValue(const MCAssembler &Asm, const MCAsmLayout &Layout, - const MCFixup &Fixup, const MCFragment *DF, - const MCValue &Target, uint64_t &Value, - bool &IsResolved) override; + void processFixupValue(const MCAssembler &Asm, const MCFixup &Fixup, + const MCValue &Target, bool &IsResolved) override; - unsigned adjustFixupValue(const MCFixup &Fixup, uint64_t Value, bool IsPCRel, + unsigned adjustFixupValue(const MCAssembler &Asm, const MCFixup &Fixup, + const MCValue &Target, uint64_t Value, bool IsPCRel, MCContext &Ctx, bool IsLittleEndian, bool IsResolved) const; - void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize, - uint64_t Value, bool IsPCRel, MCContext &Ctx) const override; + void applyFixup(const MCAssembler &Asm, const MCFixup &Fixup, + const MCValue &Target, MutableArrayRef<char> Data, + uint64_t Value, bool IsPCRel) const override; unsigned getRelaxedOpcode(unsigned Op) const; diff --git a/lib/Target/ARM/MCTargetDesc/ARMWinCOFFObjectWriter.cpp b/lib/Target/ARM/MCTargetDesc/ARMWinCOFFObjectWriter.cpp index 00505a103e00f..f74fb2e20b5a3 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMWinCOFFObjectWriter.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMWinCOFFObjectWriter.cpp @@ -33,8 +33,8 @@ public: ~ARMWinCOFFObjectWriter() override = default; - unsigned getRelocType(const MCValue &Target, const MCFixup &Fixup, - bool IsCrossSection, + unsigned getRelocType(MCContext &Ctx, const MCValue &Target, + const MCFixup &Fixup, bool IsCrossSection, const MCAsmBackend &MAB) const override; bool recordRelocation(const MCFixup &) const override; @@ -42,7 +42,8 @@ public: } // end anonymous namespace -unsigned ARMWinCOFFObjectWriter::getRelocType(const MCValue &Target, +unsigned ARMWinCOFFObjectWriter::getRelocType(MCContext &Ctx, + const MCValue &Target, const MCFixup &Fixup, bool IsCrossSection, const MCAsmBackend &MAB) const { |
