diff options
Diffstat (limited to 'lib/Target/ARM')
91 files changed, 4821 insertions, 4898 deletions
diff --git a/lib/Target/ARM/A15SDOptimizer.cpp b/lib/Target/ARM/A15SDOptimizer.cpp index 92eaf9e1c9b3..387f1f64e836 100644 --- a/lib/Target/ARM/A15SDOptimizer.cpp +++ b/lib/Target/ARM/A15SDOptimizer.cpp @@ -34,6 +34,8 @@ #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/Support/Debug.h" #include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" +#include <map> #include <set> using namespace llvm; @@ -676,8 +678,8 @@ bool A15SDOptimizer::runOnInstruction(MachineInstr *MI) { } bool A15SDOptimizer::runOnMachineFunction(MachineFunction &Fn) { - TII = static_cast<const ARMBaseInstrInfo*>(Fn.getTarget().getInstrInfo()); - TRI = Fn.getTarget().getRegisterInfo(); + TII = static_cast<const ARMBaseInstrInfo *>(Fn.getSubtarget().getInstrInfo()); + TRI = Fn.getSubtarget().getRegisterInfo(); MRI = &Fn.getRegInfo(); bool Modified = false; diff --git a/lib/Target/ARM/ARM.h b/lib/Target/ARM/ARM.h index 55df29c1499a..02db53a27455 100644 --- a/lib/Target/ARM/ARM.h +++ b/lib/Target/ARM/ARM.h @@ -12,8 +12,8 @@ // //===----------------------------------------------------------------------===// -#ifndef TARGET_ARM_H -#define TARGET_ARM_H +#ifndef LLVM_LIB_TARGET_ARM_ARM_H +#define LLVM_LIB_TARGET_ARM_ARM_H #include "llvm/Support/CodeGen.h" @@ -23,7 +23,6 @@ class ARMAsmPrinter; class ARMBaseTargetMachine; class FunctionPass; class ImmutablePass; -class JITCodeEmitter; class MachineInstr; class MCInst; class TargetLowering; @@ -31,10 +30,6 @@ class TargetMachine; FunctionPass *createARMISelDag(ARMBaseTargetMachine &TM, CodeGenOpt::Level OptLevel); - -FunctionPass *createARMJITCodeEmitterPass(ARMBaseTargetMachine &TM, - JITCodeEmitter &JCE); - FunctionPass *createA15SDOptimizerPass(); FunctionPass *createARMLoadStoreOptimizationPass(bool PreAlloc = false); FunctionPass *createARMExpandPseudoPass(); diff --git a/lib/Target/ARM/ARM.td b/lib/Target/ARM/ARM.td index 7916ccc180c8..244014b5c29f 100644 --- a/lib/Target/ARM/ARM.td +++ b/lib/Target/ARM/ARM.td @@ -228,6 +228,15 @@ def ProcA15 : SubtargetFeature<"a15", "ARMProcFamily", "CortexA15", FeatureAvoidPartialCPSR, FeatureTrustZone, FeatureVirtualization]>; +def ProcA17 : SubtargetFeature<"a17", "ARMProcFamily", "CortexA17", + "Cortex-A17 ARM processors", + [FeatureVMLxForwarding, + FeatureT2XtPk, FeatureVFP4, + FeatureHWDiv, FeatureHWDivARM, + FeatureAvoidPartialCPSR, + FeatureVirtualization, + FeatureTrustZone]>; + def ProcA53 : SubtargetFeature<"a53", "ARMProcFamily", "CortexA53", "Cortex-A53 ARM processors", [FeatureHWDiv, FeatureHWDivARM, @@ -261,13 +270,6 @@ def ProcKrait : SubtargetFeature<"krait", "ARMProcFamily", "Krait", FeatureHWDivARM]>; -def FeatureAPCS : SubtargetFeature<"apcs", "TargetABI", "ARM_ABI_APCS", - "Use the APCS ABI">; - -def FeatureAAPCS : SubtargetFeature<"aapcs", "TargetABI", "ARM_ABI_AAPCS", - "Use the AAPCS ABI">; - - class ProcNoItin<string Name, list<SubtargetFeature> Features> : Processor<Name, NoItineraries, Features>; @@ -347,12 +349,8 @@ def : ProcessorModel<"cortex-a8", CortexA8Model, FeatureAClass]>; def : ProcessorModel<"cortex-a9", CortexA9Model, [ProcA9, HasV7Ops, FeatureNEON, FeatureDB, - FeatureDSPThumb2, FeatureHasRAS, + FeatureDSPThumb2, FeatureHasRAS, FeatureMP, FeatureAClass]>; -def : ProcessorModel<"cortex-a9-mp", CortexA9Model, - [ProcA9, HasV7Ops, FeatureNEON, FeatureDB, - FeatureDSPThumb2, FeatureMP, - FeatureHasRAS, FeatureAClass]>; // FIXME: A12 has currently the same Schedule model as A9 def : ProcessorModel<"cortex-a12", CortexA9Model, @@ -366,6 +364,12 @@ def : ProcessorModel<"cortex-a15", CortexA9Model, FeatureDSPThumb2, FeatureHasRAS, FeatureAClass]>; +// FIXME: A17 has currently the same Schedule model as A9 +def : ProcessorModel<"cortex-a17", CortexA9Model, + [ProcA17, HasV7Ops, FeatureNEON, FeatureDB, + FeatureDSPThumb2, FeatureMP, + FeatureHasRAS, FeatureAClass]>; + // FIXME: krait has currently the same Schedule model as A9 def : ProcessorModel<"krait", CortexA9Model, [ProcKrait, HasV7Ops, @@ -392,6 +396,12 @@ def : ProcNoItin<"cortex-m4", [HasV7Ops, FeatureT2XtPk, FeatureVFP4, FeatureVFPOnlySP, FeatureD16, FeatureMClass]>; +def : ProcNoItin<"cortex-m7", [HasV7Ops, + FeatureThumb2, FeatureNoARM, FeatureDB, + FeatureHWDiv, FeatureDSPThumb2, + FeatureT2XtPk, FeatureFPARMv8, + FeatureD16, FeatureMClass]>; + // Swift uArch Processors. def : ProcessorModel<"swift", SwiftModel, diff --git a/lib/Target/ARM/ARMAsmPrinter.cpp b/lib/Target/ARM/ARMAsmPrinter.cpp index 28d2610c3903..b17d4aa19f70 100644 --- a/lib/Target/ARM/ARMAsmPrinter.cpp +++ b/lib/Target/ARM/ARMAsmPrinter.cpp @@ -76,7 +76,8 @@ void ARMAsmPrinter::EmitFunctionEntryLabel() { } void ARMAsmPrinter::EmitXXStructor(const Constant *CV) { - uint64_t Size = TM.getDataLayout()->getTypeAllocSize(CV->getType()); + uint64_t Size = + TM.getSubtargetImpl()->getDataLayout()->getTypeAllocSize(CV->getType()); assert(Size && "C++ constructor pointer had zero size!"); const GlobalValue *GV = dyn_cast<GlobalValue>(CV->stripPointerCasts()); @@ -119,6 +120,23 @@ bool ARMAsmPrinter::runOnMachineFunction(MachineFunction &MF) { // Emit the rest of the function body. EmitFunctionBody(); + // If we need V4T thumb mode Register Indirect Jump pads, emit them. + // These are created per function, rather than per TU, since it's + // relatively easy to exceed the thumb branch range within a TU. + if (! ThumbIndirectPads.empty()) { + OutStreamer.EmitAssemblerFlag(MCAF_Code16); + EmitAlignment(1); + for (unsigned i = 0, e = ThumbIndirectPads.size(); i < e; i++) { + OutStreamer.EmitLabel(ThumbIndirectPads[i].second); + EmitToStreamer(OutStreamer, MCInstBuilder(ARM::tBX) + .addReg(ThumbIndirectPads[i].first) + // Add predicate operands. + .addImm(ARMCC::AL) + .addReg(0)); + } + ThumbIndirectPads.clear(); + } + // We didn't modify anything. return false; } @@ -136,7 +154,7 @@ void ARMAsmPrinter::printOperand(const MachineInstr *MI, int OpNum, assert(!MO.getSubReg() && "Subregs should be eliminated!"); if(ARM::GPRPairRegClass.contains(Reg)) { const MachineFunction &MF = *MI->getParent()->getParent(); - const TargetRegisterInfo *TRI = MF.getTarget().getRegisterInfo(); + const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); Reg = TRI->getSubReg(Reg, ARM::gsub_0); } O << ARMInstPrinter::getRegisterName(Reg); @@ -182,7 +200,7 @@ void ARMAsmPrinter::printOperand(const MachineInstr *MI, int OpNum, MCSymbol *ARMAsmPrinter:: GetARMJTIPICJumpTableLabel2(unsigned uid, unsigned uid2) const { - const DataLayout *DL = TM.getDataLayout(); + const DataLayout *DL = TM.getSubtargetImpl()->getDataLayout(); SmallString<60> Name; raw_svector_ostream(Name) << DL->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber() << '_' << uid << '_' << uid2; @@ -191,7 +209,7 @@ GetARMJTIPICJumpTableLabel2(unsigned uid, unsigned uid2) const { MCSymbol *ARMAsmPrinter::GetARMSJLJEHLabel() const { - const DataLayout *DL = TM.getDataLayout(); + const DataLayout *DL = TM.getSubtargetImpl()->getDataLayout(); SmallString<60> Name; raw_svector_ostream(Name) << DL->getPrivateGlobalPrefix() << "SJLJEH" << getFunctionNumber(); @@ -229,7 +247,7 @@ bool ARMAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum, case 'y': // Print a VFP single precision register as indexed double. if (MI->getOperand(OpNum).isReg()) { unsigned Reg = MI->getOperand(OpNum).getReg(); - const TargetRegisterInfo *TRI = MF->getTarget().getRegisterInfo(); + const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo(); // Find the 'd' register that has this 's' register as a sub-register, // and determine the lane number. for (MCSuperRegIterator SR(Reg, TRI); SR.isValid(); ++SR) { @@ -261,7 +279,7 @@ bool ARMAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum, // inline asm statement. O << "{"; if (ARM::GPRPairRegClass.contains(RegBegin)) { - const TargetRegisterInfo *TRI = MF->getTarget().getRegisterInfo(); + const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo(); unsigned Reg0 = TRI->getSubReg(RegBegin, ARM::gsub_0); O << ARMInstPrinter::getRegisterName(Reg0) << ", "; RegBegin = TRI->getSubReg(RegBegin, ARM::gsub_1); @@ -317,7 +335,7 @@ bool ARMAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum, const MachineOperand &MO = MI->getOperand(OpNum); if (!MO.isReg()) return true; - const TargetRegisterInfo *TRI = MF->getTarget().getRegisterInfo(); + const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo(); unsigned Reg = TRI->getSubReg(MO.getReg(), ExtraCode[0] == 'Q' ? ARM::gsub_0 : ARM::gsub_1); O << ARMInstPrinter::getRegisterName(Reg); @@ -343,7 +361,7 @@ bool ARMAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum, unsigned Reg = MI->getOperand(OpNum).getReg(); if (!ARM::QPRRegClass.contains(Reg)) return true; - const TargetRegisterInfo *TRI = MF->getTarget().getRegisterInfo(); + const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo(); unsigned SubReg = TRI->getSubReg(Reg, ExtraCode[0] == 'e' ? ARM::dsub_0 : ARM::dsub_1); O << ARMInstPrinter::getRegisterName(SubReg); @@ -358,7 +376,7 @@ bool ARMAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum, if (!MO.isReg()) return true; const MachineFunction &MF = *MI->getParent()->getParent(); - const TargetRegisterInfo *TRI = MF.getTarget().getRegisterInfo(); + const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); unsigned Reg = MO.getReg(); if(!ARM::GPRPairRegClass.contains(Reg)) return false; @@ -478,6 +496,9 @@ void ARMAsmPrinter::EmitStartOfAsmFile(Module &M) { // Emit ARM Build Attributes if (Subtarget->isTargetELF()) emitAttributes(); + + if (!M.getModuleInlineAsm().empty() && Subtarget->isThumb()) + OutStreamer.EmitAssemblerFlag(MCAF_Code16); } static void @@ -558,7 +579,7 @@ void ARMAsmPrinter::EmitEndOfAsmFile(Module &M) { MachineModuleInfoELF::SymbolListTy Stubs = MMIELF.GetGVStubList(); if (!Stubs.empty()) { OutStreamer.SwitchSection(TLOFELF.getDataRelSection()); - const DataLayout *TD = TM.getDataLayout(); + const DataLayout *TD = TM.getSubtargetImpl()->getDataLayout(); for (auto &stub: Stubs) { OutStreamer.EmitLabel(stub.first); @@ -608,6 +629,8 @@ void ARMAsmPrinter::emitAttributes() { MCTargetStreamer &TS = *OutStreamer.getTargetStreamer(); ARMTargetStreamer &ATS = static_cast<ARMTargetStreamer &>(TS); + ATS.emitTextAttribute(ARMBuildAttrs::conformance, "2.09"); + ATS.switchVendor("aeabi"); std::string CPUString = Subtarget->getCPUString(); @@ -663,7 +686,9 @@ void ARMAsmPrinter::emitAttributes() { ARMBuildAttrs::AllowNeonARMv8); } else { if (Subtarget->hasFPARMv8()) - ATS.emitFPU(ARM::FP_ARMV8); + // FPv5 and FP-ARMv8 have the same instructions, so are modeled as one + // FPU, but there are two different names for it depending on the CPU. + ATS.emitFPU(Subtarget->hasD16() ? ARM::FPV5_D16 : ARM::FP_ARMV8); else if (Subtarget->hasVFP4()) ATS.emitFPU(Subtarget->hasD16() ? ARM::VFPV4_D16 : ARM::VFPV4); else if (Subtarget->hasVFP3()) @@ -688,11 +713,44 @@ void ARMAsmPrinter::emitAttributes() { // Signal various FP modes. if (!TM.Options.UnsafeFPMath) { - ATS.emitAttribute(ARMBuildAttrs::ABI_FP_denormal, ARMBuildAttrs::Allowed); + ATS.emitAttribute(ARMBuildAttrs::ABI_FP_denormal, + ARMBuildAttrs::IEEEDenormals); ATS.emitAttribute(ARMBuildAttrs::ABI_FP_exceptions, ARMBuildAttrs::Allowed); + + // If the user has permitted this code to choose the IEEE 754 + // rounding at run-time, emit the rounding attribute. + if (TM.Options.HonorSignDependentRoundingFPMathOption) + ATS.emitAttribute(ARMBuildAttrs::ABI_FP_rounding, + ARMBuildAttrs::Allowed); + } else { + if (!Subtarget->hasVFP2()) { + // When the target doesn't have an FPU (by design or + // intention), the assumptions made on the software support + // mirror that of the equivalent hardware support *if it + // existed*. For v7 and better we indicate that denormals are + // flushed preserving sign, and for V6 we indicate that + // denormals are flushed to positive zero. + if (Subtarget->hasV7Ops()) + ATS.emitAttribute(ARMBuildAttrs::ABI_FP_denormal, + ARMBuildAttrs::PreserveFPSign); + } else if (Subtarget->hasVFP3()) { + // In VFPv4, VFPv4U, VFPv3, or VFPv3U, it is preserved. That is, + // the sign bit of the zero matches the sign bit of the input or + // result that is being flushed to zero. + ATS.emitAttribute(ARMBuildAttrs::ABI_FP_denormal, + ARMBuildAttrs::PreserveFPSign); + } + // For VFPv2 implementations it is implementation defined as + // to whether denormals are flushed to positive zero or to + // whatever the sign of zero is (ARM v7AR ARM 2.7.5). Historically + // LLVM has chosen to flush this to positive zero (most likely for + // GCC compatibility), so that's the chosen value here (the + // absence of its emission implies zero). } + // TM.Options.NoInfsFPMath && TM.Options.NoNaNsFPMath is the + // equivalent of GCC's -ffinite-math-only flag. if (TM.Options.NoInfsFPMath && TM.Options.NoNaNsFPMath) ATS.emitAttribute(ARMBuildAttrs::ABI_FP_number_model, ARMBuildAttrs::Allowed); @@ -700,6 +758,13 @@ void ARMAsmPrinter::emitAttributes() { ATS.emitAttribute(ARMBuildAttrs::ABI_FP_number_model, ARMBuildAttrs::AllowIEE754); + if (Subtarget->allowsUnalignedMem()) + ATS.emitAttribute(ARMBuildAttrs::CPU_unaligned_access, + ARMBuildAttrs::Allowed); + else + ATS.emitAttribute(ARMBuildAttrs::CPU_unaligned_access, + ARMBuildAttrs::Not_Allowed); + // FIXME: add more flags to ARMBuildAttributes.h // 8-bytes alignment stuff. ATS.emitAttribute(ARMBuildAttrs::ABI_align_needed, 1); @@ -719,6 +784,13 @@ void ARMAsmPrinter::emitAttributes() { if (Subtarget->hasFP16()) ATS.emitAttribute(ARMBuildAttrs::FP_HP_extension, ARMBuildAttrs::AllowHPFP); + // FIXME: To support emitting this build attribute as GCC does, the + // -mfp16-format option and associated plumbing must be + // supported. For now the __fp16 type is exposed by default, so this + // attribute should be emitted with value 1. + ATS.emitAttribute(ARMBuildAttrs::ABI_FP_16bit_format, + ARMBuildAttrs::FP16FormatIEEE); + if (Subtarget->hasMPExtension()) ATS.emitAttribute(ARMBuildAttrs::MPextension_use, ARMBuildAttrs::AllowMP); @@ -735,7 +807,7 @@ void ARMAsmPrinter::emitAttributes() { if (const Module *SourceModule = MMI->getModule()) { // ABI_PCS_wchar_t to indicate wchar_t width // FIXME: There is no way to emit value 0 (wchar_t prohibited). - if (auto WCharWidthValue = cast_or_null<ConstantInt>( + if (auto WCharWidthValue = mdconst::extract_or_null<ConstantInt>( SourceModule->getModuleFlag("wchar_size"))) { int WCharWidth = WCharWidthValue->getZExtValue(); assert((WCharWidth == 2 || WCharWidth == 4) && @@ -746,7 +818,7 @@ void ARMAsmPrinter::emitAttributes() { // ABI_enum_size to indicate enum width // FIXME: There is no way to emit value 0 (enums prohibited) or value 3 // (all enums contain a value needing 32 bits to encode). - if (auto EnumWidthValue = cast_or_null<ConstantInt>( + if (auto EnumWidthValue = mdconst::extract_or_null<ConstantInt>( SourceModule->getModuleFlag("min_enum_size"))) { int EnumWidth = EnumWidthValue->getZExtValue(); assert((EnumWidth == 1 || EnumWidth == 4) && @@ -757,6 +829,17 @@ void ARMAsmPrinter::emitAttributes() { } } + // TODO: We currently only support either reserving the register, or treating + // it as another callee-saved register, but not as SB or a TLS pointer; It + // would instead be nicer to push this from the frontend as metadata, as we do + // for the wchar and enum size tags + if (Subtarget->isR9Reserved()) + ATS.emitAttribute(ARMBuildAttrs::ABI_PCS_R9_use, + ARMBuildAttrs::R9Reserved); + else + ATS.emitAttribute(ARMBuildAttrs::ABI_PCS_R9_use, + ARMBuildAttrs::R9IsGPR); + if (Subtarget->hasTrustZone() && Subtarget->hasVirtualization()) ATS.emitAttribute(ARMBuildAttrs::Virtualization_use, ARMBuildAttrs::AllowTZVirtualization); @@ -834,8 +917,9 @@ MCSymbol *ARMAsmPrinter::GetARMGVSymbol(const GlobalValue *GV, void ARMAsmPrinter:: EmitMachineConstantPoolValue(MachineConstantPoolValue *MCPV) { - const DataLayout *DL = TM.getDataLayout(); - int Size = TM.getDataLayout()->getTypeAllocSize(MCPV->getType()); + const DataLayout *DL = TM.getSubtargetImpl()->getDataLayout(); + int Size = + TM.getSubtargetImpl()->getDataLayout()->getTypeAllocSize(MCPV->getType()); ARMConstantPoolValue *ACPV = static_cast<ARMConstantPoolValue*>(MCPV); @@ -1013,7 +1097,7 @@ void ARMAsmPrinter::EmitUnwindingInstruction(const MachineInstr *MI) { MCTargetStreamer &TS = *OutStreamer.getTargetStreamer(); ARMTargetStreamer &ATS = static_cast<ARMTargetStreamer &>(TS); const MachineFunction &MF = *MI->getParent()->getParent(); - const TargetRegisterInfo *RegInfo = MF.getTarget().getRegisterInfo(); + const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo(); const ARMFunctionInfo &AFI = *MF.getInfo<ARMFunctionInfo>(); unsigned FramePtr = RegInfo->getFrameRegister(MF); @@ -1151,7 +1235,7 @@ void ARMAsmPrinter::EmitUnwindingInstruction(const MachineInstr *MI) { #include "ARMGenMCPseudoLowering.inc" void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) { - const DataLayout *DL = TM.getDataLayout(); + const DataLayout *DL = TM.getSubtargetImpl()->getDataLayout(); // If we just ended a constant pool, mark it as such. if (InConstantPool && MI->getOpcode() != ARM::CONSTPOOL_ENTRY) { @@ -1226,18 +1310,34 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) { return; } case ARM::tBX_CALL: { - EmitToStreamer(OutStreamer, MCInstBuilder(ARM::tMOVr) - .addReg(ARM::LR) - .addReg(ARM::PC) - // Add predicate operands. - .addImm(ARMCC::AL) - .addReg(0)); + if (Subtarget->hasV5TOps()) + llvm_unreachable("Expected BLX to be selected for v5t+"); + + // On ARM v4t, when doing a call from thumb mode, we need to ensure + // that the saved lr has its LSB set correctly (the arch doesn't + // have blx). + // So here we generate a bl to a small jump pad that does bx rN. + // The jump pads are emitted after the function body. + + unsigned TReg = MI->getOperand(0).getReg(); + MCSymbol *TRegSym = nullptr; + for (unsigned i = 0, e = ThumbIndirectPads.size(); i < e; i++) { + if (ThumbIndirectPads[i].first == TReg) { + TRegSym = ThumbIndirectPads[i].second; + break; + } + } - EmitToStreamer(OutStreamer, MCInstBuilder(ARM::tBX) - .addReg(MI->getOperand(0).getReg()) - // Add predicate operands. - .addImm(ARMCC::AL) - .addReg(0)); + if (!TRegSym) { + TRegSym = OutContext.CreateTempSymbol(); + ThumbIndirectPads.push_back(std::make_pair(TReg, TRegSym)); + } + + // Create a link-saving branch to the Reg Indirect Jump Pad. + EmitToStreamer(OutStreamer, MCInstBuilder(ARM::tBL) + // Predicate comes first here. + .addImm(ARMCC::AL).addReg(0) + .addExpr(MCSymbolRefExpr::Create(TRegSym, OutContext))); return; } case ARM::BMOVPCRX_CALL: { @@ -1567,6 +1667,9 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) { EmitJumpTable(MI); return; } + case ARM::SPACE: + OutStreamer.EmitZeros(MI->getOperand(1).getImm()); + return; case ARM::TRAP: { // Non-Darwin binutils don't yet support the "trap" mnemonic. // FIXME: Remove this special case when they do. diff --git a/lib/Target/ARM/ARMAsmPrinter.h b/lib/Target/ARM/ARMAsmPrinter.h index 7c103c6763f2..a6214911a783 100644 --- a/lib/Target/ARM/ARMAsmPrinter.h +++ b/lib/Target/ARM/ARMAsmPrinter.h @@ -7,8 +7,8 @@ // //===----------------------------------------------------------------------===// -#ifndef ARMASMPRINTER_H -#define ARMASMPRINTER_H +#ifndef LLVM_LIB_TARGET_ARM_ARMASMPRINTER_H +#define LLVM_LIB_TARGET_ARM_ARMASMPRINTER_H #include "ARMSubtarget.h" #include "llvm/CodeGen/AsmPrinter.h" @@ -20,6 +20,7 @@ class ARMFunctionInfo; class MCOperand; class MachineConstantPool; class MachineOperand; +class MCSymbol; namespace ARM { enum DW_ISA { @@ -45,6 +46,11 @@ class LLVM_LIBRARY_VISIBILITY ARMAsmPrinter : public AsmPrinter { /// InConstantPool - Maintain state when emitting a sequence of constant /// pool entries so we can properly mark them as data regions. bool InConstantPool; + + /// ThumbIndirectPads - These maintain a per-function list of jump pad + /// labels used for ARMv4t thumb code to make register indirect calls. + SmallVector<std::pair<unsigned, MCSymbol*>, 4> ThumbIndirectPads; + public: explicit ARMAsmPrinter(TargetMachine &TM, MCStreamer &Streamer) : AsmPrinter(TM, Streamer), AFI(nullptr), MCP(nullptr), diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp index 0288db91dcbf..e0397cce962d 100644 --- a/lib/Target/ARM/ARMBaseInstrInfo.cpp +++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp @@ -108,7 +108,7 @@ ARMBaseInstrInfo::CreateTargetHazardRecognizer(const TargetSubtargetInfo *STI, const ScheduleDAG *DAG) const { if (usePreRAHazardRecognizer()) { const InstrItineraryData *II = - &static_cast<const ARMSubtarget *>(STI)->getInstrItineraryData(); + static_cast<const ARMSubtarget *>(STI)->getInstrItineraryData(); return new ScoreboardHazardRecognizer(II, DAG, "pre-RA-sched"); } return TargetInstrInfo::CreateTargetHazardRecognizer(STI, DAG); @@ -518,6 +518,42 @@ bool ARMBaseInstrInfo::DefinesPredicate(MachineInstr *MI, return Found; } +static bool isCPSRDefined(const MachineInstr *MI) { + for (const auto &MO : MI->operands()) + if (MO.isReg() && MO.getReg() == ARM::CPSR && MO.isDef()) + return true; + return false; +} + +static bool isEligibleForITBlock(const MachineInstr *MI) { + switch (MI->getOpcode()) { + default: return true; + case ARM::tADC: // ADC (register) T1 + case ARM::tADDi3: // ADD (immediate) T1 + case ARM::tADDi8: // ADD (immediate) T2 + case ARM::tADDrr: // ADD (register) T1 + case ARM::tAND: // AND (register) T1 + case ARM::tASRri: // ASR (immediate) T1 + case ARM::tASRrr: // ASR (register) T1 + case ARM::tBIC: // BIC (register) T1 + case ARM::tEOR: // EOR (register) T1 + case ARM::tLSLri: // LSL (immediate) T1 + case ARM::tLSLrr: // LSL (register) T1 + case ARM::tLSRri: // LSR (immediate) T1 + case ARM::tLSRrr: // LSR (register) T1 + case ARM::tMUL: // MUL T1 + case ARM::tMVN: // MVN (register) T1 + case ARM::tORR: // ORR (register) T1 + case ARM::tROR: // ROR (register) T1 + case ARM::tRSB: // RSB (immediate) T1 + case ARM::tSBC: // SBC (register) T1 + case ARM::tSUBi3: // SUB (immediate) T1 + case ARM::tSUBi8: // SUB (immediate) T2 + case ARM::tSUBrr: // SUB (register) T1 + return !isCPSRDefined(MI); + } +} + /// isPredicable - Return true if the specified instruction can be predicated. /// By default, this returns true for every instruction with a /// PredicateOperand. @@ -525,6 +561,9 @@ bool ARMBaseInstrInfo::isPredicable(MachineInstr *MI) const { if (!MI->isPredicable()) return false; + if (!isEligibleForITBlock(MI)) + return false; + ARMFunctionInfo *AFI = MI->getParent()->getParent()->getInfo<ARMFunctionInfo>(); @@ -555,16 +594,6 @@ template <> bool IsCPSRDead<MachineInstr>(MachineInstr *MI) { } } -/// FIXME: Works around a gcc miscompilation with -fstrict-aliasing. -LLVM_ATTRIBUTE_NOINLINE -static unsigned getNumJTEntries(const std::vector<MachineJumpTableEntry> &JT, - unsigned JTI); -static unsigned getNumJTEntries(const std::vector<MachineJumpTableEntry> &JT, - unsigned JTI) { - assert(JTI < JT.size()); - return JT[JTI].MBBs.size(); -} - /// GetInstSize - Return the size of the specified MachineInstr. /// unsigned ARMBaseInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const { @@ -637,7 +666,7 @@ unsigned ARMBaseInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const { // bytes, we can use 16-bit entries instead. Then there won't be an // alignment issue. unsigned InstSize = (Opc == ARM::tBR_JTr || Opc == ARM::t2BR_JT) ? 2 : 4; - unsigned NumEntries = getNumJTEntries(JT, JTI); + unsigned NumEntries = JT[JTI].MBBs.size(); if (Opc == ARM::t2TBB_JT && (NumEntries & 1)) // Make sure the instruction that follows TBB is 2-byte aligned. // FIXME: Constant island pass should insert an "ALIGN" instruction @@ -645,6 +674,8 @@ unsigned ARMBaseInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const { ++NumEntries; return NumEntries * EntrySize + InstSize; } + case ARM::SPACE: + return MI->getOperand(1).getImm(); } } @@ -659,6 +690,49 @@ unsigned ARMBaseInstrInfo::getInstBundleLength(const MachineInstr *MI) const { return Size; } +void ARMBaseInstrInfo::copyFromCPSR(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, + unsigned DestReg, bool KillSrc, + const ARMSubtarget &Subtarget) const { + unsigned Opc = Subtarget.isThumb() + ? (Subtarget.isMClass() ? ARM::t2MRS_M : ARM::t2MRS_AR) + : ARM::MRS; + + MachineInstrBuilder MIB = + BuildMI(MBB, I, I->getDebugLoc(), get(Opc), DestReg); + + // There is only 1 A/R class MRS instruction, and it always refers to + // APSR. However, there are lots of other possibilities on M-class cores. + if (Subtarget.isMClass()) + MIB.addImm(0x800); + + AddDefaultPred(MIB); + + MIB.addReg(ARM::CPSR, RegState::Implicit | getKillRegState(KillSrc)); +} + +void ARMBaseInstrInfo::copyToCPSR(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, + unsigned SrcReg, bool KillSrc, + const ARMSubtarget &Subtarget) const { + unsigned Opc = Subtarget.isThumb() + ? (Subtarget.isMClass() ? ARM::t2MSR_M : ARM::t2MSR_AR) + : ARM::MSR; + + MachineInstrBuilder MIB = BuildMI(MBB, I, I->getDebugLoc(), get(Opc)); + + if (Subtarget.isMClass()) + MIB.addImm(0x800); + else + MIB.addImm(8); + + MIB.addReg(SrcReg, getKillRegState(KillSrc)); + + AddDefaultPred(MIB); + + MIB.addReg(ARM::CPSR, RegState::Implicit | RegState::Define); +} + void ARMBaseInstrInfo::copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, DebugLoc DL, unsigned DestReg, unsigned SrcReg, @@ -682,7 +756,7 @@ void ARMBaseInstrInfo::copyPhysReg(MachineBasicBlock &MBB, Opc = ARM::VMOVRS; else if (SPRDest && GPRSrc) Opc = ARM::VMOVSR; - else if (ARM::DPRRegClass.contains(DestReg, SrcReg)) + else if (ARM::DPRRegClass.contains(DestReg, SrcReg) && !Subtarget.isFPOnlySP()) Opc = ARM::VMOVD; else if (ARM::QPRRegClass.contains(DestReg, SrcReg)) Opc = ARM::VORRq; @@ -742,6 +816,16 @@ void ARMBaseInstrInfo::copyPhysReg(MachineBasicBlock &MBB, BeginIdx = ARM::dsub_0; SubRegs = 4; Spacing = 2; + } else if (ARM::DPRRegClass.contains(DestReg, SrcReg) && Subtarget.isFPOnlySP()) { + Opc = ARM::VMOVS; + BeginIdx = ARM::ssub_0; + SubRegs = 2; + } else if (SrcReg == ARM::CPSR) { + copyFromCPSR(MBB, I, DestReg, KillSrc, Subtarget); + return; + } else if (DestReg == ARM::CPSR) { + copyToCPSR(MBB, I, SrcReg, KillSrc, Subtarget); + return; } assert(Opc && "Impossible reg-to-reg copy"); @@ -1174,12 +1258,26 @@ unsigned ARMBaseInstrInfo::isLoadFromStackSlotPostFE(const MachineInstr *MI, return MI->mayLoad() && hasLoadFromStackSlot(MI, Dummy, FrameIndex); } -bool ARMBaseInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const{ +bool +ARMBaseInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const { + MachineFunction &MF = *MI->getParent()->getParent(); + Reloc::Model RM = MF.getTarget().getRelocationModel(); + + if (MI->getOpcode() == TargetOpcode::LOAD_STACK_GUARD) { + assert(getSubtarget().getTargetTriple().getObjectFormat() == + Triple::MachO && + "LOAD_STACK_GUARD currently supported only for MachO."); + expandLoadStackGuard(MI, RM); + MI->getParent()->erase(MI); + return true; + } + // This hook gets to expand COPY instructions before they become // copyPhysReg() calls. Look for VMOVS instructions that can legally be // widened to VMOVD. We prefer the VMOVD when possible because it may be // changed into a VORR that can go down the NEON pipeline. - if (!WidenVMOVS || !MI->isCopy() || Subtarget.isCortexA15()) + if (!WidenVMOVS || !MI->isCopy() || Subtarget.isCortexA15() || + Subtarget.isFPOnlySP()) return false; // Look for a copy between even S-registers. That is where we keep floats @@ -1738,8 +1836,10 @@ bool ARMBaseInstrInfo::analyzeSelect(const MachineInstr *MI, return false; } -MachineInstr *ARMBaseInstrInfo::optimizeSelect(MachineInstr *MI, - bool PreferFalse) const { +MachineInstr * +ARMBaseInstrInfo::optimizeSelect(MachineInstr *MI, + SmallPtrSetImpl<MachineInstr *> &SeenMIs, + bool PreferFalse) const { assert((MI->getOpcode() == ARM::MOVCCr || MI->getOpcode() == ARM::t2MOVCCr) && "Unknown select instruction"); MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo(); @@ -1787,6 +1887,10 @@ MachineInstr *ARMBaseInstrInfo::optimizeSelect(MachineInstr *MI, NewMI.addOperand(FalseReg); NewMI->tieOperands(0, NewMI->getNumOperands() - 1); + // Update SeenMIs set: register newly created MI and erase removed DefMI. + SeenMIs.insert(NewMI); + SeenMIs.erase(DefMI); + // The caller will erase MI, but not DefMI. DefMI->eraseFromParent(); return NewMI; @@ -2832,7 +2936,7 @@ static unsigned getNumMicroOpsSwiftLdSt(const InstrItineraryData *ItinData, // FIXME: The current MachineInstr design does not support relying on machine // mem operands to determine the width of a memory access. Instead, we expect // the target to provide this information based on the instruction opcode and -// operands. However, using MachineMemOperand is a the best solution now for +// operands. However, using MachineMemOperand is the best solution now for // two reasons: // // 1) getNumMicroOps tries to infer LDM memory width from the total number of MI @@ -3933,6 +4037,38 @@ bool ARMBaseInstrInfo::verifyInstruction(const MachineInstr *MI, return true; } +// LoadStackGuard has so far only been implemented for MachO. Different code +// sequence is needed for other targets. +void ARMBaseInstrInfo::expandLoadStackGuardBase(MachineBasicBlock::iterator MI, + unsigned LoadImmOpc, + unsigned LoadOpc, + Reloc::Model RM) const { + MachineBasicBlock &MBB = *MI->getParent(); + DebugLoc DL = MI->getDebugLoc(); + unsigned Reg = MI->getOperand(0).getReg(); + const GlobalValue *GV = + cast<GlobalValue>((*MI->memoperands_begin())->getValue()); + MachineInstrBuilder MIB; + + BuildMI(MBB, MI, DL, get(LoadImmOpc), Reg) + .addGlobalAddress(GV, 0, ARMII::MO_NONLAZY); + + if (Subtarget.GVIsIndirectSymbol(GV, RM)) { + MIB = BuildMI(MBB, MI, DL, get(LoadOpc), Reg); + MIB.addReg(Reg, RegState::Kill).addImm(0); + unsigned Flag = MachineMemOperand::MOLoad | MachineMemOperand::MOInvariant; + MachineMemOperand *MMO = MBB.getParent()-> + getMachineMemOperand(MachinePointerInfo::getGOT(), Flag, 4, 4); + MIB.addMemOperand(MMO); + AddDefaultPred(MIB); + } + + MIB = BuildMI(MBB, MI, DL, get(LoadOpc), Reg); + MIB.addReg(Reg, RegState::Kill).addImm(0); + MIB.setMemRefs(MI->memoperands_begin(), MI->memoperands_end()); + AddDefaultPred(MIB); +} + bool ARMBaseInstrInfo::isFpMLxInstruction(unsigned Opcode, unsigned &MulOpc, unsigned &AddSubOpc, @@ -4361,29 +4497,6 @@ breakPartialRegDependency(MachineBasicBlock::iterator MI, MI->addRegisterKilled(DReg, TRI, true); } -void ARMBaseInstrInfo::getUnconditionalBranch( - MCInst &Branch, const MCSymbolRefExpr *BranchTarget) const { - if (Subtarget.isThumb()) - Branch.setOpcode(ARM::tB); - else if (Subtarget.isThumb2()) - Branch.setOpcode(ARM::t2B); - else - Branch.setOpcode(ARM::Bcc); - - Branch.addOperand(MCOperand::CreateExpr(BranchTarget)); - Branch.addOperand(MCOperand::CreateImm(ARMCC::AL)); - Branch.addOperand(MCOperand::CreateReg(0)); -} - -void ARMBaseInstrInfo::getTrap(MCInst &MI) const { - if (Subtarget.isThumb()) - MI.setOpcode(ARM::tTRAP); - else if (Subtarget.useNaClTrap()) - MI.setOpcode(ARM::TRAPNaCl); - else - MI.setOpcode(ARM::TRAP); -} - bool ARMBaseInstrInfo::hasNOP() const { return (Subtarget.getFeatureBits() & ARM::HasV6T2Ops) != 0; } @@ -4401,3 +4514,72 @@ bool ARMBaseInstrInfo::isSwiftFastImmShift(const MachineInstr *MI) const { return false; } + +bool ARMBaseInstrInfo::getRegSequenceLikeInputs( + const MachineInstr &MI, unsigned DefIdx, + SmallVectorImpl<RegSubRegPairAndIdx> &InputRegs) const { + assert(DefIdx < MI.getDesc().getNumDefs() && "Invalid definition index"); + assert(MI.isRegSequenceLike() && "Invalid kind of instruction"); + + switch (MI.getOpcode()) { + case ARM::VMOVDRR: + // dX = VMOVDRR rY, rZ + // is the same as: + // dX = REG_SEQUENCE rY, ssub_0, rZ, ssub_1 + // Populate the InputRegs accordingly. + // rY + const MachineOperand *MOReg = &MI.getOperand(1); + InputRegs.push_back( + RegSubRegPairAndIdx(MOReg->getReg(), MOReg->getSubReg(), ARM::ssub_0)); + // rZ + MOReg = &MI.getOperand(2); + InputRegs.push_back( + RegSubRegPairAndIdx(MOReg->getReg(), MOReg->getSubReg(), ARM::ssub_1)); + return true; + } + llvm_unreachable("Target dependent opcode missing"); +} + +bool ARMBaseInstrInfo::getExtractSubregLikeInputs( + const MachineInstr &MI, unsigned DefIdx, + RegSubRegPairAndIdx &InputReg) const { + assert(DefIdx < MI.getDesc().getNumDefs() && "Invalid definition index"); + assert(MI.isExtractSubregLike() && "Invalid kind of instruction"); + + switch (MI.getOpcode()) { + case ARM::VMOVRRD: + // rX, rY = VMOVRRD dZ + // is the same as: + // rX = EXTRACT_SUBREG dZ, ssub_0 + // rY = EXTRACT_SUBREG dZ, ssub_1 + const MachineOperand &MOReg = MI.getOperand(2); + InputReg.Reg = MOReg.getReg(); + InputReg.SubReg = MOReg.getSubReg(); + InputReg.SubIdx = DefIdx == 0 ? ARM::ssub_0 : ARM::ssub_1; + return true; + } + llvm_unreachable("Target dependent opcode missing"); +} + +bool ARMBaseInstrInfo::getInsertSubregLikeInputs( + const MachineInstr &MI, unsigned DefIdx, RegSubRegPair &BaseReg, + RegSubRegPairAndIdx &InsertedReg) const { + assert(DefIdx < MI.getDesc().getNumDefs() && "Invalid definition index"); + assert(MI.isInsertSubregLike() && "Invalid kind of instruction"); + + switch (MI.getOpcode()) { + case ARM::VSETLNi32: + // dX = VSETLNi32 dY, rZ, imm + const MachineOperand &MOBaseReg = MI.getOperand(1); + const MachineOperand &MOInsertedReg = MI.getOperand(2); + const MachineOperand &MOIndex = MI.getOperand(3); + BaseReg.Reg = MOBaseReg.getReg(); + BaseReg.SubReg = MOBaseReg.getSubReg(); + + InsertedReg.Reg = MOInsertedReg.getReg(); + InsertedReg.SubReg = MOInsertedReg.getSubReg(); + InsertedReg.SubIdx = MOIndex.getImm() == 0 ? ARM::ssub_0 : ARM::ssub_1; + return true; + } + llvm_unreachable("Target dependent opcode missing"); +} diff --git a/lib/Target/ARM/ARMBaseInstrInfo.h b/lib/Target/ARM/ARMBaseInstrInfo.h index b8d675806b25..ecbcf5c0f96a 100644 --- a/lib/Target/ARM/ARMBaseInstrInfo.h +++ b/lib/Target/ARM/ARMBaseInstrInfo.h @@ -11,13 +11,14 @@ // //===----------------------------------------------------------------------===// -#ifndef ARMBASEINSTRUCTIONINFO_H -#define ARMBASEINSTRUCTIONINFO_H +#ifndef LLVM_LIB_TARGET_ARM_ARMBASEINSTRINFO_H +#define LLVM_LIB_TARGET_ARM_ARMBASEINSTRINFO_H #include "MCTargetDesc/ARMBaseInfo.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallSet.h" #include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/Support/CodeGen.h" #include "llvm/Target/TargetInstrInfo.h" #define GET_INSTRINFO_HEADER @@ -34,6 +35,57 @@ protected: // Can be only subclassed. explicit ARMBaseInstrInfo(const ARMSubtarget &STI); + void expandLoadStackGuardBase(MachineBasicBlock::iterator MI, + unsigned LoadImmOpc, unsigned LoadOpc, + Reloc::Model RM) const; + + /// Build the equivalent inputs of a REG_SEQUENCE for the given \p MI + /// and \p DefIdx. + /// \p [out] InputRegs of the equivalent REG_SEQUENCE. Each element of + /// the list is modeled as <Reg:SubReg, SubIdx>. + /// E.g., REG_SEQUENCE vreg1:sub1, sub0, vreg2, sub1 would produce + /// two elements: + /// - vreg1:sub1, sub0 + /// - vreg2<:0>, sub1 + /// + /// \returns true if it is possible to build such an input sequence + /// with the pair \p MI, \p DefIdx. False otherwise. + /// + /// \pre MI.isRegSequenceLike(). + bool getRegSequenceLikeInputs( + const MachineInstr &MI, unsigned DefIdx, + SmallVectorImpl<RegSubRegPairAndIdx> &InputRegs) const override; + + /// Build the equivalent inputs of a EXTRACT_SUBREG for the given \p MI + /// and \p DefIdx. + /// \p [out] InputReg of the equivalent EXTRACT_SUBREG. + /// E.g., EXTRACT_SUBREG vreg1:sub1, sub0, sub1 would produce: + /// - vreg1:sub1, sub0 + /// + /// \returns true if it is possible to build such an input sequence + /// with the pair \p MI, \p DefIdx. False otherwise. + /// + /// \pre MI.isExtractSubregLike(). + bool getExtractSubregLikeInputs(const MachineInstr &MI, unsigned DefIdx, + RegSubRegPairAndIdx &InputReg) const override; + + /// Build the equivalent inputs of a INSERT_SUBREG for the given \p MI + /// and \p DefIdx. + /// \p [out] BaseReg and \p [out] InsertedReg contain + /// the equivalent inputs of INSERT_SUBREG. + /// E.g., INSERT_SUBREG vreg0:sub0, vreg1:sub1, sub3 would produce: + /// - BaseReg: vreg0:sub0 + /// - InsertedReg: vreg1:sub1, sub3 + /// + /// \returns true if it is possible to build such an input sequence + /// with the pair \p MI, \p DefIdx. False otherwise. + /// + /// \pre MI.isInsertSubregLike(). + bool + getInsertSubregLikeInputs(const MachineInstr &MI, unsigned DefIdx, + RegSubRegPair &BaseReg, + RegSubRegPairAndIdx &InsertedReg) const override; + public: // Return whether the target has an explicit NOP encoding. bool hasNOP() const; @@ -104,6 +156,13 @@ public: unsigned isStoreToStackSlotPostFE(const MachineInstr *MI, int &FrameIndex) const override; + void copyToCPSR(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, + unsigned SrcReg, bool KillSrc, + const ARMSubtarget &Subtarget) const; + void copyFromCPSR(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, + unsigned DestReg, bool KillSrc, + const ARMSubtarget &Subtarget) const; + void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, DebugLoc DL, unsigned DestReg, unsigned SrcReg, bool KillSrc) const override; @@ -202,7 +261,9 @@ public: unsigned &TrueOp, unsigned &FalseOp, bool &Optimizable) const override; - MachineInstr *optimizeSelect(MachineInstr *MI, bool) const override; + MachineInstr *optimizeSelect(MachineInstr *MI, + SmallPtrSetImpl<MachineInstr *> &SeenMIs, + bool) const override; /// FoldImmediate - 'Reg' is known to be defined by a move immediate /// instruction, try to fold the immediate into the use instruction. @@ -230,12 +291,6 @@ public: void breakPartialRegDependency(MachineBasicBlock::iterator, unsigned, const TargetRegisterInfo *TRI) const override; - void - getUnconditionalBranch(MCInst &Branch, - const MCSymbolRefExpr *BranchTarget) const override; - - void getTrap(MCInst &MI) const override; - /// Get the number of addresses by LDM or VLDM or zero for unknown. unsigned getNumLDMAddresses(const MachineInstr *MI) const; @@ -286,6 +341,9 @@ private: bool verifyInstruction(const MachineInstr *MI, StringRef &ErrInfo) const override; + virtual void expandLoadStackGuard(MachineBasicBlock::iterator MI, + Reloc::Model RM) const = 0; + private: /// Modeling special VFP / NEON fp MLA / MLS hazards. diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/lib/Target/ARM/ARMBaseRegisterInfo.cpp index 32b5f4aa2942..8744f1c62217 100644 --- a/lib/Target/ARM/ARMBaseRegisterInfo.cpp +++ b/lib/Target/ARM/ARMBaseRegisterInfo.cpp @@ -60,9 +60,8 @@ ARMBaseRegisterInfo::ARMBaseRegisterInfo(const ARMSubtarget &sti) const MCPhysReg* ARMBaseRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { - const MCPhysReg *RegList = (STI.isTargetIOS() && !STI.isAAPCS_ABI()) - ? CSR_iOS_SaveList - : CSR_AAPCS_SaveList; + const MCPhysReg *RegList = + STI.isTargetDarwin() ? CSR_iOS_SaveList : CSR_AAPCS_SaveList; if (!MF) return RegList; @@ -95,8 +94,7 @@ ARMBaseRegisterInfo::getCallPreservedMask(CallingConv::ID CC) const { if (CC == CallingConv::GHC) // This is academic becase all GHC calls are (supposed to be) tail calls return CSR_NoRegs_RegMask; - return (STI.isTargetIOS() && !STI.isAAPCS_ABI()) - ? CSR_iOS_RegMask : CSR_AAPCS_RegMask; + return STI.isTargetDarwin() ? CSR_iOS_RegMask : CSR_AAPCS_RegMask; } const uint32_t* @@ -117,13 +115,13 @@ ARMBaseRegisterInfo::getThisReturnPreservedMask(CallingConv::ID CC) const { if (CC == CallingConv::GHC) // This is academic becase all GHC calls are (supposed to be) tail calls return nullptr; - return (STI.isTargetIOS() && !STI.isAAPCS_ABI()) - ? CSR_iOS_ThisReturn_RegMask : CSR_AAPCS_ThisReturn_RegMask; + return STI.isTargetDarwin() ? CSR_iOS_ThisReturn_RegMask + : CSR_AAPCS_ThisReturn_RegMask; } BitVector ARMBaseRegisterInfo:: getReservedRegs(const MachineFunction &MF) const { - const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering(); + const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering(); // FIXME: avoid re-calculating this every time. BitVector Reserved(getNumRegs()); @@ -182,14 +180,14 @@ ARMBaseRegisterInfo::getPointerRegClass(const MachineFunction &MF, unsigned Kind const TargetRegisterClass * ARMBaseRegisterInfo::getCrossCopyRegClass(const TargetRegisterClass *RC) const { if (RC == &ARM::CCRRegClass) - return nullptr; // Can't copy CCR registers. + return &ARM::rGPRRegClass; // Can't copy CCR registers. return RC; } unsigned ARMBaseRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC, MachineFunction &MF) const { - const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering(); + const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering(); switch (RC->getID()) { default: @@ -311,7 +309,7 @@ ARMBaseRegisterInfo::avoidWriteAfterWrite(const TargetRegisterClass *RC) const { bool ARMBaseRegisterInfo::hasBasePointer(const MachineFunction &MF) const { const MachineFrameInfo *MFI = MF.getFrameInfo(); const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); - const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering(); + const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering(); // When outgoing call frames are so large that we adjust the stack pointer // around the call, we can no longer use the stack pointer to reach the @@ -356,7 +354,10 @@ bool ARMBaseRegisterInfo::canRealignStack(const MachineFunction &MF) const { return false; // We may also need a base pointer if there are dynamic allocas or stack // pointer adjustments around calls. - if (MF.getTarget().getFrameLowering()->hasReservedCallFrame(MF)) + if (MF.getTarget() + .getSubtargetImpl() + ->getFrameLowering() + ->hasReservedCallFrame(MF)) return true; // A base pointer is required and allowed. Check that it isn't too late to // reserve it. @@ -367,7 +368,10 @@ bool ARMBaseRegisterInfo:: needsStackRealignment(const MachineFunction &MF) const { const MachineFrameInfo *MFI = MF.getFrameInfo(); const Function *F = MF.getFunction(); - unsigned StackAlign = MF.getTarget().getFrameLowering()->getStackAlignment(); + unsigned StackAlign = MF.getTarget() + .getSubtargetImpl() + ->getFrameLowering() + ->getStackAlignment(); bool requiresRealignment = ((MFI->getMaxAlignment() > StackAlign) || F->getAttributes().hasAttribute(AttributeSet::FunctionIndex, @@ -387,7 +391,7 @@ cannotEliminateFrame(const MachineFunction &MF) const { unsigned ARMBaseRegisterInfo::getFrameRegister(const MachineFunction &MF) const { - const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering(); + const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering(); if (TFI->hasFP(MF)) return FramePtr; @@ -404,7 +408,7 @@ emitLoadConstPool(MachineBasicBlock &MBB, ARMCC::CondCodes Pred, unsigned PredReg, unsigned MIFlags) const { MachineFunction &MF = *MBB.getParent(); - const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo(); + const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); MachineConstantPool *ConstantPool = MF.getConstantPool(); const Constant *C = ConstantInt::get(Type::getInt32Ty(MF.getFunction()->getContext()), Val); @@ -417,11 +421,6 @@ emitLoadConstPool(MachineBasicBlock &MBB, .setMIFlags(MIFlags); } -bool ARMBaseRegisterInfo::mayOverrideLocalAssignment() const { - // The native linux build hits a downstream codegen bug when this is enabled. - return STI.isTargetDarwin(); -} - bool ARMBaseRegisterInfo:: requiresRegisterScavenging(const MachineFunction &MF) const { return true; @@ -531,7 +530,7 @@ needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const { // Note that the incoming offset is based on the SP value at function entry, // so it'll be negative. MachineFunction &MF = *MI->getParent()->getParent(); - const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering(); + const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering(); MachineFrameInfo *MFI = MF.getFrameInfo(); ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); @@ -584,7 +583,7 @@ materializeFrameBaseRegister(MachineBasicBlock *MBB, int64_t Offset) const { ARMFunctionInfo *AFI = MBB->getParent()->getInfo<ARMFunctionInfo>(); unsigned ADDriOpc = !AFI->isThumbFunction() ? ARM::ADDri : - (AFI->isThumb1OnlyFunction() ? ARM::tADDrSPi : ARM::t2ADDri); + (AFI->isThumb1OnlyFunction() ? ARM::tADDframe : ARM::t2ADDri); MachineBasicBlock::iterator Ins = MBB->begin(); DebugLoc DL; // Defaults to "unknown" @@ -593,15 +592,15 @@ materializeFrameBaseRegister(MachineBasicBlock *MBB, const MachineFunction &MF = *MBB->getParent(); MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo(); - const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo(); + const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); const MCInstrDesc &MCID = TII.get(ADDriOpc); MRI.constrainRegClass(BaseReg, TII.getRegClass(MCID, 0, this, MF)); - MachineInstrBuilder MIB = AddDefaultPred(BuildMI(*MBB, Ins, DL, MCID, BaseReg) - .addFrameIndex(FrameIdx).addImm(Offset)); + MachineInstrBuilder MIB = BuildMI(*MBB, Ins, DL, MCID, BaseReg) + .addFrameIndex(FrameIdx).addImm(Offset); if (!AFI->isThumb1OnlyFunction()) - AddDefaultCC(MIB); + AddDefaultCC(AddDefaultPred(MIB)); } void ARMBaseRegisterInfo::resolveFrameIndex(MachineInstr &MI, unsigned BaseReg, @@ -609,7 +608,7 @@ void ARMBaseRegisterInfo::resolveFrameIndex(MachineInstr &MI, unsigned BaseReg, MachineBasicBlock &MBB = *MI.getParent(); MachineFunction &MF = *MBB.getParent(); const ARMBaseInstrInfo &TII = - *static_cast<const ARMBaseInstrInfo*>(MF.getTarget().getInstrInfo()); + *static_cast<const ARMBaseInstrInfo *>(MF.getSubtarget().getInstrInfo()); ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); int Off = Offset; // ARM doesn't need the general 64-bit offsets unsigned i = 0; @@ -708,9 +707,9 @@ ARMBaseRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, MachineBasicBlock &MBB = *MI.getParent(); MachineFunction &MF = *MBB.getParent(); const ARMBaseInstrInfo &TII = - *static_cast<const ARMBaseInstrInfo*>(MF.getTarget().getInstrInfo()); - const ARMFrameLowering *TFI = - static_cast<const ARMFrameLowering*>(MF.getTarget().getFrameLowering()); + *static_cast<const ARMBaseInstrInfo *>(MF.getSubtarget().getInstrInfo()); + const ARMFrameLowering *TFI = static_cast<const ARMFrameLowering *>( + MF.getSubtarget().getFrameLowering()); ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); assert(!AFI->isThumb1OnlyFunction() && "This eliminateFrameIndex does not support Thumb1!"); diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.h b/lib/Target/ARM/ARMBaseRegisterInfo.h index 833d3f218480..e9bc412e99e2 100644 --- a/lib/Target/ARM/ARMBaseRegisterInfo.h +++ b/lib/Target/ARM/ARMBaseRegisterInfo.h @@ -11,8 +11,8 @@ // //===----------------------------------------------------------------------===// -#ifndef ARMBASEREGISTERINFO_H -#define ARMBASEREGISTERINFO_H +#ifndef LLVM_LIB_TARGET_ARM_ARMBASEREGISTERINFO_H +#define LLVM_LIB_TARGET_ARM_ARMBASEREGISTERINFO_H #include "MCTargetDesc/ARMBaseInfo.h" #include "llvm/Target/TargetRegisterInfo.h" @@ -174,8 +174,6 @@ public: unsigned MIFlags = MachineInstr::NoFlags)const; /// Code Generation virtual methods... - bool mayOverrideLocalAssignment() const override; - bool requiresRegisterScavenging(const MachineFunction &MF) const override; bool trackLivenessAfterRegAlloc(const MachineFunction &MF) const override; diff --git a/lib/Target/ARM/ARMCallingConv.h b/lib/Target/ARM/ARMCallingConv.h index dc41c1c14bbb..e0d0559ba986 100644 --- a/lib/Target/ARM/ARMCallingConv.h +++ b/lib/Target/ARM/ARMCallingConv.h @@ -12,8 +12,8 @@ // //===----------------------------------------------------------------------===// -#ifndef ARMCALLINGCONV_H -#define ARMCALLINGCONV_H +#ifndef LLVM_LIB_TARGET_ARM_ARMCALLINGCONV_H +#define LLVM_LIB_TARGET_ARM_ARMCALLINGCONV_H #include "ARM.h" #include "ARMBaseInstrInfo.h" @@ -177,8 +177,9 @@ static bool CC_ARM_AAPCS_Custom_HA(unsigned &ValNo, MVT &ValVT, MVT &LocVT, CCValAssign::LocInfo &LocInfo, ISD::ArgFlagsTy &ArgFlags, CCState &State) { SmallVectorImpl<CCValAssign> &PendingHAMembers = State.getPendingLocs(); + // AAPCS HFAs must have 1-4 elements, all of the same type - assert(PendingHAMembers.size() < 8); + assert(PendingHAMembers.size() < 4); if (PendingHAMembers.size() > 0) assert(PendingHAMembers[0].getLocVT() == LocVT); @@ -188,26 +189,21 @@ static bool CC_ARM_AAPCS_Custom_HA(unsigned &ValNo, MVT &ValVT, MVT &LocVT, CCValAssign::getPending(ValNo, ValVT, LocVT, LocInfo)); if (ArgFlags.isInConsecutiveRegsLast()) { - assert(PendingHAMembers.size() > 0 && PendingHAMembers.size() <= 8 && + assert(PendingHAMembers.size() > 0 && PendingHAMembers.size() <= 4 && "Homogeneous aggregates must have between 1 and 4 members"); // Try to allocate a contiguous block of registers, each of the correct // size to hold one member. - const uint16_t *RegList; - unsigned NumRegs; + ArrayRef<uint16_t> RegList; switch (LocVT.SimpleTy) { - case MVT::i32: case MVT::f32: RegList = SRegList; - NumRegs = 16; break; case MVT::f64: RegList = DRegList; - NumRegs = 8; break; case MVT::v2f64: RegList = QRegList; - NumRegs = 4; break; default: llvm_unreachable("Unexpected member type for HA"); @@ -215,7 +211,7 @@ static bool CC_ARM_AAPCS_Custom_HA(unsigned &ValNo, MVT &ValVT, MVT &LocVT, } unsigned RegResult = - State.AllocateRegBlock(RegList, NumRegs, PendingHAMembers.size()); + State.AllocateRegBlock(RegList, PendingHAMembers.size()); if (RegResult) { for (SmallVectorImpl<CCValAssign>::iterator It = PendingHAMembers.begin(); @@ -235,20 +231,11 @@ static bool CC_ARM_AAPCS_Custom_HA(unsigned &ValNo, MVT &ValVT, MVT &LocVT, State.AllocateReg(SRegList[regNo]); unsigned Size = LocVT.getSizeInBits() / 8; - unsigned Align = Size; - - if (LocVT.SimpleTy == MVT::v2f64 || LocVT.SimpleTy == MVT::i32) { - // Vectors are always aligned to 8 bytes. If we've seen an i32 here - // it's because it's been split from a larger type, also with align 8. - Align = 8; - } + unsigned Align = std::min(Size, 8U); for (auto It : PendingHAMembers) { It.convertToMem(State.AllocateStack(Size, Align)); State.addLoc(It); - - // Only the first member needs to be aligned. - Align = 1; } // All pending members have now been allocated diff --git a/lib/Target/ARM/ARMCodeEmitter.cpp b/lib/Target/ARM/ARMCodeEmitter.cpp deleted file mode 100644 index 5fb6ebfeaae1..000000000000 --- a/lib/Target/ARM/ARMCodeEmitter.cpp +++ /dev/null @@ -1,1909 +0,0 @@ -//===-- ARM/ARMCodeEmitter.cpp - Convert ARM code to machine code ---------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file contains the pass that transforms the ARM machine instructions into -// relocatable machine code. -// -//===----------------------------------------------------------------------===// - -#include "ARM.h" -#include "ARMBaseInstrInfo.h" -#include "ARMConstantPoolValue.h" -#include "ARMMachineFunctionInfo.h" -#include "ARMRelocations.h" -#include "ARMSubtarget.h" -#include "ARMTargetMachine.h" -#include "MCTargetDesc/ARMAddressingModes.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/CodeGen/JITCodeEmitter.h" -#include "llvm/CodeGen/MachineConstantPool.h" -#include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/CodeGen/MachineInstr.h" -#include "llvm/CodeGen/MachineJumpTableInfo.h" -#include "llvm/CodeGen/MachineModuleInfo.h" -#include "llvm/CodeGen/Passes.h" -#include "llvm/IR/Constants.h" -#include "llvm/IR/DerivedTypes.h" -#include "llvm/IR/Function.h" -#include "llvm/PassManager.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/raw_ostream.h" -#ifndef NDEBUG -#include <iomanip> -#endif -using namespace llvm; - -#define DEBUG_TYPE "jit" - -STATISTIC(NumEmitted, "Number of machine instructions emitted"); - -namespace { - - class ARMCodeEmitter : public MachineFunctionPass { - ARMJITInfo *JTI; - const ARMBaseInstrInfo *II; - const DataLayout *TD; - const ARMSubtarget *Subtarget; - TargetMachine &TM; - JITCodeEmitter &MCE; - MachineModuleInfo *MMI; - const std::vector<MachineConstantPoolEntry> *MCPEs; - const std::vector<MachineJumpTableEntry> *MJTEs; - bool IsPIC; - bool IsThumb; - - void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.addRequired<MachineModuleInfo>(); - MachineFunctionPass::getAnalysisUsage(AU); - } - - static char ID; - public: - ARMCodeEmitter(TargetMachine &tm, JITCodeEmitter &mce) - : MachineFunctionPass(ID), JTI(nullptr), - II((const ARMBaseInstrInfo *)tm.getInstrInfo()), - TD(tm.getDataLayout()), TM(tm), - MCE(mce), MCPEs(nullptr), MJTEs(nullptr), - IsPIC(TM.getRelocationModel() == Reloc::PIC_), IsThumb(false) {} - - /// getBinaryCodeForInstr - This function, generated by the - /// CodeEmitterGenerator using TableGen, produces the binary encoding for - /// machine instructions. - uint64_t getBinaryCodeForInstr(const MachineInstr &MI) const; - - bool runOnMachineFunction(MachineFunction &MF) override; - - const char *getPassName() const override { - return "ARM Machine Code Emitter"; - } - - void emitInstruction(const MachineInstr &MI); - - private: - - void emitWordLE(unsigned Binary); - void emitDWordLE(uint64_t Binary); - void emitConstPoolInstruction(const MachineInstr &MI); - void emitMOVi32immInstruction(const MachineInstr &MI); - void emitMOVi2piecesInstruction(const MachineInstr &MI); - void emitLEApcrelJTInstruction(const MachineInstr &MI); - void emitPseudoMoveInstruction(const MachineInstr &MI); - void addPCLabel(unsigned LabelID); - void emitPseudoInstruction(const MachineInstr &MI); - unsigned getMachineSoRegOpValue(const MachineInstr &MI, - const MCInstrDesc &MCID, - const MachineOperand &MO, - unsigned OpIdx); - - unsigned getMachineSoImmOpValue(unsigned SoImm); - unsigned getAddrModeSBit(const MachineInstr &MI, - const MCInstrDesc &MCID) const; - - void emitDataProcessingInstruction(const MachineInstr &MI, - unsigned ImplicitRd = 0, - unsigned ImplicitRn = 0); - - void emitLoadStoreInstruction(const MachineInstr &MI, - unsigned ImplicitRd = 0, - unsigned ImplicitRn = 0); - - void emitMiscLoadStoreInstruction(const MachineInstr &MI, - unsigned ImplicitRn = 0); - - void emitLoadStoreMultipleInstruction(const MachineInstr &MI); - - void emitMulFrmInstruction(const MachineInstr &MI); - - void emitExtendInstruction(const MachineInstr &MI); - - void emitMiscArithInstruction(const MachineInstr &MI); - - void emitSaturateInstruction(const MachineInstr &MI); - - void emitBranchInstruction(const MachineInstr &MI); - - void emitInlineJumpTable(unsigned JTIndex); - - void emitMiscBranchInstruction(const MachineInstr &MI); - - void emitVFPArithInstruction(const MachineInstr &MI); - - void emitVFPConversionInstruction(const MachineInstr &MI); - - void emitVFPLoadStoreInstruction(const MachineInstr &MI); - - void emitVFPLoadStoreMultipleInstruction(const MachineInstr &MI); - - void emitNEONLaneInstruction(const MachineInstr &MI); - void emitNEONDupInstruction(const MachineInstr &MI); - void emitNEON1RegModImmInstruction(const MachineInstr &MI); - void emitNEON2RegInstruction(const MachineInstr &MI); - void emitNEON3RegInstruction(const MachineInstr &MI); - - /// getMachineOpValue - Return binary encoding of operand. If the machine - /// operand requires relocation, record the relocation and return zero. - unsigned getMachineOpValue(const MachineInstr &MI, - const MachineOperand &MO) const; - unsigned getMachineOpValue(const MachineInstr &MI, unsigned OpIdx) const { - return getMachineOpValue(MI, MI.getOperand(OpIdx)); - } - - // FIXME: The legacy JIT ARMCodeEmitter doesn't rely on the the - // TableGen'erated getBinaryCodeForInstr() function to encode any - // operand values, instead querying getMachineOpValue() directly for - // each operand it needs to encode. Thus, any of the new encoder - // helper functions can simply return 0 as the values the return - // are already handled elsewhere. They are placeholders to allow this - // encoder to continue to function until the MC encoder is sufficiently - // far along that this one can be eliminated entirely. - unsigned NEONThumb2DataIPostEncoder(const MachineInstr &MI, unsigned Val) - const { return 0; } - unsigned NEONThumb2LoadStorePostEncoder(const MachineInstr &MI,unsigned Val) - const { return 0; } - unsigned NEONThumb2DupPostEncoder(const MachineInstr &MI,unsigned Val) - const { return 0; } - unsigned NEONThumb2V8PostEncoder(const MachineInstr &MI,unsigned Val) - const { return 0; } - unsigned VFPThumb2PostEncoder(const MachineInstr&MI, unsigned Val) - const { return 0; } - unsigned getAdrLabelOpValue(const MachineInstr &MI, unsigned Op) - const { return 0; } - unsigned getThumbAdrLabelOpValue(const MachineInstr &MI, unsigned Op) - const { return 0; } - unsigned getThumbBLTargetOpValue(const MachineInstr &MI, unsigned Op) - const { return 0; } - unsigned getThumbBLXTargetOpValue(const MachineInstr &MI, unsigned Op) - const { return 0; } - unsigned getThumbBRTargetOpValue(const MachineInstr &MI, unsigned Op) - const { return 0; } - unsigned getThumbBCCTargetOpValue(const MachineInstr &MI, unsigned Op) - const { return 0; } - unsigned getThumbCBTargetOpValue(const MachineInstr &MI, unsigned Op) - const { return 0; } - unsigned getBranchTargetOpValue(const MachineInstr &MI, unsigned Op) - const { return 0; } - unsigned getUnconditionalBranchTargetOpValue(const MachineInstr &MI, - unsigned Op) const { return 0; } - unsigned getARMBranchTargetOpValue(const MachineInstr &MI, unsigned Op) - const { return 0; } - unsigned getARMBLTargetOpValue(const MachineInstr &MI, unsigned Op) - const { return 0; } - unsigned getARMBLXTargetOpValue(const MachineInstr &MI, unsigned Op) - const { return 0; } - unsigned getCCOutOpValue(const MachineInstr &MI, unsigned Op) - const { return 0; } - unsigned getSOImmOpValue(const MachineInstr &MI, unsigned Op) - const { return 0; } - unsigned getT2SOImmOpValue(const MachineInstr &MI, unsigned Op) - const { return 0; } - unsigned getSORegRegOpValue(const MachineInstr &MI, unsigned Op) - const { return 0; } - unsigned getSORegImmOpValue(const MachineInstr &MI, unsigned Op) - const { return 0; } - unsigned getThumbAddrModeRegRegOpValue(const MachineInstr &MI, unsigned Op) - const { return 0; } - unsigned getT2AddrModeImm8OpValue(const MachineInstr &MI, unsigned Op) - const { return 0; } - unsigned getT2Imm8s4OpValue(const MachineInstr &MI, unsigned Op) - const { return 0; } - unsigned getT2AddrModeImm8s4OpValue(const MachineInstr &MI, unsigned Op) - const { return 0; } - unsigned getT2AddrModeImm0_1020s4OpValue(const MachineInstr &MI,unsigned Op) - const { return 0; } - unsigned getT2AddrModeImm8OffsetOpValue(const MachineInstr &MI, unsigned Op) - const { return 0; } - unsigned getT2AddrModeSORegOpValue(const MachineInstr &MI, unsigned Op) - const { return 0; } - unsigned getT2SORegOpValue(const MachineInstr &MI, unsigned Op) - const { return 0; } - unsigned getT2AdrLabelOpValue(const MachineInstr &MI, unsigned Op) - const { return 0; } - unsigned getAddrMode6AddressOpValue(const MachineInstr &MI, unsigned Op) - const { return 0; } - unsigned getAddrMode6OneLane32AddressOpValue(const MachineInstr &MI, - unsigned Op) - const { return 0; } - unsigned getAddrMode6DupAddressOpValue(const MachineInstr &MI, unsigned Op) - const { return 0; } - unsigned getAddrMode6OffsetOpValue(const MachineInstr &MI, unsigned Op) - const { return 0; } - unsigned getBitfieldInvertedMaskOpValue(const MachineInstr &MI, - unsigned Op) const { return 0; } - uint32_t getLdStSORegOpValue(const MachineInstr &MI, unsigned OpIdx) - const { return 0; } - - unsigned getAddrModeImm12OpValue(const MachineInstr &MI, unsigned Op) - const { - // {17-13} = reg - // {12} = (U)nsigned (add == '1', sub == '0') - // {11-0} = imm12 - const MachineOperand &MO = MI.getOperand(Op); - const MachineOperand &MO1 = MI.getOperand(Op + 1); - if (!MO.isReg()) { - emitConstPoolAddress(MO.getIndex(), ARM::reloc_arm_cp_entry); - return 0; - } - unsigned Reg = II->getRegisterInfo().getEncodingValue(MO.getReg()); - int32_t Imm12 = MO1.getImm(); - uint32_t Binary; - Binary = Imm12 & 0xfff; - if (Imm12 >= 0) - Binary |= (1 << 12); - Binary |= (Reg << 13); - return Binary; - } - - unsigned getHiLo16ImmOpValue(const MachineInstr &MI, unsigned Op) const { - return 0; - } - - uint32_t getAddrMode2OffsetOpValue(const MachineInstr &MI, unsigned OpIdx) - const { return 0;} - uint32_t getPostIdxRegOpValue(const MachineInstr &MI, unsigned OpIdx) - const { return 0;} - uint32_t getAddrMode3OffsetOpValue(const MachineInstr &MI, unsigned OpIdx) - const { return 0;} - uint32_t getAddrMode3OpValue(const MachineInstr &MI, unsigned Op) - const { return 0; } - uint32_t getAddrModeThumbSPOpValue(const MachineInstr &MI, unsigned Op) - const { return 0; } - uint32_t getAddrModeISOpValue(const MachineInstr &MI, unsigned Op) - const { return 0; } - uint32_t getAddrModePCOpValue(const MachineInstr &MI, unsigned Op) - const { return 0; } - uint32_t getAddrMode5OpValue(const MachineInstr &MI, unsigned Op) const { - // {17-13} = reg - // {12} = (U)nsigned (add == '1', sub == '0') - // {11-0} = imm12 - const MachineOperand &MO = MI.getOperand(Op); - const MachineOperand &MO1 = MI.getOperand(Op + 1); - if (!MO.isReg()) { - emitConstPoolAddress(MO.getIndex(), ARM::reloc_arm_cp_entry); - return 0; - } - unsigned Reg = II->getRegisterInfo().getEncodingValue(MO.getReg()); - int32_t Imm12 = MO1.getImm(); - - // Special value for #-0 - if (Imm12 == INT32_MIN) - Imm12 = 0; - - // Immediate is always encoded as positive. The 'U' bit controls add vs - // sub. - bool isAdd = true; - if (Imm12 < 0) { - Imm12 = -Imm12; - isAdd = false; - } - - uint32_t Binary = Imm12 & 0xfff; - if (isAdd) - Binary |= (1 << 12); - Binary |= (Reg << 13); - return Binary; - } - unsigned getNEONVcvtImm32OpValue(const MachineInstr &MI, unsigned Op) - const { return 0; } - - unsigned getRegisterListOpValue(const MachineInstr &MI, unsigned Op) - const { return 0; } - - unsigned getShiftRight8Imm(const MachineInstr &MI, unsigned Op) - const { return 0; } - unsigned getShiftRight16Imm(const MachineInstr &MI, unsigned Op) - const { return 0; } - unsigned getShiftRight32Imm(const MachineInstr &MI, unsigned Op) - const { return 0; } - unsigned getShiftRight64Imm(const MachineInstr &MI, unsigned Op) - const { return 0; } - - /// getMovi32Value - Return binary encoding of operand for movw/movt. If the - /// machine operand requires relocation, record the relocation and return - /// zero. - unsigned getMovi32Value(const MachineInstr &MI,const MachineOperand &MO, - unsigned Reloc); - - /// getShiftOp - Return the shift opcode (bit[6:5]) of the immediate value. - /// - unsigned getShiftOp(unsigned Imm) const ; - - /// Routines that handle operands which add machine relocations which are - /// fixed up by the relocation stage. - void emitGlobalAddress(const GlobalValue *GV, unsigned Reloc, - bool MayNeedFarStub, bool Indirect, - intptr_t ACPV = 0) const; - void emitExternalSymbolAddress(const char *ES, unsigned Reloc) const; - void emitConstPoolAddress(unsigned CPI, unsigned Reloc) const; - void emitJumpTableAddress(unsigned JTIndex, unsigned Reloc) const; - void emitMachineBasicBlock(MachineBasicBlock *BB, unsigned Reloc, - intptr_t JTBase = 0) const; - unsigned encodeVFPRd(const MachineInstr &MI, unsigned OpIdx) const; - unsigned encodeVFPRn(const MachineInstr &MI, unsigned OpIdx) const; - unsigned encodeVFPRm(const MachineInstr &MI, unsigned OpIdx) const; - unsigned encodeNEONRd(const MachineInstr &MI, unsigned OpIdx) const; - unsigned encodeNEONRn(const MachineInstr &MI, unsigned OpIdx) const; - unsigned encodeNEONRm(const MachineInstr &MI, unsigned OpIdx) const; - }; -} - -char ARMCodeEmitter::ID = 0; - -/// createARMJITCodeEmitterPass - Return a pass that emits the collected ARM -/// code to the specified MCE object. -FunctionPass *llvm::createARMJITCodeEmitterPass(ARMBaseTargetMachine &TM, - JITCodeEmitter &JCE) { - return new ARMCodeEmitter(TM, JCE); -} - -bool ARMCodeEmitter::runOnMachineFunction(MachineFunction &MF) { - TargetMachine &Target = const_cast<TargetMachine&>(MF.getTarget()); - - assert((Target.getRelocationModel() != Reloc::Default || - Target.getRelocationModel() != Reloc::Static) && - "JIT relocation model must be set to static or default!"); - - JTI = static_cast<ARMJITInfo*>(Target.getJITInfo()); - II = static_cast<const ARMBaseInstrInfo*>(Target.getInstrInfo()); - TD = Target.getDataLayout(); - - Subtarget = &TM.getSubtarget<ARMSubtarget>(); - MCPEs = &MF.getConstantPool()->getConstants(); - MJTEs = nullptr; - if (MF.getJumpTableInfo()) MJTEs = &MF.getJumpTableInfo()->getJumpTables(); - IsPIC = TM.getRelocationModel() == Reloc::PIC_; - IsThumb = MF.getInfo<ARMFunctionInfo>()->isThumbFunction(); - JTI->Initialize(MF, IsPIC); - MMI = &getAnalysis<MachineModuleInfo>(); - MCE.setModuleInfo(MMI); - - do { - DEBUG(errs() << "JITTing function '" - << MF.getName() << "'\n"); - MCE.startFunction(MF); - for (MachineFunction::iterator MBB = MF.begin(), E = MF.end(); - MBB != E; ++MBB) { - MCE.StartMachineBasicBlock(MBB); - for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); - I != E; ++I) - emitInstruction(*I); - } - } while (MCE.finishFunction(MF)); - - return false; -} - -/// getShiftOp - Return the shift opcode (bit[6:5]) of the immediate value. -/// -unsigned ARMCodeEmitter::getShiftOp(unsigned Imm) const { - switch (ARM_AM::getAM2ShiftOpc(Imm)) { - default: llvm_unreachable("Unknown shift opc!"); - case ARM_AM::asr: return 2; - case ARM_AM::lsl: return 0; - case ARM_AM::lsr: return 1; - case ARM_AM::ror: - case ARM_AM::rrx: return 3; - } -} - -/// getMovi32Value - Return binary encoding of operand for movw/movt. If the -/// machine operand requires relocation, record the relocation and return zero. -unsigned ARMCodeEmitter::getMovi32Value(const MachineInstr &MI, - const MachineOperand &MO, - unsigned Reloc) { - assert(((Reloc == ARM::reloc_arm_movt) || (Reloc == ARM::reloc_arm_movw)) - && "Relocation to this function should be for movt or movw"); - - if (MO.isImm()) - return static_cast<unsigned>(MO.getImm()); - else if (MO.isGlobal()) - emitGlobalAddress(MO.getGlobal(), Reloc, true, false); - else if (MO.isSymbol()) - emitExternalSymbolAddress(MO.getSymbolName(), Reloc); - else if (MO.isMBB()) - emitMachineBasicBlock(MO.getMBB(), Reloc); - else { -#ifndef NDEBUG - errs() << MO; -#endif - llvm_unreachable("Unsupported operand type for movw/movt"); - } - return 0; -} - -/// getMachineOpValue - Return binary encoding of operand. If the machine -/// operand requires relocation, record the relocation and return zero. -unsigned ARMCodeEmitter::getMachineOpValue(const MachineInstr &MI, - const MachineOperand &MO) const { - if (MO.isReg()) - return II->getRegisterInfo().getEncodingValue(MO.getReg()); - else if (MO.isImm()) - return static_cast<unsigned>(MO.getImm()); - else if (MO.isGlobal()) - emitGlobalAddress(MO.getGlobal(), ARM::reloc_arm_branch, true, false); - else if (MO.isSymbol()) - emitExternalSymbolAddress(MO.getSymbolName(), ARM::reloc_arm_branch); - else if (MO.isCPI()) { - const MCInstrDesc &MCID = MI.getDesc(); - // For VFP load, the immediate offset is multiplied by 4. - unsigned Reloc = ((MCID.TSFlags & ARMII::FormMask) == ARMII::VFPLdStFrm) - ? ARM::reloc_arm_vfp_cp_entry : ARM::reloc_arm_cp_entry; - emitConstPoolAddress(MO.getIndex(), Reloc); - } else if (MO.isJTI()) - emitJumpTableAddress(MO.getIndex(), ARM::reloc_arm_relative); - else if (MO.isMBB()) - emitMachineBasicBlock(MO.getMBB(), ARM::reloc_arm_branch); - else - llvm_unreachable("Unable to encode MachineOperand!"); - return 0; -} - -/// emitGlobalAddress - Emit the specified address to the code stream. -/// -void ARMCodeEmitter::emitGlobalAddress(const GlobalValue *GV, unsigned Reloc, - bool MayNeedFarStub, bool Indirect, - intptr_t ACPV) const { - MachineRelocation MR = Indirect - ? MachineRelocation::getIndirectSymbol(MCE.getCurrentPCOffset(), Reloc, - const_cast<GlobalValue *>(GV), - ACPV, MayNeedFarStub) - : MachineRelocation::getGV(MCE.getCurrentPCOffset(), Reloc, - const_cast<GlobalValue *>(GV), ACPV, - MayNeedFarStub); - MCE.addRelocation(MR); -} - -/// emitExternalSymbolAddress - Arrange for the address of an external symbol to -/// be emitted to the current location in the function, and allow it to be PC -/// relative. -void ARMCodeEmitter:: -emitExternalSymbolAddress(const char *ES, unsigned Reloc) const { - MCE.addRelocation(MachineRelocation::getExtSym(MCE.getCurrentPCOffset(), - Reloc, ES)); -} - -/// emitConstPoolAddress - Arrange for the address of an constant pool -/// to be emitted to the current location in the function, and allow it to be PC -/// relative. -void ARMCodeEmitter::emitConstPoolAddress(unsigned CPI, unsigned Reloc) const { - // Tell JIT emitter we'll resolve the address. - MCE.addRelocation(MachineRelocation::getConstPool(MCE.getCurrentPCOffset(), - Reloc, CPI, 0, true)); -} - -/// emitJumpTableAddress - Arrange for the address of a jump table to -/// be emitted to the current location in the function, and allow it to be PC -/// relative. -void ARMCodeEmitter:: -emitJumpTableAddress(unsigned JTIndex, unsigned Reloc) const { - MCE.addRelocation(MachineRelocation::getJumpTable(MCE.getCurrentPCOffset(), - Reloc, JTIndex, 0, true)); -} - -/// emitMachineBasicBlock - Emit the specified address basic block. -void ARMCodeEmitter::emitMachineBasicBlock(MachineBasicBlock *BB, - unsigned Reloc, - intptr_t JTBase) const { - MCE.addRelocation(MachineRelocation::getBB(MCE.getCurrentPCOffset(), - Reloc, BB, JTBase)); -} - -void ARMCodeEmitter::emitWordLE(unsigned Binary) { - DEBUG(errs() << " 0x"; - errs().write_hex(Binary) << "\n"); - MCE.emitWordLE(Binary); -} - -void ARMCodeEmitter::emitDWordLE(uint64_t Binary) { - DEBUG(errs() << " 0x"; - errs().write_hex(Binary) << "\n"); - MCE.emitDWordLE(Binary); -} - -void ARMCodeEmitter::emitInstruction(const MachineInstr &MI) { - DEBUG(errs() << "JIT: " << (void*)MCE.getCurrentPCValue() << ":\t" << MI); - - MCE.processDebugLoc(MI.getDebugLoc(), true); - - ++NumEmitted; // Keep track of the # of mi's emitted - switch (MI.getDesc().TSFlags & ARMII::FormMask) { - default: { - llvm_unreachable("Unhandled instruction encoding format!"); - } - case ARMII::MiscFrm: - if (MI.getOpcode() == ARM::LEApcrelJT) { - // Materialize jumptable address. - emitLEApcrelJTInstruction(MI); - break; - } - llvm_unreachable("Unhandled instruction encoding!"); - case ARMII::Pseudo: - emitPseudoInstruction(MI); - break; - case ARMII::DPFrm: - case ARMII::DPSoRegFrm: - emitDataProcessingInstruction(MI); - break; - case ARMII::LdFrm: - case ARMII::StFrm: - emitLoadStoreInstruction(MI); - break; - case ARMII::LdMiscFrm: - case ARMII::StMiscFrm: - emitMiscLoadStoreInstruction(MI); - break; - case ARMII::LdStMulFrm: - emitLoadStoreMultipleInstruction(MI); - break; - case ARMII::MulFrm: - emitMulFrmInstruction(MI); - break; - case ARMII::ExtFrm: - emitExtendInstruction(MI); - break; - case ARMII::ArithMiscFrm: - emitMiscArithInstruction(MI); - break; - case ARMII::SatFrm: - emitSaturateInstruction(MI); - break; - case ARMII::BrFrm: - emitBranchInstruction(MI); - break; - case ARMII::BrMiscFrm: - emitMiscBranchInstruction(MI); - break; - // VFP instructions. - case ARMII::VFPUnaryFrm: - case ARMII::VFPBinaryFrm: - emitVFPArithInstruction(MI); - break; - case ARMII::VFPConv1Frm: - case ARMII::VFPConv2Frm: - case ARMII::VFPConv3Frm: - case ARMII::VFPConv4Frm: - case ARMII::VFPConv5Frm: - emitVFPConversionInstruction(MI); - break; - case ARMII::VFPLdStFrm: - emitVFPLoadStoreInstruction(MI); - break; - case ARMII::VFPLdStMulFrm: - emitVFPLoadStoreMultipleInstruction(MI); - break; - - // NEON instructions. - case ARMII::NGetLnFrm: - case ARMII::NSetLnFrm: - emitNEONLaneInstruction(MI); - break; - case ARMII::NDupFrm: - emitNEONDupInstruction(MI); - break; - case ARMII::N1RegModImmFrm: - emitNEON1RegModImmInstruction(MI); - break; - case ARMII::N2RegFrm: - emitNEON2RegInstruction(MI); - break; - case ARMII::N3RegFrm: - emitNEON3RegInstruction(MI); - break; - } - MCE.processDebugLoc(MI.getDebugLoc(), false); -} - -void ARMCodeEmitter::emitConstPoolInstruction(const MachineInstr &MI) { - unsigned CPI = MI.getOperand(0).getImm(); // CP instruction index. - unsigned CPIndex = MI.getOperand(1).getIndex(); // Actual cp entry index. - const MachineConstantPoolEntry &MCPE = (*MCPEs)[CPIndex]; - - // Remember the CONSTPOOL_ENTRY address for later relocation. - JTI->addConstantPoolEntryAddr(CPI, MCE.getCurrentPCValue()); - - // Emit constpool island entry. In most cases, the actual values will be - // resolved and relocated after code emission. - if (MCPE.isMachineConstantPoolEntry()) { - ARMConstantPoolValue *ACPV = - static_cast<ARMConstantPoolValue*>(MCPE.Val.MachineCPVal); - - DEBUG(errs() << " ** ARM constant pool #" << CPI << " @ " - << (void*)MCE.getCurrentPCValue() << " " << *ACPV << '\n'); - - assert(ACPV->isGlobalValue() && "unsupported constant pool value"); - const GlobalValue *GV = cast<ARMConstantPoolConstant>(ACPV)->getGV(); - if (GV) { - Reloc::Model RelocM = TM.getRelocationModel(); - emitGlobalAddress(GV, ARM::reloc_arm_machine_cp_entry, - isa<Function>(GV), - Subtarget->GVIsIndirectSymbol(GV, RelocM), - (intptr_t)ACPV); - } else { - const char *Sym = cast<ARMConstantPoolSymbol>(ACPV)->getSymbol(); - emitExternalSymbolAddress(Sym, ARM::reloc_arm_absolute); - } - emitWordLE(0); - } else { - const Constant *CV = MCPE.Val.ConstVal; - - DEBUG({ - errs() << " ** Constant pool #" << CPI << " @ " - << (void*)MCE.getCurrentPCValue() << " "; - if (const Function *F = dyn_cast<Function>(CV)) - errs() << F->getName(); - else - errs() << *CV; - errs() << '\n'; - }); - - if (const GlobalValue *GV = dyn_cast<GlobalValue>(CV)) { - emitGlobalAddress(GV, ARM::reloc_arm_absolute, isa<Function>(GV), false); - emitWordLE(0); - } else if (const ConstantInt *CI = dyn_cast<ConstantInt>(CV)) { - uint32_t Val = uint32_t(*CI->getValue().getRawData()); - emitWordLE(Val); - } else if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CV)) { - if (CFP->getType()->isFloatTy()) - emitWordLE(CFP->getValueAPF().bitcastToAPInt().getZExtValue()); - else if (CFP->getType()->isDoubleTy()) - emitDWordLE(CFP->getValueAPF().bitcastToAPInt().getZExtValue()); - else { - llvm_unreachable("Unable to handle this constantpool entry!"); - } - } else { - llvm_unreachable("Unable to handle this constantpool entry!"); - } - } -} - -void ARMCodeEmitter::emitMOVi32immInstruction(const MachineInstr &MI) { - const MachineOperand &MO0 = MI.getOperand(0); - const MachineOperand &MO1 = MI.getOperand(1); - - // Emit the 'movw' instruction. - unsigned Binary = 0x30 << 20; // mov: Insts{27-20} = 0b00110000 - - unsigned Lo16 = getMovi32Value(MI, MO1, ARM::reloc_arm_movw) & 0xFFFF; - - // Set the conditional execution predicate. - Binary |= II->getPredicate(&MI) << ARMII::CondShift; - - // Encode Rd. - Binary |= getMachineOpValue(MI, MO0) << ARMII::RegRdShift; - - // Encode imm16 as imm4:imm12 - Binary |= Lo16 & 0xFFF; // Insts{11-0} = imm12 - Binary |= ((Lo16 >> 12) & 0xF) << 16; // Insts{19-16} = imm4 - emitWordLE(Binary); - - unsigned Hi16 = getMovi32Value(MI, MO1, ARM::reloc_arm_movt) >> 16; - // Emit the 'movt' instruction. - Binary = 0x34 << 20; // movt: Insts{27-20} = 0b00110100 - - // Set the conditional execution predicate. - Binary |= II->getPredicate(&MI) << ARMII::CondShift; - - // Encode Rd. - Binary |= getMachineOpValue(MI, MO0) << ARMII::RegRdShift; - - // Encode imm16 as imm4:imm1, same as movw above. - Binary |= Hi16 & 0xFFF; - Binary |= ((Hi16 >> 12) & 0xF) << 16; - emitWordLE(Binary); -} - -void ARMCodeEmitter::emitMOVi2piecesInstruction(const MachineInstr &MI) { - const MachineOperand &MO0 = MI.getOperand(0); - const MachineOperand &MO1 = MI.getOperand(1); - assert(MO1.isImm() && ARM_AM::isSOImmTwoPartVal(MO1.getImm()) && - "Not a valid so_imm value!"); - unsigned V1 = ARM_AM::getSOImmTwoPartFirst(MO1.getImm()); - unsigned V2 = ARM_AM::getSOImmTwoPartSecond(MO1.getImm()); - - // Emit the 'mov' instruction. - unsigned Binary = 0xd << 21; // mov: Insts{24-21} = 0b1101 - - // Set the conditional execution predicate. - Binary |= II->getPredicate(&MI) << ARMII::CondShift; - - // Encode Rd. - Binary |= getMachineOpValue(MI, MO0) << ARMII::RegRdShift; - - // Encode so_imm. - // Set bit I(25) to identify this is the immediate form of <shifter_op> - Binary |= 1 << ARMII::I_BitShift; - Binary |= getMachineSoImmOpValue(V1); - emitWordLE(Binary); - - // Now the 'orr' instruction. - Binary = 0xc << 21; // orr: Insts{24-21} = 0b1100 - - // Set the conditional execution predicate. - Binary |= II->getPredicate(&MI) << ARMII::CondShift; - - // Encode Rd. - Binary |= getMachineOpValue(MI, MO0) << ARMII::RegRdShift; - - // Encode Rn. - Binary |= getMachineOpValue(MI, MO0) << ARMII::RegRnShift; - - // Encode so_imm. - // Set bit I(25) to identify this is the immediate form of <shifter_op> - Binary |= 1 << ARMII::I_BitShift; - Binary |= getMachineSoImmOpValue(V2); - emitWordLE(Binary); -} - -void ARMCodeEmitter::emitLEApcrelJTInstruction(const MachineInstr &MI) { - // It's basically add r, pc, (LJTI - $+8) - - const MCInstrDesc &MCID = MI.getDesc(); - - // Emit the 'add' instruction. - unsigned Binary = 0x4 << 21; // add: Insts{24-21} = 0b0100 - - // Set the conditional execution predicate - Binary |= II->getPredicate(&MI) << ARMII::CondShift; - - // Encode S bit if MI modifies CPSR. - Binary |= getAddrModeSBit(MI, MCID); - - // Encode Rd. - Binary |= getMachineOpValue(MI, 0) << ARMII::RegRdShift; - - // Encode Rn which is PC. - Binary |= II->getRegisterInfo().getEncodingValue(ARM::PC) << ARMII::RegRnShift; - - // Encode the displacement. - Binary |= 1 << ARMII::I_BitShift; - emitJumpTableAddress(MI.getOperand(1).getIndex(), ARM::reloc_arm_jt_base); - - emitWordLE(Binary); -} - -void ARMCodeEmitter::emitPseudoMoveInstruction(const MachineInstr &MI) { - unsigned Opcode = MI.getDesc().Opcode; - - // Part of binary is determined by TableGn. - unsigned Binary = getBinaryCodeForInstr(MI); - - // Set the conditional execution predicate - Binary |= II->getPredicate(&MI) << ARMII::CondShift; - - // Encode S bit if MI modifies CPSR. - if (Opcode == ARM::MOVsrl_flag || Opcode == ARM::MOVsra_flag) - Binary |= 1 << ARMII::S_BitShift; - - // Encode register def if there is one. - Binary |= getMachineOpValue(MI, 0) << ARMII::RegRdShift; - - // Encode the shift operation. - switch (Opcode) { - default: break; - case ARM::RRX: - // rrx - Binary |= 0x6 << 4; - break; - case ARM::MOVsrl_flag: - // lsr #1 - Binary |= (0x2 << 4) | (1 << 7); - break; - case ARM::MOVsra_flag: - // asr #1 - Binary |= (0x4 << 4) | (1 << 7); - break; - } - - // Encode register Rm. - Binary |= getMachineOpValue(MI, 1); - - emitWordLE(Binary); -} - -void ARMCodeEmitter::addPCLabel(unsigned LabelID) { - DEBUG(errs() << " ** LPC" << LabelID << " @ " - << (void*)MCE.getCurrentPCValue() << '\n'); - JTI->addPCLabelAddr(LabelID, MCE.getCurrentPCValue()); -} - -void ARMCodeEmitter::emitPseudoInstruction(const MachineInstr &MI) { - unsigned Opcode = MI.getDesc().Opcode; - switch (Opcode) { - default: - llvm_unreachable("ARMCodeEmitter::emitPseudoInstruction"); - case ARM::BX_CALL: - case ARM::BMOVPCRX_CALL: { - // First emit mov lr, pc - unsigned Binary = 0x01a0e00f; - Binary |= II->getPredicate(&MI) << ARMII::CondShift; - emitWordLE(Binary); - - // and then emit the branch. - emitMiscBranchInstruction(MI); - break; - } - case TargetOpcode::INLINEASM: { - // We allow inline assembler nodes with empty bodies - they can - // implicitly define registers, which is ok for JIT. - if (MI.getOperand(0).getSymbolName()[0]) { - report_fatal_error("JIT does not support inline asm!"); - } - break; - } - case TargetOpcode::CFI_INSTRUCTION: - break; - case TargetOpcode::EH_LABEL: - MCE.emitLabel(MI.getOperand(0).getMCSymbol()); - break; - case TargetOpcode::IMPLICIT_DEF: - case TargetOpcode::KILL: - // Do nothing. - break; - case ARM::CONSTPOOL_ENTRY: - emitConstPoolInstruction(MI); - break; - case ARM::PICADD: { - // Remember of the address of the PC label for relocation later. - addPCLabel(MI.getOperand(2).getImm()); - // PICADD is just an add instruction that implicitly read pc. - emitDataProcessingInstruction(MI, 0, ARM::PC); - break; - } - case ARM::PICLDR: - case ARM::PICLDRB: - case ARM::PICSTR: - case ARM::PICSTRB: { - // Remember of the address of the PC label for relocation later. - addPCLabel(MI.getOperand(2).getImm()); - // These are just load / store instructions that implicitly read pc. - emitLoadStoreInstruction(MI, 0, ARM::PC); - break; - } - case ARM::PICLDRH: - case ARM::PICLDRSH: - case ARM::PICLDRSB: - case ARM::PICSTRH: { - // Remember of the address of the PC label for relocation later. - addPCLabel(MI.getOperand(2).getImm()); - // These are just load / store instructions that implicitly read pc. - emitMiscLoadStoreInstruction(MI, ARM::PC); - break; - } - - case ARM::MOVi32imm: - // Two instructions to materialize a constant. - if (Subtarget->hasV6T2Ops()) - emitMOVi32immInstruction(MI); - else - emitMOVi2piecesInstruction(MI); - break; - - case ARM::LEApcrelJT: - // Materialize jumptable address. - emitLEApcrelJTInstruction(MI); - break; - case ARM::RRX: - case ARM::MOVsrl_flag: - case ARM::MOVsra_flag: - emitPseudoMoveInstruction(MI); - break; - } -} - -unsigned ARMCodeEmitter::getMachineSoRegOpValue(const MachineInstr &MI, - const MCInstrDesc &MCID, - const MachineOperand &MO, - unsigned OpIdx) { - unsigned Binary = getMachineOpValue(MI, MO); - - const MachineOperand &MO1 = MI.getOperand(OpIdx + 1); - const MachineOperand &MO2 = MI.getOperand(OpIdx + 2); - ARM_AM::ShiftOpc SOpc = ARM_AM::getSORegShOp(MO2.getImm()); - - // Encode the shift opcode. - unsigned SBits = 0; - unsigned Rs = MO1.getReg(); - if (Rs) { - // Set shift operand (bit[7:4]). - // LSL - 0001 - // LSR - 0011 - // ASR - 0101 - // ROR - 0111 - // RRX - 0110 and bit[11:8] clear. - switch (SOpc) { - default: llvm_unreachable("Unknown shift opc!"); - case ARM_AM::lsl: SBits = 0x1; break; - case ARM_AM::lsr: SBits = 0x3; break; - case ARM_AM::asr: SBits = 0x5; break; - case ARM_AM::ror: SBits = 0x7; break; - case ARM_AM::rrx: SBits = 0x6; break; - } - } else { - // Set shift operand (bit[6:4]). - // LSL - 000 - // LSR - 010 - // ASR - 100 - // ROR - 110 - switch (SOpc) { - default: llvm_unreachable("Unknown shift opc!"); - case ARM_AM::lsl: SBits = 0x0; break; - case ARM_AM::lsr: SBits = 0x2; break; - case ARM_AM::asr: SBits = 0x4; break; - case ARM_AM::ror: SBits = 0x6; break; - } - } - Binary |= SBits << 4; - if (SOpc == ARM_AM::rrx) - return Binary; - - // Encode the shift operation Rs or shift_imm (except rrx). - if (Rs) { - // Encode Rs bit[11:8]. - assert(ARM_AM::getSORegOffset(MO2.getImm()) == 0); - return Binary | (II->getRegisterInfo().getEncodingValue(Rs) << ARMII::RegRsShift); - } - - // Encode shift_imm bit[11:7]. - return Binary | ARM_AM::getSORegOffset(MO2.getImm()) << 7; -} - -unsigned ARMCodeEmitter::getMachineSoImmOpValue(unsigned SoImm) { - int SoImmVal = ARM_AM::getSOImmVal(SoImm); - assert(SoImmVal != -1 && "Not a valid so_imm value!"); - - // Encode rotate_imm. - unsigned Binary = (ARM_AM::getSOImmValRot((unsigned)SoImmVal) >> 1) - << ARMII::SoRotImmShift; - - // Encode immed_8. - Binary |= ARM_AM::getSOImmValImm((unsigned)SoImmVal); - return Binary; -} - -unsigned ARMCodeEmitter::getAddrModeSBit(const MachineInstr &MI, - const MCInstrDesc &MCID) const { - for (unsigned i = MI.getNumOperands(), e = MCID.getNumOperands(); i >= e;--i){ - const MachineOperand &MO = MI.getOperand(i-1); - if (MO.isReg() && MO.isDef() && MO.getReg() == ARM::CPSR) - return 1 << ARMII::S_BitShift; - } - return 0; -} - -void ARMCodeEmitter::emitDataProcessingInstruction(const MachineInstr &MI, - unsigned ImplicitRd, - unsigned ImplicitRn) { - const MCInstrDesc &MCID = MI.getDesc(); - - // Part of binary is determined by TableGn. - unsigned Binary = getBinaryCodeForInstr(MI); - - // Set the conditional execution predicate - Binary |= II->getPredicate(&MI) << ARMII::CondShift; - - // Encode S bit if MI modifies CPSR. - Binary |= getAddrModeSBit(MI, MCID); - - // Encode register def if there is one. - unsigned NumDefs = MCID.getNumDefs(); - unsigned OpIdx = 0; - if (NumDefs) - Binary |= getMachineOpValue(MI, OpIdx++) << ARMII::RegRdShift; - else if (ImplicitRd) - // Special handling for implicit use (e.g. PC). - Binary |= (II->getRegisterInfo().getEncodingValue(ImplicitRd) << ARMII::RegRdShift); - - if (MCID.Opcode == ARM::MOVi16) { - // Get immediate from MI. - unsigned Lo16 = getMovi32Value(MI, MI.getOperand(OpIdx), - ARM::reloc_arm_movw); - // Encode imm which is the same as in emitMOVi32immInstruction(). - Binary |= Lo16 & 0xFFF; - Binary |= ((Lo16 >> 12) & 0xF) << 16; - emitWordLE(Binary); - return; - } else if(MCID.Opcode == ARM::MOVTi16) { - unsigned Hi16 = (getMovi32Value(MI, MI.getOperand(OpIdx), - ARM::reloc_arm_movt) >> 16); - Binary |= Hi16 & 0xFFF; - Binary |= ((Hi16 >> 12) & 0xF) << 16; - emitWordLE(Binary); - return; - } else if ((MCID.Opcode == ARM::BFC) || (MCID.Opcode == ARM::BFI)) { - uint32_t v = ~MI.getOperand(2).getImm(); - int32_t lsb = countTrailingZeros(v); - int32_t msb = (32 - countLeadingZeros(v)) - 1; - // Instr{20-16} = msb, Instr{11-7} = lsb - Binary |= (msb & 0x1F) << 16; - Binary |= (lsb & 0x1F) << 7; - emitWordLE(Binary); - return; - } else if ((MCID.Opcode == ARM::UBFX) || (MCID.Opcode == ARM::SBFX)) { - // Encode Rn in Instr{0-3} - Binary |= getMachineOpValue(MI, OpIdx++); - - uint32_t lsb = MI.getOperand(OpIdx++).getImm(); - uint32_t widthm1 = MI.getOperand(OpIdx++).getImm() - 1; - - // Instr{20-16} = widthm1, Instr{11-7} = lsb - Binary |= (widthm1 & 0x1F) << 16; - Binary |= (lsb & 0x1F) << 7; - emitWordLE(Binary); - return; - } - - // If this is a two-address operand, skip it. e.g. MOVCCr operand 1. - if (MCID.getOperandConstraint(OpIdx, MCOI::TIED_TO) != -1) - ++OpIdx; - - // Encode first non-shifter register operand if there is one. - bool isUnary = MCID.TSFlags & ARMII::UnaryDP; - if (!isUnary) { - if (ImplicitRn) - // Special handling for implicit use (e.g. PC). - Binary |= (II->getRegisterInfo().getEncodingValue(ImplicitRn) << ARMII::RegRnShift); - else { - Binary |= getMachineOpValue(MI, OpIdx) << ARMII::RegRnShift; - ++OpIdx; - } - } - - // Encode shifter operand. - const MachineOperand &MO = MI.getOperand(OpIdx); - if ((MCID.TSFlags & ARMII::FormMask) == ARMII::DPSoRegFrm) { - // Encode SoReg. - emitWordLE(Binary | getMachineSoRegOpValue(MI, MCID, MO, OpIdx)); - return; - } - - if (MO.isReg()) { - // Encode register Rm. - emitWordLE(Binary | II->getRegisterInfo().getEncodingValue(MO.getReg())); - return; - } - - // Encode so_imm. - Binary |= getMachineSoImmOpValue((unsigned)MO.getImm()); - - emitWordLE(Binary); -} - -void ARMCodeEmitter::emitLoadStoreInstruction(const MachineInstr &MI, - unsigned ImplicitRd, - unsigned ImplicitRn) { - const MCInstrDesc &MCID = MI.getDesc(); - unsigned Form = MCID.TSFlags & ARMII::FormMask; - bool IsPrePost = (MCID.TSFlags & ARMII::IndexModeMask) != 0; - - // Part of binary is determined by TableGn. - unsigned Binary = getBinaryCodeForInstr(MI); - - // If this is an LDRi12, STRi12 or LDRcp, nothing more needs be done. - if (MI.getOpcode() == ARM::LDRi12 || MI.getOpcode() == ARM::LDRcp || - MI.getOpcode() == ARM::STRi12) { - emitWordLE(Binary); - return; - } - - // Set the conditional execution predicate - Binary |= II->getPredicate(&MI) << ARMII::CondShift; - - unsigned OpIdx = 0; - - // Operand 0 of a pre- and post-indexed store is the address base - // writeback. Skip it. - bool Skipped = false; - if (IsPrePost && Form == ARMII::StFrm) { - ++OpIdx; - Skipped = true; - } - - // Set first operand - if (ImplicitRd) - // Special handling for implicit use (e.g. PC). - Binary |= (II->getRegisterInfo().getEncodingValue(ImplicitRd) << ARMII::RegRdShift); - else - Binary |= getMachineOpValue(MI, OpIdx++) << ARMII::RegRdShift; - - // Set second operand - if (ImplicitRn) - // Special handling for implicit use (e.g. PC). - Binary |= (II->getRegisterInfo().getEncodingValue(ImplicitRn) << ARMII::RegRnShift); - else - Binary |= getMachineOpValue(MI, OpIdx++) << ARMII::RegRnShift; - - // If this is a two-address operand, skip it. e.g. LDR_PRE. - if (!Skipped && MCID.getOperandConstraint(OpIdx, MCOI::TIED_TO) != -1) - ++OpIdx; - - const MachineOperand &MO2 = MI.getOperand(OpIdx); - unsigned AM2Opc = (ImplicitRn == ARM::PC) - ? 0 : MI.getOperand(OpIdx+1).getImm(); - - // Set bit U(23) according to sign of immed value (positive or negative). - Binary |= ((ARM_AM::getAM2Op(AM2Opc) == ARM_AM::add ? 1 : 0) << - ARMII::U_BitShift); - if (!MO2.getReg()) { // is immediate - if (ARM_AM::getAM2Offset(AM2Opc)) - // Set the value of offset_12 field - Binary |= ARM_AM::getAM2Offset(AM2Opc); - emitWordLE(Binary); - return; - } - - // Set bit I(25), because this is not in immediate encoding. - Binary |= 1 << ARMII::I_BitShift; - assert(TargetRegisterInfo::isPhysicalRegister(MO2.getReg())); - // Set bit[3:0] to the corresponding Rm register - Binary |= II->getRegisterInfo().getEncodingValue(MO2.getReg()); - - // If this instr is in scaled register offset/index instruction, set - // shift_immed(bit[11:7]) and shift(bit[6:5]) fields. - if (unsigned ShImm = ARM_AM::getAM2Offset(AM2Opc)) { - Binary |= getShiftOp(AM2Opc) << ARMII::ShiftImmShift; // shift - Binary |= ShImm << ARMII::ShiftShift; // shift_immed - } - - emitWordLE(Binary); -} - -void ARMCodeEmitter::emitMiscLoadStoreInstruction(const MachineInstr &MI, - unsigned ImplicitRn) { - const MCInstrDesc &MCID = MI.getDesc(); - unsigned Form = MCID.TSFlags & ARMII::FormMask; - bool IsPrePost = (MCID.TSFlags & ARMII::IndexModeMask) != 0; - - // Part of binary is determined by TableGn. - unsigned Binary = getBinaryCodeForInstr(MI); - - // Set the conditional execution predicate - Binary |= II->getPredicate(&MI) << ARMII::CondShift; - - unsigned OpIdx = 0; - - // Operand 0 of a pre- and post-indexed store is the address base - // writeback. Skip it. - bool Skipped = false; - if (IsPrePost && Form == ARMII::StMiscFrm) { - ++OpIdx; - Skipped = true; - } - - // Set first operand - Binary |= getMachineOpValue(MI, OpIdx++) << ARMII::RegRdShift; - - // Skip LDRD and STRD's second operand. - if (MCID.Opcode == ARM::LDRD || MCID.Opcode == ARM::STRD) - ++OpIdx; - - // Set second operand - if (ImplicitRn) - // Special handling for implicit use (e.g. PC). - Binary |= (II->getRegisterInfo().getEncodingValue(ImplicitRn) << ARMII::RegRnShift); - else - Binary |= getMachineOpValue(MI, OpIdx++) << ARMII::RegRnShift; - - // If this is a two-address operand, skip it. e.g. LDRH_POST. - if (!Skipped && MCID.getOperandConstraint(OpIdx, MCOI::TIED_TO) != -1) - ++OpIdx; - - const MachineOperand &MO2 = MI.getOperand(OpIdx); - unsigned AM3Opc = (ImplicitRn == ARM::PC) - ? 0 : MI.getOperand(OpIdx+1).getImm(); - - // Set bit U(23) according to sign of immed value (positive or negative) - Binary |= ((ARM_AM::getAM3Op(AM3Opc) == ARM_AM::add ? 1 : 0) << - ARMII::U_BitShift); - - // If this instr is in register offset/index encoding, set bit[3:0] - // to the corresponding Rm register. - if (MO2.getReg()) { - Binary |= II->getRegisterInfo().getEncodingValue(MO2.getReg()); - emitWordLE(Binary); - return; - } - - // This instr is in immediate offset/index encoding, set bit 22 to 1. - Binary |= 1 << ARMII::AM3_I_BitShift; - if (unsigned ImmOffs = ARM_AM::getAM3Offset(AM3Opc)) { - // Set operands - Binary |= (ImmOffs >> 4) << ARMII::ImmHiShift; // immedH - Binary |= (ImmOffs & 0xF); // immedL - } - - emitWordLE(Binary); -} - -static unsigned getAddrModeUPBits(unsigned Mode) { - unsigned Binary = 0; - - // Set addressing mode by modifying bits U(23) and P(24) - // IA - Increment after - bit U = 1 and bit P = 0 - // IB - Increment before - bit U = 1 and bit P = 1 - // DA - Decrement after - bit U = 0 and bit P = 0 - // DB - Decrement before - bit U = 0 and bit P = 1 - switch (Mode) { - default: llvm_unreachable("Unknown addressing sub-mode!"); - case ARM_AM::da: break; - case ARM_AM::db: Binary |= 0x1 << ARMII::P_BitShift; break; - case ARM_AM::ia: Binary |= 0x1 << ARMII::U_BitShift; break; - case ARM_AM::ib: Binary |= 0x3 << ARMII::U_BitShift; break; - } - - return Binary; -} - -void ARMCodeEmitter::emitLoadStoreMultipleInstruction(const MachineInstr &MI) { - const MCInstrDesc &MCID = MI.getDesc(); - bool IsUpdating = (MCID.TSFlags & ARMII::IndexModeMask) != 0; - - // Part of binary is determined by TableGn. - unsigned Binary = getBinaryCodeForInstr(MI); - - // Set the conditional execution predicate - Binary |= II->getPredicate(&MI) << ARMII::CondShift; - - // Skip operand 0 of an instruction with base register update. - unsigned OpIdx = 0; - if (IsUpdating) - ++OpIdx; - - // Set base address operand - Binary |= getMachineOpValue(MI, OpIdx++) << ARMII::RegRnShift; - - // Set addressing mode by modifying bits U(23) and P(24) - ARM_AM::AMSubMode Mode = ARM_AM::getLoadStoreMultipleSubMode(MI.getOpcode()); - Binary |= getAddrModeUPBits(ARM_AM::getAM4SubMode(Mode)); - - // Set bit W(21) - if (IsUpdating) - Binary |= 0x1 << ARMII::W_BitShift; - - // Set registers - for (unsigned i = OpIdx+2, e = MI.getNumOperands(); i != e; ++i) { - const MachineOperand &MO = MI.getOperand(i); - if (!MO.isReg() || MO.isImplicit()) - break; - unsigned RegNum = II->getRegisterInfo().getEncodingValue(MO.getReg()); - assert(TargetRegisterInfo::isPhysicalRegister(MO.getReg()) && - RegNum < 16); - Binary |= 0x1 << RegNum; - } - - emitWordLE(Binary); -} - -void ARMCodeEmitter::emitMulFrmInstruction(const MachineInstr &MI) { - const MCInstrDesc &MCID = MI.getDesc(); - - // Part of binary is determined by TableGn. - unsigned Binary = getBinaryCodeForInstr(MI); - - // Set the conditional execution predicate - Binary |= II->getPredicate(&MI) << ARMII::CondShift; - - // Encode S bit if MI modifies CPSR. - Binary |= getAddrModeSBit(MI, MCID); - - // 32x32->64bit operations have two destination registers. The number - // of register definitions will tell us if that's what we're dealing with. - unsigned OpIdx = 0; - if (MCID.getNumDefs() == 2) - Binary |= getMachineOpValue (MI, OpIdx++) << ARMII::RegRdLoShift; - - // Encode Rd - Binary |= getMachineOpValue(MI, OpIdx++) << ARMII::RegRdHiShift; - - // Encode Rm - Binary |= getMachineOpValue(MI, OpIdx++); - - // Encode Rs - Binary |= getMachineOpValue(MI, OpIdx++) << ARMII::RegRsShift; - - // Many multiple instructions (e.g. MLA) have three src operands. Encode - // it as Rn (for multiply, that's in the same offset as RdLo. - if (MCID.getNumOperands() > OpIdx && - !MCID.OpInfo[OpIdx].isPredicate() && - !MCID.OpInfo[OpIdx].isOptionalDef()) - Binary |= getMachineOpValue(MI, OpIdx) << ARMII::RegRdLoShift; - - emitWordLE(Binary); -} - -void ARMCodeEmitter::emitExtendInstruction(const MachineInstr &MI) { - const MCInstrDesc &MCID = MI.getDesc(); - - // Part of binary is determined by TableGn. - unsigned Binary = getBinaryCodeForInstr(MI); - - // Set the conditional execution predicate - Binary |= II->getPredicate(&MI) << ARMII::CondShift; - - unsigned OpIdx = 0; - - // Encode Rd - Binary |= getMachineOpValue(MI, OpIdx++) << ARMII::RegRdShift; - - const MachineOperand &MO1 = MI.getOperand(OpIdx++); - const MachineOperand &MO2 = MI.getOperand(OpIdx); - if (MO2.isReg()) { - // Two register operand form. - // Encode Rn. - Binary |= getMachineOpValue(MI, MO1) << ARMII::RegRnShift; - - // Encode Rm. - Binary |= getMachineOpValue(MI, MO2); - ++OpIdx; - } else { - Binary |= getMachineOpValue(MI, MO1); - } - - // Encode rot imm (0, 8, 16, or 24) if it has a rotate immediate operand. - if (MI.getOperand(OpIdx).isImm() && - !MCID.OpInfo[OpIdx].isPredicate() && - !MCID.OpInfo[OpIdx].isOptionalDef()) - Binary |= (getMachineOpValue(MI, OpIdx) / 8) << ARMII::ExtRotImmShift; - - emitWordLE(Binary); -} - -void ARMCodeEmitter::emitMiscArithInstruction(const MachineInstr &MI) { - const MCInstrDesc &MCID = MI.getDesc(); - - // Part of binary is determined by TableGn. - unsigned Binary = getBinaryCodeForInstr(MI); - - // Set the conditional execution predicate - Binary |= II->getPredicate(&MI) << ARMII::CondShift; - - // PKH instructions are finished at this point - if (MCID.Opcode == ARM::PKHBT || MCID.Opcode == ARM::PKHTB) { - emitWordLE(Binary); - return; - } - - unsigned OpIdx = 0; - - // Encode Rd - Binary |= getMachineOpValue(MI, OpIdx++) << ARMII::RegRdShift; - - const MachineOperand &MO = MI.getOperand(OpIdx++); - if (OpIdx == MCID.getNumOperands() || - MCID.OpInfo[OpIdx].isPredicate() || - MCID.OpInfo[OpIdx].isOptionalDef()) { - // Encode Rm and it's done. - Binary |= getMachineOpValue(MI, MO); - emitWordLE(Binary); - return; - } - - // Encode Rn. - Binary |= getMachineOpValue(MI, MO) << ARMII::RegRnShift; - - // Encode Rm. - Binary |= getMachineOpValue(MI, OpIdx++); - - // Encode shift_imm. - unsigned ShiftAmt = MI.getOperand(OpIdx).getImm(); - if (MCID.Opcode == ARM::PKHTB) { - assert(ShiftAmt != 0 && "PKHTB shift_imm is 0!"); - if (ShiftAmt == 32) - ShiftAmt = 0; - } - assert(ShiftAmt < 32 && "shift_imm range is 0 to 31!"); - Binary |= ShiftAmt << ARMII::ShiftShift; - - emitWordLE(Binary); -} - -void ARMCodeEmitter::emitSaturateInstruction(const MachineInstr &MI) { - const MCInstrDesc &MCID = MI.getDesc(); - - // Part of binary is determined by TableGen. - unsigned Binary = getBinaryCodeForInstr(MI); - - // Set the conditional execution predicate - Binary |= II->getPredicate(&MI) << ARMII::CondShift; - - // Encode Rd - Binary |= getMachineOpValue(MI, 0) << ARMII::RegRdShift; - - // Encode saturate bit position. - unsigned Pos = MI.getOperand(1).getImm(); - if (MCID.Opcode == ARM::SSAT || MCID.Opcode == ARM::SSAT16) - Pos -= 1; - assert((Pos < 16 || (Pos < 32 && - MCID.Opcode != ARM::SSAT16 && - MCID.Opcode != ARM::USAT16)) && - "saturate bit position out of range"); - Binary |= Pos << 16; - - // Encode Rm - Binary |= getMachineOpValue(MI, 2); - - // Encode shift_imm. - if (MCID.getNumOperands() == 4) { - unsigned ShiftOp = MI.getOperand(3).getImm(); - ARM_AM::ShiftOpc Opc = ARM_AM::getSORegShOp(ShiftOp); - if (Opc == ARM_AM::asr) - Binary |= (1 << 6); - unsigned ShiftAmt = MI.getOperand(3).getImm(); - if (ShiftAmt == 32 && Opc == ARM_AM::asr) - ShiftAmt = 0; - assert(ShiftAmt < 32 && "shift_imm range is 0 to 31!"); - Binary |= ShiftAmt << ARMII::ShiftShift; - } - - emitWordLE(Binary); -} - -void ARMCodeEmitter::emitBranchInstruction(const MachineInstr &MI) { - const MCInstrDesc &MCID = MI.getDesc(); - - if (MCID.Opcode == ARM::TPsoft) { - llvm_unreachable("ARM::TPsoft FIXME"); // FIXME - } - - // Part of binary is determined by TableGn. - unsigned Binary = getBinaryCodeForInstr(MI); - - // Set the conditional execution predicate - Binary |= II->getPredicate(&MI) << ARMII::CondShift; - - // Set signed_immed_24 field - Binary |= getMachineOpValue(MI, 0); - - emitWordLE(Binary); -} - -void ARMCodeEmitter::emitInlineJumpTable(unsigned JTIndex) { - // Remember the base address of the inline jump table. - uintptr_t JTBase = MCE.getCurrentPCValue(); - JTI->addJumpTableBaseAddr(JTIndex, JTBase); - DEBUG(errs() << " ** Jump Table #" << JTIndex << " @ " << (void*)JTBase - << '\n'); - - // Now emit the jump table entries. - const std::vector<MachineBasicBlock*> &MBBs = (*MJTEs)[JTIndex].MBBs; - for (unsigned i = 0, e = MBBs.size(); i != e; ++i) { - if (IsPIC) - // DestBB address - JT base. - emitMachineBasicBlock(MBBs[i], ARM::reloc_arm_pic_jt, JTBase); - else - // Absolute DestBB address. - emitMachineBasicBlock(MBBs[i], ARM::reloc_arm_absolute); - emitWordLE(0); - } -} - -void ARMCodeEmitter::emitMiscBranchInstruction(const MachineInstr &MI) { - const MCInstrDesc &MCID = MI.getDesc(); - - // Handle jump tables. - if (MCID.Opcode == ARM::BR_JTr || MCID.Opcode == ARM::BR_JTadd) { - // First emit a ldr pc, [] instruction. - emitDataProcessingInstruction(MI, ARM::PC); - - // Then emit the inline jump table. - unsigned JTIndex = - (MCID.Opcode == ARM::BR_JTr) - ? MI.getOperand(1).getIndex() : MI.getOperand(2).getIndex(); - emitInlineJumpTable(JTIndex); - return; - } else if (MCID.Opcode == ARM::BR_JTm) { - // First emit a ldr pc, [] instruction. - emitLoadStoreInstruction(MI, ARM::PC); - - // Then emit the inline jump table. - emitInlineJumpTable(MI.getOperand(3).getIndex()); - return; - } - - // Part of binary is determined by TableGn. - unsigned Binary = getBinaryCodeForInstr(MI); - - // Set the conditional execution predicate - Binary |= II->getPredicate(&MI) << ARMII::CondShift; - - if (MCID.Opcode == ARM::BX_RET || MCID.Opcode == ARM::MOVPCLR) - // The return register is LR. - Binary |= II->getRegisterInfo().getEncodingValue(ARM::LR); - else - // otherwise, set the return register - Binary |= getMachineOpValue(MI, 0); - - emitWordLE(Binary); -} - -unsigned ARMCodeEmitter::encodeVFPRd(const MachineInstr &MI, - unsigned OpIdx) const { - unsigned RegD = MI.getOperand(OpIdx).getReg(); - unsigned Binary = 0; - bool isSPVFP = ARM::SPRRegClass.contains(RegD); - RegD = II->getRegisterInfo().getEncodingValue(RegD); - if (!isSPVFP) - Binary |= RegD << ARMII::RegRdShift; - else { - Binary |= ((RegD & 0x1E) >> 1) << ARMII::RegRdShift; - Binary |= (RegD & 0x01) << ARMII::D_BitShift; - } - return Binary; -} - -unsigned ARMCodeEmitter::encodeVFPRn(const MachineInstr &MI, - unsigned OpIdx) const { - unsigned RegN = MI.getOperand(OpIdx).getReg(); - unsigned Binary = 0; - bool isSPVFP = ARM::SPRRegClass.contains(RegN); - RegN = II->getRegisterInfo().getEncodingValue(RegN); - if (!isSPVFP) - Binary |= RegN << ARMII::RegRnShift; - else { - Binary |= ((RegN & 0x1E) >> 1) << ARMII::RegRnShift; - Binary |= (RegN & 0x01) << ARMII::N_BitShift; - } - return Binary; -} - -unsigned ARMCodeEmitter::encodeVFPRm(const MachineInstr &MI, - unsigned OpIdx) const { - unsigned RegM = MI.getOperand(OpIdx).getReg(); - unsigned Binary = 0; - bool isSPVFP = ARM::SPRRegClass.contains(RegM); - RegM = II->getRegisterInfo().getEncodingValue(RegM); - if (!isSPVFP) - Binary |= RegM; - else { - Binary |= ((RegM & 0x1E) >> 1); - Binary |= (RegM & 0x01) << ARMII::M_BitShift; - } - return Binary; -} - -void ARMCodeEmitter::emitVFPArithInstruction(const MachineInstr &MI) { - const MCInstrDesc &MCID = MI.getDesc(); - - // Part of binary is determined by TableGn. - unsigned Binary = getBinaryCodeForInstr(MI); - - // Set the conditional execution predicate - Binary |= II->getPredicate(&MI) << ARMII::CondShift; - - unsigned OpIdx = 0; - assert((Binary & ARMII::D_BitShift) == 0 && - (Binary & ARMII::N_BitShift) == 0 && - (Binary & ARMII::M_BitShift) == 0 && "VFP encoding bug!"); - - // Encode Dd / Sd. - Binary |= encodeVFPRd(MI, OpIdx++); - - // If this is a two-address operand, skip it, e.g. FMACD. - if (MCID.getOperandConstraint(OpIdx, MCOI::TIED_TO) != -1) - ++OpIdx; - - // Encode Dn / Sn. - if ((MCID.TSFlags & ARMII::FormMask) == ARMII::VFPBinaryFrm) - Binary |= encodeVFPRn(MI, OpIdx++); - - if (OpIdx == MCID.getNumOperands() || - MCID.OpInfo[OpIdx].isPredicate() || - MCID.OpInfo[OpIdx].isOptionalDef()) { - // FCMPEZD etc. has only one operand. - emitWordLE(Binary); - return; - } - - // Encode Dm / Sm. - Binary |= encodeVFPRm(MI, OpIdx); - - emitWordLE(Binary); -} - -void ARMCodeEmitter::emitVFPConversionInstruction(const MachineInstr &MI) { - const MCInstrDesc &MCID = MI.getDesc(); - unsigned Form = MCID.TSFlags & ARMII::FormMask; - - // Part of binary is determined by TableGn. - unsigned Binary = getBinaryCodeForInstr(MI); - - // Set the conditional execution predicate - Binary |= II->getPredicate(&MI) << ARMII::CondShift; - - switch (Form) { - default: break; - case ARMII::VFPConv1Frm: - case ARMII::VFPConv2Frm: - case ARMII::VFPConv3Frm: - // Encode Dd / Sd. - Binary |= encodeVFPRd(MI, 0); - break; - case ARMII::VFPConv4Frm: - // Encode Dn / Sn. - Binary |= encodeVFPRn(MI, 0); - break; - case ARMII::VFPConv5Frm: - // Encode Dm / Sm. - Binary |= encodeVFPRm(MI, 0); - break; - } - - switch (Form) { - default: break; - case ARMII::VFPConv1Frm: - // Encode Dm / Sm. - Binary |= encodeVFPRm(MI, 1); - break; - case ARMII::VFPConv2Frm: - case ARMII::VFPConv3Frm: - // Encode Dn / Sn. - Binary |= encodeVFPRn(MI, 1); - break; - case ARMII::VFPConv4Frm: - case ARMII::VFPConv5Frm: - // Encode Dd / Sd. - Binary |= encodeVFPRd(MI, 1); - break; - } - - if (Form == ARMII::VFPConv5Frm) - // Encode Dn / Sn. - Binary |= encodeVFPRn(MI, 2); - else if (Form == ARMII::VFPConv3Frm) - // Encode Dm / Sm. - Binary |= encodeVFPRm(MI, 2); - - emitWordLE(Binary); -} - -void ARMCodeEmitter::emitVFPLoadStoreInstruction(const MachineInstr &MI) { - // Part of binary is determined by TableGn. - unsigned Binary = getBinaryCodeForInstr(MI); - - // Set the conditional execution predicate - Binary |= II->getPredicate(&MI) << ARMII::CondShift; - - unsigned OpIdx = 0; - - // Encode Dd / Sd. - Binary |= encodeVFPRd(MI, OpIdx++); - - // Encode address base. - const MachineOperand &Base = MI.getOperand(OpIdx++); - Binary |= getMachineOpValue(MI, Base) << ARMII::RegRnShift; - - // If there is a non-zero immediate offset, encode it. - if (Base.isReg()) { - const MachineOperand &Offset = MI.getOperand(OpIdx); - if (unsigned ImmOffs = ARM_AM::getAM5Offset(Offset.getImm())) { - if (ARM_AM::getAM5Op(Offset.getImm()) == ARM_AM::add) - Binary |= 1 << ARMII::U_BitShift; - Binary |= ImmOffs; - emitWordLE(Binary); - return; - } - } - - // If immediate offset is omitted, default to +0. - Binary |= 1 << ARMII::U_BitShift; - - emitWordLE(Binary); -} - -void -ARMCodeEmitter::emitVFPLoadStoreMultipleInstruction(const MachineInstr &MI) { - const MCInstrDesc &MCID = MI.getDesc(); - bool IsUpdating = (MCID.TSFlags & ARMII::IndexModeMask) != 0; - - // Part of binary is determined by TableGn. - unsigned Binary = getBinaryCodeForInstr(MI); - - // Set the conditional execution predicate - Binary |= II->getPredicate(&MI) << ARMII::CondShift; - - // Skip operand 0 of an instruction with base register update. - unsigned OpIdx = 0; - if (IsUpdating) - ++OpIdx; - - // Set base address operand - Binary |= getMachineOpValue(MI, OpIdx++) << ARMII::RegRnShift; - - // Set addressing mode by modifying bits U(23) and P(24) - ARM_AM::AMSubMode Mode = ARM_AM::getLoadStoreMultipleSubMode(MI.getOpcode()); - Binary |= getAddrModeUPBits(ARM_AM::getAM4SubMode(Mode)); - - // Set bit W(21) - if (IsUpdating) - Binary |= 0x1 << ARMII::W_BitShift; - - // First register is encoded in Dd. - Binary |= encodeVFPRd(MI, OpIdx+2); - - // Count the number of registers. - unsigned NumRegs = 1; - for (unsigned i = OpIdx+3, e = MI.getNumOperands(); i != e; ++i) { - const MachineOperand &MO = MI.getOperand(i); - if (!MO.isReg() || MO.isImplicit()) - break; - ++NumRegs; - } - // Bit 8 will be set if <list> is consecutive 64-bit registers (e.g., D0) - // Otherwise, it will be 0, in the case of 32-bit registers. - if(Binary & 0x100) - Binary |= NumRegs * 2; - else - Binary |= NumRegs; - - emitWordLE(Binary); -} - -unsigned ARMCodeEmitter::encodeNEONRd(const MachineInstr &MI, - unsigned OpIdx) const { - unsigned RegD = MI.getOperand(OpIdx).getReg(); - unsigned Binary = 0; - RegD = II->getRegisterInfo().getEncodingValue(RegD); - Binary |= (RegD & 0xf) << ARMII::RegRdShift; - Binary |= ((RegD >> 4) & 1) << ARMII::D_BitShift; - return Binary; -} - -unsigned ARMCodeEmitter::encodeNEONRn(const MachineInstr &MI, - unsigned OpIdx) const { - unsigned RegN = MI.getOperand(OpIdx).getReg(); - unsigned Binary = 0; - RegN = II->getRegisterInfo().getEncodingValue(RegN); - Binary |= (RegN & 0xf) << ARMII::RegRnShift; - Binary |= ((RegN >> 4) & 1) << ARMII::N_BitShift; - return Binary; -} - -unsigned ARMCodeEmitter::encodeNEONRm(const MachineInstr &MI, - unsigned OpIdx) const { - unsigned RegM = MI.getOperand(OpIdx).getReg(); - unsigned Binary = 0; - RegM = II->getRegisterInfo().getEncodingValue(RegM); - Binary |= (RegM & 0xf); - Binary |= ((RegM >> 4) & 1) << ARMII::M_BitShift; - return Binary; -} - -/// convertNEONDataProcToThumb - Convert the ARM mode encoding for a NEON -/// data-processing instruction to the corresponding Thumb encoding. -static unsigned convertNEONDataProcToThumb(unsigned Binary) { - assert((Binary & 0xfe000000) == 0xf2000000 && - "not an ARM NEON data-processing instruction"); - unsigned UBit = (Binary >> 24) & 1; - return 0xef000000 | (UBit << 28) | (Binary & 0xffffff); -} - -void ARMCodeEmitter::emitNEONLaneInstruction(const MachineInstr &MI) { - unsigned Binary = getBinaryCodeForInstr(MI); - - unsigned RegTOpIdx, RegNOpIdx, LnOpIdx; - const MCInstrDesc &MCID = MI.getDesc(); - if ((MCID.TSFlags & ARMII::FormMask) == ARMII::NGetLnFrm) { - RegTOpIdx = 0; - RegNOpIdx = 1; - LnOpIdx = 2; - } else { // ARMII::NSetLnFrm - RegTOpIdx = 2; - RegNOpIdx = 0; - LnOpIdx = 3; - } - - // Set the conditional execution predicate - Binary |= (IsThumb ? ARMCC::AL : II->getPredicate(&MI)) << ARMII::CondShift; - - unsigned RegT = MI.getOperand(RegTOpIdx).getReg(); - RegT = II->getRegisterInfo().getEncodingValue(RegT); - Binary |= (RegT << ARMII::RegRdShift); - Binary |= encodeNEONRn(MI, RegNOpIdx); - - unsigned LaneShift; - if ((Binary & (1 << 22)) != 0) - LaneShift = 0; // 8-bit elements - else if ((Binary & (1 << 5)) != 0) - LaneShift = 1; // 16-bit elements - else - LaneShift = 2; // 32-bit elements - - unsigned Lane = MI.getOperand(LnOpIdx).getImm() << LaneShift; - unsigned Opc1 = Lane >> 2; - unsigned Opc2 = Lane & 3; - assert((Opc1 & 3) == 0 && "out-of-range lane number operand"); - Binary |= (Opc1 << 21); - Binary |= (Opc2 << 5); - - emitWordLE(Binary); -} - -void ARMCodeEmitter::emitNEONDupInstruction(const MachineInstr &MI) { - unsigned Binary = getBinaryCodeForInstr(MI); - - // Set the conditional execution predicate - Binary |= (IsThumb ? ARMCC::AL : II->getPredicate(&MI)) << ARMII::CondShift; - - unsigned RegT = MI.getOperand(1).getReg(); - RegT = II->getRegisterInfo().getEncodingValue(RegT); - Binary |= (RegT << ARMII::RegRdShift); - Binary |= encodeNEONRn(MI, 0); - emitWordLE(Binary); -} - -void ARMCodeEmitter::emitNEON1RegModImmInstruction(const MachineInstr &MI) { - unsigned Binary = getBinaryCodeForInstr(MI); - // Destination register is encoded in Dd. - Binary |= encodeNEONRd(MI, 0); - // Immediate fields: Op, Cmode, I, Imm3, Imm4 - unsigned Imm = MI.getOperand(1).getImm(); - unsigned Op = (Imm >> 12) & 1; - unsigned Cmode = (Imm >> 8) & 0xf; - unsigned I = (Imm >> 7) & 1; - unsigned Imm3 = (Imm >> 4) & 0x7; - unsigned Imm4 = Imm & 0xf; - Binary |= (I << 24) | (Imm3 << 16) | (Cmode << 8) | (Op << 5) | Imm4; - if (IsThumb) - Binary = convertNEONDataProcToThumb(Binary); - emitWordLE(Binary); -} - -void ARMCodeEmitter::emitNEON2RegInstruction(const MachineInstr &MI) { - const MCInstrDesc &MCID = MI.getDesc(); - unsigned Binary = getBinaryCodeForInstr(MI); - // Destination register is encoded in Dd; source register in Dm. - unsigned OpIdx = 0; - Binary |= encodeNEONRd(MI, OpIdx++); - if (MCID.getOperandConstraint(OpIdx, MCOI::TIED_TO) != -1) - ++OpIdx; - Binary |= encodeNEONRm(MI, OpIdx); - if (IsThumb) - Binary = convertNEONDataProcToThumb(Binary); - // FIXME: This does not handle VDUPfdf or VDUPfqf. - emitWordLE(Binary); -} - -void ARMCodeEmitter::emitNEON3RegInstruction(const MachineInstr &MI) { - const MCInstrDesc &MCID = MI.getDesc(); - unsigned Binary = getBinaryCodeForInstr(MI); - // Destination register is encoded in Dd; source registers in Dn and Dm. - unsigned OpIdx = 0; - Binary |= encodeNEONRd(MI, OpIdx++); - if (MCID.getOperandConstraint(OpIdx, MCOI::TIED_TO) != -1) - ++OpIdx; - Binary |= encodeNEONRn(MI, OpIdx++); - if (MCID.getOperandConstraint(OpIdx, MCOI::TIED_TO) != -1) - ++OpIdx; - Binary |= encodeNEONRm(MI, OpIdx); - if (IsThumb) - Binary = convertNEONDataProcToThumb(Binary); - // FIXME: This does not handle VMOVDneon or VMOVQ. - emitWordLE(Binary); -} - -#include "ARMGenCodeEmitter.inc" diff --git a/lib/Target/ARM/ARMConstantIslandPass.cpp b/lib/Target/ARM/ARMConstantIslandPass.cpp index ce264eeef90f..5d295317c556 100644 --- a/lib/Target/ARM/ARMConstantIslandPass.cpp +++ b/lib/Target/ARM/ARMConstantIslandPass.cpp @@ -275,6 +275,7 @@ namespace { private: void doInitialPlacement(std::vector<MachineInstr*> &CPEMIs); + bool BBHasFallthrough(MachineBasicBlock *MBB); CPEntry *findConstPoolEntry(unsigned CPI, const MachineInstr *CPEMI); unsigned getCPELogAlign(const MachineInstr *CPEMI); void scanFunctionJumpTables(); @@ -382,7 +383,9 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &mf) { << MCP->getConstants().size() << " CP entries, aligned to " << MCP->getConstantPoolAlignment() << " bytes *****\n"); - TII = (const ARMBaseInstrInfo*)MF->getTarget().getInstrInfo(); + TII = (const ARMBaseInstrInfo *)MF->getTarget() + .getSubtargetImpl() + ->getInstrInfo(); AFI = MF->getInfo<ARMFunctionInfo>(); STI = &MF->getTarget().getSubtarget<ARMSubtarget>(); @@ -529,7 +532,7 @@ ARMConstantIslands::doInitialPlacement(std::vector<MachineInstr*> &CPEMIs) { // identity mapping of CPI's to CPE's. const std::vector<MachineConstantPoolEntry> &CPs = MCP->getConstants(); - const DataLayout &TD = *MF->getTarget().getDataLayout(); + const DataLayout &TD = *MF->getSubtarget().getDataLayout(); for (unsigned i = 0, e = CPs.size(); i != e; ++i) { unsigned Size = TD.getTypeAllocSize(CPs[i].getType()); assert(Size >= 4 && "Too small constant pool entry"); @@ -554,9 +557,7 @@ ARMConstantIslands::doInitialPlacement(std::vector<MachineInstr*> &CPEMIs) { InsPoint[a] = CPEMI; // Add a new CPEntry, but no corresponding CPUser yet. - std::vector<CPEntry> CPEs; - CPEs.push_back(CPEntry(CPEMI, i)); - CPEntries.push_back(CPEs); + CPEntries.emplace_back(1, CPEntry(CPEMI, i)); ++NumCPEs; DEBUG(dbgs() << "Moved CPI#" << i << " to end of function, size = " << Size << ", align = " << Align <<'\n'); @@ -566,7 +567,7 @@ ARMConstantIslands::doInitialPlacement(std::vector<MachineInstr*> &CPEMIs) { /// BBHasFallthrough - Return true if the specified basic block can fallthrough /// into the block immediately after it. -static bool BBHasFallthrough(MachineBasicBlock *MBB) { +bool ARMConstantIslands::BBHasFallthrough(MachineBasicBlock *MBB) { // Get the next machine basic block in the function. MachineFunction::iterator MBBI = MBB; // Can't fall off end of function. @@ -574,12 +575,15 @@ static bool BBHasFallthrough(MachineBasicBlock *MBB) { return false; MachineBasicBlock *NextBB = std::next(MBBI); - for (MachineBasicBlock::succ_iterator I = MBB->succ_begin(), - E = MBB->succ_end(); I != E; ++I) - if (*I == NextBB) - return true; + if (std::find(MBB->succ_begin(), MBB->succ_end(), NextBB) == MBB->succ_end()) + return false; - return false; + // Try to analyze the end of the block. A potential fallthrough may already + // have an unconditional branch for whatever reason. + MachineBasicBlock *TBB, *FBB; + SmallVector<MachineOperand, 4> Cond; + bool TooDifficult = TII->AnalyzeBranch(*MBB, TBB, FBB, Cond); + return TooDifficult || FBB == nullptr; } /// findConstPoolEntry - Given the constpool index and CONSTPOOL_ENTRY MI, @@ -1203,7 +1207,8 @@ bool ARMConstantIslands::findAvailableWater(CPUser &U, unsigned UserOffset, unsigned Growth; if (isWaterInRange(UserOffset, WaterBB, U, Growth) && (WaterBB->getNumber() < U.HighWaterMark->getNumber() || - NewWaterList.count(WaterBB)) && Growth < BestGrowth) { + NewWaterList.count(WaterBB) || WaterBB == U.MI->getParent()) && + Growth < BestGrowth) { // This is the least amount of required padding seen so far. BestGrowth = Growth; WaterIter = IP; @@ -1265,7 +1270,7 @@ void ARMConstantIslands::createNewWater(unsigned CPUserIndex, unsigned MaxDisp = getUnconditionalBrDisp(UncondBr); ImmBranches.push_back(ImmBranch(&UserMBB->back(), MaxDisp, false, UncondBr)); - BBInfo[UserMBB->getNumber()].Size += Delta; + computeBlockSize(UserMBB); adjustBBOffsetsAfter(UserMBB); return; } @@ -1309,7 +1314,12 @@ void ARMConstantIslands::createNewWater(unsigned CPUserIndex, // Back past any possible branches (allow for a conditional and a maximally // long unconditional). if (BaseInsertOffset + 8 >= UserBBI.postOffset()) { - BaseInsertOffset = UserBBI.postOffset() - UPad - 8; + // Ensure BaseInsertOffset is larger than the offset of the instruction + // following UserMI so that the loop which searches for the split point + // iterates at least once. + BaseInsertOffset = + std::max(UserBBI.postOffset() - UPad - 8, + UserOffset + TII->GetInstSizeInBytes(UserMI) + 1); DEBUG(dbgs() << format("Move inside block: %#x\n", BaseInsertOffset)); } unsigned EndInsertOffset = BaseInsertOffset + 4 + UPad + @@ -1352,6 +1362,11 @@ void ARMConstantIslands::createNewWater(unsigned CPUserIndex, if (CC != ARMCC::AL) MI = LastIT; } + + // We really must not split an IT block. + DEBUG(unsigned PredReg; + assert(!isThumb || getITInstrPredicate(MI, PredReg) == ARMCC::AL)); + NewMBB = splitBlockBeforeInstr(MI); } @@ -1937,7 +1952,9 @@ bool ARMConstantIslands::optimizeThumb2JumpTables() { DEBUG(dbgs() << "Shrink JT: " << *MI << " addr: " << *AddrMI << " lea: " << *LeaMI); unsigned Opc = ByteOk ? ARM::t2TBB_JT : ARM::t2TBH_JT; - MachineInstr *NewJTMI = BuildMI(MBB, MI->getDebugLoc(), TII->get(Opc)) + MachineBasicBlock::iterator MI_JT = MI; + MachineInstr *NewJTMI = + BuildMI(*MBB, MI_JT, MI->getDebugLoc(), TII->get(Opc)) .addReg(IdxReg, getKillRegState(IdxRegKill)) .addJumpTableIndex(JTI, JTOP.getTargetFlags()) .addImm(MI->getOperand(JTOpIdx+1).getImm()); diff --git a/lib/Target/ARM/ARMConstantPoolValue.h b/lib/Target/ARM/ARMConstantPoolValue.h index c7a84155bd48..13bef54b3b7d 100644 --- a/lib/Target/ARM/ARMConstantPoolValue.h +++ b/lib/Target/ARM/ARMConstantPoolValue.h @@ -11,8 +11,8 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_TARGET_ARM_CONSTANTPOOLVALUE_H -#define LLVM_TARGET_ARM_CONSTANTPOOLVALUE_H +#ifndef LLVM_LIB_TARGET_ARM_ARMCONSTANTPOOLVALUE_H +#define LLVM_LIB_TARGET_ARM_ARMCONSTANTPOOLVALUE_H #include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/Support/Casting.h" diff --git a/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/lib/Target/ARM/ARMExpandPseudoInsts.cpp index 51d3dbb5bd8e..7ddf8793e126 100644 --- a/lib/Target/ARM/ARMExpandPseudoInsts.cpp +++ b/lib/Target/ARM/ARMExpandPseudoInsts.cpp @@ -22,8 +22,8 @@ #include "MCTargetDesc/ARMAddressingModes.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/CodeGen/MachineInstrBundle.h" #include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineInstrBundle.h" #include "llvm/IR/GlobalValue.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/raw_ostream.h" // FIXME: for debug only. remove! @@ -867,7 +867,7 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB, if (RI.hasBasePointer(MF)) { int32_t NumBytes = AFI->getFramePtrSpillOffset(); unsigned FramePtr = RI.getFrameRegister(MF); - assert(MF.getTarget().getFrameLowering()->hasFP(MF) && + assert(MF.getSubtarget().getFrameLowering()->hasFP(MF) && "base pointer without frame pointer?"); if (AFI->isThumb2Function()) { @@ -887,6 +887,9 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB, unsigned MaxAlign = MFI->getMaxAlignment(); assert (!AFI->isThumb1OnlyFunction()); // Emit bic r6, r6, MaxAlign + assert(MaxAlign <= 256 && "The BIC instruction cannot encode " + "immediates larger than 256 with all lower " + "bits set."); unsigned bicOpc = AFI->isThumbFunction() ? ARM::t2BICri : ARM::BICri; AddDefaultCC(AddDefaultPred(BuildMI(MBB, MBBI, MI.getDebugLoc(), @@ -980,7 +983,7 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB, unsigned LDRLITOpc = IsARM ? ARM::LDRi12 : ARM::tLDRpci; unsigned PICAddOpc = IsARM - ? (Opcode == ARM::LDRLIT_ga_pcrel_ldr ? ARM::PICADD : ARM::PICLDR) + ? (Opcode == ARM::LDRLIT_ga_pcrel_ldr ? ARM::PICLDR : ARM::PICADD) : ARM::tPICADD; // We need a new const-pool entry to load from. @@ -1343,8 +1346,9 @@ bool ARMExpandPseudo::ExpandMBB(MachineBasicBlock &MBB) { bool ARMExpandPseudo::runOnMachineFunction(MachineFunction &MF) { const TargetMachine &TM = MF.getTarget(); - TII = static_cast<const ARMBaseInstrInfo*>(TM.getInstrInfo()); - TRI = TM.getRegisterInfo(); + TII = static_cast<const ARMBaseInstrInfo *>( + TM.getSubtargetImpl()->getInstrInfo()); + TRI = TM.getSubtargetImpl()->getRegisterInfo(); STI = &TM.getSubtarget<ARMSubtarget>(); AFI = MF.getInfo<ARMFunctionInfo>(); diff --git a/lib/Target/ARM/ARMFPUName.def b/lib/Target/ARM/ARMFPUName.def index 1fef3b3bc5e2..34ce85d280e6 100644 --- a/lib/Target/ARM/ARMFPUName.def +++ b/lib/Target/ARM/ARMFPUName.def @@ -23,6 +23,7 @@ ARM_FPU_NAME("vfpv3", VFPV3) ARM_FPU_NAME("vfpv3-d16", VFPV3_D16) ARM_FPU_NAME("vfpv4", VFPV4) ARM_FPU_NAME("vfpv4-d16", VFPV4_D16) +ARM_FPU_NAME("fpv5-d16", FPV5_D16) ARM_FPU_NAME("fp-armv8", FP_ARMV8) ARM_FPU_NAME("neon", NEON) ARM_FPU_NAME("neon-vfpv4", NEON_VFPV4) diff --git a/lib/Target/ARM/ARMFPUName.h b/lib/Target/ARM/ARMFPUName.h index 2a64cce4880d..86acffbc8f75 100644 --- a/lib/Target/ARM/ARMFPUName.h +++ b/lib/Target/ARM/ARMFPUName.h @@ -7,8 +7,8 @@ // //===----------------------------------------------------------------------===// -#ifndef ARMFPUNAME_H -#define ARMFPUNAME_H +#ifndef LLVM_LIB_TARGET_ARM_ARMFPUNAME_H +#define LLVM_LIB_TARGET_ARM_ARMFPUNAME_H namespace llvm { namespace ARM { @@ -23,4 +23,4 @@ enum FPUKind { } // namespace ARM } // namespace llvm -#endif // ARMFPUNAME_H +#endif diff --git a/lib/Target/ARM/ARMFastISel.cpp b/lib/Target/ARM/ARMFastISel.cpp index e2d90cd9306c..29462f7a8eb8 100644 --- a/lib/Target/ARM/ARMFastISel.cpp +++ b/lib/Target/ARM/ARMFastISel.cpp @@ -92,11 +92,11 @@ class ARMFastISel final : public FastISel { public: explicit ARMFastISel(FunctionLoweringInfo &funcInfo, const TargetLibraryInfo *libInfo) - : FastISel(funcInfo, libInfo), - M(const_cast<Module&>(*funcInfo.Fn->getParent())), - TM(funcInfo.MF->getTarget()), - TII(*TM.getInstrInfo()), - TLI(*TM.getTargetLowering()) { + : FastISel(funcInfo, libInfo), + M(const_cast<Module &>(*funcInfo.Fn->getParent())), + TM(funcInfo.MF->getTarget()), + TII(*TM.getSubtargetImpl()->getInstrInfo()), + TLI(*TM.getSubtargetImpl()->getTargetLowering()) { Subtarget = &TM.getSubtarget<ARMSubtarget>(); AFI = funcInfo.MF->getInfo<ARMFunctionInfo>(); isThumb2 = AFI->isThumbFunction(); @@ -105,39 +105,39 @@ class ARMFastISel final : public FastISel { // Code from FastISel.cpp. private: - unsigned FastEmitInst_r(unsigned MachineInstOpcode, + unsigned fastEmitInst_r(unsigned MachineInstOpcode, const TargetRegisterClass *RC, unsigned Op0, bool Op0IsKill); - unsigned FastEmitInst_rr(unsigned MachineInstOpcode, + unsigned fastEmitInst_rr(unsigned MachineInstOpcode, const TargetRegisterClass *RC, unsigned Op0, bool Op0IsKill, unsigned Op1, bool Op1IsKill); - unsigned FastEmitInst_rrr(unsigned MachineInstOpcode, + unsigned fastEmitInst_rrr(unsigned MachineInstOpcode, const TargetRegisterClass *RC, unsigned Op0, bool Op0IsKill, unsigned Op1, bool Op1IsKill, unsigned Op2, bool Op2IsKill); - unsigned FastEmitInst_ri(unsigned MachineInstOpcode, + unsigned fastEmitInst_ri(unsigned MachineInstOpcode, const TargetRegisterClass *RC, unsigned Op0, bool Op0IsKill, uint64_t Imm); - unsigned FastEmitInst_rri(unsigned MachineInstOpcode, + unsigned fastEmitInst_rri(unsigned MachineInstOpcode, const TargetRegisterClass *RC, unsigned Op0, bool Op0IsKill, unsigned Op1, bool Op1IsKill, uint64_t Imm); - unsigned FastEmitInst_i(unsigned MachineInstOpcode, + unsigned fastEmitInst_i(unsigned MachineInstOpcode, const TargetRegisterClass *RC, uint64_t Imm); // Backend specific FastISel code. private: - bool TargetSelectInstruction(const Instruction *I) override; - unsigned TargetMaterializeConstant(const Constant *C) override; - unsigned TargetMaterializeAlloca(const AllocaInst *AI) override; + bool fastSelectInstruction(const Instruction *I) override; + unsigned fastMaterializeConstant(const Constant *C) override; + unsigned fastMaterializeAlloca(const AllocaInst *AI) override; bool tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo, const LoadInst *LI) override; - bool FastLowerArguments() override; + bool fastLowerArguments() override; private: #include "ARMGenFastISel.inc" @@ -189,7 +189,9 @@ class ARMFastISel final : public FastISel { unsigned ARMSelectCallOp(bool UseReg); unsigned ARMLowerPICELF(const GlobalValue *GV, unsigned Align, MVT VT); - const TargetLowering *getTargetLowering() { return TM.getTargetLowering(); } + const TargetLowering *getTargetLowering() { + return TM.getSubtargetImpl()->getTargetLowering(); + } // Call handling routines. private: @@ -283,7 +285,7 @@ ARMFastISel::AddOptionalDefs(const MachineInstrBuilder &MIB) { return MIB; } -unsigned ARMFastISel::FastEmitInst_r(unsigned MachineInstOpcode, +unsigned ARMFastISel::fastEmitInst_r(unsigned MachineInstOpcode, const TargetRegisterClass *RC, unsigned Op0, bool Op0IsKill) { unsigned ResultReg = createResultReg(RC); @@ -305,7 +307,7 @@ unsigned ARMFastISel::FastEmitInst_r(unsigned MachineInstOpcode, return ResultReg; } -unsigned ARMFastISel::FastEmitInst_rr(unsigned MachineInstOpcode, +unsigned ARMFastISel::fastEmitInst_rr(unsigned MachineInstOpcode, const TargetRegisterClass *RC, unsigned Op0, bool Op0IsKill, unsigned Op1, bool Op1IsKill) { @@ -333,7 +335,7 @@ unsigned ARMFastISel::FastEmitInst_rr(unsigned MachineInstOpcode, return ResultReg; } -unsigned ARMFastISel::FastEmitInst_rrr(unsigned MachineInstOpcode, +unsigned ARMFastISel::fastEmitInst_rrr(unsigned MachineInstOpcode, const TargetRegisterClass *RC, unsigned Op0, bool Op0IsKill, unsigned Op1, bool Op1IsKill, @@ -365,7 +367,7 @@ unsigned ARMFastISel::FastEmitInst_rrr(unsigned MachineInstOpcode, return ResultReg; } -unsigned ARMFastISel::FastEmitInst_ri(unsigned MachineInstOpcode, +unsigned ARMFastISel::fastEmitInst_ri(unsigned MachineInstOpcode, const TargetRegisterClass *RC, unsigned Op0, bool Op0IsKill, uint64_t Imm) { @@ -391,7 +393,7 @@ unsigned ARMFastISel::FastEmitInst_ri(unsigned MachineInstOpcode, return ResultReg; } -unsigned ARMFastISel::FastEmitInst_rri(unsigned MachineInstOpcode, +unsigned ARMFastISel::fastEmitInst_rri(unsigned MachineInstOpcode, const TargetRegisterClass *RC, unsigned Op0, bool Op0IsKill, unsigned Op1, bool Op1IsKill, @@ -421,7 +423,7 @@ unsigned ARMFastISel::FastEmitInst_rri(unsigned MachineInstOpcode, return ResultReg; } -unsigned ARMFastISel::FastEmitInst_i(unsigned MachineInstOpcode, +unsigned ARMFastISel::fastEmitInst_i(unsigned MachineInstOpcode, const TargetRegisterClass *RC, uint64_t Imm) { unsigned ResultReg = createResultReg(RC); @@ -511,7 +513,7 @@ unsigned ARMFastISel::ARMMaterializeFP(const ConstantFP *CFP, MVT VT) { unsigned ARMFastISel::ARMMaterializeInt(const Constant *C, MVT VT) { if (VT != MVT::i32 && VT != MVT::i16 && VT != MVT::i8 && VT != MVT::i1) - return false; + return 0; // If we can do this in a single instruction without a constant pool entry // do so now. @@ -534,7 +536,9 @@ unsigned ARMFastISel::ARMMaterializeInt(const Constant *C, MVT VT) { (ARM_AM::getSOImmVal(Imm) != -1); if (UseImm) { unsigned Opc = isThumb2 ? ARM::t2MVNi : ARM::MVNi; - unsigned ImmReg = createResultReg(TLI.getRegClassFor(MVT::i32)); + const TargetRegisterClass *RC = isThumb2 ? &ARM::rGPRRegClass : + &ARM::GPRRegClass; + unsigned ImmReg = createResultReg(RC); AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ImmReg) .addImm(Imm)); @@ -542,11 +546,16 @@ unsigned ARMFastISel::ARMMaterializeInt(const Constant *C, MVT VT) { } } + unsigned ResultReg = 0; + if (Subtarget->useMovt(*FuncInfo.MF)) + ResultReg = fastEmit_i(VT, VT, ISD::Constant, CI->getZExtValue()); + + if (ResultReg) + return ResultReg; + // Load from constant pool. For now 32-bit only. if (VT != MVT::i32) - return false; - - unsigned DestReg = createResultReg(TLI.getRegClassFor(VT)); + return 0; // MachineConstantPool wants an explicit alignment. unsigned Align = DL.getPrefTypeAlignment(C->getType()); @@ -555,21 +564,20 @@ unsigned ARMFastISel::ARMMaterializeInt(const Constant *C, MVT VT) { Align = DL.getTypeAllocSize(C->getType()); } unsigned Idx = MCP.getConstantPoolIndex(C, Align); - + ResultReg = createResultReg(TLI.getRegClassFor(VT)); if (isThumb2) AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, - TII.get(ARM::t2LDRpci), DestReg) - .addConstantPoolIndex(Idx)); + TII.get(ARM::t2LDRpci), ResultReg) + .addConstantPoolIndex(Idx)); else { // The extra immediate is for addrmode2. - DestReg = constrainOperandRegClass(TII.get(ARM::LDRcp), DestReg, 0); + ResultReg = constrainOperandRegClass(TII.get(ARM::LDRcp), ResultReg, 0); AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, - TII.get(ARM::LDRcp), DestReg) - .addConstantPoolIndex(Idx) - .addImm(0)); + TII.get(ARM::LDRcp), ResultReg) + .addConstantPoolIndex(Idx) + .addImm(0)); } - - return DestReg; + return ResultReg; } unsigned ARMFastISel::ARMMaterializeGV(const GlobalValue *GV, MVT VT) { @@ -578,9 +586,8 @@ unsigned ARMFastISel::ARMMaterializeGV(const GlobalValue *GV, MVT VT) { Reloc::Model RelocM = TM.getRelocationModel(); bool IsIndirect = Subtarget->GVIsIndirectSymbol(GV, RelocM); - const TargetRegisterClass *RC = isThumb2 ? - (const TargetRegisterClass*)&ARM::rGPRRegClass : - (const TargetRegisterClass*)&ARM::GPRRegClass; + const TargetRegisterClass *RC = isThumb2 ? &ARM::rGPRRegClass + : &ARM::GPRRegClass; unsigned DestReg = createResultReg(RC); // FastISel TLS support on non-MachO is broken, punt to SelectionDAG. @@ -679,7 +686,7 @@ unsigned ARMFastISel::ARMMaterializeGV(const GlobalValue *GV, MVT VT) { return DestReg; } -unsigned ARMFastISel::TargetMaterializeConstant(const Constant *C) { +unsigned ARMFastISel::fastMaterializeConstant(const Constant *C) { EVT CEVT = TLI.getValueType(C->getType(), true); // Only handle simple types. @@ -698,7 +705,7 @@ unsigned ARMFastISel::TargetMaterializeConstant(const Constant *C) { // TODO: unsigned ARMFastISel::TargetMaterializeFloatZero(const ConstantFP *CF); -unsigned ARMFastISel::TargetMaterializeAlloca(const AllocaInst *AI) { +unsigned ARMFastISel::fastMaterializeAlloca(const AllocaInst *AI) { // Don't handle dynamic allocas. if (!FuncInfo.StaticAllocaMap.count(AI)) return 0; @@ -885,9 +892,8 @@ void ARMFastISel::ARMSimplifyAddress(Address &Addr, MVT VT, bool useAM3) { // put the alloca address into a register, set the base type back to // register and continue. This should almost never happen. if (needsLowering && Addr.BaseType == Address::FrameIndexBase) { - const TargetRegisterClass *RC = isThumb2 ? - (const TargetRegisterClass*)&ARM::tGPRRegClass : - (const TargetRegisterClass*)&ARM::GPRRegClass; + const TargetRegisterClass *RC = isThumb2 ? &ARM::tGPRRegClass + : &ARM::GPRRegClass; unsigned ResultReg = createResultReg(RC); unsigned Opc = isThumb2 ? ARM::t2ADDri : ARM::ADDri; AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, @@ -901,7 +907,7 @@ void ARMFastISel::ARMSimplifyAddress(Address &Addr, MVT VT, bool useAM3) { // Since the offset is too large for the load/store instruction // get the reg+offset into a register. if (needsLowering) { - Addr.Base.Reg = FastEmit_ri_(MVT::i32, ISD::ADD, Addr.Base.Reg, + Addr.Base.Reg = fastEmit_ri_(MVT::i32, ISD::ADD, Addr.Base.Reg, /*Op0IsKill*/false, Addr.Offset, MVT::i32); Addr.Offset = 0; } @@ -1074,7 +1080,7 @@ bool ARMFastISel::SelectLoad(const Instruction *I) { unsigned ResultReg; if (!ARMEmitLoad(VT, ResultReg, Addr, cast<LoadInst>(I)->getAlignment())) return false; - UpdateValueMap(I, ResultReg); + updateValueMap(I, ResultReg); return true; } @@ -1086,9 +1092,8 @@ bool ARMFastISel::ARMEmitStore(MVT VT, unsigned SrcReg, Address &Addr, // This is mostly going to be Neon/vector support. default: return false; case MVT::i1: { - unsigned Res = createResultReg(isThumb2 ? - (const TargetRegisterClass*)&ARM::tGPRRegClass : - (const TargetRegisterClass*)&ARM::GPRRegClass); + unsigned Res = createResultReg(isThumb2 ? &ARM::tGPRRegClass + : &ARM::GPRRegClass); unsigned Opc = isThumb2 ? ARM::t2ANDri : ARM::ANDri; SrcReg = constrainOperandRegClass(TII.get(Opc), SrcReg, 1); AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, @@ -1276,7 +1281,7 @@ bool ARMFastISel::SelectBranch(const Instruction *I) { unsigned BrOpc = isThumb2 ? ARM::t2Bcc : ARM::Bcc; BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(BrOpc)) .addMBB(TBB).addImm(ARMPred).addReg(ARM::CPSR); - FastEmitBranch(FBB, DbgLoc); + fastEmitBranch(FBB, DbgLoc); FuncInfo.MBB->addSuccessor(TBB); return true; } @@ -1301,7 +1306,7 @@ bool ARMFastISel::SelectBranch(const Instruction *I) { BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(BrOpc)) .addMBB(TBB).addImm(CCMode).addReg(ARM::CPSR); - FastEmitBranch(FBB, DbgLoc); + fastEmitBranch(FBB, DbgLoc); FuncInfo.MBB->addSuccessor(TBB); return true; } @@ -1309,7 +1314,7 @@ bool ARMFastISel::SelectBranch(const Instruction *I) { dyn_cast<ConstantInt>(BI->getCondition())) { uint64_t Imm = CI->getZExtValue(); MachineBasicBlock *Target = (Imm == 0) ? FBB : TBB; - FastEmitBranch(Target, DbgLoc); + fastEmitBranch(Target, DbgLoc); return true; } @@ -1339,7 +1344,7 @@ bool ARMFastISel::SelectBranch(const Instruction *I) { unsigned BrOpc = isThumb2 ? ARM::t2Bcc : ARM::Bcc; BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(BrOpc)) .addMBB(TBB).addImm(CCMode).addReg(ARM::CPSR); - FastEmitBranch(FBB, DbgLoc); + fastEmitBranch(FBB, DbgLoc); FuncInfo.MBB->addSuccessor(TBB); return true; } @@ -1492,18 +1497,17 @@ bool ARMFastISel::SelectCmp(const Instruction *I) { // Now set a register based on the comparison. Explicitly set the predicates // here. unsigned MovCCOpc = isThumb2 ? ARM::t2MOVCCi : ARM::MOVCCi; - const TargetRegisterClass *RC = isThumb2 ? - (const TargetRegisterClass*)&ARM::rGPRRegClass : - (const TargetRegisterClass*)&ARM::GPRRegClass; + const TargetRegisterClass *RC = isThumb2 ? &ARM::rGPRRegClass + : &ARM::GPRRegClass; unsigned DestReg = createResultReg(RC); Constant *Zero = ConstantInt::get(Type::getInt32Ty(*Context), 0); - unsigned ZeroReg = TargetMaterializeConstant(Zero); + unsigned ZeroReg = fastMaterializeConstant(Zero); // ARMEmitCmp emits a FMSTAT when necessary, so it's always safe to use CPSR. BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(MovCCOpc), DestReg) .addReg(ZeroReg).addImm(1) .addImm(ARMPred).addReg(ARM::CPSR); - UpdateValueMap(I, DestReg); + updateValueMap(I, DestReg); return true; } @@ -1522,7 +1526,7 @@ bool ARMFastISel::SelectFPExt(const Instruction *I) { AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(ARM::VCVTDS), Result) .addReg(Op)); - UpdateValueMap(I, Result); + updateValueMap(I, Result); return true; } @@ -1541,7 +1545,7 @@ bool ARMFastISel::SelectFPTrunc(const Instruction *I) { AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(ARM::VCVTSD), Result) .addReg(Op)); - UpdateValueMap(I, Result); + updateValueMap(I, Result); return true; } @@ -1585,7 +1589,7 @@ bool ARMFastISel::SelectIToFP(const Instruction *I, bool isSigned) { unsigned ResultReg = createResultReg(TLI.getRegClassFor(DstVT)); AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg).addReg(FP)); - UpdateValueMap(I, ResultReg); + updateValueMap(I, ResultReg); return true; } @@ -1617,7 +1621,7 @@ bool ARMFastISel::SelectFPToI(const Instruction *I, bool isSigned) { unsigned IntReg = ARMMoveToIntReg(DstVT, ResultReg); if (IntReg == 0) return false; - UpdateValueMap(I, IntReg); + updateValueMap(I, IntReg); return true; } @@ -1693,7 +1697,7 @@ bool ARMFastISel::SelectSelect(const Instruction *I) { .addImm(ARMCC::EQ) .addReg(ARM::CPSR); } - UpdateValueMap(I, ResultReg); + updateValueMap(I, ResultReg); return true; } @@ -1783,7 +1787,7 @@ bool ARMFastISel::SelectBinaryIntOp(const Instruction *I, unsigned ISDOpcode) { AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg) .addReg(SrcReg1).addReg(SrcReg2)); - UpdateValueMap(I, ResultReg); + updateValueMap(I, ResultReg); return true; } @@ -1825,7 +1829,7 @@ bool ARMFastISel::SelectBinaryFPOp(const Instruction *I, unsigned ISDOpcode) { AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg) .addReg(Op1).addReg(Op2)); - UpdateValueMap(I, ResultReg); + updateValueMap(I, ResultReg); return true; } @@ -1883,7 +1887,7 @@ bool ARMFastISel::ProcessCallArgs(SmallVectorImpl<Value*> &Args, unsigned &NumBytes, bool isVarArg) { SmallVector<CCValAssign, 16> ArgLocs; - CCState CCInfo(CC, isVarArg, *FuncInfo.MF, TM, ArgLocs, *Context); + CCState CCInfo(CC, isVarArg, *FuncInfo.MF, ArgLocs, *Context); CCInfo.AnalyzeCallOperands(ArgVTs, ArgFlags, CCAssignFnForCall(CC, false, isVarArg)); @@ -1941,6 +1945,7 @@ bool ARMFastISel::ProcessCallArgs(SmallVectorImpl<Value*> &Args, // Process the args. for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { CCValAssign &VA = ArgLocs[i]; + const Value *ArgVal = Args[VA.getValNo()]; unsigned Arg = ArgRegs[VA.getValNo()]; MVT ArgVT = ArgVTs[VA.getValNo()]; @@ -1967,7 +1972,7 @@ bool ARMFastISel::ProcessCallArgs(SmallVectorImpl<Value*> &Args, break; } case CCValAssign::BCvt: { - unsigned BC = FastEmit_r(ArgVT, VA.getLocVT(), ISD::BITCAST, Arg, + unsigned BC = fastEmit_r(ArgVT, VA.getLocVT(), ISD::BITCAST, Arg, /*TODO: Kill=*/false); assert(BC != 0 && "Failed to emit a bitcast!"); Arg = BC; @@ -2001,6 +2006,11 @@ bool ARMFastISel::ProcessCallArgs(SmallVectorImpl<Value*> &Args, } else { assert(VA.isMemLoc()); // Need to store on the stack. + + // Don't emit stores for undef values. + if (isa<UndefValue>(ArgVal)) + continue; + Address Addr; Addr.BaseType = Address::RegBase; Addr.Base.Reg = ARM::SP; @@ -2026,7 +2036,7 @@ bool ARMFastISel::FinishCall(MVT RetVT, SmallVectorImpl<unsigned> &UsedRegs, // Now the return value. if (RetVT != MVT::isVoid) { SmallVector<CCValAssign, 16> RVLocs; - CCState CCInfo(CC, isVarArg, *FuncInfo.MF, TM, RVLocs, *Context); + CCState CCInfo(CC, isVarArg, *FuncInfo.MF, RVLocs, *Context); CCInfo.AnalyzeCallResult(RetVT, CCAssignFnForCall(CC, true, isVarArg)); // Copy all of the result registers out of their specified physreg. @@ -2045,7 +2055,7 @@ bool ARMFastISel::FinishCall(MVT RetVT, SmallVectorImpl<unsigned> &UsedRegs, UsedRegs.push_back(RVLocs[1].getLocReg()); // Finally update the result. - UpdateValueMap(I, ResultReg); + updateValueMap(I, ResultReg); } else { assert(RVLocs.size() == 1 &&"Can't handle non-double multi-reg retvals!"); MVT CopyVT = RVLocs[0].getValVT(); @@ -2063,7 +2073,7 @@ bool ARMFastISel::FinishCall(MVT RetVT, SmallVectorImpl<unsigned> &UsedRegs, UsedRegs.push_back(RVLocs[0].getLocReg()); // Finally update the result. - UpdateValueMap(I, ResultReg); + updateValueMap(I, ResultReg); } } @@ -2087,7 +2097,7 @@ bool ARMFastISel::SelectRet(const Instruction *I) { // Analyze operands of the call, assigning locations to each operand. SmallVector<CCValAssign, 16> ValLocs; - CCState CCInfo(CC, F.isVarArg(), *FuncInfo.MF, TM, ValLocs,I->getContext()); + CCState CCInfo(CC, F.isVarArg(), *FuncInfo.MF, ValLocs, I->getContext()); CCInfo.AnalyzeReturn(Outs, CCAssignFnForCall(CC, true /* is Ret */, F.isVarArg())); @@ -2192,7 +2202,7 @@ bool ARMFastISel::ARMEmitLibcall(const Instruction *I, RTLIB::Libcall Call) { // Can't handle non-double multi-reg retvals. if (RetVT != MVT::isVoid && RetVT != MVT::i32) { SmallVector<CCValAssign, 16> RVLocs; - CCState CCInfo(CC, false, *FuncInfo.MF, TM, RVLocs, *Context); + CCState CCInfo(CC, false, *FuncInfo.MF, RVLocs, *Context); CCInfo.AnalyzeCallResult(RetVT, CCAssignFnForCall(CC, true, false)); if (RVLocs.size() >= 2 && RetVT != MVT::f64) return false; @@ -2303,7 +2313,7 @@ bool ARMFastISel::SelectCall(const Instruction *I, if (RetVT != MVT::isVoid && RetVT != MVT::i1 && RetVT != MVT::i8 && RetVT != MVT::i16 && RetVT != MVT::i32) { SmallVector<CCValAssign, 16> RVLocs; - CCState CCInfo(CC, isVarArg, *FuncInfo.MF, TM, RVLocs, *Context); + CCState CCInfo(CC, isVarArg, *FuncInfo.MF, RVLocs, *Context); CCInfo.AnalyzeCallResult(RetVT, CCAssignFnForCall(CC, true, isVarArg)); if (RVLocs.size() >= 2 && RetVT != MVT::f64) return false; @@ -2476,18 +2486,13 @@ bool ARMFastISel::SelectIntrinsicCall(const IntrinsicInst &I) { MachineFrameInfo *MFI = FuncInfo.MF->getFrameInfo(); MFI->setFrameAddressIsTaken(true); - unsigned LdrOpc; - const TargetRegisterClass *RC; - if (isThumb2) { - LdrOpc = ARM::t2LDRi12; - RC = (const TargetRegisterClass*)&ARM::tGPRRegClass; - } else { - LdrOpc = ARM::LDRi12; - RC = (const TargetRegisterClass*)&ARM::GPRRegClass; - } + unsigned LdrOpc = isThumb2 ? ARM::t2LDRi12 : ARM::LDRi12; + const TargetRegisterClass *RC = isThumb2 ? &ARM::tGPRRegClass + : &ARM::GPRRegClass; const ARMBaseRegisterInfo *RegInfo = - static_cast<const ARMBaseRegisterInfo*>(TM.getRegisterInfo()); + static_cast<const ARMBaseRegisterInfo *>( + TM.getSubtargetImpl()->getRegisterInfo()); unsigned FramePtr = RegInfo->getFrameRegister(*(FuncInfo.MF)); unsigned SrcReg = FramePtr; @@ -2505,7 +2510,7 @@ bool ARMFastISel::SelectIntrinsicCall(const IntrinsicInst &I) { .addReg(SrcReg).addImm(0)); SrcReg = DestReg; } - UpdateValueMap(&I, SrcReg); + updateValueMap(&I, SrcReg); return true; } case Intrinsic::memcpy: @@ -2583,7 +2588,7 @@ bool ARMFastISel::SelectTrunc(const Instruction *I) { // Because the high bits are undefined, a truncate doesn't generate // any code. - UpdateValueMap(I, SrcReg); + updateValueMap(I, SrcReg); return true; } @@ -2745,7 +2750,7 @@ bool ARMFastISel::SelectIntExt(const Instruction *I) { MVT DestVT = DestEVT.getSimpleVT(); unsigned ResultReg = ARMEmitIntExt(SrcVT, SrcReg, DestVT, isZExt); if (ResultReg == 0) return false; - UpdateValueMap(I, ResultReg); + updateValueMap(I, ResultReg); return true; } @@ -2800,12 +2805,12 @@ bool ARMFastISel::SelectShift(const Instruction *I, } AddOptionalDefs(MIB); - UpdateValueMap(I, ResultReg); + updateValueMap(I, ResultReg); return true; } // TODO: SoftFP support. -bool ARMFastISel::TargetSelectInstruction(const Instruction *I) { +bool ARMFastISel::fastSelectInstruction(const Instruction *I) { switch (I->getOpcode()) { case Instruction::Load: @@ -2983,7 +2988,7 @@ unsigned ARMFastISel::ARMLowerPICELF(const GlobalValue *GV, return DestReg2; } -bool ARMFastISel::FastLowerArguments() { +bool ARMFastISel::fastLowerArguments() { if (!FuncInfo.CanLowerReturn) return false; @@ -3050,7 +3055,7 @@ bool ARMFastISel::FastLowerArguments() { BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::COPY), ResultReg).addReg(DstReg, getKillRegState(true)); - UpdateValueMap(I, ResultReg); + updateValueMap(I, ResultReg); } return true; diff --git a/lib/Target/ARM/ARMFeatures.h b/lib/Target/ARM/ARMFeatures.h index e191a3c779f1..0c910ab6130f 100644 --- a/lib/Target/ARM/ARMFeatures.h +++ b/lib/Target/ARM/ARMFeatures.h @@ -11,8 +11,8 @@ // //===----------------------------------------------------------------------===// -#ifndef TARGET_ARM_FEATURES_H -#define TARGET_ARM_FEATURES_H +#ifndef LLVM_LIB_TARGET_ARM_ARMFEATURES_H +#define LLVM_LIB_TARGET_ARM_ARMFEATURES_H #include "MCTargetDesc/ARMMCTargetDesc.h" diff --git a/lib/Target/ARM/ARMFrameLowering.cpp b/lib/Target/ARM/ARMFrameLowering.cpp index 3624377c9402..45c2c30db738 100644 --- a/lib/Target/ARM/ARMFrameLowering.cpp +++ b/lib/Target/ARM/ARMFrameLowering.cpp @@ -47,7 +47,7 @@ ARMFrameLowering::ARMFrameLowering(const ARMSubtarget &sti) /// pointer register. This is true if the function has variable sized allocas /// or if frame pointer elimination is disabled. bool ARMFrameLowering::hasFP(const MachineFunction &MF) const { - const TargetRegisterInfo *RegInfo = MF.getTarget().getRegisterInfo(); + const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo(); // iOS requires FP not to be clobbered for backtracing purpose. if (STI.isTargetIOS()) @@ -137,12 +137,27 @@ static void emitSPUpdate(bool isARM, MachineBasicBlock &MBB, } static int sizeOfSPAdjustment(const MachineInstr *MI) { - assert(MI->getOpcode() == ARM::VSTMDDB_UPD); + int RegSize; + switch (MI->getOpcode()) { + case ARM::VSTMDDB_UPD: + RegSize = 8; + break; + case ARM::STMDB_UPD: + case ARM::t2STMDB_UPD: + RegSize = 4; + break; + case ARM::t2STR_PRE: + case ARM::STR_PRE_IMM: + return 4; + default: + llvm_unreachable("Unknown push or pop like instruction"); + } + int count = 0; // ARM and Thumb2 push/pop insts have explicit "sp, sp" operands (+ // pred) so the list starts at 4. for (int i = MI->getNumOperands() - 1; i >= 4; --i) - count += 8; + count += RegSize; return count; } @@ -154,6 +169,110 @@ static bool WindowsRequiresStackProbe(const MachineFunction &MF, return StackSizeInBytes >= 4096; } +namespace { +struct StackAdjustingInsts { + struct InstInfo { + MachineBasicBlock::iterator I; + unsigned SPAdjust; + bool BeforeFPSet; + }; + + SmallVector<InstInfo, 4> Insts; + + void addInst(MachineBasicBlock::iterator I, unsigned SPAdjust, + bool BeforeFPSet = false) { + InstInfo Info = {I, SPAdjust, BeforeFPSet}; + Insts.push_back(Info); + } + + void addExtraBytes(const MachineBasicBlock::iterator I, unsigned ExtraBytes) { + auto Info = std::find_if(Insts.begin(), Insts.end(), + [&](InstInfo &Info) { return Info.I == I; }); + assert(Info != Insts.end() && "invalid sp adjusting instruction"); + Info->SPAdjust += ExtraBytes; + } + + void emitDefCFAOffsets(MachineModuleInfo &MMI, MachineBasicBlock &MBB, + DebugLoc dl, const ARMBaseInstrInfo &TII, bool HasFP) { + unsigned CFAOffset = 0; + for (auto &Info : Insts) { + if (HasFP && !Info.BeforeFPSet) + return; + + CFAOffset -= Info.SPAdjust; + unsigned CFIIndex = MMI.addFrameInst( + MCCFIInstruction::createDefCfaOffset(nullptr, CFAOffset)); + BuildMI(MBB, std::next(Info.I), dl, + TII.get(TargetOpcode::CFI_INSTRUCTION)) + .addCFIIndex(CFIIndex) + .setMIFlags(MachineInstr::FrameSetup); + } + } +}; +} + +/// Emit an instruction sequence that will align the address in +/// register Reg by zero-ing out the lower bits. For versions of the +/// architecture that support Neon, this must be done in a single +/// instruction, since skipAlignedDPRCS2Spills assumes it is done in a +/// single instruction. That function only gets called when optimizing +/// spilling of D registers on a core with the Neon instruction set +/// present. +static void emitAligningInstructions(MachineFunction &MF, ARMFunctionInfo *AFI, + const TargetInstrInfo &TII, + MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + DebugLoc DL, const unsigned Reg, + const unsigned Alignment, + const bool MustBeSingleInstruction) { + const ARMSubtarget &AST = MF.getTarget().getSubtarget<ARMSubtarget>(); + const bool CanUseBFC = AST.hasV6T2Ops() || AST.hasV7Ops(); + const unsigned AlignMask = Alignment - 1; + const unsigned NrBitsToZero = countTrailingZeros(Alignment); + assert(!AFI->isThumb1OnlyFunction() && "Thumb1 not supported"); + if (!AFI->isThumbFunction()) { + // if the BFC instruction is available, use that to zero the lower + // bits: + // bfc Reg, #0, log2(Alignment) + // otherwise use BIC, if the mask to zero the required number of bits + // can be encoded in the bic immediate field + // bic Reg, Reg, Alignment-1 + // otherwise, emit + // lsr Reg, Reg, log2(Alignment) + // lsl Reg, Reg, log2(Alignment) + if (CanUseBFC) { + AddDefaultPred(BuildMI(MBB, MBBI, DL, TII.get(ARM::BFC), Reg) + .addReg(Reg, RegState::Kill) + .addImm(~AlignMask)); + } else if (AlignMask <= 255) { + AddDefaultCC( + AddDefaultPred(BuildMI(MBB, MBBI, DL, TII.get(ARM::BICri), Reg) + .addReg(Reg, RegState::Kill) + .addImm(AlignMask))); + } else { + assert(!MustBeSingleInstruction && + "Shouldn't call emitAligningInstructions demanding a single " + "instruction to be emitted for large stack alignment for a target " + "without BFC."); + AddDefaultCC(AddDefaultPred( + BuildMI(MBB, MBBI, DL, TII.get(ARM::MOVsi), Reg) + .addReg(Reg, RegState::Kill) + .addImm(ARM_AM::getSORegOpc(ARM_AM::lsr, NrBitsToZero)))); + AddDefaultCC(AddDefaultPred( + BuildMI(MBB, MBBI, DL, TII.get(ARM::MOVsi), Reg) + .addReg(Reg, RegState::Kill) + .addImm(ARM_AM::getSORegOpc(ARM_AM::lsl, NrBitsToZero)))); + } + } else { + // Since this is only reached for Thumb-2 targets, the BFC instruction + // should always be available. + assert(CanUseBFC); + AddDefaultPred(BuildMI(MBB, MBBI, DL, TII.get(ARM::t2BFC), Reg) + .addReg(Reg, RegState::Kill) + .addImm(~AlignMask)); + } +} + void ARMFrameLowering::emitPrologue(MachineFunction &MF) const { MachineBasicBlock &MBB = MF.front(); MachineBasicBlock::iterator MBBI = MBB.begin(); @@ -163,20 +282,20 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const { MCContext &Context = MMI.getContext(); const TargetMachine &TM = MF.getTarget(); const MCRegisterInfo *MRI = Context.getRegisterInfo(); - const ARMBaseRegisterInfo *RegInfo = - static_cast<const ARMBaseRegisterInfo*>(TM.getRegisterInfo()); - const ARMBaseInstrInfo &TII = - *static_cast<const ARMBaseInstrInfo*>(TM.getInstrInfo()); + const ARMBaseRegisterInfo *RegInfo = static_cast<const ARMBaseRegisterInfo *>( + TM.getSubtargetImpl()->getRegisterInfo()); + const ARMBaseInstrInfo &TII = *static_cast<const ARMBaseInstrInfo *>( + TM.getSubtargetImpl()->getInstrInfo()); assert(!AFI->isThumb1OnlyFunction() && "This emitPrologue does not support Thumb1!"); bool isARM = !AFI->isThumbFunction(); - unsigned Align = TM.getFrameLowering()->getStackAlignment(); + unsigned Align = + TM.getSubtargetImpl()->getFrameLowering()->getStackAlignment(); unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize(Align); unsigned NumBytes = MFI->getStackSize(); const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo(); DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc(); unsigned FramePtr = RegInfo->getFrameRegister(MF); - int CFAOffset = 0; // Determine the sizes of each callee-save spill areas and record which frame // belongs to which callee-save spill areas. @@ -189,15 +308,13 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const { if (MF.getFunction()->getCallingConv() == CallingConv::GHC) return; + StackAdjustingInsts DefCFAOffsetCandidates; + // Allocate the vararg register save area. if (ArgRegsSaveSize) { emitSPUpdate(isARM, MBB, MBBI, dl, TII, -ArgRegsSaveSize, MachineInstr::FrameSetup); - CFAOffset -= ArgRegsSaveSize; - unsigned CFIIndex = MMI.addFrameInst( - MCCFIInstruction::createDefCfaOffset(nullptr, CFAOffset)); - BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) - .addCFIIndex(CFIIndex); + DefCFAOffsetCandidates.addInst(std::prev(MBBI), ArgRegsSaveSize, true); } if (!AFI->hasStackFrame() && @@ -205,11 +322,8 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const { if (NumBytes - ArgRegsSaveSize != 0) { emitSPUpdate(isARM, MBB, MBBI, dl, TII, -(NumBytes - ArgRegsSaveSize), MachineInstr::FrameSetup); - CFAOffset -= NumBytes - ArgRegsSaveSize; - unsigned CFIIndex = MMI.addFrameInst( - MCCFIInstruction::createDefCfaOffset(nullptr, CFAOffset)); - BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) - .addCFIIndex(CFIIndex); + DefCFAOffsetCandidates.addInst(std::prev(MBBI), + NumBytes - ArgRegsSaveSize, true); } return; } @@ -252,21 +366,23 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const { } // Move past area 1. - MachineBasicBlock::iterator LastPush = MBB.end(), GPRCS1Push, GPRCS2Push, - DPRCSPush; - if (GPRCS1Size > 0) + MachineBasicBlock::iterator LastPush = MBB.end(), GPRCS1Push, GPRCS2Push; + if (GPRCS1Size > 0) { GPRCS1Push = LastPush = MBBI++; + DefCFAOffsetCandidates.addInst(LastPush, GPRCS1Size, true); + } // Determine starting offsets of spill areas. bool HasFP = hasFP(MF); - unsigned DPRCSOffset = NumBytes - (ArgRegsSaveSize + GPRCS1Size - + GPRCS2Size + DPRCSSize); - unsigned GPRCS2Offset = DPRCSOffset + DPRCSSize; - unsigned GPRCS1Offset = GPRCS2Offset + GPRCS2Size; + unsigned GPRCS1Offset = NumBytes - ArgRegsSaveSize - GPRCS1Size; + unsigned GPRCS2Offset = GPRCS1Offset - GPRCS2Size; + unsigned DPRAlign = DPRCSSize ? std::min(8U, Align) : 4U; + unsigned DPRGapSize = (GPRCS1Size + GPRCS2Size + ArgRegsSaveSize) % DPRAlign; + unsigned DPRCSOffset = GPRCS2Offset - DPRGapSize - DPRCSSize; int FramePtrOffsetInPush = 0; if (HasFP) { - FramePtrOffsetInPush = MFI->getObjectOffset(FramePtrSpillFI) - + GPRCS1Size + ArgRegsSaveSize; + FramePtrOffsetInPush = + MFI->getObjectOffset(FramePtrSpillFI) + ArgRegsSaveSize; AFI->setFramePtrSpillOffset(MFI->getObjectOffset(FramePtrSpillFI) + NumBytes); } @@ -275,16 +391,32 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const { AFI->setDPRCalleeSavedAreaOffset(DPRCSOffset); // Move past area 2. - if (GPRCS2Size > 0) + if (GPRCS2Size > 0) { GPRCS2Push = LastPush = MBBI++; + DefCFAOffsetCandidates.addInst(LastPush, GPRCS2Size); + } + + // Prolog/epilog inserter assumes we correctly align DPRs on the stack, so our + // .cfi_offset operations will reflect that. + if (DPRGapSize) { + assert(DPRGapSize == 4 && "unexpected alignment requirements for DPRs"); + if (tryFoldSPUpdateIntoPushPop(STI, MF, LastPush, DPRGapSize)) + DefCFAOffsetCandidates.addExtraBytes(LastPush, DPRGapSize); + else { + emitSPUpdate(isARM, MBB, MBBI, dl, TII, -DPRGapSize, + MachineInstr::FrameSetup); + DefCFAOffsetCandidates.addInst(std::prev(MBBI), DPRGapSize); + } + } // Move past area 3. if (DPRCSSize > 0) { - DPRCSPush = MBBI; // Since vpush register list cannot have gaps, there may be multiple vpush // instructions in the prologue. - while (MBBI->getOpcode() == ARM::VSTMDDB_UPD) + while (MBBI->getOpcode() == ARM::VSTMDDB_UPD) { + DefCFAOffsetCandidates.addInst(MBBI, sizeOfSPAdjustment(MBBI)); LastPush = MBBI++; + } } // Move past the aligned DPRCS2 area. @@ -343,18 +475,15 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const { NumBytes = 0; } - unsigned adjustedGPRCS1Size = GPRCS1Size; if (NumBytes) { // Adjust SP after all the callee-save spills. - if (tryFoldSPUpdateIntoPushPop(STI, MF, LastPush, NumBytes)) { - if (LastPush == GPRCS1Push) { - FramePtrOffsetInPush += NumBytes; - adjustedGPRCS1Size += NumBytes; - NumBytes = 0; - } - } else + if (tryFoldSPUpdateIntoPushPop(STI, MF, LastPush, NumBytes)) + DefCFAOffsetCandidates.addExtraBytes(LastPush, NumBytes); + else { emitSPUpdate(isARM, MBB, MBBI, dl, TII, -NumBytes, MachineInstr::FrameSetup); + DefCFAOffsetCandidates.addInst(std::prev(MBBI), NumBytes); + } if (HasFP && isARM) // Restore from fp only in ARM mode: e.g. sub sp, r7, #24 @@ -368,13 +497,42 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const { AFI->setShouldRestoreSPFromFP(true); } - if (adjustedGPRCS1Size > 0) { - CFAOffset -= adjustedGPRCS1Size; - unsigned CFIIndex = MMI.addFrameInst( - MCCFIInstruction::createDefCfaOffset(nullptr, CFAOffset)); - MachineBasicBlock::iterator Pos = ++GPRCS1Push; - BuildMI(MBB, Pos, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) - .addCFIIndex(CFIIndex); + // Set FP to point to the stack slot that contains the previous FP. + // For iOS, FP is R7, which has now been stored in spill area 1. + // Otherwise, if this is not iOS, all the callee-saved registers go + // into spill area 1, including the FP in R11. In either case, it + // is in area one and the adjustment needs to take place just after + // that push. + if (HasFP) { + MachineBasicBlock::iterator AfterPush = std::next(GPRCS1Push); + unsigned PushSize = sizeOfSPAdjustment(GPRCS1Push); + emitRegPlusImmediate(!AFI->isThumbFunction(), MBB, AfterPush, + dl, TII, FramePtr, ARM::SP, + PushSize + FramePtrOffsetInPush, + MachineInstr::FrameSetup); + if (FramePtrOffsetInPush + PushSize != 0) { + unsigned CFIIndex = MMI.addFrameInst(MCCFIInstruction::createDefCfa( + nullptr, MRI->getDwarfRegNum(FramePtr, true), + -(ArgRegsSaveSize - FramePtrOffsetInPush))); + BuildMI(MBB, AfterPush, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) + .addCFIIndex(CFIIndex) + .setMIFlags(MachineInstr::FrameSetup); + } else { + unsigned CFIIndex = + MMI.addFrameInst(MCCFIInstruction::createDefCfaRegister( + nullptr, MRI->getDwarfRegNum(FramePtr, true))); + BuildMI(MBB, AfterPush, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) + .addCFIIndex(CFIIndex) + .setMIFlags(MachineInstr::FrameSetup); + } + } + + // Now that the prologue's actual instructions are finalised, we can insert + // the necessary DWARF cf instructions to describe the situation. Start by + // recording where each register ended up: + if (GPRCS1Size > 0) { + MachineBasicBlock::iterator Pos = std::next(GPRCS1Push); + int CFIIndex; for (const auto &Entry : CSI) { unsigned Reg = Entry.getReg(); int FI = Entry.getFrameIdx(); @@ -399,47 +557,15 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const { CFIIndex = MMI.addFrameInst(MCCFIInstruction::createOffset( nullptr, MRI->getDwarfRegNum(Reg, true), MFI->getObjectOffset(FI))); BuildMI(MBB, Pos, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) - .addCFIIndex(CFIIndex); + .addCFIIndex(CFIIndex) + .setMIFlags(MachineInstr::FrameSetup); break; } } } - // Set FP to point to the stack slot that contains the previous FP. - // For iOS, FP is R7, which has now been stored in spill area 1. - // Otherwise, if this is not iOS, all the callee-saved registers go - // into spill area 1, including the FP in R11. In either case, it - // is in area one and the adjustment needs to take place just after - // that push. - if (HasFP) { - emitRegPlusImmediate(!AFI->isThumbFunction(), MBB, GPRCS1Push, dl, TII, - FramePtr, ARM::SP, FramePtrOffsetInPush, - MachineInstr::FrameSetup); - if (FramePtrOffsetInPush) { - CFAOffset += FramePtrOffsetInPush; - unsigned CFIIndex = MMI.addFrameInst(MCCFIInstruction::createDefCfa( - nullptr, MRI->getDwarfRegNum(FramePtr, true), CFAOffset)); - BuildMI(MBB, GPRCS1Push, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) - .addCFIIndex(CFIIndex); - - } else { - unsigned CFIIndex = - MMI.addFrameInst(MCCFIInstruction::createDefCfaRegister( - nullptr, MRI->getDwarfRegNum(FramePtr, true))); - BuildMI(MBB, GPRCS1Push, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) - .addCFIIndex(CFIIndex); - } - } - if (GPRCS2Size > 0) { - MachineBasicBlock::iterator Pos = ++GPRCS2Push; - if (!HasFP) { - CFAOffset -= GPRCS2Size; - unsigned CFIIndex = MMI.addFrameInst( - MCCFIInstruction::createDefCfaOffset(nullptr, CFAOffset)); - BuildMI(MBB, Pos, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) - .addCFIIndex(CFIIndex); - } + MachineBasicBlock::iterator Pos = std::next(GPRCS2Push); for (const auto &Entry : CSI) { unsigned Reg = Entry.getReg(); int FI = Entry.getFrameIdx(); @@ -455,7 +581,8 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const { unsigned CFIIndex = MMI.addFrameInst( MCCFIInstruction::createOffset(nullptr, DwarfReg, Offset)); BuildMI(MBB, Pos, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) - .addCFIIndex(CFIIndex); + .addCFIIndex(CFIIndex) + .setMIFlags(MachineInstr::FrameSetup); } break; } @@ -465,17 +592,7 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const { if (DPRCSSize > 0) { // Since vpush register list cannot have gaps, there may be multiple vpush // instructions in the prologue. - do { - MachineBasicBlock::iterator Push = DPRCSPush++; - if (!HasFP) { - CFAOffset -= sizeOfSPAdjustment(Push); - unsigned CFIIndex = MMI.addFrameInst( - MCCFIInstruction::createDefCfaOffset(nullptr, CFAOffset)); - BuildMI(MBB, DPRCSPush, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) - .addCFIIndex(CFIIndex); - } - } while (DPRCSPush->getOpcode() == ARM::VSTMDDB_UPD); - + MachineBasicBlock::iterator Pos = std::next(LastPush); for (const auto &Entry : CSI) { unsigned Reg = Entry.getReg(); int FI = Entry.getFrameIdx(); @@ -485,21 +602,18 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const { unsigned Offset = MFI->getObjectOffset(FI); unsigned CFIIndex = MMI.addFrameInst( MCCFIInstruction::createOffset(nullptr, DwarfReg, Offset)); - BuildMI(MBB, DPRCSPush, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) - .addCFIIndex(CFIIndex); + BuildMI(MBB, Pos, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) + .addCFIIndex(CFIIndex) + .setMIFlags(MachineInstr::FrameSetup); } } } - if (NumBytes) { - if (!HasFP) { - CFAOffset -= NumBytes; - unsigned CFIIndex = MMI.addFrameInst( - MCCFIInstruction::createDefCfaOffset(nullptr, CFAOffset)); - BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) - .addCFIIndex(CFIIndex); - } - } + // Now we can emit descriptions of where the canonical frame address was + // throughout the process. If we have a frame pointer, it takes over the job + // half-way through, so only the first few .cfi_def_cfa_offset instructions + // actually get emitted. + DefCFAOffsetCandidates.emitDefCFAOffsets(MMI, MBB, dl, TII, HasFP); if (STI.isTargetELF() && hasFP(MF)) MFI->setOffsetAdjustment(MFI->getOffsetAdjustment() - @@ -507,6 +621,7 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const { AFI->setGPRCalleeSavedArea1Size(GPRCS1Size); AFI->setGPRCalleeSavedArea2Size(GPRCS2Size); + AFI->setDPRCalleeSavedGapSize(DPRGapSize); AFI->setDPRCalleeSavedAreaSize(DPRCSSize); // If we need dynamic stack realignment, do it here. Be paranoid and make @@ -515,28 +630,24 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const { // realigned. if (!AFI->getNumAlignedDPRCS2Regs() && RegInfo->needsStackRealignment(MF)) { unsigned MaxAlign = MFI->getMaxAlignment(); - assert (!AFI->isThumb1OnlyFunction()); + assert(!AFI->isThumb1OnlyFunction()); if (!AFI->isThumbFunction()) { - // Emit bic sp, sp, MaxAlign - AddDefaultCC(AddDefaultPred(BuildMI(MBB, MBBI, dl, - TII.get(ARM::BICri), ARM::SP) - .addReg(ARM::SP, RegState::Kill) - .addImm(MaxAlign-1))); + emitAligningInstructions(MF, AFI, TII, MBB, MBBI, dl, ARM::SP, MaxAlign, + false); } else { - // We cannot use sp as source/dest register here, thus we're emitting the - // following sequence: + // We cannot use sp as source/dest register here, thus we're using r4 to + // perform the calculations. We're emitting the following sequence: // mov r4, sp - // bic r4, r4, MaxAlign + // -- use emitAligningInstructions to produce best sequence to zero + // -- out lower bits in r4 // mov sp, r4 // FIXME: It will be better just to find spare register here. AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::R4) - .addReg(ARM::SP, RegState::Kill)); - AddDefaultCC(AddDefaultPred(BuildMI(MBB, MBBI, dl, - TII.get(ARM::t2BICri), ARM::R4) - .addReg(ARM::R4, RegState::Kill) - .addImm(MaxAlign-1))); + .addReg(ARM::SP, RegState::Kill)); + emitAligningInstructions(MF, AFI, TII, MBB, MBBI, dl, ARM::R4, MaxAlign, + false); AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::SP) - .addReg(ARM::R4, RegState::Kill)); + .addReg(ARM::R4, RegState::Kill)); } AFI->setShouldRestoreSPFromFP(true); @@ -574,7 +685,7 @@ void ARMFrameLowering::fixTCReturn(MachineFunction &MF, unsigned RetOpcode = MBBI->getOpcode(); DebugLoc dl = MBBI->getDebugLoc(); const ARMBaseInstrInfo &TII = - *MF.getTarget().getSubtarget<ARMSubtarget>().getInstrInfo(); + *static_cast<const ARMBaseInstrInfo *>(MF.getSubtarget().getInstrInfo()); if (!(RetOpcode == ARM::TCRETURNdi || RetOpcode == ARM::TCRETURNri)) return; @@ -622,14 +733,17 @@ void ARMFrameLowering::emitEpilogue(MachineFunction &MF, DebugLoc dl = MBBI->getDebugLoc(); MachineFrameInfo *MFI = MF.getFrameInfo(); ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); - const TargetRegisterInfo *RegInfo = MF.getTarget().getRegisterInfo(); + const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo(); const ARMBaseInstrInfo &TII = - *static_cast<const ARMBaseInstrInfo*>(MF.getTarget().getInstrInfo()); + *static_cast<const ARMBaseInstrInfo *>(MF.getSubtarget().getInstrInfo()); assert(!AFI->isThumb1OnlyFunction() && "This emitEpilogue does not support Thumb1!"); bool isARM = !AFI->isThumbFunction(); - unsigned Align = MF.getTarget().getFrameLowering()->getStackAlignment(); + unsigned Align = MF.getTarget() + .getSubtargetImpl() + ->getFrameLowering() + ->getStackAlignment(); unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize(Align); int NumBytes = (int)MFI->getStackSize(); unsigned FramePtr = RegInfo->getFrameRegister(MF); @@ -659,6 +773,7 @@ void ARMFrameLowering::emitEpilogue(MachineFunction &MF, NumBytes -= (ArgRegsSaveSize + AFI->getGPRCalleeSavedArea1Size() + AFI->getGPRCalleeSavedArea2Size() + + AFI->getDPRCalleeSavedGapSize() + AFI->getDPRCalleeSavedAreaSize()); // Reset SP based on frame pointer only if the stack frame extends beyond @@ -707,6 +822,12 @@ void ARMFrameLowering::emitEpilogue(MachineFunction &MF, while (MBBI->getOpcode() == ARM::VLDMDIA_UPD) MBBI++; } + if (AFI->getDPRCalleeSavedGapSize()) { + assert(AFI->getDPRCalleeSavedGapSize() == 4 && + "unexpected DPR alignment gap"); + emitSPUpdate(isARM, MBB, MBBI, dl, TII, AFI->getDPRCalleeSavedGapSize()); + } + if (AFI->getGPRCalleeSavedArea2Size()) MBBI++; if (AFI->getGPRCalleeSavedArea1Size()) MBBI++; } @@ -732,8 +853,8 @@ ARMFrameLowering::ResolveFrameIndexReference(const MachineFunction &MF, int FI, unsigned &FrameReg, int SPAdj) const { const MachineFrameInfo *MFI = MF.getFrameInfo(); - const ARMBaseRegisterInfo *RegInfo = - static_cast<const ARMBaseRegisterInfo*>(MF.getTarget().getRegisterInfo()); + const ARMBaseRegisterInfo *RegInfo = static_cast<const ARMBaseRegisterInfo *>( + MF.getSubtarget().getRegisterInfo()); const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); int Offset = MFI->getObjectOffset(FI) + MFI->getStackSize(); int FPOffset = Offset - AFI->getFramePtrSpillOffset(); @@ -818,7 +939,7 @@ void ARMFrameLowering::emitPushInst(MachineBasicBlock &MBB, unsigned NumAlignedDPRCS2Regs, unsigned MIFlags) const { MachineFunction &MF = *MBB.getParent(); - const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo(); + const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); DebugLoc DL; if (MI != MBB.end()) DL = MI->getDebugLoc(); @@ -891,7 +1012,7 @@ void ARMFrameLowering::emitPopInst(MachineBasicBlock &MBB, bool(*Func)(unsigned, bool), unsigned NumAlignedDPRCS2Regs) const { MachineFunction &MF = *MBB.getParent(); - const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo(); + const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); DebugLoc DL = MI->getDebugLoc(); unsigned RetOpcode = MI->getOpcode(); @@ -981,7 +1102,7 @@ static void emitAlignedDPRCS2Spills(MachineBasicBlock &MBB, MachineFunction &MF = *MBB.getParent(); ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); DebugLoc DL = MI->getDebugLoc(); - const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo(); + const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); MachineFrameInfo &MFI = *MF.getFrameInfo(); // Mark the D-register spill slots as properly aligned. Since MFI computes @@ -1021,15 +1142,16 @@ static void emitAlignedDPRCS2Spills(MachineBasicBlock &MBB, // The immediate is <= 64, so it doesn't need any special encoding. unsigned Opc = isThumb ? ARM::t2SUBri : ARM::SUBri; AddDefaultCC(AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(Opc), ARM::R4) - .addReg(ARM::SP) - .addImm(8 * NumAlignedDPRCS2Regs))); + .addReg(ARM::SP) + .addImm(8 * NumAlignedDPRCS2Regs))); - // bic r4, r4, #align-1 - Opc = isThumb ? ARM::t2BICri : ARM::BICri; unsigned MaxAlign = MF.getFrameInfo()->getMaxAlignment(); - AddDefaultCC(AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(Opc), ARM::R4) - .addReg(ARM::R4, RegState::Kill) - .addImm(MaxAlign - 1))); + // We must set parameter MustBeSingleInstruction to true, since + // skipAlignedDPRCS2Spills expects exactly 3 instructions to perform + // stack alignment. Luckily, this can always be done since all ARM + // architecture versions that support Neon also support the BFC + // instruction. + emitAligningInstructions(MF, AFI, TII, MBB, MI, DL, ARM::R4, MaxAlign, true); // mov sp, r4 // The stack pointer must be adjusted before spilling anything, otherwise @@ -1140,7 +1262,7 @@ static void emitAlignedDPRCS2Restores(MachineBasicBlock &MBB, MachineFunction &MF = *MBB.getParent(); ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); DebugLoc DL = MI->getDebugLoc(); - const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo(); + const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); // Find the frame index assigned to d8. int D8SpillFI = 0; @@ -1355,12 +1477,15 @@ static void checkNumAlignedDPRCS2Regs(MachineFunction &MF) { return; // Don't bother if the default stack alignment is sufficiently high. - if (MF.getTarget().getFrameLowering()->getStackAlignment() >= 8) + if (MF.getTarget() + .getSubtargetImpl() + ->getFrameLowering() + ->getStackAlignment() >= 8) return; // Aligned spills require stack realignment. - const ARMBaseRegisterInfo *RegInfo = - static_cast<const ARMBaseRegisterInfo*>(MF.getTarget().getRegisterInfo()); + const ARMBaseRegisterInfo *RegInfo = static_cast<const ARMBaseRegisterInfo *>( + MF.getSubtarget().getRegisterInfo()); if (!RegInfo->canRealignStack(MF)) return; @@ -1399,10 +1524,10 @@ ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, unsigned NumGPRSpills = 0; SmallVector<unsigned, 4> UnspilledCS1GPRs; SmallVector<unsigned, 4> UnspilledCS2GPRs; - const ARMBaseRegisterInfo *RegInfo = - static_cast<const ARMBaseRegisterInfo*>(MF.getTarget().getRegisterInfo()); + const ARMBaseRegisterInfo *RegInfo = static_cast<const ARMBaseRegisterInfo *>( + MF.getSubtarget().getRegisterInfo()); const ARMBaseInstrInfo &TII = - *static_cast<const ARMBaseInstrInfo*>(MF.getTarget().getInstrInfo()); + *static_cast<const ARMBaseInstrInfo *>(MF.getSubtarget().getInstrInfo()); ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); MachineFrameInfo *MFI = MF.getFrameInfo(); MachineRegisterInfo &MRI = MF.getRegInfo(); @@ -1565,7 +1690,7 @@ ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, // of GPRs, spill one extra callee save GPR so we won't have to pad between // the integer and double callee save areas. unsigned TargetAlign = getStackAlignment(); - if (TargetAlign == 8 && (NumGPRSpills & 1)) { + if (TargetAlign >= 8 && (NumGPRSpills & 1)) { if (CS1Spilled && !UnspilledCS1GPRs.empty()) { for (unsigned i = 0, e = UnspilledCS1GPRs.size(); i != e; ++i) { unsigned Reg = UnspilledCS1GPRs[i]; @@ -1643,7 +1768,7 @@ void ARMFrameLowering:: eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator I) const { const ARMBaseInstrInfo &TII = - *static_cast<const ARMBaseInstrInfo*>(MF.getTarget().getInstrInfo()); + *static_cast<const ARMBaseInstrInfo *>(MF.getSubtarget().getInstrInfo()); if (!hasReservedCallFrame(MF)) { // If we have alloca, convert as follows: // ADJCALLSTACKDOWN -> sub, sp, sp, amount @@ -1761,7 +1886,7 @@ void ARMFrameLowering::adjustForSegmentedStacks(MachineFunction &MF) const { MCContext &Context = MMI.getContext(); const MCRegisterInfo *MRI = Context.getRegisterInfo(); const ARMBaseInstrInfo &TII = - *static_cast<const ARMBaseInstrInfo*>(MF.getTarget().getInstrInfo()); + *static_cast<const ARMBaseInstrInfo *>(MF.getSubtarget().getInstrInfo()); ARMFunctionInfo *ARMFI = MF.getInfo<ARMFunctionInfo>(); DebugLoc DL; diff --git a/lib/Target/ARM/ARMFrameLowering.h b/lib/Target/ARM/ARMFrameLowering.h index 74629c32b398..b7be43642ad0 100644 --- a/lib/Target/ARM/ARMFrameLowering.h +++ b/lib/Target/ARM/ARMFrameLowering.h @@ -11,8 +11,8 @@ // //===----------------------------------------------------------------------===// -#ifndef ARM_FRAMEINFO_H -#define ARM_FRAMEINFO_H +#ifndef LLVM_LIB_TARGET_ARM_ARMFRAMELOWERING_H +#define LLVM_LIB_TARGET_ARM_ARMFRAMELOWERING_H #include "llvm/Target/TargetFrameLowering.h" diff --git a/lib/Target/ARM/ARMHazardRecognizer.cpp b/lib/Target/ARM/ARMHazardRecognizer.cpp index 0885c4e5721a..0e4f81c8789e 100644 --- a/lib/Target/ARM/ARMHazardRecognizer.cpp +++ b/lib/Target/ARM/ARMHazardRecognizer.cpp @@ -46,8 +46,8 @@ ARMHazardRecognizer::getHazardType(SUnit *SU, int Stalls) { const MCInstrDesc &LastMCID = LastMI->getDesc(); const TargetMachine &TM = MI->getParent()->getParent()->getTarget(); - const ARMBaseInstrInfo &TII = - *static_cast<const ARMBaseInstrInfo*>(TM.getInstrInfo()); + const ARMBaseInstrInfo &TII = *static_cast<const ARMBaseInstrInfo *>( + TM.getSubtargetImpl()->getInstrInfo()); // Skip over one non-VFP / NEON instruction. if (!LastMI->isBarrier() && diff --git a/lib/Target/ARM/ARMHazardRecognizer.h b/lib/Target/ARM/ARMHazardRecognizer.h index a8198e26703e..ccf09db69937 100644 --- a/lib/Target/ARM/ARMHazardRecognizer.h +++ b/lib/Target/ARM/ARMHazardRecognizer.h @@ -11,8 +11,8 @@ // //===----------------------------------------------------------------------===// -#ifndef ARMHAZARDRECOGNIZER_H -#define ARMHAZARDRECOGNIZER_H +#ifndef LLVM_LIB_TARGET_ARM_ARMHAZARDRECOGNIZER_H +#define LLVM_LIB_TARGET_ARM_ARMHAZARDRECOGNIZER_H #include "llvm/CodeGen/ScoreboardHazardRecognizer.h" @@ -46,4 +46,4 @@ public: } // end namespace llvm -#endif // ARMHAZARDRECOGNIZER_H +#endif diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp index 38547cfae2e0..9621743fe307 100644 --- a/lib/Target/ARM/ARMISelDAGToDAG.cpp +++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp @@ -425,7 +425,7 @@ bool ARMDAGToDAGISel::hasNoVMLxHazardUse(SDNode *N) const { return true; if (Use->isMachineOpcode()) { const ARMBaseInstrInfo *TII = static_cast<const ARMBaseInstrInfo *>( - CurDAG->getTarget().getInstrInfo()); + CurDAG->getSubtarget().getInstrInfo()); const MCInstrDesc &MCID = TII->get(Use->getMachineOpcode()); if (MCID.mayStore()) @@ -526,8 +526,7 @@ bool ARMDAGToDAGISel::SelectAddrModeImm12(SDValue N, if (N.getOpcode() == ISD::FrameIndex) { // Match frame index. int FI = cast<FrameIndexSDNode>(N)->getIndex(); - Base = CurDAG->getTargetFrameIndex(FI, - getTargetLowering()->getPointerTy()); + Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy()); OffImm = CurDAG->getTargetConstant(0, MVT::i32); return true; } @@ -542,16 +541,15 @@ bool ARMDAGToDAGISel::SelectAddrModeImm12(SDValue N, } if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) { - int RHSC = (int)RHS->getZExtValue(); + int RHSC = (int)RHS->getSExtValue(); if (N.getOpcode() == ISD::SUB) RHSC = -RHSC; - if (RHSC >= 0 && RHSC < 0x1000) { // 12 bits (unsigned) + if (RHSC > -0x1000 && RHSC < 0x1000) { // 12 bits Base = N.getOperand(0); if (Base.getOpcode() == ISD::FrameIndex) { int FI = cast<FrameIndexSDNode>(Base)->getIndex(); - Base = CurDAG->getTargetFrameIndex(FI, - getTargetLowering()->getPointerTy()); + Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy()); } OffImm = CurDAG->getTargetConstant(RHSC, MVT::i32); return true; @@ -697,8 +695,7 @@ AddrMode2Type ARMDAGToDAGISel::SelectAddrMode2Worker(SDValue N, Base = N; if (N.getOpcode() == ISD::FrameIndex) { int FI = cast<FrameIndexSDNode>(N)->getIndex(); - Base = CurDAG->getTargetFrameIndex(FI, - getTargetLowering()->getPointerTy()); + Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy()); } else if (N.getOpcode() == ARMISD::Wrapper && N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress) { Base = N.getOperand(0); @@ -718,8 +715,7 @@ AddrMode2Type ARMDAGToDAGISel::SelectAddrMode2Worker(SDValue N, Base = N.getOperand(0); if (Base.getOpcode() == ISD::FrameIndex) { int FI = cast<FrameIndexSDNode>(Base)->getIndex(); - Base = CurDAG->getTargetFrameIndex(FI, - getTargetLowering()->getPointerTy()); + Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy()); } Offset = CurDAG->getRegister(0, MVT::i32); @@ -896,8 +892,7 @@ bool ARMDAGToDAGISel::SelectAddrMode3(SDValue N, Base = N; if (N.getOpcode() == ISD::FrameIndex) { int FI = cast<FrameIndexSDNode>(N)->getIndex(); - Base = CurDAG->getTargetFrameIndex(FI, - getTargetLowering()->getPointerTy()); + Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy()); } Offset = CurDAG->getRegister(0, MVT::i32); Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::add, 0),MVT::i32); @@ -911,8 +906,7 @@ bool ARMDAGToDAGISel::SelectAddrMode3(SDValue N, Base = N.getOperand(0); if (Base.getOpcode() == ISD::FrameIndex) { int FI = cast<FrameIndexSDNode>(Base)->getIndex(); - Base = CurDAG->getTargetFrameIndex(FI, - getTargetLowering()->getPointerTy()); + Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy()); } Offset = CurDAG->getRegister(0, MVT::i32); @@ -957,8 +951,7 @@ bool ARMDAGToDAGISel::SelectAddrMode5(SDValue N, Base = N; if (N.getOpcode() == ISD::FrameIndex) { int FI = cast<FrameIndexSDNode>(N)->getIndex(); - Base = CurDAG->getTargetFrameIndex(FI, - getTargetLowering()->getPointerTy()); + Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy()); } else if (N.getOpcode() == ARMISD::Wrapper && N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress) { Base = N.getOperand(0); @@ -975,8 +968,7 @@ bool ARMDAGToDAGISel::SelectAddrMode5(SDValue N, Base = N.getOperand(0); if (Base.getOpcode() == ISD::FrameIndex) { int FI = cast<FrameIndexSDNode>(Base)->getIndex(); - Base = CurDAG->getTargetFrameIndex(FI, - getTargetLowering()->getPointerTy()); + Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy()); } ARM_AM::AddrOpc AddSub = ARM_AM::add; @@ -1199,8 +1191,7 @@ bool ARMDAGToDAGISel::SelectThumbAddrModeSP(SDValue N, SDValue &Base, SDValue &OffImm) { if (N.getOpcode() == ISD::FrameIndex) { int FI = cast<FrameIndexSDNode>(N)->getIndex(); - Base = CurDAG->getTargetFrameIndex(FI, - getTargetLowering()->getPointerTy()); + Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy()); OffImm = CurDAG->getTargetConstant(0, MVT::i32); return true; } @@ -1217,8 +1208,7 @@ bool ARMDAGToDAGISel::SelectThumbAddrModeSP(SDValue N, Base = N.getOperand(0); if (Base.getOpcode() == ISD::FrameIndex) { int FI = cast<FrameIndexSDNode>(Base)->getIndex(); - Base = CurDAG->getTargetFrameIndex(FI, - getTargetLowering()->getPointerTy()); + Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy()); } OffImm = CurDAG->getTargetConstant(RHSC, MVT::i32); return true; @@ -1266,8 +1256,7 @@ bool ARMDAGToDAGISel::SelectT2AddrModeImm12(SDValue N, if (N.getOpcode() == ISD::FrameIndex) { // Match frame index. int FI = cast<FrameIndexSDNode>(N)->getIndex(); - Base = CurDAG->getTargetFrameIndex(FI, - getTargetLowering()->getPointerTy()); + Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy()); OffImm = CurDAG->getTargetConstant(0, MVT::i32); return true; } @@ -1296,8 +1285,7 @@ bool ARMDAGToDAGISel::SelectT2AddrModeImm12(SDValue N, Base = N.getOperand(0); if (Base.getOpcode() == ISD::FrameIndex) { int FI = cast<FrameIndexSDNode>(Base)->getIndex(); - Base = CurDAG->getTargetFrameIndex(FI, - getTargetLowering()->getPointerTy()); + Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy()); } OffImm = CurDAG->getTargetConstant(RHSC, MVT::i32); return true; @@ -1326,8 +1314,7 @@ bool ARMDAGToDAGISel::SelectT2AddrModeImm8(SDValue N, Base = N.getOperand(0); if (Base.getOpcode() == ISD::FrameIndex) { int FI = cast<FrameIndexSDNode>(Base)->getIndex(); - Base = CurDAG->getTargetFrameIndex(FI, - getTargetLowering()->getPointerTy()); + Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy()); } OffImm = CurDAG->getTargetConstant(RHSC, MVT::i32); return true; @@ -1392,10 +1379,7 @@ bool ARMDAGToDAGISel::SelectT2AddrModeSoReg(SDValue N, OffReg = OffReg.getOperand(0); else { ShAmt = 0; - ShOpcVal = ARM_AM::no_shift; } - } else { - ShOpcVal = ARM_AM::no_shift; } } @@ -1425,7 +1409,7 @@ bool ARMDAGToDAGISel::SelectT2AddrModeExclusive(SDValue N, SDValue &Base, Base = N.getOperand(0); if (Base.getOpcode() == ISD::FrameIndex) { int FI = cast<FrameIndexSDNode>(Base)->getIndex(); - Base = CurDAG->getTargetFrameIndex(FI, getTargetLowering()->getPointerTy()); + Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy()); } OffImm = CurDAG->getTargetConstant(RHSC / 4, MVT::i32); @@ -1800,6 +1784,7 @@ SDNode *ARMDAGToDAGISel::SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs, case MVT::v8i16: OpcodeIndex = 1; break; case MVT::v4f32: case MVT::v4i32: OpcodeIndex = 2; break; + case MVT::v2f64: case MVT::v2i64: OpcodeIndex = 3; assert(NumVecs == 1 && "v2i64 type only supported for VLD1"); break; @@ -1936,6 +1921,7 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs, case MVT::v8i16: OpcodeIndex = 1; break; case MVT::v4f32: case MVT::v4i32: OpcodeIndex = 2; break; + case MVT::v2f64: case MVT::v2i64: OpcodeIndex = 3; assert(NumVecs == 1 && "v2i64 type only supported for VST1"); break; @@ -2361,6 +2347,25 @@ SDNode *ARMDAGToDAGISel::SelectV6T2BitfieldExtractOp(SDNode *N, return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops); } } + + if (N->getOpcode() == ISD::SIGN_EXTEND_INREG) { + unsigned Width = cast<VTSDNode>(N->getOperand(1))->getVT().getSizeInBits(); + unsigned LSB = 0; + if (!isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRL, LSB) && + !isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRA, LSB)) + return nullptr; + + if (LSB + Width > 32) + return nullptr; + + SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); + SDValue Ops[] = { N->getOperand(0).getOperand(0), + CurDAG->getTargetConstant(LSB, MVT::i32), + CurDAG->getTargetConstant(Width - 1, MVT::i32), + getAL(CurDAG), Reg0 }; + return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops); + } + return nullptr; } @@ -2457,10 +2462,9 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { } if (UseCP) { - SDValue CPIdx = - CurDAG->getTargetConstantPool(ConstantInt::get( - Type::getInt32Ty(*CurDAG->getContext()), Val), - getTargetLowering()->getPointerTy()); + SDValue CPIdx = CurDAG->getTargetConstantPool( + ConstantInt::get(Type::getInt32Ty(*CurDAG->getContext()), Val), + TLI->getPointerTy()); SDNode *ResNode; if (Subtarget->isThumb()) { @@ -2490,12 +2494,10 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { case ISD::FrameIndex: { // Selects to ADDri FI, 0 which in turn will become ADDri SP, imm. int FI = cast<FrameIndexSDNode>(N)->getIndex(); - SDValue TFI = CurDAG->getTargetFrameIndex(FI, - getTargetLowering()->getPointerTy()); + SDValue TFI = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy()); if (Subtarget->isThumb1Only()) { - SDValue Ops[] = { TFI, CurDAG->getTargetConstant(0, MVT::i32), - getAL(CurDAG), CurDAG->getRegister(0, MVT::i32) }; - return CurDAG->SelectNodeTo(N, ARM::tADDrSPi, MVT::i32, Ops); + return CurDAG->SelectNodeTo(N, ARM::tADDframe, MVT::i32, TFI, + CurDAG->getTargetConstant(0, MVT::i32)); } else { unsigned Opc = ((Subtarget->isThumb() && Subtarget->hasThumb2()) ? ARM::t2ADDri : ARM::ADDri); @@ -2509,6 +2511,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { if (SDNode *I = SelectV6T2BitfieldExtractOp(N, false)) return I; break; + case ISD::SIGN_EXTEND_INREG: case ISD::SRA: if (SDNode *I = SelectV6T2BitfieldExtractOp(N, true)) return I; @@ -3393,7 +3396,6 @@ SDNode *ARMDAGToDAGISel::SelectInlineAsm(SDNode *N){ std::vector<SDValue> Ops(GU->op_begin(), GU->op_end()-1); Ops.push_back(T1.getValue(1)); CurDAG->UpdateNodeOperands(GU, Ops); - GU = T1.getNode(); } else { // For Kind == InlineAsm::Kind_RegUse, we first copy two GPRs into a diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index f5baf2e4859c..f92f257cd7eb 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -29,6 +29,7 @@ #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineJumpTableInfo.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/SelectionDAG.h" @@ -70,9 +71,9 @@ namespace { class ARMCCState : public CCState { public: ARMCCState(CallingConv::ID CC, bool isVarArg, MachineFunction &MF, - const TargetMachine &TM, SmallVectorImpl<CCValAssign> &locs, - LLVMContext &C, ParmContext PC) - : CCState(CC, isVarArg, MF, TM, locs, C) { + SmallVectorImpl<CCValAssign> &locs, LLVMContext &C, + ParmContext PC) + : CCState(CC, isVarArg, MF, locs, C) { assert(((PC == Call) || (PC == Prologue)) && "ARMCCState users must specify whether their context is call" "or prologue generation."); @@ -155,19 +156,11 @@ void ARMTargetLowering::addQRTypeForNEON(MVT VT) { addTypeForNEON(VT, MVT::v2f64, MVT::v4i32); } -static TargetLoweringObjectFile *createTLOF(const Triple &TT) { - if (TT.isOSBinFormatMachO()) - return new TargetLoweringObjectFileMachO(); - if (TT.isOSWindows()) - return new TargetLoweringObjectFileCOFF(); - return new ARMElfTargetObjectFile(); -} - -ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) - : TargetLowering(TM, createTLOF(Triple(TM.getTargetTriple()))) { +ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM) + : TargetLowering(TM) { Subtarget = &TM.getSubtarget<ARMSubtarget>(); - RegInfo = TM.getRegisterInfo(); - Itins = TM.getInstrItineraryData(); + RegInfo = TM.getSubtargetImpl()->getRegisterInfo(); + Itins = TM.getSubtargetImpl()->getInstrItineraryData(); setBooleanVectorContents(ZeroOrNegativeOneBooleanContent); @@ -312,6 +305,7 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) // Conversions between floating types. // RTABI chapter 4.1.2, Table 7 { RTLIB::FPROUND_F64_F32, "__aeabi_d2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, + { RTLIB::FPROUND_F64_F16, "__aeabi_d2h", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, { RTLIB::FPEXT_F32_F64, "__aeabi_f2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID }, // Integer to floating-point conversions. @@ -387,6 +381,19 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) setLibcallName(RTLIB::UDIVREM_I32, "__udivmodsi4"); } + // The half <-> float conversion functions are always soft-float, but are + // needed for some targets which use a hard-float calling convention by + // default. + if (Subtarget->isAAPCS_ABI()) { + setLibcallCallingConv(RTLIB::FPROUND_F32_F16, CallingConv::ARM_AAPCS); + setLibcallCallingConv(RTLIB::FPROUND_F64_F16, CallingConv::ARM_AAPCS); + setLibcallCallingConv(RTLIB::FPEXT_F16_F32, CallingConv::ARM_AAPCS); + } else { + setLibcallCallingConv(RTLIB::FPROUND_F32_F16, CallingConv::ARM_APCS); + setLibcallCallingConv(RTLIB::FPROUND_F64_F16, CallingConv::ARM_APCS); + setLibcallCallingConv(RTLIB::FPEXT_F16_F32, CallingConv::ARM_APCS); + } + if (Subtarget->isThumb1Only()) addRegisterClass(MVT::i32, &ARM::tGPRRegClass); else @@ -394,26 +401,23 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) if (!TM.Options.UseSoftFloat && Subtarget->hasVFP2() && !Subtarget->isThumb1Only()) { addRegisterClass(MVT::f32, &ARM::SPRRegClass); - if (!Subtarget->isFPOnlySP()) - addRegisterClass(MVT::f64, &ARM::DPRRegClass); + addRegisterClass(MVT::f64, &ARM::DPRRegClass); } - for (unsigned VT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE; - VT <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++VT) { - for (unsigned InnerVT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE; - InnerVT <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++InnerVT) - setTruncStoreAction((MVT::SimpleValueType)VT, - (MVT::SimpleValueType)InnerVT, Expand); - setLoadExtAction(ISD::SEXTLOAD, (MVT::SimpleValueType)VT, Expand); - setLoadExtAction(ISD::ZEXTLOAD, (MVT::SimpleValueType)VT, Expand); - setLoadExtAction(ISD::EXTLOAD, (MVT::SimpleValueType)VT, Expand); + for (MVT VT : MVT::vector_valuetypes()) { + for (MVT InnerVT : MVT::vector_valuetypes()) { + setTruncStoreAction(VT, InnerVT, Expand); + setLoadExtAction(ISD::SEXTLOAD, VT, InnerVT, Expand); + setLoadExtAction(ISD::ZEXTLOAD, VT, InnerVT, Expand); + setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand); + } - setOperationAction(ISD::MULHS, (MVT::SimpleValueType)VT, Expand); - setOperationAction(ISD::SMUL_LOHI, (MVT::SimpleValueType)VT, Expand); - setOperationAction(ISD::MULHU, (MVT::SimpleValueType)VT, Expand); - setOperationAction(ISD::UMUL_LOHI, (MVT::SimpleValueType)VT, Expand); + setOperationAction(ISD::MULHS, VT, Expand); + setOperationAction(ISD::SMUL_LOHI, VT, Expand); + setOperationAction(ISD::MULHU, VT, Expand); + setOperationAction(ISD::UMUL_LOHI, VT, Expand); - setOperationAction(ISD::BSWAP, (MVT::SimpleValueType)VT, Expand); + setOperationAction(ISD::BSWAP, VT, Expand); } setOperationAction(ISD::ConstantFP, MVT::f32, Custom); @@ -561,15 +565,18 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) setTargetDAGCombine(ISD::FP_TO_SINT); setTargetDAGCombine(ISD::FP_TO_UINT); setTargetDAGCombine(ISD::FDIV); + setTargetDAGCombine(ISD::LOAD); // It is legal to extload from v4i8 to v4i16 or v4i32. MVT Tys[6] = {MVT::v8i8, MVT::v4i8, MVT::v2i8, MVT::v4i16, MVT::v2i16, MVT::v2i32}; for (unsigned i = 0; i < 6; ++i) { - setLoadExtAction(ISD::EXTLOAD, Tys[i], Legal); - setLoadExtAction(ISD::ZEXTLOAD, Tys[i], Legal); - setLoadExtAction(ISD::SEXTLOAD, Tys[i], Legal); + for (MVT VT : MVT::integer_vector_valuetypes()) { + setLoadExtAction(ISD::EXTLOAD, VT, Tys[i], Legal); + setLoadExtAction(ISD::ZEXTLOAD, VT, Tys[i], Legal); + setLoadExtAction(ISD::SEXTLOAD, VT, Tys[i], Legal); + } } } @@ -577,12 +584,47 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) if (!Subtarget->isThumb1Only()) setTargetDAGCombine(ISD::ADDC); + if (Subtarget->isFPOnlySP()) { + // When targetting a floating-point unit with only single-precision + // operations, f64 is legal for the few double-precision instructions which + // are present However, no double-precision operations other than moves, + // loads and stores are provided by the hardware. + setOperationAction(ISD::FADD, MVT::f64, Expand); + setOperationAction(ISD::FSUB, MVT::f64, Expand); + setOperationAction(ISD::FMUL, MVT::f64, Expand); + setOperationAction(ISD::FMA, MVT::f64, Expand); + setOperationAction(ISD::FDIV, MVT::f64, Expand); + setOperationAction(ISD::FREM, MVT::f64, Expand); + setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand); + setOperationAction(ISD::FGETSIGN, MVT::f64, Expand); + setOperationAction(ISD::FNEG, MVT::f64, Expand); + setOperationAction(ISD::FABS, MVT::f64, Expand); + setOperationAction(ISD::FSQRT, MVT::f64, Expand); + setOperationAction(ISD::FSIN, MVT::f64, Expand); + setOperationAction(ISD::FCOS, MVT::f64, Expand); + setOperationAction(ISD::FPOWI, MVT::f64, Expand); + setOperationAction(ISD::FPOW, MVT::f64, Expand); + setOperationAction(ISD::FLOG, MVT::f64, Expand); + setOperationAction(ISD::FLOG2, MVT::f64, Expand); + setOperationAction(ISD::FLOG10, MVT::f64, Expand); + setOperationAction(ISD::FEXP, MVT::f64, Expand); + setOperationAction(ISD::FEXP2, MVT::f64, Expand); + setOperationAction(ISD::FCEIL, MVT::f64, Expand); + setOperationAction(ISD::FTRUNC, MVT::f64, Expand); + setOperationAction(ISD::FRINT, MVT::f64, Expand); + setOperationAction(ISD::FNEARBYINT, MVT::f64, Expand); + setOperationAction(ISD::FFLOOR, MVT::f64, Expand); + setOperationAction(ISD::FP_ROUND, MVT::f32, Custom); + setOperationAction(ISD::FP_EXTEND, MVT::f64, Custom); + } computeRegisterProperties(); // ARM does not have floating-point extending loads. - setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand); - setLoadExtAction(ISD::EXTLOAD, MVT::f16, Expand); + for (MVT VT : MVT::fp_valuetypes()) { + setLoadExtAction(ISD::EXTLOAD, VT, MVT::f32, Expand); + setLoadExtAction(ISD::EXTLOAD, VT, MVT::f16, Expand); + } // ... or truncating stores setTruncStoreAction(MVT::f64, MVT::f32, Expand); @@ -590,7 +632,8 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) setTruncStoreAction(MVT::f64, MVT::f16, Expand); // ARM does not have i1 sign extending load. - setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote); + for (MVT VT : MVT::integer_valuetypes()) + setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote); // ARM supports all 4 flavors of integer indexed load / store. if (!Subtarget->isThumb1Only()) { @@ -720,8 +763,12 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand); // ARMv6 Thumb1 (except for CPUs that support dmb / dsb) and earlier use - // the default expansion. - if (Subtarget->hasAnyDataBarrier() && !Subtarget->isThumb1Only()) { + // the default expansion. If we are targeting a single threaded system, + // then set them all for expand so we can lower them later into their + // non-atomic form. + if (TM.Options.ThreadModel == ThreadModel::Single) + setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Expand); + else if (Subtarget->hasAnyDataBarrier() && !Subtarget->isThumb1Only()) { // ATOMIC_FENCE needs custom lowering; the others should have been expanded // to ldrex/strex loops already. setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom); @@ -729,7 +776,7 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) // On v8, we have particularly efficient implementations of atomic fences // if they can be combined with nearby atomic loads and stores. if (!Subtarget->hasV8Ops()) { - // Automatically insert fences (dmb ist) around ATOMIC_SWAP etc. + // Automatically insert fences (dmb ish) around ATOMIC_SWAP etc. setInsertFencesForAtomic(true); } } else { @@ -830,8 +877,8 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom); } - // v8 adds f64 <-> f16 conversion. Before that it should be expanded. - if (!Subtarget->hasV8Ops()) { + // FP-ARMv8 adds f64 <-> f16 conversion. Before that it should be expanded. + if (!Subtarget->hasFPARMv8() || Subtarget->isFPOnlySP()) { setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand); setOperationAction(ISD::FP_TO_FP16, MVT::f64, Expand); } @@ -847,7 +894,7 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) if (Subtarget->hasSinCos()) { setLibcallName(RTLIB::SINCOS_F32, "sincosf"); setLibcallName(RTLIB::SINCOS_F64, "sincos"); - if (Subtarget->getTargetTriple().getOS() == Triple::IOS) { + if (Subtarget->getTargetTriple().isiOS()) { // For iOS, we don't want to the normal expansion of a libcall to // sincos. We want to issue a libcall to __sincos_stret. setOperationAction(ISD::FSINCOS, MVT::f64, Custom); @@ -855,6 +902,23 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) } } + // FP-ARMv8 implements a lot of rounding-like FP operations. + if (Subtarget->hasFPARMv8()) { + setOperationAction(ISD::FFLOOR, MVT::f32, Legal); + setOperationAction(ISD::FCEIL, MVT::f32, Legal); + setOperationAction(ISD::FROUND, MVT::f32, Legal); + setOperationAction(ISD::FTRUNC, MVT::f32, Legal); + setOperationAction(ISD::FNEARBYINT, MVT::f32, Legal); + setOperationAction(ISD::FRINT, MVT::f32, Legal); + if (!Subtarget->isFPOnlySP()) { + setOperationAction(ISD::FFLOOR, MVT::f64, Legal); + setOperationAction(ISD::FCEIL, MVT::f64, Legal); + setOperationAction(ISD::FROUND, MVT::f64, Legal); + setOperationAction(ISD::FTRUNC, MVT::f64, Legal); + setOperationAction(ISD::FNEARBYINT, MVT::f64, Legal); + setOperationAction(ISD::FRINT, MVT::f64, Legal); + } + } // We have target-specific dag combine patterns for the following nodes: // ARMISD::VMOVRRD - No need to call setTargetDAGCombine setTargetDAGCombine(ISD::ADD); @@ -1130,7 +1194,8 @@ Sched::Preference ARMTargetLowering::getSchedulingPreference(SDNode *N) const { // Load are scheduled for latency even if there instruction itinerary // is not available. - const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); + const TargetInstrInfo *TII = + getTargetMachine().getSubtargetImpl()->getInstrInfo(); const MCInstrDesc &MCID = TII->get(N->getMachineOpcode()); if (MCID.getNumDefs() == 0) @@ -1267,8 +1332,8 @@ ARMTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag, // Assign locations to each value returned by this call. SmallVector<CCValAssign, 16> RVLocs; - ARMCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), - getTargetMachine(), RVLocs, *DAG.getContext(), Call); + ARMCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs, + *DAG.getContext(), Call); CCInfo.AnalyzeCallResult(Ins, CCAssignFnForNode(CallConv, /* Return*/ true, isVarArg)); @@ -1428,8 +1493,8 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, // Analyze operands of the call, assigning locations to each operand. SmallVector<CCValAssign, 16> ArgLocs; - ARMCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), - getTargetMachine(), ArgLocs, *DAG.getContext(), Call); + ARMCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs, + *DAG.getContext(), Call); CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForNode(CallConv, /* Return*/ false, isVarArg)); @@ -1657,14 +1722,17 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, bool isExt = GV->isDeclaration() || GV->isWeakForLinker(); bool isStub = (isExt && Subtarget->isTargetMachO()) && getTargetMachine().getRelocationModel() != Reloc::Static; - isARMFunc = !Subtarget->isThumb() || isStub; + isARMFunc = !Subtarget->isThumb() || (isStub && !Subtarget->isMClass()); // ARM call to a local ARM function is predicable. isLocalARMFunc = !Subtarget->isThumb() && (!isExt || !ARMInterworking); // tBX takes a register source operand. if (isStub && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) { assert(Subtarget->isTargetMachO() && "WrapperPIC use on non-MachO?"); Callee = DAG.getNode(ARMISD::WrapperPIC, dl, getPointerTy(), - DAG.getTargetGlobalAddress(GV, dl, getPointerTy())); + DAG.getTargetGlobalAddress(GV, dl, getPointerTy(), + 0, ARMII::MO_NONLAZY)); + Callee = DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(), Callee, + MachinePointerInfo::getGOT(), false, false, true, 0); } else if (Subtarget->isTargetCOFF()) { assert(Subtarget->isTargetWindows() && "Windows is the only supported COFF target"); @@ -1690,7 +1758,7 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, isDirect = true; bool isStub = Subtarget->isTargetMachO() && getTargetMachine().getRelocationModel() != Reloc::Static; - isARMFunc = !Subtarget->isThumb() || isStub; + isARMFunc = !Subtarget->isThumb() || (isStub && !Subtarget->isMClass()); // tBX takes a register source operand. const char *Sym = S->getSymbol(); if (isARMFunc && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) { @@ -1751,7 +1819,8 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, // Add a register mask operand representing the call-preserved registers. if (!isTailCall) { const uint32_t *Mask; - const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo(); + const TargetRegisterInfo *TRI = + getTargetMachine().getSubtargetImpl()->getRegisterInfo(); const ARMBaseRegisterInfo *ARI = static_cast<const ARMBaseRegisterInfo*>(TRI); if (isThisReturn) { // For 'this' returns, use the R0-preserving mask if applicable @@ -1951,17 +2020,32 @@ ARMTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee, if (Subtarget->isThumb1Only()) return false; + // Externally-defined functions with weak linkage should not be + // tail-called on ARM when the OS does not support dynamic + // pre-emption of symbols, as the AAELF spec requires normal calls + // to undefined weak functions to be replaced with a NOP or jump to the + // next instruction. The behaviour of branch instructions in this + // situation (as used for tail calls) is implementation-defined, so we + // cannot rely on the linker replacing the tail call with a return. + if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) { + const GlobalValue *GV = G->getGlobal(); + const Triple TT(getTargetMachine().getTargetTriple()); + if (GV->hasExternalWeakLinkage() && + (!TT.isOSWindows() || TT.isOSBinFormatELF() || TT.isOSBinFormatMachO())) + return false; + } + // If the calling conventions do not match, then we'd better make sure the // results are returned in the same way as what the caller expects. if (!CCMatch) { SmallVector<CCValAssign, 16> RVLocs1; - ARMCCState CCInfo1(CalleeCC, false, DAG.getMachineFunction(), - getTargetMachine(), RVLocs1, *DAG.getContext(), Call); + ARMCCState CCInfo1(CalleeCC, false, DAG.getMachineFunction(), RVLocs1, + *DAG.getContext(), Call); CCInfo1.AnalyzeCallResult(Ins, CCAssignFnForNode(CalleeCC, true, isVarArg)); SmallVector<CCValAssign, 16> RVLocs2; - ARMCCState CCInfo2(CallerCC, false, DAG.getMachineFunction(), - getTargetMachine(), RVLocs2, *DAG.getContext(), Call); + ARMCCState CCInfo2(CallerCC, false, DAG.getMachineFunction(), RVLocs2, + *DAG.getContext(), Call); CCInfo2.AnalyzeCallResult(Ins, CCAssignFnForNode(CallerCC, true, isVarArg)); if (RVLocs1.size() != RVLocs2.size()) @@ -1995,8 +2079,8 @@ ARMTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee, // Check if stack adjustment is needed. For now, do not do this if any // argument is passed on the stack. SmallVector<CCValAssign, 16> ArgLocs; - ARMCCState CCInfo(CalleeCC, isVarArg, DAG.getMachineFunction(), - getTargetMachine(), ArgLocs, *DAG.getContext(), Call); + ARMCCState CCInfo(CalleeCC, isVarArg, DAG.getMachineFunction(), ArgLocs, + *DAG.getContext(), Call); CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForNode(CalleeCC, false, isVarArg)); if (CCInfo.getNextStackOffset()) { @@ -2006,7 +2090,8 @@ ARMTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee, // the caller's fixed stack objects. MachineFrameInfo *MFI = MF.getFrameInfo(); const MachineRegisterInfo *MRI = &MF.getRegInfo(); - const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); + const TargetInstrInfo *TII = + getTargetMachine().getSubtargetImpl()->getInstrInfo(); for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size(); i != e; ++i, ++realArgIdx) { @@ -2049,7 +2134,7 @@ ARMTargetLowering::CanLowerReturn(CallingConv::ID CallConv, const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const { SmallVector<CCValAssign, 16> RVLocs; - CCState CCInfo(CallConv, isVarArg, MF, getTargetMachine(), RVLocs, Context); + CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context); return CCInfo.CheckReturn(Outs, CCAssignFnForNode(CallConv, /*Return=*/true, isVarArg)); } @@ -2097,8 +2182,8 @@ ARMTargetLowering::LowerReturn(SDValue Chain, SmallVector<CCValAssign, 16> RVLocs; // CCState - Info about the registers and stack slots. - ARMCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), - getTargetMachine(), RVLocs, *DAG.getContext(), Call); + ARMCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs, + *DAG.getContext(), Call); // Analyze outgoing return values. CCInfo.AnalyzeReturn(Outs, CCAssignFnForNode(CallConv, /* Return */ true, @@ -2109,6 +2194,10 @@ ARMTargetLowering::LowerReturn(SDValue Chain, RetOps.push_back(Chain); // Operand #0 = Chain (updated below) bool isLittleEndian = Subtarget->isLittle(); + MachineFunction &MF = DAG.getMachineFunction(); + ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); + AFI->setReturnRegsCount(RVLocs.size()); + // Copy the result values into the output registers. for (unsigned i = 0, realRVLocIdx = 0; i != RVLocs.size(); @@ -2227,9 +2316,15 @@ bool ARMTargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const { if (Copies.count(UseChain.getNode())) // Second CopyToReg Copy = *UI; - else + else { + // We are at the top of this chain. + // If the copy has a glue operand, we conservatively assume it + // isn't safe to perform a tail call. + if (UI->getOperand(UI->getNumOperands()-1).getValueType() == MVT::Glue) + return false; // First CopyToReg TCChain = UseChain; + } } } else if (Copy->getOpcode() == ISD::BITCAST) { // f32 returned in a single GPR. @@ -2238,6 +2333,10 @@ bool ARMTargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const { Copy = *Copy->use_begin(); if (Copy->getOpcode() != ISD::CopyToReg || !Copy->hasNUsesOfValue(1, 0)) return false; + // If the copy has a glue operand, we conservatively assume it isn't safe to + // perform a tail call. + if (Copy->getOperand(Copy->getNumOperands()-1).getValueType() == MVT::Glue) + return false; TCChain = Copy->getOperand(0); } else { return false; @@ -2637,7 +2736,7 @@ static SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG, ConstantSDNode *OrdN = cast<ConstantSDNode>(Op.getOperand(1)); AtomicOrdering Ord = static_cast<AtomicOrdering>(OrdN->getZExtValue()); - unsigned Domain = ARM_MB::ISH; + ARM_MB::MemBOpt Domain = ARM_MB::ISH; if (Subtarget->isMClass()) { // Only a full system barrier exists in the M-class architectures. Domain = ARM_MB::SY; @@ -2750,7 +2849,10 @@ ARMTargetLowering::computeRegArea(CCState &CCInfo, MachineFunction &MF, NumGPRs = (firstUnalloced <= 3) ? (4 - firstUnalloced) : 0; } - unsigned Align = MF.getTarget().getFrameLowering()->getStackAlignment(); + unsigned Align = MF.getTarget() + .getSubtargetImpl() + ->getFrameLowering() + ->getStackAlignment(); ArgRegsSize = NumGPRs * 4; // If parameter is split between stack and GPRs... @@ -2927,8 +3029,8 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain, // Assign locations to all of the incoming arguments. SmallVector<CCValAssign, 16> ArgLocs; - ARMCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), - getTargetMachine(), ArgLocs, *DAG.getContext(), Prologue); + ARMCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs, + *DAG.getContext(), Prologue); CCInfo.AnalyzeFormalArguments(Ins, CCAssignFnForNode(CallConv, /* Return*/ false, isVarArg)); @@ -2977,7 +3079,7 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain, } CCInfo.rewindByValRegsInfo(); lastInsIndex = -1; - if (isVarArg) { + if (isVarArg && MFI->hasVAStart()) { unsigned ExtraArgRegsSize; unsigned ExtraArgRegsSaveSize; computeRegArea(CCInfo, MF, CCInfo.getInRegsParamsCount(), 0, @@ -3033,9 +3135,8 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain, else if (RegVT == MVT::v2f64) RC = &ARM::QPRRegClass; else if (RegVT == MVT::i32) - RC = AFI->isThumb1OnlyFunction() ? - (const TargetRegisterClass*)&ARM::tGPRRegClass : - (const TargetRegisterClass*)&ARM::GPRRegClass; + RC = AFI->isThumb1OnlyFunction() ? &ARM::tGPRRegClass + : &ARM::GPRRegClass; else llvm_unreachable("RegVT not supported by FORMAL_ARGUMENTS Lowering"); @@ -3119,7 +3220,7 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain, } // varargs - if (isVarArg) + if (isVarArg && MFI->hasVAStart()) VarArgStyleRegisters(CCInfo, DAG, dl, Chain, CCInfo.getNextStackOffset(), TotalArgRegsSaveSize); @@ -3141,6 +3242,18 @@ static bool isFloatingPointZero(SDValue Op) { if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CP->getConstVal())) return CFP->getValueAPF().isPosZero(); } + } else if (Op->getOpcode() == ISD::BITCAST && + Op->getValueType(0) == MVT::f64) { + // Handle (ISD::BITCAST (ARMISD::VMOVIMM (ISD::TargetConstant 0)) MVT::f64) + // created by LowerConstantFP(). + SDValue BitcastOp = Op->getOperand(0); + if (BitcastOp->getOpcode() == ARMISD::VMOVIMM) { + SDValue MoveOp = BitcastOp->getOperand(0); + if (MoveOp->getOpcode() == ISD::TargetConstant && + cast<ConstantSDNode>(MoveOp)->getZExtValue() == 0) { + return true; + } + } } return false; } @@ -3209,6 +3322,7 @@ ARMTargetLowering::getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC, SDValue ARMTargetLowering::getVFPCmp(SDValue LHS, SDValue RHS, SelectionDAG &DAG, SDLoc dl) const { + assert(!Subtarget->isFPOnlySP() || RHS.getValueType() != MVT::f64); SDValue Cmp; if (!isFloatingPointZero(RHS)) Cmp = DAG.getNode(ARMISD::CMPFP, dl, MVT::Glue, LHS, RHS); @@ -3324,9 +3438,8 @@ SDValue ARMTargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const { SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); EVT VT = Op.getValueType(); - return DAG.getNode(ARMISD::CMOV, SDLoc(Op), VT, SelectTrue, SelectFalse, - ARMcc, CCR, OverflowCmp); - + return getCMOV(SDLoc(Op), VT, SelectTrue, SelectFalse, ARMcc, CCR, + OverflowCmp, DAG); } // Convert: @@ -3360,7 +3473,7 @@ SDValue ARMTargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const { SDValue CCR = Cond.getOperand(3); SDValue Cmp = duplicateCmp(Cond.getOperand(4), DAG); assert(True.getValueType() == VT); - return DAG.getNode(ARMISD::CMOV, dl, VT, True, False, ARMcc, CCR, Cmp); + return getCMOV(dl, VT, True, False, ARMcc, CCR, Cmp, DAG); } } } @@ -3430,6 +3543,32 @@ static void checkVSELConstraints(ISD::CondCode CC, ARMCC::CondCodes &CondCode, } } +SDValue ARMTargetLowering::getCMOV(SDLoc dl, EVT VT, SDValue FalseVal, + SDValue TrueVal, SDValue ARMcc, SDValue CCR, + SDValue Cmp, SelectionDAG &DAG) const { + if (Subtarget->isFPOnlySP() && VT == MVT::f64) { + FalseVal = DAG.getNode(ARMISD::VMOVRRD, dl, + DAG.getVTList(MVT::i32, MVT::i32), FalseVal); + TrueVal = DAG.getNode(ARMISD::VMOVRRD, dl, + DAG.getVTList(MVT::i32, MVT::i32), TrueVal); + + SDValue TrueLow = TrueVal.getValue(0); + SDValue TrueHigh = TrueVal.getValue(1); + SDValue FalseLow = FalseVal.getValue(0); + SDValue FalseHigh = FalseVal.getValue(1); + + SDValue Low = DAG.getNode(ARMISD::CMOV, dl, MVT::i32, FalseLow, TrueLow, + ARMcc, CCR, Cmp); + SDValue High = DAG.getNode(ARMISD::CMOV, dl, MVT::i32, FalseHigh, TrueHigh, + ARMcc, CCR, duplicateCmp(Cmp, DAG)); + + return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Low, High); + } else { + return DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, ARMcc, CCR, + Cmp); + } +} + SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const { EVT VT = Op.getValueType(); SDValue LHS = Op.getOperand(0); @@ -3439,6 +3578,18 @@ SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const { SDValue FalseVal = Op.getOperand(3); SDLoc dl(Op); + if (Subtarget->isFPOnlySP() && LHS.getValueType() == MVT::f64) { + DAG.getTargetLoweringInfo().softenSetCCOperands(DAG, MVT::f64, LHS, RHS, CC, + dl); + + // If softenSetCCOperands only returned one value, we should compare it to + // zero. + if (!RHS.getNode()) { + RHS = DAG.getConstant(0, LHS.getValueType()); + CC = ISD::SETNE; + } + } + if (LHS.getValueType() == MVT::i32) { // Try to generate VSEL on ARMv8. // The VSEL instruction can't use all the usual ARM condition @@ -3463,8 +3614,7 @@ SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const { SDValue ARMcc; SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl); - return DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, ARMcc, CCR, - Cmp); + return getCMOV(dl, VT, FalseVal, TrueVal, ARMcc, CCR, Cmp, DAG); } ARMCC::CondCodes CondCode, CondCode2; @@ -3479,12 +3629,18 @@ SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const { // select c, a, b // We only do this in unsafe-fp-math, because signed zeros and NaNs are // handled differently than the original code sequence. - if (getTargetMachine().Options.UnsafeFPMath && LHS == TrueVal && - RHS == FalseVal) { - if (CC == ISD::SETOGT || CC == ISD::SETUGT) - return DAG.getNode(ARMISD::VMAXNM, dl, VT, TrueVal, FalseVal); - if (CC == ISD::SETOLT || CC == ISD::SETULT) - return DAG.getNode(ARMISD::VMINNM, dl, VT, TrueVal, FalseVal); + if (getTargetMachine().Options.UnsafeFPMath) { + if (LHS == TrueVal && RHS == FalseVal) { + if (CC == ISD::SETOGT || CC == ISD::SETUGT) + return DAG.getNode(ARMISD::VMAXNM, dl, VT, TrueVal, FalseVal); + if (CC == ISD::SETOLT || CC == ISD::SETULT) + return DAG.getNode(ARMISD::VMINNM, dl, VT, TrueVal, FalseVal); + } else if (LHS == FalseVal && RHS == TrueVal) { + if (CC == ISD::SETOLT || CC == ISD::SETULT) + return DAG.getNode(ARMISD::VMAXNM, dl, VT, TrueVal, FalseVal); + if (CC == ISD::SETOGT || CC == ISD::SETUGT) + return DAG.getNode(ARMISD::VMINNM, dl, VT, TrueVal, FalseVal); + } } bool swpCmpOps = false; @@ -3503,14 +3659,12 @@ SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const { SDValue ARMcc = DAG.getConstant(CondCode, MVT::i32); SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl); SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); - SDValue Result = DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, - ARMcc, CCR, Cmp); + SDValue Result = getCMOV(dl, VT, FalseVal, TrueVal, ARMcc, CCR, Cmp, DAG); if (CondCode2 != ARMCC::AL) { SDValue ARMcc2 = DAG.getConstant(CondCode2, MVT::i32); // FIXME: Needs another CMP because flag can have but one use. SDValue Cmp2 = getVFPCmp(LHS, RHS, DAG, dl); - Result = DAG.getNode(ARMISD::CMOV, dl, VT, - Result, TrueVal, ARMcc2, CCR, Cmp2); + Result = getCMOV(dl, VT, Result, TrueVal, ARMcc2, CCR, Cmp2, DAG); } return Result; } @@ -3643,6 +3797,18 @@ SDValue ARMTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const { SDValue Dest = Op.getOperand(4); SDLoc dl(Op); + if (Subtarget->isFPOnlySP() && LHS.getValueType() == MVT::f64) { + DAG.getTargetLoweringInfo().softenSetCCOperands(DAG, MVT::f64, LHS, RHS, CC, + dl); + + // If softenSetCCOperands only returned one value, we should compare it to + // zero. + if (!RHS.getNode()) { + RHS = DAG.getConstant(0, LHS.getValueType()); + CC = ISD::SETNE; + } + } + if (LHS.getValueType() == MVT::i32) { SDValue ARMcc; SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl); @@ -3735,11 +3901,23 @@ static SDValue LowerVectorFP_TO_INT(SDValue Op, SelectionDAG &DAG) { return DAG.getNode(ISD::TRUNCATE, dl, VT, Op); } -static SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) { +SDValue ARMTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const { EVT VT = Op.getValueType(); if (VT.isVector()) return LowerVectorFP_TO_INT(Op, DAG); + if (Subtarget->isFPOnlySP() && Op.getOperand(0).getValueType() == MVT::f64) { + RTLIB::Libcall LC; + if (Op.getOpcode() == ISD::FP_TO_SINT) + LC = RTLIB::getFPTOSINT(Op.getOperand(0).getValueType(), + Op.getValueType()); + else + LC = RTLIB::getFPTOUINT(Op.getOperand(0).getValueType(), + Op.getValueType()); + return makeLibCall(DAG, LC, Op.getValueType(), &Op.getOperand(0), 1, + /*isSigned*/ false, SDLoc(Op)).first; + } + SDLoc dl(Op); unsigned Opc; @@ -3789,11 +3967,23 @@ static SDValue LowerVectorINT_TO_FP(SDValue Op, SelectionDAG &DAG) { return DAG.getNode(Opc, dl, VT, Op); } -static SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) { +SDValue ARMTargetLowering::LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) const { EVT VT = Op.getValueType(); if (VT.isVector()) return LowerVectorINT_TO_FP(Op, DAG); + if (Subtarget->isFPOnlySP() && Op.getValueType() == MVT::f64) { + RTLIB::Libcall LC; + if (Op.getOpcode() == ISD::SINT_TO_FP) + LC = RTLIB::getSINTTOFP(Op.getOperand(0).getValueType(), + Op.getValueType()); + else + LC = RTLIB::getUINTTOFP(Op.getOperand(0).getValueType(), + Op.getValueType()); + return makeLibCall(DAG, LC, Op.getValueType(), &Op.getOperand(0), 1, + /*isSigned*/ false, SDLoc(Op)).first; + } + SDLoc dl(Op); unsigned Opc; @@ -4302,7 +4492,7 @@ static SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) { ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get(); SDLoc dl(Op); - if (Op.getOperand(1).getValueType().isFloatingPoint()) { + if (Op1.getValueType().isFloatingPoint()) { switch (SetCCOpcode) { default: llvm_unreachable("Illegal FP comparison"); case ISD::SETUNE: @@ -4566,6 +4756,11 @@ SDValue ARMTargetLowering::LowerConstantFP(SDValue Op, SelectionDAG &DAG, bool IsDouble = Op.getValueType() == MVT::f64; ConstantFPSDNode *CFP = cast<ConstantFPSDNode>(Op); + // Use the default (constant pool) lowering for double constants when we have + // an SP-only FPU + if (IsDouble && Subtarget->isFPOnlySP()) + return SDValue(); + // Try splatting with a VMOV.f32... APFloat FPVal = CFP->getValueAPF(); int ImmVal = IsDouble ? ARM_AM::getFP64Imm(FPVal) : ARM_AM::getFP32Imm(FPVal); @@ -5744,7 +5939,7 @@ static SDValue SkipLoadExtensionForVMULL(LoadSDNode *LD, SelectionDAG& DAG) { // operation legalization where we can't create illegal types. return DAG.getExtLoad(LD->getExtensionType(), SDLoc(LD), ExtendedTy, LD->getChain(), LD->getBasePtr(), LD->getPointerInfo(), - LD->getMemoryVT(), LD->isVolatile(), + LD->getMemoryVT(), LD->isVolatile(), LD->isInvariant(), LD->isNonTemporal(), LD->getAlignment()); } @@ -6099,7 +6294,7 @@ SDValue ARMTargetLowering::LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const { const TargetLowering &TLI = DAG.getTargetLoweringInfo(); // Pair of floats / doubles used to pass the result. - StructType *RetTy = StructType::get(ArgTy, ArgTy, NULL); + StructType *RetTy = StructType::get(ArgTy, ArgTy, nullptr); // Create stack object for sret. const uint64_t ByteSize = TLI.getDataLayout()->getTypeAllocSize(RetTy); @@ -6269,6 +6464,8 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { if (Subtarget->getTargetTriple().isWindowsItaniumEnvironment()) return LowerDYNAMIC_STACKALLOC(Op, DAG); llvm_unreachable("Don't know how to custom lower this!"); + case ISD::FP_ROUND: return LowerFP_ROUND(Op, DAG); + case ISD::FP_EXTEND: return LowerFP_EXTEND(Op, DAG); } } @@ -6305,7 +6502,8 @@ void ARMTargetLowering::ReplaceNodeResults(SDNode *N, void ARMTargetLowering:: SetupEntryBlockForSjLj(MachineInstr *MI, MachineBasicBlock *MBB, MachineBasicBlock *DispatchBB, int FI) const { - const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); + const TargetInstrInfo *TII = + getTargetMachine().getSubtargetImpl()->getInstrInfo(); DebugLoc dl = MI->getDebugLoc(); MachineFunction *MF = MBB->getParent(); MachineRegisterInfo *MRI = &MF->getRegInfo(); @@ -6322,9 +6520,8 @@ SetupEntryBlockForSjLj(MachineInstr *MI, MachineBasicBlock *MBB, ARMConstantPoolMBB::Create(F->getContext(), DispatchBB, PCLabelId, PCAdj); unsigned CPI = MCP->getConstantPoolIndex(CPV, 4); - const TargetRegisterClass *TRC = isThumb ? - (const TargetRegisterClass*)&ARM::tGPRRegClass : - (const TargetRegisterClass*)&ARM::GPRRegClass; + const TargetRegisterClass *TRC = isThumb ? &ARM::tGPRRegClass + : &ARM::GPRRegClass; // Grab constant pool and fixed stack memory operands. MachineMemOperand *CPMMO = @@ -6388,9 +6585,9 @@ SetupEntryBlockForSjLj(MachineInstr *MI, MachineBasicBlock *MBB, .addReg(NewVReg2, RegState::Kill) .addReg(NewVReg3, RegState::Kill)); unsigned NewVReg5 = MRI->createVirtualRegister(TRC); - AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::tADDrSPi), NewVReg5) - .addFrameIndex(FI) - .addImm(36)); // &jbuf[1] :: pc + BuildMI(*MBB, MI, dl, TII->get(ARM::tADDframe), NewVReg5) + .addFrameIndex(FI) + .addImm(36); // &jbuf[1] :: pc AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::tSTRi)) .addReg(NewVReg4, RegState::Kill) .addReg(NewVReg5, RegState::Kill) @@ -6420,7 +6617,8 @@ SetupEntryBlockForSjLj(MachineInstr *MI, MachineBasicBlock *MBB, MachineBasicBlock *ARMTargetLowering:: EmitSjLjDispatchBlock(MachineInstr *MI, MachineBasicBlock *MBB) const { - const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); + const TargetInstrInfo *TII = + getTargetMachine().getSubtargetImpl()->getInstrInfo(); DebugLoc dl = MI->getDebugLoc(); MachineFunction *MF = MBB->getParent(); MachineRegisterInfo *MRI = &MF->getRegInfo(); @@ -6428,9 +6626,8 @@ EmitSjLjDispatchBlock(MachineInstr *MI, MachineBasicBlock *MBB) const { MachineFrameInfo *MFI = MF->getFrameInfo(); int FI = MFI->getFunctionContextIndex(); - const TargetRegisterClass *TRC = Subtarget->isThumb() ? - (const TargetRegisterClass*)&ARM::tGPRRegClass : - (const TargetRegisterClass*)&ARM::GPRnopcRegClass; + const TargetRegisterClass *TRC = Subtarget->isThumb() ? &ARM::tGPRRegClass + : &ARM::GPRnopcRegClass; // Get a mapping of the call site numbers to all of the landing pads they're // associated with. @@ -6749,16 +6946,14 @@ EmitSjLjDispatchBlock(MachineInstr *MI, MachineBasicBlock *MBB) const { for (std::vector<MachineBasicBlock*>::iterator I = LPadList.begin(), E = LPadList.end(); I != E; ++I) { MachineBasicBlock *CurMBB = *I; - if (SeenMBBs.insert(CurMBB)) + if (SeenMBBs.insert(CurMBB).second) DispContBB->addSuccessor(CurMBB); } // N.B. the order the invoke BBs are processed in doesn't matter here. const MCPhysReg *SavedRegs = RI.getCalleeSavedRegs(MF); SmallVector<MachineBasicBlock*, 64> MBBLPads; - for (SmallPtrSet<MachineBasicBlock*, 64>::iterator - I = InvokeBBs.begin(), E = InvokeBBs.end(); I != E; ++I) { - MachineBasicBlock *BB = *I; + for (MachineBasicBlock *BB : InvokeBBs) { // Remove the landing pad successor from the invoke block and replace it // with the new dispatch block. @@ -6937,7 +7132,8 @@ ARMTargetLowering::EmitStructByval(MachineInstr *MI, // This pseudo instruction has 3 operands: dst, src, size // We expand it to a loop if size > Subtarget->getMaxInlineSizeThreshold(). // Otherwise, we will generate unrolled scalar copies. - const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); + const TargetInstrInfo *TII = + getTargetMachine().getSubtargetImpl()->getInstrInfo(); const BasicBlock *LLVM_BB = BB->getBasicBlock(); MachineFunction::iterator It = BB; ++It; @@ -6979,14 +7175,11 @@ ARMTargetLowering::EmitStructByval(MachineInstr *MI, // Select the correct opcode and register class for unit size load/store bool IsNeon = UnitSize >= 8; - TRC = (IsThumb1 || IsThumb2) ? (const TargetRegisterClass *)&ARM::tGPRRegClass - : (const TargetRegisterClass *)&ARM::GPRRegClass; + TRC = (IsThumb1 || IsThumb2) ? &ARM::tGPRRegClass : &ARM::GPRRegClass; if (IsNeon) - VecTRC = UnitSize == 16 - ? (const TargetRegisterClass *)&ARM::DPairRegClass - : UnitSize == 8 - ? (const TargetRegisterClass *)&ARM::DPRRegClass - : nullptr; + VecTRC = UnitSize == 16 ? &ARM::DPairRegClass + : UnitSize == 8 ? &ARM::DPRRegClass + : nullptr; unsigned BytesLeft = SizeVal % UnitSize; unsigned LoopSize = SizeVal - BytesLeft; @@ -7171,7 +7364,7 @@ MachineBasicBlock * ARMTargetLowering::EmitLowered__chkstk(MachineInstr *MI, MachineBasicBlock *MBB) const { const TargetMachine &TM = getTargetMachine(); - const TargetInstrInfo &TII = *TM.getInstrInfo(); + const TargetInstrInfo &TII = *TM.getSubtargetImpl()->getInstrInfo(); DebugLoc DL = MI->getDebugLoc(); assert(Subtarget->isTargetWindows() && @@ -7236,7 +7429,8 @@ ARMTargetLowering::EmitLowered__chkstk(MachineInstr *MI, MachineBasicBlock * ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *BB) const { - const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); + const TargetInstrInfo *TII = + getTargetMachine().getSubtargetImpl()->getInstrInfo(); DebugLoc dl = MI->getDebugLoc(); bool isThumb2 = Subtarget->isThumb2(); switch (MI->getOpcode()) { @@ -7433,9 +7627,8 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, MachineRegisterInfo &MRI = Fn->getRegInfo(); // In Thumb mode S must not be specified if source register is the SP or // PC and if destination register is the SP, so restrict register class - unsigned NewRsbDstReg = MRI.createVirtualRegister(isThumb2 ? - (const TargetRegisterClass*)&ARM::rGPRRegClass : - (const TargetRegisterClass*)&ARM::GPRRegClass); + unsigned NewRsbDstReg = + MRI.createVirtualRegister(isThumb2 ? &ARM::rGPRRegClass : &ARM::GPRRegClass); // Transfer the remainder of BB and its successor edges to sinkMBB. SinkBB->splice(SinkBB->begin(), BB, @@ -7489,12 +7682,6 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, void ARMTargetLowering::AdjustInstrPostInstrSelection(MachineInstr *MI, SDNode *Node) const { - if (!MI->hasPostISelHook()) { - assert(!convertAddSubFlagsOpcode(MI->getOpcode()) && - "Pseudo flag-setting opcodes must be marked with 'hasPostISelHook'"); - return; - } - const MCInstrDesc *MCID = &MI->getDesc(); // Adjust potentially 's' setting instructions after isel, i.e. ADC, SBC, RSB, // RSC. Coming out of isel, they have an implicit CPSR def, but the optional @@ -7506,8 +7693,8 @@ void ARMTargetLowering::AdjustInstrPostInstrSelection(MachineInstr *MI, // Rename pseudo opcodes. unsigned NewOpc = convertAddSubFlagsOpcode(MI->getOpcode()); if (NewOpc) { - const ARMBaseInstrInfo *TII = - static_cast<const ARMBaseInstrInfo*>(getTargetMachine().getInstrInfo()); + const ARMBaseInstrInfo *TII = static_cast<const ARMBaseInstrInfo *>( + getTargetMachine().getSubtargetImpl()->getInstrInfo()); MCID = &TII->get(NewOpc); assert(MCID->getNumOperands() == MI->getDesc().getNumOperands() + 1 && @@ -8395,7 +8582,10 @@ static SDValue PerformBFICombine(SDNode *N, unsigned InvMask = cast<ConstantSDNode>(N->getOperand(2))->getZExtValue(); unsigned LSB = countTrailingZeros(~InvMask); unsigned Width = (32 - countLeadingZeros(~InvMask)) - LSB; - unsigned Mask = (1 << Width)-1; + assert(Width < + static_cast<unsigned>(std::numeric_limits<unsigned>::digits) && + "undefined behavior"); + unsigned Mask = (1u << Width) - 1; unsigned Mask2 = N11C->getZExtValue(); if ((Mask & (~Mask2)) == 0) return DCI.DAG.getNode(ARMISD::BFI, SDLoc(N), N->getValueType(0), @@ -8408,10 +8598,11 @@ static SDValue PerformBFICombine(SDNode *N, /// PerformVMOVRRDCombine - Target-specific dag combine xforms for /// ARMISD::VMOVRRD. static SDValue PerformVMOVRRDCombine(SDNode *N, - TargetLowering::DAGCombinerInfo &DCI) { + TargetLowering::DAGCombinerInfo &DCI, + const ARMSubtarget *Subtarget) { // vmovrrd(vmovdrr x, y) -> x,y SDValue InDouble = N->getOperand(0); - if (InDouble.getOpcode() == ARMISD::VMOVDRR) + if (InDouble.getOpcode() == ARMISD::VMOVDRR && !Subtarget->isFPOnlySP()) return DCI.CombineTo(N, InDouble.getOperand(0), InDouble.getOperand(1)); // vmovrrd(load f64) -> (load i32), (load i32) @@ -8442,8 +8633,6 @@ static SDValue PerformVMOVRRDCombine(SDNode *N, if (DCI.DAG.getTargetLoweringInfo().isBigEndian()) std::swap (NewLD1, NewLD2); SDValue Result = DCI.CombineTo(N, NewLD1, NewLD2); - DCI.RemoveFromWorklist(LD); - DAG.DeleteNode(LD); return Result; } @@ -8468,147 +8657,6 @@ static SDValue PerformVMOVDRRCombine(SDNode *N, SelectionDAG &DAG) { return SDValue(); } -/// PerformSTORECombine - Target-specific dag combine xforms for -/// ISD::STORE. -static SDValue PerformSTORECombine(SDNode *N, - TargetLowering::DAGCombinerInfo &DCI) { - StoreSDNode *St = cast<StoreSDNode>(N); - if (St->isVolatile()) - return SDValue(); - - // Optimize trunc store (of multiple scalars) to shuffle and store. First, - // pack all of the elements in one place. Next, store to memory in fewer - // chunks. - SDValue StVal = St->getValue(); - EVT VT = StVal.getValueType(); - if (St->isTruncatingStore() && VT.isVector()) { - SelectionDAG &DAG = DCI.DAG; - const TargetLowering &TLI = DAG.getTargetLoweringInfo(); - EVT StVT = St->getMemoryVT(); - unsigned NumElems = VT.getVectorNumElements(); - assert(StVT != VT && "Cannot truncate to the same type"); - unsigned FromEltSz = VT.getVectorElementType().getSizeInBits(); - unsigned ToEltSz = StVT.getVectorElementType().getSizeInBits(); - - // From, To sizes and ElemCount must be pow of two - if (!isPowerOf2_32(NumElems * FromEltSz * ToEltSz)) return SDValue(); - - // We are going to use the original vector elt for storing. - // Accumulated smaller vector elements must be a multiple of the store size. - if (0 != (NumElems * FromEltSz) % ToEltSz) return SDValue(); - - unsigned SizeRatio = FromEltSz / ToEltSz; - assert(SizeRatio * NumElems * ToEltSz == VT.getSizeInBits()); - - // Create a type on which we perform the shuffle. - EVT WideVecVT = EVT::getVectorVT(*DAG.getContext(), StVT.getScalarType(), - NumElems*SizeRatio); - assert(WideVecVT.getSizeInBits() == VT.getSizeInBits()); - - SDLoc DL(St); - SDValue WideVec = DAG.getNode(ISD::BITCAST, DL, WideVecVT, StVal); - SmallVector<int, 8> ShuffleVec(NumElems * SizeRatio, -1); - for (unsigned i = 0; i < NumElems; ++i) - ShuffleVec[i] = TLI.isBigEndian() ? (i+1) * SizeRatio - 1 : i * SizeRatio; - - // Can't shuffle using an illegal type. - if (!TLI.isTypeLegal(WideVecVT)) return SDValue(); - - SDValue Shuff = DAG.getVectorShuffle(WideVecVT, DL, WideVec, - DAG.getUNDEF(WideVec.getValueType()), - ShuffleVec.data()); - // At this point all of the data is stored at the bottom of the - // register. We now need to save it to mem. - - // Find the largest store unit - MVT StoreType = MVT::i8; - for (unsigned tp = MVT::FIRST_INTEGER_VALUETYPE; - tp < MVT::LAST_INTEGER_VALUETYPE; ++tp) { - MVT Tp = (MVT::SimpleValueType)tp; - if (TLI.isTypeLegal(Tp) && Tp.getSizeInBits() <= NumElems * ToEltSz) - StoreType = Tp; - } - // Didn't find a legal store type. - if (!TLI.isTypeLegal(StoreType)) - return SDValue(); - - // Bitcast the original vector into a vector of store-size units - EVT StoreVecVT = EVT::getVectorVT(*DAG.getContext(), - StoreType, VT.getSizeInBits()/EVT(StoreType).getSizeInBits()); - assert(StoreVecVT.getSizeInBits() == VT.getSizeInBits()); - SDValue ShuffWide = DAG.getNode(ISD::BITCAST, DL, StoreVecVT, Shuff); - SmallVector<SDValue, 8> Chains; - SDValue Increment = DAG.getConstant(StoreType.getSizeInBits()/8, - TLI.getPointerTy()); - SDValue BasePtr = St->getBasePtr(); - - // Perform one or more big stores into memory. - unsigned E = (ToEltSz*NumElems)/StoreType.getSizeInBits(); - for (unsigned I = 0; I < E; I++) { - SDValue SubVec = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, - StoreType, ShuffWide, - DAG.getIntPtrConstant(I)); - SDValue Ch = DAG.getStore(St->getChain(), DL, SubVec, BasePtr, - St->getPointerInfo(), St->isVolatile(), - St->isNonTemporal(), St->getAlignment()); - BasePtr = DAG.getNode(ISD::ADD, DL, BasePtr.getValueType(), BasePtr, - Increment); - Chains.push_back(Ch); - } - return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains); - } - - if (!ISD::isNormalStore(St)) - return SDValue(); - - // Split a store of a VMOVDRR into two integer stores to avoid mixing NEON and - // ARM stores of arguments in the same cache line. - if (StVal.getNode()->getOpcode() == ARMISD::VMOVDRR && - StVal.getNode()->hasOneUse()) { - SelectionDAG &DAG = DCI.DAG; - bool isBigEndian = DAG.getTargetLoweringInfo().isBigEndian(); - SDLoc DL(St); - SDValue BasePtr = St->getBasePtr(); - SDValue NewST1 = DAG.getStore(St->getChain(), DL, - StVal.getNode()->getOperand(isBigEndian ? 1 : 0 ), - BasePtr, St->getPointerInfo(), St->isVolatile(), - St->isNonTemporal(), St->getAlignment()); - - SDValue OffsetPtr = DAG.getNode(ISD::ADD, DL, MVT::i32, BasePtr, - DAG.getConstant(4, MVT::i32)); - return DAG.getStore(NewST1.getValue(0), DL, - StVal.getNode()->getOperand(isBigEndian ? 0 : 1), - OffsetPtr, St->getPointerInfo(), St->isVolatile(), - St->isNonTemporal(), - std::min(4U, St->getAlignment() / 2)); - } - - if (StVal.getValueType() != MVT::i64 || - StVal.getNode()->getOpcode() != ISD::EXTRACT_VECTOR_ELT) - return SDValue(); - - // Bitcast an i64 store extracted from a vector to f64. - // Otherwise, the i64 value will be legalized to a pair of i32 values. - SelectionDAG &DAG = DCI.DAG; - SDLoc dl(StVal); - SDValue IntVec = StVal.getOperand(0); - EVT FloatVT = EVT::getVectorVT(*DAG.getContext(), MVT::f64, - IntVec.getValueType().getVectorNumElements()); - SDValue Vec = DAG.getNode(ISD::BITCAST, dl, FloatVT, IntVec); - SDValue ExtElt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, - Vec, StVal.getOperand(1)); - dl = SDLoc(N); - SDValue V = DAG.getNode(ISD::BITCAST, dl, MVT::i64, ExtElt); - // Make the DAGCombiner fold the bitcasts. - DCI.AddToWorklist(Vec.getNode()); - DCI.AddToWorklist(ExtElt.getNode()); - DCI.AddToWorklist(V.getNode()); - return DAG.getStore(St->getChain(), dl, V, St->getBasePtr(), - St->getPointerInfo(), St->isVolatile(), - St->isNonTemporal(), St->getAlignment(), - St->getTBAAInfo()); -} - /// hasNormalLoadOperand - Check if any of the operands of a BUILD_VECTOR node /// are normal, non-volatile loads. If so, it is profitable to bitcast an /// i64 vector to have f64 elements, since the value can then be loaded @@ -8626,7 +8674,8 @@ static bool hasNormalLoadOperand(SDNode *N) { /// PerformBUILD_VECTORCombine - Target-specific dag combine xforms for /// ISD::BUILD_VECTOR. static SDValue PerformBUILD_VECTORCombine(SDNode *N, - TargetLowering::DAGCombinerInfo &DCI){ + TargetLowering::DAGCombinerInfo &DCI, + const ARMSubtarget *Subtarget) { // build_vector(N=ARMISD::VMOVRRD(X), N:1) -> bit_convert(X): // VMOVRRD is introduced when legalizing i64 types. It forces the i64 value // into a pair of GPRs, which is fine when the value is used as a scalar, @@ -8828,17 +8877,18 @@ static SDValue PerformVECTOR_SHUFFLECombine(SDNode *N, SelectionDAG &DAG) { DAG.getUNDEF(VT), NewMask.data()); } -/// CombineBaseUpdate - Target-specific DAG combine function for VLDDUP and -/// NEON load/store intrinsics to merge base address updates. +/// CombineBaseUpdate - Target-specific DAG combine function for VLDDUP, +/// NEON load/store intrinsics, and generic vector load/stores, to merge +/// base address updates. +/// For generic load/stores, the memory type is assumed to be a vector. +/// The caller is assumed to have checked legality. static SDValue CombineBaseUpdate(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) { - if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer()) - return SDValue(); - SelectionDAG &DAG = DCI.DAG; bool isIntrinsic = (N->getOpcode() == ISD::INTRINSIC_VOID || N->getOpcode() == ISD::INTRINSIC_W_CHAIN); - unsigned AddrOpIdx = (isIntrinsic ? 2 : 1); + bool isStore = N->getOpcode() == ISD::STORE; + unsigned AddrOpIdx = ((isIntrinsic || isStore) ? 2 : 1); SDValue Addr = N->getOperand(AddrOpIdx); // Search for a use of the address operand that is an increment. @@ -8899,6 +8949,10 @@ static SDValue CombineBaseUpdate(SDNode *N, case ARMISD::VLD2DUP: NewOpc = ARMISD::VLD2DUP_UPD; NumVecs = 2; break; case ARMISD::VLD3DUP: NewOpc = ARMISD::VLD3DUP_UPD; NumVecs = 3; break; case ARMISD::VLD4DUP: NewOpc = ARMISD::VLD4DUP_UPD; NumVecs = 4; break; + case ISD::LOAD: NewOpc = ARMISD::VLD1_UPD; + NumVecs = 1; isLaneOp = false; break; + case ISD::STORE: NewOpc = ARMISD::VST1_UPD; + NumVecs = 1; isLoad = false; isLaneOp = false; break; } } @@ -8906,8 +8960,11 @@ static SDValue CombineBaseUpdate(SDNode *N, EVT VecTy; if (isLoad) VecTy = N->getValueType(0); - else + else if (isIntrinsic) VecTy = N->getOperand(AddrOpIdx+1).getValueType(); + else + VecTy = N->getOperand(1).getValueType(); + unsigned NumBytes = NumVecs * VecTy.getSizeInBits() / 8; if (isLaneOp) NumBytes /= VecTy.getVectorNumElements(); @@ -8924,25 +8981,70 @@ static SDValue CombineBaseUpdate(SDNode *N, continue; } + EVT AlignedVecTy = VecTy; + + // If this is a less-than-standard-aligned load/store, change the type to + // match the standard alignment. + // The alignment is overlooked when selecting _UPD variants; and it's + // easier to introduce bitcasts here than fix that. + // There are 3 ways to get to this base-update combine: + // - intrinsics: they are assumed to be properly aligned (to the standard + // alignment of the memory type), so we don't need to do anything. + // - ARMISD::VLDx nodes: they are only generated from the aforementioned + // intrinsics, so, likewise, there's nothing to do. + // - generic load/store instructions: the alignment is specified as an + // explicit operand, rather than implicitly as the standard alignment + // of the memory type (like the intrisics). We need to change the + // memory type to match the explicit alignment. That way, we don't + // generate non-standard-aligned ARMISD::VLDx nodes. + if (LSBaseSDNode *LSN = dyn_cast<LSBaseSDNode>(N)) { + unsigned Alignment = LSN->getAlignment(); + if (Alignment == 0) + Alignment = 1; + if (Alignment < VecTy.getScalarSizeInBits() / 8) { + MVT EltTy = MVT::getIntegerVT(Alignment * 8); + assert(NumVecs == 1 && "Unexpected multi-element generic load/store."); + assert(!isLaneOp && "Unexpected generic load/store lane."); + unsigned NumElts = NumBytes / (EltTy.getSizeInBits() / 8); + AlignedVecTy = MVT::getVectorVT(EltTy, NumElts); + } + } + // Create the new updating load/store node. + // First, create an SDVTList for the new updating node's results. EVT Tys[6]; unsigned NumResultVecs = (isLoad ? NumVecs : 0); unsigned n; for (n = 0; n < NumResultVecs; ++n) - Tys[n] = VecTy; + Tys[n] = AlignedVecTy; Tys[n++] = MVT::i32; Tys[n] = MVT::Other; - SDVTList SDTys = DAG.getVTList(ArrayRef<EVT>(Tys, NumResultVecs+2)); + SDVTList SDTys = DAG.getVTList(makeArrayRef(Tys, NumResultVecs+2)); + + // Then, gather the new node's operands. SmallVector<SDValue, 8> Ops; Ops.push_back(N->getOperand(0)); // incoming chain Ops.push_back(N->getOperand(AddrOpIdx)); Ops.push_back(Inc); - for (unsigned i = AddrOpIdx + 1; i < N->getNumOperands(); ++i) { - Ops.push_back(N->getOperand(i)); + if (StoreSDNode *StN = dyn_cast<StoreSDNode>(N)) { + // Try to match the intrinsic's signature + Ops.push_back(StN->getValue()); + Ops.push_back(DAG.getConstant(StN->getAlignment(), MVT::i32)); + } else { + for (unsigned i = AddrOpIdx + 1; i < N->getNumOperands(); ++i) + Ops.push_back(N->getOperand(i)); } - MemIntrinsicSDNode *MemInt = cast<MemIntrinsicSDNode>(N); + + // If this is a non-standard-aligned store, the penultimate operand is the + // stored value. Bitcast it to the aligned type. + if (AlignedVecTy != VecTy && N->getOpcode() == ISD::STORE) { + SDValue &StVal = Ops[Ops.size()-2]; + StVal = DAG.getNode(ISD::BITCAST, SDLoc(N), AlignedVecTy, StVal); + } + + MemSDNode *MemInt = cast<MemSDNode>(N); SDValue UpdN = DAG.getMemIntrinsicNode(NewOpc, SDLoc(N), SDTys, - Ops, MemInt->getMemoryVT(), + Ops, AlignedVecTy, MemInt->getMemOperand()); // Update the uses. @@ -8950,6 +9052,14 @@ static SDValue CombineBaseUpdate(SDNode *N, for (unsigned i = 0; i < NumResultVecs; ++i) { NewResults.push_back(SDValue(UpdN.getNode(), i)); } + + // If this is an non-standard-aligned load, the first result is the loaded + // value. Bitcast it to the expected result type. + if (AlignedVecTy != VecTy && N->getOpcode() == ISD::LOAD) { + SDValue &LdVal = NewResults[0]; + LdVal = DAG.getNode(ISD::BITCAST, SDLoc(N), VecTy, LdVal); + } + NewResults.push_back(SDValue(UpdN.getNode(), NumResultVecs+1)); // chain DCI.CombineTo(N, NewResults); DCI.CombineTo(User, SDValue(UpdN.getNode(), NumResultVecs)); @@ -8959,6 +9069,14 @@ static SDValue CombineBaseUpdate(SDNode *N, return SDValue(); } +static SDValue PerformVLDCombine(SDNode *N, + TargetLowering::DAGCombinerInfo &DCI) { + if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer()) + return SDValue(); + + return CombineBaseUpdate(N, DCI); +} + /// CombineVLDDUP - For a VDUPLANE node N, check if its source operand is a /// vldN-lane (N > 1) intrinsic, and if all the other uses of that intrinsic /// are also VDUPLANEs. If so, combine them to a vldN-dup operation and @@ -9011,7 +9129,7 @@ static bool CombineVLDDUP(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) { for (n = 0; n < NumVecs; ++n) Tys[n] = VT; Tys[n] = MVT::Other; - SDVTList SDTys = DAG.getVTList(ArrayRef<EVT>(Tys, NumVecs+1)); + SDVTList SDTys = DAG.getVTList(makeArrayRef(Tys, NumVecs+1)); SDValue Ops[] = { VLD->getOperand(0), VLD->getOperand(2) }; MemIntrinsicSDNode *VLDMemInt = cast<MemIntrinsicSDNode>(VLD); SDValue VLDDup = DAG.getMemIntrinsicNode(NewOpc, SDLoc(VLD), SDTys, @@ -9072,6 +9190,164 @@ static SDValue PerformVDUPLANECombine(SDNode *N, return DCI.DAG.getNode(ISD::BITCAST, SDLoc(N), VT, Op); } +static SDValue PerformLOADCombine(SDNode *N, + TargetLowering::DAGCombinerInfo &DCI) { + EVT VT = N->getValueType(0); + + // If this is a legal vector load, try to combine it into a VLD1_UPD. + if (ISD::isNormalLoad(N) && VT.isVector() && + DCI.DAG.getTargetLoweringInfo().isTypeLegal(VT)) + return CombineBaseUpdate(N, DCI); + + return SDValue(); +} + +/// PerformSTORECombine - Target-specific dag combine xforms for +/// ISD::STORE. +static SDValue PerformSTORECombine(SDNode *N, + TargetLowering::DAGCombinerInfo &DCI) { + StoreSDNode *St = cast<StoreSDNode>(N); + if (St->isVolatile()) + return SDValue(); + + // Optimize trunc store (of multiple scalars) to shuffle and store. First, + // pack all of the elements in one place. Next, store to memory in fewer + // chunks. + SDValue StVal = St->getValue(); + EVT VT = StVal.getValueType(); + if (St->isTruncatingStore() && VT.isVector()) { + SelectionDAG &DAG = DCI.DAG; + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + EVT StVT = St->getMemoryVT(); + unsigned NumElems = VT.getVectorNumElements(); + assert(StVT != VT && "Cannot truncate to the same type"); + unsigned FromEltSz = VT.getVectorElementType().getSizeInBits(); + unsigned ToEltSz = StVT.getVectorElementType().getSizeInBits(); + + // From, To sizes and ElemCount must be pow of two + if (!isPowerOf2_32(NumElems * FromEltSz * ToEltSz)) return SDValue(); + + // We are going to use the original vector elt for storing. + // Accumulated smaller vector elements must be a multiple of the store size. + if (0 != (NumElems * FromEltSz) % ToEltSz) return SDValue(); + + unsigned SizeRatio = FromEltSz / ToEltSz; + assert(SizeRatio * NumElems * ToEltSz == VT.getSizeInBits()); + + // Create a type on which we perform the shuffle. + EVT WideVecVT = EVT::getVectorVT(*DAG.getContext(), StVT.getScalarType(), + NumElems*SizeRatio); + assert(WideVecVT.getSizeInBits() == VT.getSizeInBits()); + + SDLoc DL(St); + SDValue WideVec = DAG.getNode(ISD::BITCAST, DL, WideVecVT, StVal); + SmallVector<int, 8> ShuffleVec(NumElems * SizeRatio, -1); + for (unsigned i = 0; i < NumElems; ++i) + ShuffleVec[i] = TLI.isBigEndian() ? (i+1) * SizeRatio - 1 : i * SizeRatio; + + // Can't shuffle using an illegal type. + if (!TLI.isTypeLegal(WideVecVT)) return SDValue(); + + SDValue Shuff = DAG.getVectorShuffle(WideVecVT, DL, WideVec, + DAG.getUNDEF(WideVec.getValueType()), + ShuffleVec.data()); + // At this point all of the data is stored at the bottom of the + // register. We now need to save it to mem. + + // Find the largest store unit + MVT StoreType = MVT::i8; + for (MVT Tp : MVT::integer_valuetypes()) { + if (TLI.isTypeLegal(Tp) && Tp.getSizeInBits() <= NumElems * ToEltSz) + StoreType = Tp; + } + // Didn't find a legal store type. + if (!TLI.isTypeLegal(StoreType)) + return SDValue(); + + // Bitcast the original vector into a vector of store-size units + EVT StoreVecVT = EVT::getVectorVT(*DAG.getContext(), + StoreType, VT.getSizeInBits()/EVT(StoreType).getSizeInBits()); + assert(StoreVecVT.getSizeInBits() == VT.getSizeInBits()); + SDValue ShuffWide = DAG.getNode(ISD::BITCAST, DL, StoreVecVT, Shuff); + SmallVector<SDValue, 8> Chains; + SDValue Increment = DAG.getConstant(StoreType.getSizeInBits()/8, + TLI.getPointerTy()); + SDValue BasePtr = St->getBasePtr(); + + // Perform one or more big stores into memory. + unsigned E = (ToEltSz*NumElems)/StoreType.getSizeInBits(); + for (unsigned I = 0; I < E; I++) { + SDValue SubVec = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, + StoreType, ShuffWide, + DAG.getIntPtrConstant(I)); + SDValue Ch = DAG.getStore(St->getChain(), DL, SubVec, BasePtr, + St->getPointerInfo(), St->isVolatile(), + St->isNonTemporal(), St->getAlignment()); + BasePtr = DAG.getNode(ISD::ADD, DL, BasePtr.getValueType(), BasePtr, + Increment); + Chains.push_back(Ch); + } + return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains); + } + + if (!ISD::isNormalStore(St)) + return SDValue(); + + // Split a store of a VMOVDRR into two integer stores to avoid mixing NEON and + // ARM stores of arguments in the same cache line. + if (StVal.getNode()->getOpcode() == ARMISD::VMOVDRR && + StVal.getNode()->hasOneUse()) { + SelectionDAG &DAG = DCI.DAG; + bool isBigEndian = DAG.getTargetLoweringInfo().isBigEndian(); + SDLoc DL(St); + SDValue BasePtr = St->getBasePtr(); + SDValue NewST1 = DAG.getStore(St->getChain(), DL, + StVal.getNode()->getOperand(isBigEndian ? 1 : 0 ), + BasePtr, St->getPointerInfo(), St->isVolatile(), + St->isNonTemporal(), St->getAlignment()); + + SDValue OffsetPtr = DAG.getNode(ISD::ADD, DL, MVT::i32, BasePtr, + DAG.getConstant(4, MVT::i32)); + return DAG.getStore(NewST1.getValue(0), DL, + StVal.getNode()->getOperand(isBigEndian ? 0 : 1), + OffsetPtr, St->getPointerInfo(), St->isVolatile(), + St->isNonTemporal(), + std::min(4U, St->getAlignment() / 2)); + } + + if (StVal.getValueType() == MVT::i64 && + StVal.getNode()->getOpcode() == ISD::EXTRACT_VECTOR_ELT) { + + // Bitcast an i64 store extracted from a vector to f64. + // Otherwise, the i64 value will be legalized to a pair of i32 values. + SelectionDAG &DAG = DCI.DAG; + SDLoc dl(StVal); + SDValue IntVec = StVal.getOperand(0); + EVT FloatVT = EVT::getVectorVT(*DAG.getContext(), MVT::f64, + IntVec.getValueType().getVectorNumElements()); + SDValue Vec = DAG.getNode(ISD::BITCAST, dl, FloatVT, IntVec); + SDValue ExtElt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, + Vec, StVal.getOperand(1)); + dl = SDLoc(N); + SDValue V = DAG.getNode(ISD::BITCAST, dl, MVT::i64, ExtElt); + // Make the DAGCombiner fold the bitcasts. + DCI.AddToWorklist(Vec.getNode()); + DCI.AddToWorklist(ExtElt.getNode()); + DCI.AddToWorklist(V.getNode()); + return DAG.getStore(St->getChain(), dl, V, St->getBasePtr(), + St->getPointerInfo(), St->isVolatile(), + St->isNonTemporal(), St->getAlignment(), + St->getAAInfo()); + } + + // If this is a legal vector store, try to combine it into a VST1_UPD. + if (ISD::isNormalStore(N) && VT.isVector() && + DCI.DAG.getTargetLoweringInfo().isTypeLegal(VT)) + return CombineBaseUpdate(N, DCI); + + return SDValue(); +} + // isConstVecPow2 - Return true if each vector element is a power of 2, all // elements are the same constant, C, and Log2(C) ranges from 1 to 32. static bool isConstVecPow2(SDValue ConstVec, bool isSigned, uint64_t &C) @@ -9128,16 +9404,18 @@ static SDValue PerformVCVTCombine(SDNode *N, MVT FloatTy = Op.getSimpleValueType().getVectorElementType(); MVT IntTy = N->getSimpleValueType(0).getVectorElementType(); - if (FloatTy.getSizeInBits() != 32 || IntTy.getSizeInBits() > 32) { + unsigned NumLanes = Op.getValueType().getVectorNumElements(); + if (FloatTy.getSizeInBits() != 32 || IntTy.getSizeInBits() > 32 || + NumLanes > 4) { // These instructions only exist converting from f32 to i32. We can handle // smaller integers by generating an extra truncate, but larger ones would - // be lossy. + // be lossy. We also can't handle more then 4 lanes, since these intructions + // only support v2i32/v4i32 types. return SDValue(); } unsigned IntrinsicOpcode = isSigned ? Intrinsic::arm_neon_vcvtfp2fxs : Intrinsic::arm_neon_vcvtfp2fxu; - unsigned NumLanes = Op.getValueType().getVectorNumElements(); SDValue FixConv = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, SDLoc(N), NumLanes == 2 ? MVT::v2i32 : MVT::v4i32, DAG.getConstant(IntrinsicOpcode, MVT::i32), N0, @@ -9641,10 +9919,10 @@ SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N, case ISD::XOR: return PerformXORCombine(N, DCI, Subtarget); case ISD::AND: return PerformANDCombine(N, DCI, Subtarget); case ARMISD::BFI: return PerformBFICombine(N, DCI); - case ARMISD::VMOVRRD: return PerformVMOVRRDCombine(N, DCI); + case ARMISD::VMOVRRD: return PerformVMOVRRDCombine(N, DCI, Subtarget); case ARMISD::VMOVDRR: return PerformVMOVDRRCombine(N, DCI.DAG); case ISD::STORE: return PerformSTORECombine(N, DCI); - case ISD::BUILD_VECTOR: return PerformBUILD_VECTORCombine(N, DCI); + case ISD::BUILD_VECTOR: return PerformBUILD_VECTORCombine(N, DCI, Subtarget); case ISD::INSERT_VECTOR_ELT: return PerformInsertEltCombine(N, DCI); case ISD::VECTOR_SHUFFLE: return PerformVECTOR_SHUFFLECombine(N, DCI.DAG); case ARMISD::VDUPLANE: return PerformVDUPLANECombine(N, DCI); @@ -9660,10 +9938,11 @@ SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N, case ISD::ANY_EXTEND: return PerformExtendCombine(N, DCI.DAG, Subtarget); case ISD::SELECT_CC: return PerformSELECT_CCCombine(N, DCI.DAG, Subtarget); case ARMISD::CMOV: return PerformCMOVCombine(N, DCI.DAG); + case ISD::LOAD: return PerformLOADCombine(N, DCI); case ARMISD::VLD2DUP: case ARMISD::VLD3DUP: case ARMISD::VLD4DUP: - return CombineBaseUpdate(N, DCI); + return PerformVLDCombine(N, DCI); case ARMISD::BUILD_VECTOR: return PerformARMBUILD_VECTORCombine(N, DCI); case ISD::INTRINSIC_VOID: @@ -9683,7 +9962,7 @@ SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N, case Intrinsic::arm_neon_vst2lane: case Intrinsic::arm_neon_vst3lane: case Intrinsic::arm_neon_vst4lane: - return CombineBaseUpdate(N, DCI); + return PerformVLDCombine(N, DCI); default: break; } break; @@ -9696,8 +9975,10 @@ bool ARMTargetLowering::isDesirableToTransformToIntegerOp(unsigned Opc, return (VT == MVT::f32) && (Opc == ISD::LOAD || Opc == ISD::STORE); } -bool ARMTargetLowering::allowsUnalignedMemoryAccesses(EVT VT, unsigned, - bool *Fast) const { +bool ARMTargetLowering::allowsMisalignedMemoryAccesses(EVT VT, + unsigned, + unsigned, + bool *Fast) const { // The AllowsUnaliged flag models the SCTLR.A setting in ARM cpus bool AllowsUnaligned = Subtarget->allowsUnalignedMem(); @@ -9751,11 +10032,12 @@ EVT ARMTargetLowering::getOptimalMemOpType(uint64_t Size, bool Fast; if (Size >= 16 && (memOpAlign(SrcAlign, DstAlign, 16) || - (allowsUnalignedMemoryAccesses(MVT::v2f64, 0, &Fast) && Fast))) { + (allowsMisalignedMemoryAccesses(MVT::v2f64, 0, 1, &Fast) && Fast))) { return MVT::v2f64; } else if (Size >= 8 && (memOpAlign(SrcAlign, DstAlign, 8) || - (allowsUnalignedMemoryAccesses(MVT::f64, 0, &Fast) && Fast))) { + (allowsMisalignedMemoryAccesses(MVT::f64, 0, 1, &Fast) && + Fast))) { return MVT::f64; } } @@ -10358,6 +10640,8 @@ ARMTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint, return RCPair(0U, &ARM::hGPRRegClass); break; case 'r': + if (Subtarget->isThumb1Only()) + return RCPair(0U, &ARM::tGPRRegClass); return RCPair(0U, &ARM::GPRRegClass); case 'w': if (VT == MVT::Other) @@ -10562,7 +10846,7 @@ SDValue ARMTargetLowering::LowerDivRem(SDValue Op, SelectionDAG &DAG) const { assert(Subtarget->isTargetAEABI() && "Register-based DivRem lowering only"); unsigned Opcode = Op->getOpcode(); assert((Opcode == ISD::SDIVREM || Opcode == ISD::UDIVREM) && - "Invalid opcode for Div/Rem lowering"); + "Invalid opcode for Div/Rem lowering"); bool isSigned = (Opcode == ISD::SDIVREM); EVT VT = Op->getValueType(0); Type *Ty = VT.getTypeForEVT(*DAG.getContext()); @@ -10570,10 +10854,10 @@ SDValue ARMTargetLowering::LowerDivRem(SDValue Op, SelectionDAG &DAG) const { RTLIB::Libcall LC; switch (VT.getSimpleVT().SimpleTy) { default: llvm_unreachable("Unexpected request for libcall!"); - case MVT::i8: LC= isSigned ? RTLIB::SDIVREM_I8 : RTLIB::UDIVREM_I8; break; - case MVT::i16: LC= isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break; - case MVT::i32: LC= isSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32; break; - case MVT::i64: LC= isSigned ? RTLIB::SDIVREM_I64 : RTLIB::UDIVREM_I64; break; + case MVT::i8: LC = isSigned ? RTLIB::SDIVREM_I8 : RTLIB::UDIVREM_I8; break; + case MVT::i16: LC = isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break; + case MVT::i32: LC = isSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32; break; + case MVT::i64: LC = isSigned ? RTLIB::SDIVREM_I64 : RTLIB::UDIVREM_I64; break; } SDValue InChain = DAG.getEntryNode(); @@ -10593,7 +10877,7 @@ SDValue ARMTargetLowering::LowerDivRem(SDValue Op, SelectionDAG &DAG) const { SDValue Callee = DAG.getExternalSymbol(getLibcallName(LC), getPointerTy()); - Type *RetTy = (Type*)StructType::get(Ty, Ty, NULL); + Type *RetTy = (Type*)StructType::get(Ty, Ty, nullptr); SDLoc dl(Op); TargetLowering::CallLoweringInfo CLI(DAG); @@ -10631,6 +10915,31 @@ ARMTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const return DAG.getMergeValues(Ops, DL); } +SDValue ARMTargetLowering::LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const { + assert(Op.getValueType() == MVT::f64 && Subtarget->isFPOnlySP() && + "Unexpected type for custom-lowering FP_EXTEND"); + + RTLIB::Libcall LC; + LC = RTLIB::getFPEXT(Op.getOperand(0).getValueType(), Op.getValueType()); + + SDValue SrcVal = Op.getOperand(0); + return makeLibCall(DAG, LC, Op.getValueType(), &SrcVal, 1, + /*isSigned*/ false, SDLoc(Op)).first; +} + +SDValue ARMTargetLowering::LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const { + assert(Op.getOperand(0).getValueType() == MVT::f64 && + Subtarget->isFPOnlySP() && + "Unexpected type for custom-lowering FP_ROUND"); + + RTLIB::Libcall LC; + LC = RTLIB::getFPROUND(Op.getOperand(0).getValueType(), Op.getValueType()); + + SDValue SrcVal = Op.getOperand(0); + return makeLibCall(DAG, LC, Op.getValueType(), &SrcVal, 1, + /*isSigned*/ false, SDLoc(Op)).first; +} + bool ARMTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const { // The ARM target isn't yet aware of offsets. @@ -10658,7 +10967,7 @@ bool ARMTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const { return false; if (VT == MVT::f32) return ARM_AM::getFP32Imm(Imm) != -1; - if (VT == MVT::f64) + if (VT == MVT::f64 && !Subtarget->isFPOnlySP()) return ARM_AM::getFP64Imm(Imm) != -1; return false; } @@ -10785,31 +11094,154 @@ bool ARMTargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm, return true; } -bool ARMTargetLowering::shouldExpandAtomicInIR(Instruction *Inst) const { - // Loads and stores less than 64-bits are already atomic; ones above that - // are doomed anyway, so defer to the default libcall and blame the OS when - // things go wrong. Cortex M doesn't have ldrexd/strexd though, so don't emit - // anything for those. - bool IsMClass = Subtarget->isMClass(); - if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) { - unsigned Size = SI->getValueOperand()->getType()->getPrimitiveSizeInBits(); - return Size == 64 && !IsMClass; - } else if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) { - return LI->getType()->getPrimitiveSizeInBits() == 64 && !IsMClass; +bool ARMTargetLowering::hasLoadLinkedStoreConditional() const { return true; } + +Instruction* ARMTargetLowering::makeDMB(IRBuilder<> &Builder, + ARM_MB::MemBOpt Domain) const { + Module *M = Builder.GetInsertBlock()->getParent()->getParent(); + + // First, if the target has no DMB, see what fallback we can use. + if (!Subtarget->hasDataBarrier()) { + // Some ARMv6 cpus can support data barriers with an mcr instruction. + // Thumb1 and pre-v6 ARM mode use a libcall instead and should never get + // here. + if (Subtarget->hasV6Ops() && !Subtarget->isThumb()) { + Function *MCR = llvm::Intrinsic::getDeclaration(M, Intrinsic::arm_mcr); + Value* args[6] = {Builder.getInt32(15), Builder.getInt32(0), + Builder.getInt32(0), Builder.getInt32(7), + Builder.getInt32(10), Builder.getInt32(5)}; + return Builder.CreateCall(MCR, args); + } else { + // Instead of using barriers, atomic accesses on these subtargets use + // libcalls. + llvm_unreachable("makeDMB on a target so old that it has no barriers"); + } + } else { + Function *DMB = llvm::Intrinsic::getDeclaration(M, Intrinsic::arm_dmb); + // Only a full system barrier exists in the M-class architectures. + Domain = Subtarget->isMClass() ? ARM_MB::SY : Domain; + Constant *CDomain = Builder.getInt32(Domain); + return Builder.CreateCall(DMB, CDomain); + } +} + +// Based on http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html +Instruction* ARMTargetLowering::emitLeadingFence(IRBuilder<> &Builder, + AtomicOrdering Ord, bool IsStore, + bool IsLoad) const { + if (!getInsertFencesForAtomic()) + return nullptr; + + switch (Ord) { + case NotAtomic: + case Unordered: + llvm_unreachable("Invalid fence: unordered/non-atomic"); + case Monotonic: + case Acquire: + return nullptr; // Nothing to do + case SequentiallyConsistent: + if (!IsStore) + return nullptr; // Nothing to do + /*FALLTHROUGH*/ + case Release: + case AcquireRelease: + if (Subtarget->isSwift()) + return makeDMB(Builder, ARM_MB::ISHST); + // FIXME: add a comment with a link to documentation justifying this. + else + return makeDMB(Builder, ARM_MB::ISH); + } + llvm_unreachable("Unknown fence ordering in emitLeadingFence"); +} + +Instruction* ARMTargetLowering::emitTrailingFence(IRBuilder<> &Builder, + AtomicOrdering Ord, bool IsStore, + bool IsLoad) const { + if (!getInsertFencesForAtomic()) + return nullptr; + + switch (Ord) { + case NotAtomic: + case Unordered: + llvm_unreachable("Invalid fence: unordered/not-atomic"); + case Monotonic: + case Release: + return nullptr; // Nothing to do + case Acquire: + case AcquireRelease: + case SequentiallyConsistent: + return makeDMB(Builder, ARM_MB::ISH); } + llvm_unreachable("Unknown fence ordering in emitTrailingFence"); +} + +// Loads and stores less than 64-bits are already atomic; ones above that +// are doomed anyway, so defer to the default libcall and blame the OS when +// things go wrong. Cortex M doesn't have ldrexd/strexd though, so don't emit +// anything for those. +bool ARMTargetLowering::shouldExpandAtomicStoreInIR(StoreInst *SI) const { + unsigned Size = SI->getValueOperand()->getType()->getPrimitiveSizeInBits(); + return (Size == 64) && !Subtarget->isMClass(); +} + +// Loads and stores less than 64-bits are already atomic; ones above that +// are doomed anyway, so defer to the default libcall and blame the OS when +// things go wrong. Cortex M doesn't have ldrexd/strexd though, so don't emit +// anything for those. +// FIXME: ldrd and strd are atomic if the CPU has LPAE (e.g. A15 has that +// guarantee, see DDI0406C ARM architecture reference manual, +// sections A8.8.72-74 LDRD) +bool ARMTargetLowering::shouldExpandAtomicLoadInIR(LoadInst *LI) const { + unsigned Size = LI->getType()->getPrimitiveSizeInBits(); + return (Size == 64) && !Subtarget->isMClass(); +} + +// For the real atomic operations, we have ldrex/strex up to 32 bits, +// and up to 64 bits on the non-M profiles +bool ARMTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const { + unsigned Size = AI->getType()->getPrimitiveSizeInBits(); + return Size <= (Subtarget->isMClass() ? 32U : 64U); +} + +// This has so far only been implemented for MachO. +bool ARMTargetLowering::useLoadStackGuardNode() const { + return Subtarget->isTargetMachO(); +} + +bool ARMTargetLowering::canCombineStoreAndExtract(Type *VectorTy, Value *Idx, + unsigned &Cost) const { + // If we do not have NEON, vector types are not natively supported. + if (!Subtarget->hasNEON()) + return false; + + // Floating point values and vector values map to the same register file. + // Therefore, althought we could do a store extract of a vector type, this is + // better to leave at float as we have more freedom in the addressing mode for + // those. + if (VectorTy->isFPOrFPVectorTy()) + return false; + + // If the index is unknown at compile time, this is very expensive to lower + // and it is not possible to combine the store with the extract. + if (!isa<ConstantInt>(Idx)) + return false; - // For the real atomic operations, we have ldrex/strex up to 32 bits, - // and up to 64 bits on the non-M profiles - unsigned AtomicLimit = IsMClass ? 32 : 64; - return Inst->getType()->getPrimitiveSizeInBits() <= AtomicLimit; + assert(VectorTy->isVectorTy() && "VectorTy is not a vector type"); + unsigned BitWidth = cast<VectorType>(VectorTy)->getBitWidth(); + // We can do a store + vector extract on any vector that fits perfectly in a D + // or Q register. + if (BitWidth == 64 || BitWidth == 128) { + Cost = 0; + return true; + } + return false; } Value *ARMTargetLowering::emitLoadLinked(IRBuilder<> &Builder, Value *Addr, AtomicOrdering Ord) const { Module *M = Builder.GetInsertBlock()->getParent()->getParent(); Type *ValTy = cast<PointerType>(Addr->getType())->getElementType(); - bool IsAcquire = - Ord == Acquire || Ord == AcquireRelease || Ord == SequentiallyConsistent; + bool IsAcquire = isAtLeastAcquire(Ord); // Since i64 isn't legal and intrinsics don't get type-lowered, the ldrexd // intrinsic must return {i32, i32} and we have to recombine them into a @@ -10845,8 +11277,7 @@ Value *ARMTargetLowering::emitStoreConditional(IRBuilder<> &Builder, Value *Val, Value *Addr, AtomicOrdering Ord) const { Module *M = Builder.GetInsertBlock()->getParent()->getParent(); - bool IsRelease = - Ord == Release || Ord == AcquireRelease || Ord == SequentiallyConsistent; + bool IsRelease = isAtLeastRelease(Ord); // Since the intrinsics must have legal type, the i64 intrinsics take two // parameters: "i32, i32". We must marshal Val into the appropriate form @@ -10944,6 +11375,6 @@ bool ARMTargetLowering::functionArgumentNeedsConsecutiveRegisters( HABaseType Base = HA_UNKNOWN; uint64_t Members = 0; bool result = isHomogeneousAggregate(Ty, Base, Members); - DEBUG(dbgs() << "isHA: " << result << " "; Ty->dump(); dbgs() << "\n"); + DEBUG(dbgs() << "isHA: " << result << " "; Ty->dump()); return result; } diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h index 1ace0f330f1a..89b0c31ac52e 100644 --- a/lib/Target/ARM/ARMISelLowering.h +++ b/lib/Target/ARM/ARMISelLowering.h @@ -12,8 +12,8 @@ // //===----------------------------------------------------------------------===// -#ifndef ARMISELLOWERING_H -#define ARMISELLOWERING_H +#ifndef LLVM_LIB_TARGET_ARM_ARMISELLOWERING_H +#define LLVM_LIB_TARGET_ARM_ARMISELLOWERING_H #include "MCTargetDesc/ARMBaseInfo.h" #include "llvm/CodeGen/CallingConvLower.h" @@ -232,7 +232,7 @@ namespace llvm { class ARMTargetLowering : public TargetLowering { public: - explicit ARMTargetLowering(TargetMachine &TM); + explicit ARMTargetLowering(const TargetMachine &TM); unsigned getJumpTableEncoding() const override; @@ -266,11 +266,12 @@ namespace llvm { bool isDesirableToTransformToIntegerOp(unsigned Opc, EVT VT) const override; - /// allowsUnalignedMemoryAccesses - Returns true if the target allows + /// allowsMisalignedMemoryAccesses - Returns true if the target allows /// unaligned memory accesses of the specified type. Returns whether it /// is "fast" by reference in the second argument. - bool allowsUnalignedMemoryAccesses(EVT VT, unsigned AddrSpace, - bool *Fast) const override; + bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AddrSpace, + unsigned Align, + bool *Fast) const override; EVT getOptimalMemOpType(uint64_t Size, unsigned DstAlign, unsigned SrcAlign, @@ -391,12 +392,26 @@ namespace llvm { bool functionArgumentNeedsConsecutiveRegisters( Type *Ty, CallingConv::ID CallConv, bool isVarArg) const override; + bool hasLoadLinkedStoreConditional() const override; + Instruction *makeDMB(IRBuilder<> &Builder, ARM_MB::MemBOpt Domain) const; Value *emitLoadLinked(IRBuilder<> &Builder, Value *Addr, AtomicOrdering Ord) const override; Value *emitStoreConditional(IRBuilder<> &Builder, Value *Val, Value *Addr, AtomicOrdering Ord) const override; - bool shouldExpandAtomicInIR(Instruction *Inst) const override; + Instruction* emitLeadingFence(IRBuilder<> &Builder, AtomicOrdering Ord, + bool IsStore, bool IsLoad) const override; + Instruction* emitTrailingFence(IRBuilder<> &Builder, AtomicOrdering Ord, + bool IsStore, bool IsLoad) const override; + + bool shouldExpandAtomicLoadInIR(LoadInst *LI) const override; + bool shouldExpandAtomicStoreInIR(StoreInst *SI) const override; + bool shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override; + + bool useLoadStackGuardNode() const override; + + bool canCombineStoreAndExtract(Type *VectorTy, Value *Idx, + unsigned &Cost) const override; protected: std::pair<const TargetRegisterClass*, uint8_t> @@ -473,6 +488,10 @@ namespace llvm { SDValue LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const; SDValue LowerDivRem(SDValue Op, SelectionDAG &DAG) const; SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) const; unsigned getRegisterByName(const char* RegName, EVT VT) const override; @@ -562,6 +581,9 @@ namespace llvm { bool mayBeEmittedAsTailCall(CallInst *CI) const override; + SDValue getCMOV(SDLoc dl, EVT VT, SDValue FalseVal, SDValue TrueVal, + SDValue ARMcc, SDValue CCR, SDValue Cmp, + SelectionDAG &DAG) const; SDValue getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC, SDValue &ARMcc, SelectionDAG &DAG, SDLoc dl) const; SDValue getVFPCmp(SDValue LHS, SDValue RHS, diff --git a/lib/Target/ARM/ARMInstrFormats.td b/lib/Target/ARM/ARMInstrFormats.td index 59e9260b81ea..7d27cf3fcdcb 100644 --- a/lib/Target/ARM/ARMInstrFormats.td +++ b/lib/Target/ARM/ARMInstrFormats.td @@ -203,6 +203,16 @@ def msr_mask : Operand<i32> { let ParserMatchClass = MSRMaskOperand; } +def BankedRegOperand : AsmOperandClass { + let Name = "BankedReg"; + let ParserMethod = "parseBankedRegOperand"; +} +def banked_reg : Operand<i32> { + let PrintMethod = "printBankedRegOperand"; + let DecoderMethod = "DecodeBankedReg"; + let ParserMatchClass = BankedRegOperand; +} + // Shift Right Immediate - A shift right immediate is encoded differently from // other shift immediates. The imm6 field is encoded like so: // diff --git a/lib/Target/ARM/ARMInstrInfo.cpp b/lib/Target/ARM/ARMInstrInfo.cpp index f235ac225848..17d1ffaa9ff0 100644 --- a/lib/Target/ARM/ARMInstrInfo.cpp +++ b/lib/Target/ARM/ARMInstrInfo.cpp @@ -90,6 +90,49 @@ unsigned ARMInstrInfo::getUnindexedOpcode(unsigned Opc) const { return 0; } +void ARMInstrInfo::expandLoadStackGuard(MachineBasicBlock::iterator MI, + Reloc::Model RM) const { + MachineFunction &MF = *MI->getParent()->getParent(); + const ARMSubtarget &Subtarget = MF.getTarget().getSubtarget<ARMSubtarget>(); + + if (!Subtarget.useMovt(MF)) { + if (RM == Reloc::PIC_) + expandLoadStackGuardBase(MI, ARM::LDRLIT_ga_pcrel, ARM::LDRi12, RM); + else + expandLoadStackGuardBase(MI, ARM::LDRLIT_ga_abs, ARM::LDRi12, RM); + return; + } + + if (RM != Reloc::PIC_) { + expandLoadStackGuardBase(MI, ARM::MOVi32imm, ARM::LDRi12, RM); + return; + } + + const GlobalValue *GV = + cast<GlobalValue>((*MI->memoperands_begin())->getValue()); + + if (!Subtarget.GVIsIndirectSymbol(GV, RM)) { + expandLoadStackGuardBase(MI, ARM::MOV_ga_pcrel, ARM::LDRi12, RM); + return; + } + + MachineBasicBlock &MBB = *MI->getParent(); + DebugLoc DL = MI->getDebugLoc(); + unsigned Reg = MI->getOperand(0).getReg(); + MachineInstrBuilder MIB; + + MIB = BuildMI(MBB, MI, DL, get(ARM::MOV_ga_pcrel_ldr), Reg) + .addGlobalAddress(GV, 0, ARMII::MO_NONLAZY); + unsigned Flag = MachineMemOperand::MOLoad | MachineMemOperand::MOInvariant; + MachineMemOperand *MMO = MBB.getParent()->getMachineMemOperand( + MachinePointerInfo::getGOT(), Flag, 4, 4); + MIB.addMemOperand(MMO); + MIB = BuildMI(MBB, MI, DL, get(ARM::LDRi12), Reg); + MIB.addReg(Reg, RegState::Kill).addImm(0); + MIB.setMemRefs(MI->memoperands_begin(), MI->memoperands_end()); + AddDefaultPred(MIB); +} + namespace { /// ARMCGBR - Create Global Base Reg pass. This initializes the PIC /// global base register for ARM ELF. @@ -113,8 +156,9 @@ namespace { ARMConstantPoolValue *CPV = ARMConstantPoolSymbol::Create( *Context, "_GLOBAL_OFFSET_TABLE_", ARMPCLabelIndex, PCAdj); - unsigned Align = TM->getDataLayout() - ->getPrefTypeAlignment(Type::getInt32PtrTy(*Context)); + unsigned Align = + TM->getSubtargetImpl()->getDataLayout()->getPrefTypeAlignment( + Type::getInt32PtrTy(*Context)); unsigned Idx = MF.getConstantPool()->getConstantPoolIndex(CPV, Align); MachineBasicBlock &FirstMBB = MF.front(); @@ -124,7 +168,7 @@ namespace { MF.getRegInfo().createVirtualRegister(&ARM::rGPRRegClass); unsigned Opc = TM->getSubtarget<ARMSubtarget>().isThumb2() ? ARM::t2LDRpci : ARM::LDRcp; - const TargetInstrInfo &TII = *TM->getInstrInfo(); + const TargetInstrInfo &TII = *TM->getSubtargetImpl()->getInstrInfo(); MachineInstrBuilder MIB = BuildMI(FirstMBB, MBBI, DL, TII.get(Opc), TempReg) .addConstantPoolIndex(Idx); diff --git a/lib/Target/ARM/ARMInstrInfo.h b/lib/Target/ARM/ARMInstrInfo.h index b09958aaa950..90f34ea08401 100644 --- a/lib/Target/ARM/ARMInstrInfo.h +++ b/lib/Target/ARM/ARMInstrInfo.h @@ -11,8 +11,8 @@ // //===----------------------------------------------------------------------===// -#ifndef ARMINSTRUCTIONINFO_H -#define ARMINSTRUCTIONINFO_H +#ifndef LLVM_LIB_TARGET_ARM_ARMINSTRINFO_H +#define LLVM_LIB_TARGET_ARM_ARMINSTRINFO_H #include "ARMBaseInstrInfo.h" #include "ARMRegisterInfo.h" @@ -37,6 +37,10 @@ public: /// always be able to get register info as well (through this method). /// const ARMRegisterInfo &getRegisterInfo() const override { return RI; } + +private: + void expandLoadStackGuard(MachineBasicBlock::iterator MI, + Reloc::Model RM) const override; }; } diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td index f0e145a394b2..126c5529c152 100644 --- a/lib/Target/ARM/ARMInstrInfo.td +++ b/lib/Target/ARM/ARMInstrInfo.td @@ -241,6 +241,9 @@ def HasDB : Predicate<"Subtarget->hasDataBarrier()">, def HasMP : Predicate<"Subtarget->hasMPExtension()">, AssemblerPredicate<"FeatureMP", "mp-extensions">; +def HasVirtualization: Predicate<"false">, + AssemblerPredicate<"FeatureVirtualization", + "virtualization-extensions">; def HasTrustZone : Predicate<"Subtarget->hasTrustZone()">, AssemblerPredicate<"FeatureTrustZone", "TrustZone">; @@ -260,8 +263,6 @@ def IsNotMClass : Predicate<"!Subtarget->isMClass()">, "!armv*m">; def IsARM : Predicate<"!Subtarget->isThumb()">, AssemblerPredicate<"!ModeThumb", "arm-mode">; -def IsIOS : Predicate<"Subtarget->isTargetIOS()">; -def IsNotIOS : Predicate<"!Subtarget->isTargetIOS()">; def IsMachO : Predicate<"Subtarget->isTargetMachO()">; def IsNotMachO : Predicate<"!Subtarget->isTargetMachO()">; def IsNaCl : Predicate<"Subtarget->isTargetNaCl()">; @@ -330,24 +331,6 @@ def imm16_31 : ImmLeaf<i32, [{ return (int32_t)Imm >= 16 && (int32_t)Imm < 32; }]>; -def so_imm_neg_asmoperand : AsmOperandClass { let Name = "ARMSOImmNeg"; } -def so_imm_neg : Operand<i32>, PatLeaf<(imm), [{ - unsigned Value = -(unsigned)N->getZExtValue(); - return Value && ARM_AM::getSOImmVal(Value) != -1; - }], imm_neg_XFORM> { - let ParserMatchClass = so_imm_neg_asmoperand; -} - -// Note: this pattern doesn't require an encoder method and such, as it's -// only used on aliases (Pat<> and InstAlias<>). The actual encoding -// is handled by the destination instructions, which use so_imm. -def so_imm_not_asmoperand : AsmOperandClass { let Name = "ARMSOImmNot"; } -def so_imm_not : Operand<i32>, PatLeaf<(imm), [{ - return ARM_AM::getSOImmVal(~(uint32_t)N->getZExtValue()) != -1; - }], imm_not_XFORM> { - let ParserMatchClass = so_imm_not_asmoperand; -} - // sext_16_node predicate - True if the SDNode is sign-extended 16 or more bits. def sext_16_node : PatLeaf<(i32 GPR:$a), [{ return CurDAG->ComputeNumSignBits(SDValue(N,0)) >= 17; @@ -527,7 +510,7 @@ def shift_imm : Operand<i32> { let ParserMatchClass = ShifterImmAsmOperand; } -// shifter_operand operands: so_reg_reg, so_reg_imm, and so_imm. +// shifter_operand operands: so_reg_reg, so_reg_imm, and mod_imm. def ShiftedRegAsmOperand : AsmOperandClass { let Name = "RegShiftedReg"; } def so_reg_reg : Operand<i32>, // reg reg imm ComplexPattern<i32, 3, "SelectRegShifterOperand", @@ -572,27 +555,43 @@ def shift_so_reg_imm : Operand<i32>, // reg reg imm let MIOperandInfo = (ops GPR, i32imm); } - -// so_imm - Match a 32-bit shifter_operand immediate operand, which is an -// 8-bit immediate rotated by an arbitrary number of bits. -def SOImmAsmOperand: ImmAsmOperand { let Name = "ARMSOImm"; } -def so_imm : Operand<i32>, ImmLeaf<i32, [{ +// mod_imm: match a 32-bit immediate operand, which can be encoded into +// a 12-bit immediate; an 8-bit integer and a 4-bit rotator (See ARMARM +// - "Modified Immediate Constants"). Within the MC layer we keep this +// immediate in its encoded form. +def ModImmAsmOperand: AsmOperandClass { + let Name = "ModImm"; + let ParserMethod = "parseModImm"; +} +def mod_imm : Operand<i32>, ImmLeaf<i32, [{ return ARM_AM::getSOImmVal(Imm) != -1; }]> { - let EncoderMethod = "getSOImmOpValue"; - let ParserMatchClass = SOImmAsmOperand; - let DecoderMethod = "DecodeSOImmOperand"; + let EncoderMethod = "getModImmOpValue"; + let PrintMethod = "printModImmOperand"; + let ParserMatchClass = ModImmAsmOperand; } -// Break so_imm's up into two pieces. This handles immediates with up to 16 -// bits set in them. This uses so_imm2part to match and so_imm2part_[12] to -// get the first/second pieces. -def so_imm2part : PatLeaf<(imm), [{ - return ARM_AM::isSOImmTwoPartVal((unsigned)N->getZExtValue()); -}]>; +// Note: the patterns mod_imm_not and mod_imm_neg do not require an encoder +// method and such, as they are only used on aliases (Pat<> and InstAlias<>). +// The actual parsing, encoding, decoding are handled by the destination +// instructions, which use mod_imm. -/// arm_i32imm - True for +V6T2, or true only if so_imm2part is true. -/// +def ModImmNotAsmOperand : AsmOperandClass { let Name = "ModImmNot"; } +def mod_imm_not : Operand<i32>, PatLeaf<(imm), [{ + return ARM_AM::getSOImmVal(~(uint32_t)N->getZExtValue()) != -1; + }], imm_not_XFORM> { + let ParserMatchClass = ModImmNotAsmOperand; +} + +def ModImmNegAsmOperand : AsmOperandClass { let Name = "ModImmNeg"; } +def mod_imm_neg : Operand<i32>, PatLeaf<(imm), [{ + unsigned Value = -(unsigned)N->getZExtValue(); + return Value && ARM_AM::getSOImmVal(Value) != -1; + }], imm_neg_XFORM> { + let ParserMatchClass = ModImmNegAsmOperand; +} + +/// arm_i32imm - True for +V6T2, or when isSOImmTwoParVal() def arm_i32imm : PatLeaf<(imm), [{ if (Subtarget->useMovt(*MF)) return true; @@ -633,6 +632,8 @@ def imm32 : Operand<i32>, ImmLeaf<i32, [{ return Imm == 32; }]> { let ParserMatchClass = Imm32AsmOperand; } +def imm8_or_16 : ImmLeaf<i32, [{ return Imm == 8 || Imm == 16;}]>; + /// imm1_7 predicate - Immediate in the range [1,7]. def Imm1_7AsmOperand: ImmAsmOperand { let Name = "Imm1_7"; } def imm1_7 : Operand<i32>, ImmLeaf<i32, [{ return Imm > 0 && Imm < 8; }]> { @@ -1199,7 +1200,7 @@ include "ARMInstrFormats.td" // Multiclass helpers... // -/// AsI1_bin_irs - Defines a set of (op r, {so_imm|r|so_reg}) patterns for a +/// AsI1_bin_irs - Defines a set of (op r, {mod_imm|r|so_reg}) patterns for a /// binop that produces a value. let TwoOperandAliasConstraint = "$Rn = $Rd" in multiclass AsI1_bin_irs<bits<4> opcod, string opc, @@ -1208,9 +1209,9 @@ multiclass AsI1_bin_irs<bits<4> opcod, string opc, // The register-immediate version is re-materializable. This is useful // in particular for taking the address of a local. let isReMaterializable = 1 in { - def ri : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, so_imm:$imm), DPFrm, + def ri : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, mod_imm:$imm), DPFrm, iii, opc, "\t$Rd, $Rn, $imm", - [(set GPR:$Rd, (opnode GPR:$Rn, so_imm:$imm))]>, + [(set GPR:$Rd, (opnode GPR:$Rn, mod_imm:$imm))]>, Sched<[WriteALU, ReadALU]> { bits<4> Rd; bits<4> Rn; @@ -1281,9 +1282,9 @@ multiclass AsI1_rbin_irs<bits<4> opcod, string opc, // The register-immediate version is re-materializable. This is useful // in particular for taking the address of a local. let isReMaterializable = 1 in { - def ri : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, so_imm:$imm), DPFrm, + def ri : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, mod_imm:$imm), DPFrm, iii, opc, "\t$Rd, $Rn, $imm", - [(set GPR:$Rd, (opnode so_imm:$imm, GPR:$Rn))]>, + [(set GPR:$Rd, (opnode mod_imm:$imm, GPR:$Rn))]>, Sched<[WriteALU, ReadALU]> { bits<4> Rd; bits<4> Rn; @@ -1351,9 +1352,9 @@ let hasPostISelHook = 1, Defs = [CPSR] in { multiclass AsI1_bin_s_irs<InstrItinClass iii, InstrItinClass iir, InstrItinClass iis, PatFrag opnode, bit Commutable = 0> { - def ri : ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$Rn, so_imm:$imm, pred:$p), + def ri : ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$Rn, mod_imm:$imm, pred:$p), 4, iii, - [(set GPR:$Rd, CPSR, (opnode GPR:$Rn, so_imm:$imm))]>, + [(set GPR:$Rd, CPSR, (opnode GPR:$Rn, mod_imm:$imm))]>, Sched<[WriteALU, ReadALU]>; def rr : ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm, pred:$p), @@ -1384,9 +1385,9 @@ let hasPostISelHook = 1, Defs = [CPSR] in { multiclass AsI1_rbin_s_is<InstrItinClass iii, InstrItinClass iir, InstrItinClass iis, PatFrag opnode, bit Commutable = 0> { - def ri : ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$Rn, so_imm:$imm, pred:$p), + def ri : ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$Rn, mod_imm:$imm, pred:$p), 4, iii, - [(set GPR:$Rd, CPSR, (opnode so_imm:$imm, GPR:$Rn))]>, + [(set GPR:$Rd, CPSR, (opnode mod_imm:$imm, GPR:$Rn))]>, Sched<[WriteALU, ReadALU]>; def rsi : ARMPseudoInst<(outs GPR:$Rd), @@ -1405,16 +1406,16 @@ multiclass AsI1_rbin_s_is<InstrItinClass iii, InstrItinClass iir, } } -/// AI1_cmp_irs - Defines a set of (op r, {so_imm|r|so_reg}) cmp / test +/// AI1_cmp_irs - Defines a set of (op r, {mod_imm|r|so_reg}) cmp / test /// patterns. Similar to AsI1_bin_irs except the instruction does not produce /// a explicit result, only implicitly set CPSR. let isCompare = 1, Defs = [CPSR] in { multiclass AI1_cmp_irs<bits<4> opcod, string opc, InstrItinClass iii, InstrItinClass iir, InstrItinClass iis, PatFrag opnode, bit Commutable = 0> { - def ri : AI1<opcod, (outs), (ins GPR:$Rn, so_imm:$imm), DPFrm, iii, + def ri : AI1<opcod, (outs), (ins GPR:$Rn, mod_imm:$imm), DPFrm, iii, opc, "\t$Rn, $imm", - [(opnode GPR:$Rn, so_imm:$imm)]>, + [(opnode GPR:$Rn, mod_imm:$imm)]>, Sched<[WriteCMP, ReadALU]> { bits<4> Rn; bits<12> imm; @@ -1542,9 +1543,9 @@ let TwoOperandAliasConstraint = "$Rn = $Rd" in multiclass AI1_adde_sube_irs<bits<4> opcod, string opc, PatFrag opnode, bit Commutable = 0> { let hasPostISelHook = 1, Defs = [CPSR], Uses = [CPSR] in { - def ri : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, so_imm:$imm), + def ri : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, mod_imm:$imm), DPFrm, IIC_iALUi, opc, "\t$Rd, $Rn, $imm", - [(set GPR:$Rd, CPSR, (opnode GPR:$Rn, so_imm:$imm, CPSR))]>, + [(set GPR:$Rd, CPSR, (opnode GPR:$Rn, mod_imm:$imm, CPSR))]>, Requires<[IsARM]>, Sched<[WriteALU, ReadALU]> { bits<4> Rd; @@ -1612,9 +1613,9 @@ multiclass AI1_adde_sube_irs<bits<4> opcod, string opc, PatFrag opnode, let TwoOperandAliasConstraint = "$Rn = $Rd" in multiclass AI1_rsc_irs<bits<4> opcod, string opc, PatFrag opnode> { let hasPostISelHook = 1, Defs = [CPSR], Uses = [CPSR] in { - def ri : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, so_imm:$imm), + def ri : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, mod_imm:$imm), DPFrm, IIC_iALUi, opc, "\t$Rd, $Rn, $imm", - [(set GPR:$Rd, CPSR, (opnode so_imm:$imm, GPR:$Rn, CPSR))]>, + [(set GPR:$Rd, CPSR, (opnode mod_imm:$imm, GPR:$Rn, CPSR))]>, Requires<[IsARM]>, Sched<[WriteALU, ReadALU]> { bits<4> Rd; @@ -1808,7 +1809,7 @@ multiclass AI_str1nopc<bit isByte, string opc, InstrItinClass iii, /// the function. The first operand is the ID# for this instruction, the second /// is the index into the MachineConstantPool that this is, the third is the /// size in bytes of this constant pool entry. -let neverHasSideEffects = 1, isNotDuplicable = 1 in +let hasSideEffects = 0, isNotDuplicable = 1 in def CONSTPOOL_ENTRY : PseudoInst<(outs), (ins cpinst_operand:$instid, cpinst_operand:$cpidx, i32imm:$size), NoItinerary, []>; @@ -1961,7 +1962,7 @@ def SETEND : AXI<(outs), (ins setend_op:$end), MiscFrm, NoItinerary, } def DBG : AI<(outs), (ins imm0_15:$opt), MiscFrm, NoItinerary, "dbg", "\t$opt", - []>, Requires<[IsARM, HasV7]> { + [(int_arm_dbg imm0_15:$opt)]>, Requires<[IsARM, HasV7]> { bits<4> opt; let Inst{27-4} = 0b001100100000111100001111; let Inst{3-0} = opt; @@ -2052,7 +2053,7 @@ def PICSTRB : ARMPseudoInst<(outs), (ins GPR:$src, addrmodepc:$addr, pred:$p), // LEApcrel - Load a pc-relative address into a register without offending the // assembler. -let neverHasSideEffects = 1, isReMaterializable = 1 in +let hasSideEffects = 0, isReMaterializable = 1 in // The 'adr' mnemonic encodes differently if the label is before or after // the instruction. The {24-21} opcode bits are set by the fixup, as we don't // know until then which form of the instruction will be used. @@ -2382,6 +2383,33 @@ def RFEIB_UPD : RFEI<1, "rfeib\t$Rn!"> { let Inst{24-23} = 0b11; } +// Hypervisor Call is a system instruction +let isCall = 1 in { +def HVC : AInoP< (outs), (ins imm0_65535:$imm), BrFrm, NoItinerary, + "hvc", "\t$imm", []>, + Requires<[IsARM, HasVirtualization]> { + bits<16> imm; + + // Even though HVC isn't predicable, it's encoding includes a condition field. + // The instruction is undefined if the condition field is 0xf otherwise it is + // unpredictable if it isn't condition AL (0xe). + let Inst{31-28} = 0b1110; + let Unpredictable{31-28} = 0b1111; + let Inst{27-24} = 0b0001; + let Inst{23-20} = 0b0100; + let Inst{19-8} = imm{15-4}; + let Inst{7-4} = 0b0111; + let Inst{3-0} = imm{3-0}; +} +} + +// Return from exception in Hypervisor mode. +let isReturn = 1, isBarrier = 1, isTerminator = 1, Defs = [PC] in +def ERET : ABI<0b0001, (outs), (ins), NoItinerary, "eret", "", []>, + Requires<[IsARM, HasVirtualization]> { + let Inst{23-0} = 0b011000000000000001101110; +} + //===----------------------------------------------------------------------===// // Load / Store Instructions. // @@ -2399,7 +2427,7 @@ defm STRB : AI_str1nopc<1, "strb", IIC_iStore_bh_r, IIC_iStore_bh_si, BinOpFrag<(truncstorei8 node:$LHS, node:$RHS)>>; // Special LDR for loads from non-pc-relative constpools. -let canFoldAsLoad = 1, mayLoad = 1, neverHasSideEffects = 1, +let canFoldAsLoad = 1, mayLoad = 1, hasSideEffects = 0, isReMaterializable = 1, isCodeGenOnly = 1 in def LDRcp : AI2ldst<0b010, 1, 0, (outs GPR:$Rt), (ins addrmode_imm12:$addr), AddrMode_i12, LdFrm, IIC_iLoad_r, "ldr", "\t$Rt, $addr", @@ -2426,7 +2454,7 @@ def LDRSB : AI3ld<0b1101, 1, (outs GPR:$Rt), (ins addrmode3:$addr), LdMiscFrm, IIC_iLoad_bh_r, "ldrsb", "\t$Rt, $addr", [(set GPR:$Rt, (sextloadi8 addrmode3:$addr))]>; -let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in { +let mayLoad = 1, hasSideEffects = 0, hasExtraDefRegAllocReq = 1 in { // Load doubleword def LDRD : AI3ld<0b1101, 0, (outs GPR:$Rt, GPR:$Rt2), (ins addrmode3:$addr), LdMiscFrm, IIC_iLoad_d_r, "ldrd", "\t$Rt, $Rt2, $addr", []>, @@ -2503,7 +2531,7 @@ multiclass AI2_ldridx<bit isByte, string opc, } -let mayLoad = 1, neverHasSideEffects = 1 in { +let mayLoad = 1, hasSideEffects = 0 in { // FIXME: for LDR_PRE_REG etc. the itineray should be either IIC_iLoad_ru or // IIC_iLoad_siu depending on whether it the offset register is shifted. defm LDR : AI2_ldridx<0, "ldr", IIC_iLoad_iu, IIC_iLoad_ru>; @@ -2539,7 +2567,7 @@ multiclass AI3_ldridx<bits<4> op, string opc, InstrItinClass itin> { } } -let mayLoad = 1, neverHasSideEffects = 1 in { +let mayLoad = 1, hasSideEffects = 0 in { defm LDRH : AI3_ldridx<0b1011, "ldrh", IIC_iLoad_bh_ru>; defm LDRSH : AI3_ldridx<0b1111, "ldrsh", IIC_iLoad_bh_ru>; defm LDRSB : AI3_ldridx<0b1101, "ldrsb", IIC_iLoad_bh_ru>; @@ -2572,10 +2600,10 @@ def LDRD_POST: AI3ldstidx<0b1101, 0, 0, (outs GPR:$Rt, GPR:$Rt2, GPR:$Rn_wb), let DecoderMethod = "DecodeAddrMode3Instruction"; } } // hasExtraDefRegAllocReq = 1 -} // mayLoad = 1, neverHasSideEffects = 1 +} // mayLoad = 1, hasSideEffects = 0 // LDRT, LDRBT, LDRSBT, LDRHT, LDRSHT. -let mayLoad = 1, neverHasSideEffects = 1 in { +let mayLoad = 1, hasSideEffects = 0 in { def LDRT_POST_REG : AI2ldstidx<1, 0, 0, (outs GPR:$Rt, GPR:$Rn_wb), (ins addr_offset_none:$addr, am2offset_reg:$offset), IndexModePost, LdFrm, IIC_iLoad_ru, @@ -2694,7 +2722,7 @@ def STRH : AI3str<0b1011, (outs), (ins GPR:$Rt, addrmode3:$addr), StMiscFrm, [(truncstorei16 GPR:$Rt, addrmode3:$addr)]>; // Store doubleword -let mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 in { +let mayStore = 1, hasSideEffects = 0, hasExtraSrcRegAllocReq = 1 in { def STRD : AI3str<0b1111, (outs), (ins GPR:$Rt, GPR:$Rt2, addrmode3:$addr), StMiscFrm, IIC_iStore_d_r, "strd", "\t$Rt, $Rt2, $addr", []>, Requires<[IsARM, HasV5TE]> { @@ -2767,7 +2795,7 @@ multiclass AI2_stridx<bit isByte, string opc, } } -let mayStore = 1, neverHasSideEffects = 1 in { +let mayStore = 1, hasSideEffects = 0 in { // FIXME: for STR_PRE_REG etc. the itineray should be either IIC_iStore_ru or // IIC_iStore_siu depending on whether it the offset register is shifted. defm STR : AI2_stridx<0, "str", IIC_iStore_iu, IIC_iStore_ru>; @@ -2830,7 +2858,8 @@ def STRH_preidx: ARMPseudoInst<(outs GPR:$Rn_wb), def STRH_PRE : AI3ldstidx<0b1011, 0, 1, (outs GPR:$Rn_wb), (ins GPR:$Rt, addrmode3_pre:$addr), IndexModePre, StMiscFrm, IIC_iStore_bh_ru, - "strh", "\t$Rt, $addr!", "$addr.base = $Rn_wb", []> { + "strh", "\t$Rt, $addr!", + "$addr.base = $Rn_wb,@earlyclobber $Rn_wb", []> { bits<14> addr; let Inst{23} = addr{8}; // U bit let Inst{22} = addr{13}; // 1 == imm8, 0 == Rm @@ -2843,7 +2872,8 @@ def STRH_PRE : AI3ldstidx<0b1011, 0, 1, (outs GPR:$Rn_wb), def STRH_POST : AI3ldstidx<0b1011, 0, 0, (outs GPR:$Rn_wb), (ins GPR:$Rt, addr_offset_none:$addr, am3offset:$offset), IndexModePost, StMiscFrm, IIC_iStore_bh_ru, - "strh", "\t$Rt, $addr, $offset", "$addr.base = $Rn_wb", + "strh", "\t$Rt, $addr, $offset", + "$addr.base = $Rn_wb,@earlyclobber $Rn_wb", [(set GPR:$Rn_wb, (post_truncsti16 GPR:$Rt, addr_offset_none:$addr, am3offset:$offset))]> { @@ -2857,7 +2887,7 @@ def STRH_POST : AI3ldstidx<0b1011, 0, 0, (outs GPR:$Rn_wb), let DecoderMethod = "DecodeAddrMode3Instruction"; } -let mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 in { +let mayStore = 1, hasSideEffects = 0, hasExtraSrcRegAllocReq = 1 in { def STRD_PRE : AI3ldstidx<0b1111, 0, 1, (outs GPR:$Rn_wb), (ins GPR:$Rt, GPR:$Rt2, addrmode3_pre:$addr), IndexModePre, StMiscFrm, IIC_iStore_d_ru, @@ -2887,7 +2917,7 @@ def STRD_POST: AI3ldstidx<0b1111, 0, 0, (outs GPR:$Rn_wb), let Inst{3-0} = offset{3-0}; // imm3_0/Rm let DecoderMethod = "DecodeAddrMode3Instruction"; } -} // mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 +} // mayStore = 1, hasSideEffects = 0, hasExtraSrcRegAllocReq = 1 // STRT, STRBT, and STRHT @@ -2931,7 +2961,7 @@ def STRBT_POST : ARMAsmPseudo<"strbt${q} $Rt, $addr", (ins GPR:$Rt, addr_offset_none:$addr, pred:$q)>; -let mayStore = 1, neverHasSideEffects = 1 in { +let mayStore = 1, hasSideEffects = 0 in { def STRT_POST_REG : AI2ldstidx<0, 0, 0, (outs GPR:$Rn_wb), (ins GPR:$Rt, addr_offset_none:$addr, am2offset_reg:$offset), IndexModePost, StFrm, IIC_iStore_ru, @@ -3096,17 +3126,18 @@ multiclass arm_ldst_mult<string asm, string sfx, bit L_bit, bit P_bit, Format f, } } -let neverHasSideEffects = 1 in { +let hasSideEffects = 0 in { let mayLoad = 1, hasExtraDefRegAllocReq = 1 in defm LDM : arm_ldst_mult<"ldm", "", 1, 0, LdStMulFrm, IIC_iLoad_m, - IIC_iLoad_mu>; + IIC_iLoad_mu>, ComplexDeprecationPredicate<"ARMLoad">; let mayStore = 1, hasExtraSrcRegAllocReq = 1 in defm STM : arm_ldst_mult<"stm", "", 0, 0, LdStMulFrm, IIC_iStore_m, - IIC_iStore_mu>; + IIC_iStore_mu>, + ComplexDeprecationPredicate<"ARMStore">; -} // neverHasSideEffects +} // hasSideEffects // FIXME: remove when we have a way to marking a MI with these properties. // FIXME: Should pc be an implicit operand like PICADD, etc? @@ -3132,7 +3163,7 @@ defm sysSTM : arm_ldst_mult<"stm", " ^", 0, 1, LdStMulFrm, IIC_iStore_m, // Move Instructions. // -let neverHasSideEffects = 1 in +let hasSideEffects = 0 in def MOVr : AsI1<0b1101, (outs GPR:$Rd), (ins GPR:$Rm), DPFrm, IIC_iMOVr, "mov", "\t$Rd, $Rm", []>, UnaryDP, Sched<[WriteALU]> { bits<4> Rd; @@ -3146,7 +3177,7 @@ def MOVr : AsI1<0b1101, (outs GPR:$Rd), (ins GPR:$Rm), DPFrm, IIC_iMOVr, } // A version for the smaller set of tail call registers. -let neverHasSideEffects = 1 in +let hasSideEffects = 0 in def MOVr_TC : AsI1<0b1101, (outs tcGPR:$Rd), (ins tcGPR:$Rm), DPFrm, IIC_iMOVr, "mov", "\t$Rd, $Rm", []>, UnaryDP, Sched<[WriteALU]> { bits<4> Rd; @@ -3190,8 +3221,8 @@ def MOVsi : AsI1<0b1101, (outs GPR:$Rd), (ins shift_so_reg_imm:$src), } let isReMaterializable = 1, isAsCheapAsAMove = 1, isMoveImm = 1 in -def MOVi : AsI1<0b1101, (outs GPR:$Rd), (ins so_imm:$imm), DPFrm, IIC_iMOVi, - "mov", "\t$Rd, $imm", [(set GPR:$Rd, so_imm:$imm)]>, UnaryDP, +def MOVi : AsI1<0b1101, (outs GPR:$Rd), (ins mod_imm:$imm), DPFrm, IIC_iMOVi, + "mov", "\t$Rd, $imm", [(set GPR:$Rd, mod_imm:$imm)]>, UnaryDP, Sched<[WriteALU]> { bits<4> Rd; bits<12> imm; @@ -3401,10 +3432,10 @@ defm RSC : AI1_rsc_irs<0b0111, "rsc", // assume opposite meanings of the carry flag (i.e., carry == !borrow). // See the definition of AddWithCarry() in the ARM ARM A2.2.1 for the gory // details. -def : ARMPat<(add GPR:$src, so_imm_neg:$imm), - (SUBri GPR:$src, so_imm_neg:$imm)>; -def : ARMPat<(ARMaddc GPR:$src, so_imm_neg:$imm), - (SUBSri GPR:$src, so_imm_neg:$imm)>; +def : ARMPat<(add GPR:$src, mod_imm_neg:$imm), + (SUBri GPR:$src, mod_imm_neg:$imm)>; +def : ARMPat<(ARMaddc GPR:$src, mod_imm_neg:$imm), + (SUBSri GPR:$src, mod_imm_neg:$imm)>; def : ARMPat<(add GPR:$src, imm0_65535_neg:$imm), (SUBrr GPR:$src, (MOVi16 (imm_neg_XFORM imm:$imm)))>, @@ -3416,10 +3447,11 @@ def : ARMPat<(ARMaddc GPR:$src, imm0_65535_neg:$imm), // The with-carry-in form matches bitwise not instead of the negation. // Effectively, the inverse interpretation of the carry flag already accounts // for part of the negation. -def : ARMPat<(ARMadde GPR:$src, so_imm_not:$imm, CPSR), - (SBCri GPR:$src, so_imm_not:$imm)>; +def : ARMPat<(ARMadde GPR:$src, mod_imm_not:$imm, CPSR), + (SBCri GPR:$src, mod_imm_not:$imm)>; def : ARMPat<(ARMadde GPR:$src, imm0_65535_neg:$imm, CPSR), - (SBCrr GPR:$src, (MOVi16 (imm_not_XFORM imm:$imm)))>; + (SBCrr GPR:$src, (MOVi16 (imm_not_XFORM imm:$imm)))>, + Requires<[IsARM, HasV6T2]>; // Note: These are implemented in C++ code, because they have to generate // ADD/SUBrs instructions, which use a complex pattern that a xform function @@ -3697,9 +3729,9 @@ def MVNsr : AsI1<0b1111, (outs GPR:$Rd), (ins so_reg_reg:$shift), let Inst{3-0} = shift{3-0}; } let isReMaterializable = 1, isAsCheapAsAMove = 1, isMoveImm = 1 in -def MVNi : AsI1<0b1111, (outs GPR:$Rd), (ins so_imm:$imm), DPFrm, +def MVNi : AsI1<0b1111, (outs GPR:$Rd), (ins mod_imm:$imm), DPFrm, IIC_iMVNi, "mvn", "\t$Rd, $imm", - [(set GPR:$Rd, so_imm_not:$imm)]>,UnaryDP, Sched<[WriteALU]> { + [(set GPR:$Rd, mod_imm_not:$imm)]>,UnaryDP, Sched<[WriteALU]> { bits<4> Rd; bits<12> imm; let Inst{25} = 1; @@ -3708,8 +3740,8 @@ def MVNi : AsI1<0b1111, (outs GPR:$Rd), (ins so_imm:$imm), DPFrm, let Inst{11-0} = imm; } -def : ARMPat<(and GPR:$src, so_imm_not:$imm), - (BICri GPR:$src, so_imm_not:$imm)>; +def : ARMPat<(and GPR:$src, mod_imm_not:$imm), + (BICri GPR:$src, mod_imm_not:$imm)>; //===----------------------------------------------------------------------===// // Multiply Instructions. @@ -3803,7 +3835,7 @@ def MLS : AMul1I<0b0000011, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm, GPR:$Ra), } // Extra precision multiplies with low / high results -let neverHasSideEffects = 1 in { +let hasSideEffects = 0 in { let isCommutable = 1 in { def SMULL : AsMul1I64<0b0000110, (outs GPR:$RdLo, GPR:$RdHi), (ins GPR:$Rn, GPR:$Rm), IIC_iMUL64, @@ -3870,7 +3902,7 @@ def UMLALv5 : ARMPseudoExpand<(outs GPR:$RdLo, GPR:$RdHi), Requires<[IsARM, NoV6]>; } -} // neverHasSideEffects +} // hasSideEffects // Most significant word multiply def SMMUL : AMul2I <0b0111010, 0b0001, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), @@ -3934,14 +3966,12 @@ multiclass AI_smul<string opc, PatFrag opnode> { def WB : AMulxyI<0b0001001, 0b01, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), IIC_iMUL16, !strconcat(opc, "wb"), "\t$Rd, $Rn, $Rm", - [(set GPR:$Rd, (sra (opnode GPR:$Rn, - (sext_inreg GPR:$Rm, i16)), (i32 16)))]>, + []>, Requires<[IsARM, HasV5TE]>; def WT : AMulxyI<0b0001001, 0b11, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), IIC_iMUL16, !strconcat(opc, "wt"), "\t$Rd, $Rn, $Rm", - [(set GPR:$Rd, (sra (opnode GPR:$Rn, - (sra GPR:$Rm, (i32 16))), (i32 16)))]>, + []>, Requires<[IsARM, HasV5TE]>; } @@ -3983,17 +4013,13 @@ multiclass AI_smla<string opc, PatFrag opnode> { def WB : AMulxyIa<0b0001001, 0b00, (outs GPRnopc:$Rd), (ins GPRnopc:$Rn, GPRnopc:$Rm, GPR:$Ra), IIC_iMAC16, !strconcat(opc, "wb"), "\t$Rd, $Rn, $Rm, $Ra", - [(set GPRnopc:$Rd, - (add GPR:$Ra, (sra (opnode GPRnopc:$Rn, - (sext_inreg GPRnopc:$Rm, i16)), (i32 16))))]>, + []>, Requires<[IsARM, HasV5TE, UseMulOps]>; def WT : AMulxyIa<0b0001001, 0b10, (outs GPRnopc:$Rd), (ins GPRnopc:$Rn, GPRnopc:$Rm, GPR:$Ra), IIC_iMAC16, !strconcat(opc, "wt"), "\t$Rd, $Rn, $Rm, $Ra", - [(set GPRnopc:$Rd, - (add GPR:$Ra, (sra (opnode GPRnopc:$Rn, - (sra GPRnopc:$Rm, (i32 16))), (i32 16))))]>, + []>, Requires<[IsARM, HasV5TE, UseMulOps]>; } } @@ -4113,7 +4139,7 @@ def UDIV : ADivA1I<0b011, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), IIC_iDIV, // Misc. Arithmetic Instructions. // -def CLZ : AMiscA1I<0b000010110, 0b0001, (outs GPR:$Rd), (ins GPR:$Rm), +def CLZ : AMiscA1I<0b00010110, 0b0001, (outs GPR:$Rd), (ins GPR:$Rm), IIC_iUNAr, "clz", "\t$Rd, $Rm", [(set GPR:$Rd, (ctlz GPR:$Rm))]>, Requires<[IsARM, HasV5T]>, Sched<[WriteALU]>; @@ -4240,8 +4266,8 @@ defm CMP : AI1_cmp_irs<0b1010, "cmp", BinOpFrag<(ARMcmp node:$LHS, node:$RHS)>>; // ARMcmpZ can re-use the above instruction definitions. -def : ARMPat<(ARMcmpZ GPR:$src, so_imm:$imm), - (CMPri GPR:$src, so_imm:$imm)>; +def : ARMPat<(ARMcmpZ GPR:$src, mod_imm:$imm), + (CMPri GPR:$src, mod_imm:$imm)>; def : ARMPat<(ARMcmpZ GPR:$src, GPR:$rhs), (CMPrr GPR:$src, GPR:$rhs)>; def : ARMPat<(ARMcmpZ GPR:$src, so_reg_imm:$rhs), @@ -4251,9 +4277,9 @@ def : ARMPat<(ARMcmpZ GPR:$src, so_reg_reg:$rhs), // CMN register-integer let isCompare = 1, Defs = [CPSR] in { -def CMNri : AI1<0b1011, (outs), (ins GPR:$Rn, so_imm:$imm), DPFrm, IIC_iCMPi, +def CMNri : AI1<0b1011, (outs), (ins GPR:$Rn, mod_imm:$imm), DPFrm, IIC_iCMPi, "cmn", "\t$Rn, $imm", - [(ARMcmn GPR:$Rn, so_imm:$imm)]>, + [(ARMcmn GPR:$Rn, mod_imm:$imm)]>, Sched<[WriteCMP, ReadALU]> { bits<4> Rn; bits<12> imm; @@ -4326,11 +4352,11 @@ def CMNzrsr : AI1<0b1011, (outs), } -def : ARMPat<(ARMcmp GPR:$src, so_imm_neg:$imm), - (CMNri GPR:$src, so_imm_neg:$imm)>; +def : ARMPat<(ARMcmp GPR:$src, mod_imm_neg:$imm), + (CMNri GPR:$src, mod_imm_neg:$imm)>; -def : ARMPat<(ARMcmpZ GPR:$src, so_imm_neg:$imm), - (CMNri GPR:$src, so_imm_neg:$imm)>; +def : ARMPat<(ARMcmpZ GPR:$src, mod_imm_neg:$imm), + (CMNri GPR:$src, mod_imm_neg:$imm)>; // Note that TST/TEQ don't set all the same flags that CMP does! defm TST : AI1_cmp_irs<0b1000, "tst", @@ -4357,7 +4383,7 @@ def BCCZi64 : PseudoInst<(outs), // Conditional moves -let neverHasSideEffects = 1 in { +let hasSideEffects = 0 in { let isCommutable = 1, isSelect = 1 in def MOVCCr : ARMPseudoInst<(outs GPR:$Rd), @@ -4394,9 +4420,9 @@ def MOVCCi16 let isMoveImm = 1 in def MOVCCi : ARMPseudoInst<(outs GPR:$Rd), - (ins GPR:$false, so_imm:$imm, cmovpred:$p), + (ins GPR:$false, mod_imm:$imm, cmovpred:$p), 4, IIC_iCMOVi, - [(set GPR:$Rd, (ARMcmov GPR:$false, so_imm:$imm, + [(set GPR:$Rd, (ARMcmov GPR:$false, mod_imm:$imm, cmovpred:$p))]>, RegConstraint<"$false = $Rd">, Sched<[WriteALU]>; @@ -4412,13 +4438,13 @@ def MOVCCi32imm let isMoveImm = 1 in def MVNCCi : ARMPseudoInst<(outs GPR:$Rd), - (ins GPR:$false, so_imm:$imm, cmovpred:$p), + (ins GPR:$false, mod_imm:$imm, cmovpred:$p), 4, IIC_iCMOVi, - [(set GPR:$Rd, (ARMcmov GPR:$false, so_imm_not:$imm, + [(set GPR:$Rd, (ARMcmov GPR:$false, mod_imm_not:$imm, cmovpred:$p))]>, RegConstraint<"$false = $Rd">, Sched<[WriteALU]>; -} // neverHasSideEffects +} // hasSideEffects //===----------------------------------------------------------------------===// @@ -4631,7 +4657,7 @@ def : ARMPat<(stlex_2 (and GPR:$Rt, 0xffff), addr_offset_none:$addr), class acquiring_load<PatFrag base> : PatFrag<(ops node:$ptr), (base node:$ptr), [{ AtomicOrdering Ordering = cast<AtomicSDNode>(N)->getOrdering(); - return Ordering == Acquire || Ordering == SequentiallyConsistent; + return isAtLeastAcquire(Ordering); }]>; def atomic_load_acquire_8 : acquiring_load<atomic_load_8>; @@ -4641,7 +4667,7 @@ def atomic_load_acquire_32 : acquiring_load<atomic_load_32>; class releasing_store<PatFrag base> : PatFrag<(ops node:$ptr, node:$val), (base node:$ptr, node:$val), [{ AtomicOrdering Ordering = cast<AtomicSDNode>(N)->getOrdering(); - return Ordering == Release || Ordering == SequentiallyConsistent; + return isAtLeastRelease(Ordering); }]>; def atomic_store_release_8 : releasing_store<atomic_store_8>; @@ -5062,12 +5088,31 @@ def MRSsys : ABI<0b0001, (outs GPRnopc:$Rd), (ins), NoItinerary, let Unpredictable{11-0} = 0b110100001111; } +// However, the MRS (banked register) system instruction (ARMv7VE) *does* have a +// separate encoding (distinguished by bit 5. +def MRSbanked : ABI<0b0001, (outs GPRnopc:$Rd), (ins banked_reg:$banked), + NoItinerary, "mrs", "\t$Rd, $banked", []>, + Requires<[IsARM, HasVirtualization]> { + bits<6> banked; + bits<4> Rd; + + let Inst{23} = 0; + let Inst{22} = banked{5}; // R bit + let Inst{21-20} = 0b00; + let Inst{19-16} = banked{3-0}; + let Inst{15-12} = Rd; + let Inst{11-9} = 0b001; + let Inst{8} = banked{4}; + let Inst{7-0} = 0b00000000; +} + // Move from ARM core register to Special Register // -// No need to have both system and application versions, the encodings are the -// same and the assembly parser has no way to distinguish between them. The mask -// operand contains the special register (R Bit) in bit 4 and bits 3-0 contains -// the mask with the fields to be accessed in the special register. +// No need to have both system and application versions of MSR (immediate) or +// MSR (register), the encodings are the same and the assembly parser has no way +// to distinguish between them. The mask operand contains the special register +// (R Bit) in bit 4 and bits 3-0 contains the mask with the fields to be +// accessed in the special register. def MSR : ABI<0b0001, (outs), (ins msr_mask:$mask, GPR:$Rn), NoItinerary, "msr", "\t$mask, $Rn", []> { bits<5> mask; @@ -5082,17 +5127,36 @@ def MSR : ABI<0b0001, (outs), (ins msr_mask:$mask, GPR:$Rn), NoItinerary, let Inst{3-0} = Rn; } -def MSRi : ABI<0b0011, (outs), (ins msr_mask:$mask, so_imm:$a), NoItinerary, - "msr", "\t$mask, $a", []> { +def MSRi : ABI<0b0011, (outs), (ins msr_mask:$mask, mod_imm:$imm), NoItinerary, + "msr", "\t$mask, $imm", []> { bits<5> mask; - bits<12> a; + bits<12> imm; let Inst{23} = 0; let Inst{22} = mask{4}; // R bit let Inst{21-20} = 0b10; let Inst{19-16} = mask{3-0}; let Inst{15-12} = 0b1111; - let Inst{11-0} = a; + let Inst{11-0} = imm; +} + +// However, the MSR (banked register) system instruction (ARMv7VE) *does* have a +// separate encoding (distinguished by bit 5. +def MSRbanked : ABI<0b0001, (outs), (ins banked_reg:$banked, GPRnopc:$Rn), + NoItinerary, "msr", "\t$banked, $Rn", []>, + Requires<[IsARM, HasVirtualization]> { + bits<6> banked; + bits<4> Rn; + + let Inst{23} = 0; + let Inst{22} = banked{5}; // R bit + let Inst{21-20} = 0b10; + let Inst{19-16} = banked{3-0}; + let Inst{15-12} = 0b1111; + let Inst{11-9} = 0b001; + let Inst{8} = banked{4}; + let Inst{7-4} = 0b0000; + let Inst{3-0} = Rn; } // Dynamic stack allocation yields a _chkstk for Windows targets. These calls @@ -5164,7 +5228,7 @@ let isBarrier = 1, hasSideEffects = 1, isTerminator = 1, def Int_eh_sjlj_longjmp : PseudoInst<(outs), (ins GPR:$src, GPR:$scratch), NoItinerary, [(ARMeh_sjlj_longjmp GPR:$src, GPR:$scratch)]>, - Requires<[IsARM, IsIOS]>; + Requires<[IsARM]>; } // eh.sjlj.dispatchsetup pseudo-instruction. @@ -5188,7 +5252,7 @@ let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in // Large immediate handling. -// 32-bit immediate using two piece so_imms or movw + movt. +// 32-bit immediate using two piece mod_imms or movw + movt. // This is a single pseudo instruction, the benefit is that it can be remat'd // as a single unit instead of having to handle reg inputs. // FIXME: Remove this when we can do generalized remat. @@ -5217,6 +5281,7 @@ def LDRLIT_ga_pcrel : PseudoInst<(outs GPR:$dst), (ins i32imm:$addr), (ARMWrapperPIC tglobaladdr:$addr))]>, Requires<[IsARM, DontUseMovt]>; +let AddedComplexity = 10 in def LDRLIT_ga_pcrel_ldr : PseudoInst<(outs GPR:$dst), (ins i32imm:$addr), NoItinerary, [(set GPR:$dst, @@ -5280,11 +5345,6 @@ def : ARMV5TEPat<(mul (sra GPR:$a, (i32 16)), (SMULTB GPR:$a, GPR:$b)>; def : ARMV5TEPat<(mul (sra GPR:$a, (i32 16)), sext_16_node:$b), (SMULTB GPR:$a, GPR:$b)>; -def : ARMV5TEPat<(sra (mul GPR:$a, (sra (shl GPR:$b, (i32 16)), (i32 16))), - (i32 16)), - (SMULWB GPR:$a, GPR:$b)>; -def : ARMV5TEPat<(sra (mul GPR:$a, sext_16_node:$b), (i32 16)), - (SMULWB GPR:$a, GPR:$b)>; def : ARMV5MOPat<(add GPR:$acc, (mul (sra (shl GPR:$a, (i32 16)), (i32 16)), @@ -5307,13 +5367,6 @@ def : ARMV5MOPat<(add GPR:$acc, def : ARMV5MOPat<(add GPR:$acc, (mul (sra GPR:$a, (i32 16)), sext_16_node:$b)), (SMLATB GPR:$a, GPR:$b, GPR:$acc)>; -def : ARMV5MOPat<(add GPR:$acc, - (sra (mul GPR:$a, (sra (shl GPR:$b, (i32 16)), (i32 16))), - (i32 16))), - (SMLAWB GPR:$a, GPR:$b, GPR:$acc)>; -def : ARMV5MOPat<(add GPR:$acc, - (sra (mul GPR:$a, sext_16_node:$b), (i32 16))), - (SMLAWB GPR:$a, GPR:$b, GPR:$acc)>; // Pre-v7 uses MCR for synchronization barriers. @@ -5491,36 +5544,36 @@ def : MnemonicAlias<"uqsubaddx", "uqsax">; // USAX == USUBADDX def : MnemonicAlias<"usubaddx", "usax">; -// "mov Rd, so_imm_not" can be handled via "mvn" in assembly, just like +// "mov Rd, mod_imm_not" can be handled via "mvn" in assembly, just like // for isel. def : ARMInstAlias<"mov${s}${p} $Rd, $imm", - (MVNi rGPR:$Rd, so_imm_not:$imm, pred:$p, cc_out:$s)>; + (MVNi rGPR:$Rd, mod_imm_not:$imm, pred:$p, cc_out:$s)>; def : ARMInstAlias<"mvn${s}${p} $Rd, $imm", - (MOVi rGPR:$Rd, so_imm_not:$imm, pred:$p, cc_out:$s)>; + (MOVi rGPR:$Rd, mod_imm_not:$imm, pred:$p, cc_out:$s)>; // Same for AND <--> BIC def : ARMInstAlias<"bic${s}${p} $Rd, $Rn, $imm", - (ANDri rGPR:$Rd, rGPR:$Rn, so_imm_not:$imm, + (ANDri rGPR:$Rd, rGPR:$Rn, mod_imm_not:$imm, pred:$p, cc_out:$s)>; def : ARMInstAlias<"bic${s}${p} $Rdn, $imm", - (ANDri rGPR:$Rdn, rGPR:$Rdn, so_imm_not:$imm, + (ANDri rGPR:$Rdn, rGPR:$Rdn, mod_imm_not:$imm, pred:$p, cc_out:$s)>; def : ARMInstAlias<"and${s}${p} $Rd, $Rn, $imm", - (BICri rGPR:$Rd, rGPR:$Rn, so_imm_not:$imm, + (BICri rGPR:$Rd, rGPR:$Rn, mod_imm_not:$imm, pred:$p, cc_out:$s)>; def : ARMInstAlias<"and${s}${p} $Rdn, $imm", - (BICri rGPR:$Rdn, rGPR:$Rdn, so_imm_not:$imm, + (BICri rGPR:$Rdn, rGPR:$Rdn, mod_imm_not:$imm, pred:$p, cc_out:$s)>; -// Likewise, "add Rd, so_imm_neg" -> sub +// Likewise, "add Rd, mod_imm_neg" -> sub def : ARMInstAlias<"add${s}${p} $Rd, $Rn, $imm", - (SUBri GPR:$Rd, GPR:$Rn, so_imm_neg:$imm, pred:$p, cc_out:$s)>; + (SUBri GPR:$Rd, GPR:$Rn, mod_imm_neg:$imm, pred:$p, cc_out:$s)>; def : ARMInstAlias<"add${s}${p} $Rd, $imm", - (SUBri GPR:$Rd, GPR:$Rd, so_imm_neg:$imm, pred:$p, cc_out:$s)>; -// Same for CMP <--> CMN via so_imm_neg + (SUBri GPR:$Rd, GPR:$Rd, mod_imm_neg:$imm, pred:$p, cc_out:$s)>; +// Same for CMP <--> CMN via mod_imm_neg def : ARMInstAlias<"cmp${p} $Rd, $imm", - (CMNri rGPR:$Rd, so_imm_neg:$imm, pred:$p)>; + (CMNri rGPR:$Rd, mod_imm_neg:$imm, pred:$p)>; def : ARMInstAlias<"cmn${p} $Rd, $imm", - (CMPri rGPR:$Rd, so_imm_neg:$imm, pred:$p)>; + (CMPri rGPR:$Rd, mod_imm_neg:$imm, pred:$p)>; // The shifter forms of the MOV instruction are aliased to the ASR, LSL, // LSR, ROR, and RRX instructions. @@ -5593,3 +5646,8 @@ def : InstAlias<"umull${s}${p} $RdLo, $RdHi, $Rn, $Rm", // is discarded. def ITasm : ARMAsmPseudo<"it$mask $cc", (ins it_pred:$cc, it_mask:$mask)>, ComplexDeprecationPredicate<"IT">; + +let mayLoad = 1, mayStore =1, hasSideEffects = 1 in +def SPACE : PseudoInst<(outs GPR:$Rd), (ins i32imm:$size, GPR:$Rn), + NoItinerary, + [(set GPR:$Rd, (int_arm_space imm:$size, GPR:$Rn))]>; diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td index c02bb3b55f5b..2a7b4b57fd08 100644 --- a/lib/Target/ARM/ARMInstrNEON.td +++ b/lib/Target/ARM/ARMInstrNEON.td @@ -34,6 +34,14 @@ def nImmSplatI32 : Operand<i32> { let PrintMethod = "printNEONModImmOperand"; let ParserMatchClass = nImmSplatI32AsmOperand; } +def nImmSplatNotI16AsmOperand : AsmOperandClass { let Name = "NEONi16splatNot"; } +def nImmSplatNotI16 : Operand<i32> { + let ParserMatchClass = nImmSplatNotI16AsmOperand; +} +def nImmSplatNotI32AsmOperand : AsmOperandClass { let Name = "NEONi32splatNot"; } +def nImmSplatNotI32 : Operand<i32> { + let ParserMatchClass = nImmSplatNotI32AsmOperand; +} def nImmVMOVI32AsmOperand : AsmOperandClass { let Name = "NEONi32vmov"; } def nImmVMOVI32 : Operand<i32> { let PrintMethod = "printNEONModImmOperand"; @@ -657,7 +665,7 @@ class VLDQQQQWBPseudo<InstrItinClass itin> (ins addrmode6:$addr, am6offset:$offset, QQQQPR:$src), itin, "$addr.addr = $wb, $src = $dst">; -let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in { +let mayLoad = 1, hasSideEffects = 0, hasExtraDefRegAllocReq = 1 in { // VLD1 : Vector Load (multiple single elements) class VLD1D<bits<4> op7_4, string Dt, Operand AddrMode> @@ -1015,7 +1023,7 @@ def VLD4q8oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>; def VLD4q16oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>; def VLD4q32oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>; -} // mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 +} // mayLoad = 1, hasSideEffects = 0, hasExtraDefRegAllocReq = 1 // Classes for VLD*LN pseudo-instructions with multi-register operands. // These are expanded to real instructions after register allocation. @@ -1098,7 +1106,7 @@ def : Pat<(vector_insert (v4f32 QPR:$src), (f32 (load addrmode6:$addr)), imm:$lane), (VLD1LNq32Pseudo addrmode6:$addr, QPR:$src, imm:$lane)>; -let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in { +let mayLoad = 1, hasSideEffects = 0, hasExtraDefRegAllocReq = 1 in { // ...with address register writeback: class VLD1LNWB<bits<4> op11_8, bits<4> op7_4, string Dt> @@ -1351,7 +1359,7 @@ def VLD4LNq32_UPD : VLD4LNWB<0b1011, {?,1,?,?}, "32"> { def VLD4LNq16Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD4lnu>; def VLD4LNq32Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD4lnu>; -} // mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 +} // mayLoad = 1, hasSideEffects = 0, hasExtraDefRegAllocReq = 1 // VLD1DUP : Vector Load (single element to all lanes) class VLD1DUP<bits<4> op7_4, string Dt, ValueType Ty, PatFrag LoadOp, @@ -1397,7 +1405,7 @@ def VLD1DUPq32 : VLD1QDUP<{1,0,1,?}, "32", v4i32, load, def : Pat<(v4f32 (NEONvdup (f32 (load addrmode6dup:$addr)))), (VLD1DUPq32 addrmode6:$addr)>; -let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in { +let mayLoad = 1, hasSideEffects = 0, hasExtraDefRegAllocReq = 1 in { // ...with address register writeback: multiclass VLD1DUPWB<bits<4> op7_4, string Dt, Operand AddrMode> { def _fixed : NLdSt<1, 0b10, 0b1100, op7_4, @@ -1601,9 +1609,9 @@ def VLD4DUPd8Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4dupu>; def VLD4DUPd16Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4dupu>; def VLD4DUPd32Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4dupu>; -} // mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 +} // mayLoad = 1, hasSideEffects = 0, hasExtraDefRegAllocReq = 1 -let mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 in { +let mayStore = 1, hasSideEffects = 0, hasExtraSrcRegAllocReq = 1 in { // Classes for VST* pseudo-instructions with multi-register operands. // These are expanded to real instructions after register allocation. @@ -2017,7 +2025,7 @@ def VST4q8oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>; def VST4q16oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>; def VST4q32oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>; -} // mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 +} // mayStore = 1, hasSideEffects = 0, hasExtraSrcRegAllocReq = 1 // Classes for VST*LN pseudo-instructions with multi-register operands. // These are expanded to real instructions after register allocation. @@ -2121,7 +2129,7 @@ def VST1LNq8Pseudo_UPD : VST1QLNWBPseudo<v16i8, post_truncsti8, NEONvgetlaneu>; def VST1LNq16Pseudo_UPD : VST1QLNWBPseudo<v8i16, post_truncsti16,NEONvgetlaneu>; def VST1LNq32Pseudo_UPD : VST1QLNWBPseudo<v4i32, post_store, extractelt>; -let mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 in { +let mayStore = 1, hasSideEffects = 0, hasExtraSrcRegAllocReq = 1 in { // VST2LN : Vector Store (single 2-element structure from one lane) class VST2LN<bits<4> op11_8, bits<4> op7_4, string Dt> @@ -2343,7 +2351,7 @@ def VST4LNq32_UPD : VST4LNWB<0b1011, {?,1,?,?}, "32"> { def VST4LNq16Pseudo_UPD : VSTQQQQLNWBPseudo<IIC_VST4lnu>; def VST4LNq32Pseudo_UPD : VSTQQQQLNWBPseudo<IIC_VST4lnu>; -} // mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 +} // mayStore = 1, hasSideEffects = 0, hasExtraSrcRegAllocReq = 1 // Use vld1/vst1 for unaligned f64 load / store def : Pat<(f64 (hword_alignedload addrmode6:$addr)), @@ -4376,7 +4384,7 @@ defm VMLSLslu : N3VLMulOpSL_HS<1, 0b0110, "vmlsl", "u", NEONvmullu, sub>; // VQDMLSL : Vector Saturating Doubling Multiply Subtract Long (Q -= D * D) defm VQDMLSL : N3VLInt3_HS<0, 1, 0b1011, 0, IIC_VMACi16D, IIC_VMACi32D, "vqdmlsl", "s", null_frag>; -defm VQDMLSLsl: N3VLInt3SL_HS<0, 0b111, "vqdmlsl", "s", null_frag>; +defm VQDMLSLsl: N3VLInt3SL_HS<0, 0b0111, "vqdmlsl", "s", null_frag>; def : Pat<(v4i32 (int_arm_neon_vqsubs (v4i32 QPR:$src1), (v4i32 (int_arm_neon_vqdmull (v4i16 DPR:$Vn), @@ -5429,7 +5437,7 @@ def VGETLNi32 : NVGetLane<{1,1,1,0,0,0,?,1}, 0b1011, 0b00, IIC_VMOVSI, "vmov", "32", "$R, $V$lane", [(set GPR:$R, (extractelt (v2i32 DPR:$V), imm:$lane))]>, - Requires<[HasNEON, HasFastVGETLNi32]> { + Requires<[HasVFP2, HasFastVGETLNi32]> { let Inst{21} = lane{0}; } // def VGETLNf32: see FMRDH and FMRDL in ARMInstrVFP.td @@ -5497,8 +5505,12 @@ def VSETLNi32 : NVSetLane<{1,1,1,0,0,0,?,0}, 0b1011, 0b00, (outs DPR:$V), (ins DPR:$src1, GPR:$R, VectorIndex32:$lane), IIC_VMOVISL, "vmov", "32", "$V$lane, $R", [(set DPR:$V, (insertelt (v2i32 DPR:$src1), - GPR:$R, imm:$lane))]> { + GPR:$R, imm:$lane))]>, + Requires<[HasVFP2]> { let Inst{21} = lane{0}; + // This instruction is equivalent as + // $V = INSERT_SUBREG $src1, $R, translateImmToSubIdx($imm) + let isInsertSubreg = 1; } } def : Pat<(vector_insert (v16i8 QPR:$src1), GPR:$src2, imm:$lane), @@ -6635,6 +6647,16 @@ defm : NEONDTAnyInstAlias<"vorr${p}", "$Vdn, $Vm", (VORRd DPR:$Vdn, DPR:$Vdn, DPR:$Vm, pred:$p)>; defm : NEONDTAnyInstAlias<"vorr${p}", "$Vdn, $Vm", (VORRq QPR:$Vdn, QPR:$Vdn, QPR:$Vm, pred:$p)>; +// ... immediates +def : NEONInstAlias<"vand${p}.i16 $Vd, $imm", + (VBICiv4i16 DPR:$Vd, nImmSplatNotI16:$imm, pred:$p)>; +def : NEONInstAlias<"vand${p}.i32 $Vd, $imm", + (VBICiv2i32 DPR:$Vd, nImmSplatNotI32:$imm, pred:$p)>; +def : NEONInstAlias<"vand${p}.i16 $Vd, $imm", + (VBICiv8i16 QPR:$Vd, nImmSplatNotI16:$imm, pred:$p)>; +def : NEONInstAlias<"vand${p}.i32 $Vd, $imm", + (VBICiv4i32 QPR:$Vd, nImmSplatNotI32:$imm, pred:$p)>; + // VLD1 single-lane pseudo-instructions. These need special handling for // the lane index that an InstAlias can't handle, so we use these instead. diff --git a/lib/Target/ARM/ARMInstrThumb.td b/lib/Target/ARM/ARMInstrThumb.td index e17f73af03ec..cc953c637cb4 100644 --- a/lib/Target/ARM/ARMInstrThumb.td +++ b/lib/Target/ARM/ARMInstrThumb.td @@ -311,7 +311,7 @@ def tHLT : T1I<(outs), (ins imm0_63:$val), NoItinerary, "hlt\t$val", } def tSETEND : T1I<(outs), (ins setend_op:$end), NoItinerary, "setend\t$end", - []>, T1Encoding<0b101101>, Deprecated<HasV8Ops> { + []>, T1Encoding<0b101101>, Requires<[IsNotMClass]>, Deprecated<HasV8Ops> { bits<1> end; // A8.6.156 let Inst{9-5} = 0b10010; @@ -360,6 +360,14 @@ def tADDrSPi : T1pI<(outs tGPR:$dst), (ins GPRsp:$sp, t_imm0_1020s4:$imm), let DecoderMethod = "DecodeThumbAddSpecialReg"; } +// Thumb1 frame lowering is rather fragile, we hope to be able to use +// tADDrSPi, but we may need to insert a sequence that clobbers CPSR. +def tADDframe : PseudoInst<(outs tGPR:$dst), (ins i32imm:$base, i32imm:$offset), + NoItinerary, []>, + Requires<[IsThumb, IsThumb1Only]> { + let Defs = [CPSR]; +} + // ADD sp, sp, #<imm7> def tADDspi : T1pIt<(outs GPRsp:$Rdn), (ins GPRsp:$Rn, t_imm0_508s4:$imm), IIC_iALUi, "add", "\t$Rdn, $imm", []>, @@ -466,7 +474,7 @@ let isCall = 1, (outs), (ins pred:$p, t_blxtarget:$func), IIC_Br, "blx${p}\t$func", [(ARMcall tglobaladdr:$func)]>, - Requires<[IsThumb, HasV5T]>, Sched<[WriteBrL]> { + Requires<[IsThumb, HasV5T, IsNotMClass]>, Sched<[WriteBrL]> { bits<24> func; let Inst{26} = func{23}; let Inst{25-16} = func{20-11}; @@ -706,7 +714,7 @@ def tSTRspi : T1pIs<(outs), (ins tGPR:$Rt, t_addrmode_sp:$addr), IIC_iStore_i, // // These require base address to be written back or one of the loaded regs. -let neverHasSideEffects = 1 in { +let hasSideEffects = 0 in { let mayLoad = 1, hasExtraDefRegAllocReq = 1 in def tLDMIA : T1I<(outs), (ins tGPR:$Rn, pred:$p, reglist:$regs, variable_ops), @@ -746,7 +754,7 @@ def tSTMIA_UPD : Thumb1I<(outs GPR:$wb), let Inst{7-0} = regs; } -} // neverHasSideEffects +} // hasSideEffects def : InstAlias<"ldm${p} $Rn!, $regs", (tLDMIA tGPR:$Rn, pred:$p, reglist:$regs)>, @@ -880,7 +888,7 @@ def tADDrr : // A8.6.6 T1 "add", "\t$Rd, $Rn, $Rm", [(set tGPR:$Rd, (add tGPR:$Rn, tGPR:$Rm))]>, Sched<[WriteALU]>; -let neverHasSideEffects = 1 in +let hasSideEffects = 0 in def tADDhirr : T1pIt<(outs GPR:$Rdn), (ins GPR:$Rn, GPR:$Rm), IIC_iALUr, "add", "\t$Rdn, $Rm", []>, T1Special<{0,0,?,?}>, Sched<[WriteALU]> { @@ -1040,7 +1048,7 @@ def : tInstAlias <"movs $Rdn, $imm", // A7-73: MOV(2) - mov setting flag. -let neverHasSideEffects = 1 in { +let hasSideEffects = 0 in { def tMOVr : Thumb1pI<(outs GPR:$Rd), (ins GPR:$Rm), AddrModeNone, 2, IIC_iMOVr, "mov", "\t$Rd, $Rm", "", []>, @@ -1062,7 +1070,7 @@ def tMOVSr : T1I<(outs tGPR:$Rd), (ins tGPR:$Rm), IIC_iMOVr, let Inst{5-3} = Rm; let Inst{2-0} = Rd; } -} // neverHasSideEffects +} // hasSideEffects // Multiply register let isCommutable = 1 in @@ -1240,7 +1248,7 @@ def tADR : T1I<(outs tGPR:$Rd), (ins t_adrlabel:$addr, pred:$p), let DecoderMethod = "DecodeThumbAddSpecialReg"; } -let neverHasSideEffects = 1, isReMaterializable = 1 in +let hasSideEffects = 0, isReMaterializable = 1 in def tLEApcrel : tPseudoInst<(outs tGPR:$Rd), (ins i32imm:$label, pred:$p), 2, IIC_iALUi, []>, Sched<[WriteALU]>; @@ -1289,7 +1297,7 @@ def tInt_eh_sjlj_longjmp : XI<(outs), (ins GPR:$src, GPR:$scratch), AddrModeNone, 0, IndexModeNone, Pseudo, NoItinerary, "", "", [(ARMeh_sjlj_longjmp GPR:$src, GPR:$scratch)]>, - Requires<[IsThumb, IsIOS]>; + Requires<[IsThumb]>; //===----------------------------------------------------------------------===// // Non-Instruction Patterns @@ -1355,7 +1363,7 @@ def : T1Pat<(ARMtcall texternalsym:$func), (tBL texternalsym:$func)>, Requires<[IsThumb]>; def : Tv5Pat<(ARMcall texternalsym:$func), (tBLXi texternalsym:$func)>, - Requires<[IsThumb, HasV5T]>; + Requires<[IsThumb, HasV5T, IsNotMClass]>; // Indirect calls to ARM routines def : Tv5Pat<(ARMcall GPR:$dst), (tBLXr GPR:$dst)>, diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td index 85e93516807b..5e41ea1c294d 100644 --- a/lib/Target/ARM/ARMInstrThumb2.td +++ b/lib/Target/ARM/ARMInstrThumb2.td @@ -1241,7 +1241,7 @@ def t2ADR : T2PCOneRegImm<(outs rGPR:$Rd), let DecoderMethod = "DecodeT2Adr"; } -let neverHasSideEffects = 1, isReMaterializable = 1 in +let hasSideEffects = 0, isReMaterializable = 1 in def t2LEApcrel : t2PseudoInst<(outs rGPR:$Rd), (ins i32imm:$label, pred:$p), 4, IIC_iALUi, []>, Sched<[WriteALU, ReadALU]>; let hasSideEffects = 1 in @@ -1262,22 +1262,22 @@ defm t2LDR : T2I_ld<0, 0b10, "ldr", IIC_iLoad_i, IIC_iLoad_si, GPR, // Loads with zero extension defm t2LDRH : T2I_ld<0, 0b01, "ldrh", IIC_iLoad_bh_i, IIC_iLoad_bh_si, - GPR, UnOpFrag<(zextloadi16 node:$Src)>>; + GPRnopc, UnOpFrag<(zextloadi16 node:$Src)>>; defm t2LDRB : T2I_ld<0, 0b00, "ldrb", IIC_iLoad_bh_i, IIC_iLoad_bh_si, - GPR, UnOpFrag<(zextloadi8 node:$Src)>>; + GPRnopc, UnOpFrag<(zextloadi8 node:$Src)>>; // Loads with sign extension defm t2LDRSH : T2I_ld<1, 0b01, "ldrsh", IIC_iLoad_bh_i, IIC_iLoad_bh_si, - GPR, UnOpFrag<(sextloadi16 node:$Src)>>; + GPRnopc, UnOpFrag<(sextloadi16 node:$Src)>>; defm t2LDRSB : T2I_ld<1, 0b00, "ldrsb", IIC_iLoad_bh_i, IIC_iLoad_bh_si, - GPR, UnOpFrag<(sextloadi8 node:$Src)>>; + GPRnopc, UnOpFrag<(sextloadi8 node:$Src)>>; -let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in { +let mayLoad = 1, hasSideEffects = 0, hasExtraDefRegAllocReq = 1 in { // Load doubleword def t2LDRDi8 : T2Ii8s4<1, 0, 1, (outs rGPR:$Rt, rGPR:$Rt2), (ins t2addrmode_imm8s4:$addr), IIC_iLoad_d_i, "ldrd", "\t$Rt, $Rt2, $addr", "", []>; -} // mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 +} // mayLoad = 1, hasSideEffects = 0, hasExtraDefRegAllocReq = 1 // zextload i1 -> zextload i8 def : T2Pat<(zextloadi1 t2addrmode_imm12:$addr), @@ -1326,7 +1326,7 @@ def : T2Pat<(extloadi16 (ARMWrapper tconstpool:$addr)), // Indexed loads -let mayLoad = 1, neverHasSideEffects = 1 in { +let mayLoad = 1, hasSideEffects = 0 in { def t2LDR_PRE : T2Ipreldst<0, 0b10, 1, 1, (outs GPR:$Rt, GPR:$Rn_wb), (ins t2addrmode_imm8_pre:$addr), AddrModeT2_i8, IndexModePre, IIC_iLoad_iu, @@ -1378,7 +1378,7 @@ def t2LDRSH_POST : T2Ipostldst<1, 0b01, 1, 0, (outs GPR:$Rt, GPR:$Rn_wb), (ins addr_offset_none:$Rn, t2am_imm8_offset:$offset), AddrModeT2_i8, IndexModePost, IIC_iLoad_bh_iu, "ldrsh", "\t$Rt, $Rn$offset", "$Rn = $Rn_wb", []>; -} // mayLoad = 1, neverHasSideEffects = 1 +} // mayLoad = 1, hasSideEffects = 0 // LDRT, LDRBT, LDRHT, LDRSBT, LDRSHT all have offset mode (PUW=0b110). // Ref: A8.6.57 LDR (immediate, Thumb) Encoding T4 @@ -1443,14 +1443,14 @@ defm t2STRH:T2I_st<0b01,"strh", IIC_iStore_bh_i, IIC_iStore_bh_si, rGPR, BinOpFrag<(truncstorei16 node:$LHS, node:$RHS)>>; // Store doubleword -let mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 in +let mayStore = 1, hasSideEffects = 0, hasExtraSrcRegAllocReq = 1 in def t2STRDi8 : T2Ii8s4<1, 0, 0, (outs), (ins rGPR:$Rt, rGPR:$Rt2, t2addrmode_imm8s4:$addr), IIC_iStore_d_r, "strd", "\t$Rt, $Rt2, $addr", "", []>; // Indexed stores -let mayStore = 1, neverHasSideEffects = 1 in { +let mayStore = 1, hasSideEffects = 0 in { def t2STR_PRE : T2Ipreldst<0, 0b10, 0, 1, (outs GPRnopc:$Rn_wb), (ins GPRnopc:$Rt, t2addrmode_imm8_pre:$addr), AddrModeT2_i8, IndexModePre, IIC_iStore_iu, @@ -1468,7 +1468,7 @@ def t2STRB_PRE : T2Ipreldst<0, 0b00, 0, 1, (outs GPRnopc:$Rn_wb), AddrModeT2_i8, IndexModePre, IIC_iStore_bh_iu, "strb", "\t$Rt, $addr!", "$addr.base = $Rn_wb,@earlyclobber $Rn_wb", []>; -} // mayStore = 1, neverHasSideEffects = 1 +} // mayStore = 1, hasSideEffects = 0 def t2STR_POST : T2Ipostldst<0, 0b10, 0, 0, (outs GPRnopc:$Rn_wb), (ins GPRnopc:$Rt, addr_offset_none:$Rn, @@ -1763,7 +1763,7 @@ multiclass thumb2_ld_mult<string asm, InstrItinClass itin, } } -let neverHasSideEffects = 1 in { +let hasSideEffects = 0 in { let mayLoad = 1, hasExtraDefRegAllocReq = 1 in defm t2LDM : thumb2_ld_mult<"ldm", IIC_iLoad_m, IIC_iLoad_mu, 1>; @@ -1848,14 +1848,14 @@ multiclass thumb2_st_mult<string asm, InstrItinClass itin, let mayStore = 1, hasExtraSrcRegAllocReq = 1 in defm t2STM : thumb2_st_mult<"stm", IIC_iStore_m, IIC_iStore_mu, 0>; -} // neverHasSideEffects +} // hasSideEffects //===----------------------------------------------------------------------===// // Move Instructions. // -let neverHasSideEffects = 1 in +let hasSideEffects = 0 in def t2MOVr : T2sTwoReg<(outs GPRnopc:$Rd), (ins GPR:$Rm), IIC_iMOVr, "mov", ".w\t$Rd, $Rm", []>, Sched<[WriteALU]> { let Inst{31-27} = 0b11101; @@ -1973,6 +1973,16 @@ def t2SXTAH : T2I_exta_rrot<0b000, "sxtah", BinOpFrag<(add node:$LHS, (sext_inreg node:$RHS,i16))>>; def t2SXTAB16 : T2I_exta_rrot_np<0b010, "sxtab16">; +// A simple right-shift can also be used in most cases (the exception is the +// SXTH operations with a rotate of 24: there the non-contiguous bits are +// relevant). +def : Pat<(add rGPR:$Rn, (sext_inreg (srl rGPR:$Rm, rot_imm:$rot), i8)), + (t2SXTAB rGPR:$Rn, rGPR:$Rm, rot_imm:$rot)>, + Requires<[HasT2ExtractPack, IsThumb2]>; +def : Pat<(add rGPR:$Rn, (sext_inreg (srl rGPR:$Rm, imm8_or_16:$rot), i16)), + (t2SXTAH rGPR:$Rn, rGPR:$Rm, rot_imm:$rot)>, + Requires<[HasT2ExtractPack, IsThumb2]>; + // Zero extenders let AddedComplexity = 16 in { @@ -1999,8 +2009,16 @@ def t2UXTAB : T2I_exta_rrot<0b101, "uxtab", def t2UXTAH : T2I_exta_rrot<0b001, "uxtah", BinOpFrag<(add node:$LHS, (and node:$RHS, 0xFFFF))>>; def t2UXTAB16 : T2I_exta_rrot_np<0b011, "uxtab16">; + +def : Pat<(add rGPR:$Rn, (and (srl rGPR:$Rm, rot_imm:$rot), 0xFF)), + (t2UXTAB rGPR:$Rn, rGPR:$Rm, rot_imm:$rot)>, + Requires<[HasT2ExtractPack, IsThumb2]>; +def : Pat<(add rGPR:$Rn, (and (srl rGPR:$Rm, imm8_or_16:$rot), 0xFFFF)), + (t2UXTAH rGPR:$Rn, rGPR:$Rm, rot_imm:$rot)>, + Requires<[HasT2ExtractPack, IsThumb2]>; } + //===----------------------------------------------------------------------===// // Arithmetic Instructions. // @@ -2554,7 +2572,7 @@ def t2MLS: T2FourReg< } // Extra precision multiplies with low / high results -let neverHasSideEffects = 1 in { +let hasSideEffects = 0 in { let isCommutable = 1 in { def t2SMULL : T2MulLong<0b000, 0b0000, (outs rGPR:$RdLo, rGPR:$RdHi), @@ -2585,7 +2603,7 @@ def t2UMAAL : T2MulLong<0b110, 0b0110, (ins rGPR:$Rn, rGPR:$Rm), IIC_iMAC64, "umaal", "\t$RdLo, $RdHi, $Rn, $Rm", []>, Requires<[IsThumb2, HasThumb2DSP]>; -} // neverHasSideEffects +} // hasSideEffects // Rounding variants of the below included for disassembly only @@ -2708,8 +2726,7 @@ multiclass T2I_smul<string opc, PatFrag opnode> { def WB : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iMUL16, !strconcat(opc, "wb"), "\t$Rd, $Rn, $Rm", - [(set rGPR:$Rd, (sra (opnode rGPR:$Rn, - (sext_inreg rGPR:$Rm, i16)), (i32 16)))]>, + []>, Requires<[IsThumb2, HasThumb2DSP]> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0110; @@ -2721,8 +2738,7 @@ multiclass T2I_smul<string opc, PatFrag opnode> { def WT : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iMUL16, !strconcat(opc, "wt"), "\t$Rd, $Rn, $Rm", - [(set rGPR:$Rd, (sra (opnode rGPR:$Rn, - (sra rGPR:$Rm, (i32 16))), (i32 16)))]>, + []>, Requires<[IsThumb2, HasThumb2DSP]> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0110; @@ -2791,8 +2807,7 @@ multiclass T2I_smla<string opc, PatFrag opnode> { def WB : T2FourReg< (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC16, !strconcat(opc, "wb"), "\t$Rd, $Rn, $Rm, $Ra", - [(set rGPR:$Rd, (add rGPR:$Ra, (sra (opnode rGPR:$Rn, - (sext_inreg rGPR:$Rm, i16)), (i32 16))))]>, + []>, Requires<[IsThumb2, HasThumb2DSP, UseMulOps]> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0110; @@ -2804,8 +2819,7 @@ multiclass T2I_smla<string opc, PatFrag opnode> { def WT : T2FourReg< (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC16, !strconcat(opc, "wt"), "\t$Rd, $Rn, $Rm, $Ra", - [(set rGPR:$Rd, (add rGPR:$Ra, (sra (opnode rGPR:$Rn, - (sra rGPR:$Rm, (i32 16))), (i32 16))))]>, + []>, Requires<[IsThumb2, HasThumb2DSP, UseMulOps]> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0110; @@ -3136,7 +3150,7 @@ defm t2TEQ : T2I_cmp_irs<0b0100, "teq", BinOpFrag<(ARMcmpZ (xor_su node:$LHS, node:$RHS), 0)>>; // Conditional moves -let neverHasSideEffects = 1 in { +let hasSideEffects = 0 in { let isCommutable = 1, isSelect = 1 in def t2MOVCCr : t2PseudoInst<(outs rGPR:$Rd), @@ -3199,7 +3213,7 @@ def t2MOVCCi32imm RegConstraint<"$false = $dst">; } // isCodeGenOnly = 1 -} // neverHasSideEffects +} // hasSideEffects //===----------------------------------------------------------------------===// // Atomic operations intrinsics @@ -3291,7 +3305,8 @@ def t2LDREXD : T2I_ldrex<0b0111, (outs rGPR:$Rt, rGPR:$Rt2), (ins addr_offset_none:$addr), AddrModeNone, 4, NoItinerary, "ldrexd", "\t$Rt, $Rt2, $addr", "", - [], {?, ?, ?, ?}> { + [], {?, ?, ?, ?}>, + Requires<[IsThumb2, IsNotMClass]> { bits<4> Rt2; let Inst{11-8} = Rt2; } @@ -3367,7 +3382,8 @@ def t2STREXD : T2I_strex<0b0111, (outs rGPR:$Rd), (ins rGPR:$Rt, rGPR:$Rt2, addr_offset_none:$addr), AddrModeNone, 4, NoItinerary, "strexd", "\t$Rd, $Rt, $Rt2, $addr", "", [], - {?, ?, ?, ?}> { + {?, ?, ?, ?}>, + Requires<[IsThumb2, IsNotMClass]> { bits<4> Rt2; let Inst{11-8} = Rt2; } @@ -3614,7 +3630,7 @@ def t2IT : Thumb2XI<(outs), (ins it_pred:$cc, it_mask:$mask), // Branch and Exchange Jazelle -- for disassembly only // Rm = Inst{19-16} def t2BXJ : T2I<(outs), (ins rGPR:$func), NoItinerary, "bxj", "\t$func", []>, - Sched<[WriteBr]> { + Sched<[WriteBr]>, Requires<[IsThumb2, IsNotMClass, PreV8]> { bits<4> func; let Inst{31-27} = 0b11110; let Inst{26} = 0; @@ -3656,7 +3672,8 @@ let isBranch = 1, isTerminator = 1 in { // operands, create 3 versions of the same instruction. Once there's a clean // framework to represent optional operands, change this behavior. class t2CPS<dag iops, string asm_op> : T2XI<(outs), iops, NoItinerary, - !strconcat("cps", asm_op), []> { + !strconcat("cps", asm_op), []>, + Requires<[IsThumb2, IsNotMClass]> { bits<2> imod; bits<3> iflags; bits<5> mode; @@ -3702,7 +3719,8 @@ def : t2InstAlias<"sevl$p.w", (t2HINT 5, pred:$p)> { let Predicates = [IsThumb2, HasV8]; } -def t2DBG : T2I<(outs), (ins imm0_15:$opt), NoItinerary, "dbg", "\t$opt", []> { +def t2DBG : T2I<(outs), (ins imm0_15:$opt), NoItinerary, "dbg", "\t$opt", + [(int_arm_dbg imm0_15:$opt)]> { bits<4> opt; let Inst{31-20} = 0b111100111010; let Inst{19-16} = 0b1111; @@ -3739,7 +3757,8 @@ def t2DCPS3 : T2DCPS<0b11, "dcps3">; class T2SRS<bits<2> Op, bit W, dag oops, dag iops, InstrItinClass itin, string opc, string asm, list<dag> pattern> - : T2I<oops, iops, itin, opc, asm, pattern> { + : T2I<oops, iops, itin, opc, asm, pattern>, + Requires<[IsThumb2,IsNotMClass]> { bits<5> mode; let Inst{31-25} = 0b1110100; let Inst{24-23} = Op; @@ -3770,7 +3789,8 @@ def : t2InstAlias<"srsia${p} $mode!", (t2SRSIA_UPD imm0_31:$mode, pred:$p)>; // Return From Exception is a system instruction. class T2RFE<bits<12> op31_20, dag oops, dag iops, InstrItinClass itin, string opc, string asm, list<dag> pattern> - : T2I<oops, iops, itin, opc, asm, pattern> { + : T2I<oops, iops, itin, opc, asm, pattern>, + Requires<[IsThumb2,IsNotMClass]> { let Inst{31-20} = op31_20{11-0}; bits<4> Rn; @@ -3797,13 +3817,34 @@ let isReturn = 1, isBarrier = 1, isTerminator = 1, Defs = [PC] in def t2SUBS_PC_LR : T2I <(outs), (ins imm0_255:$imm), NoItinerary, "subs", "\tpc, lr, $imm", [(ARMintretflag imm0_255:$imm)]>, - Requires<[IsThumb2]> { + Requires<[IsThumb2,IsNotMClass]> { let Inst{31-8} = 0b111100111101111010001111; bits<8> imm; let Inst{7-0} = imm; } +// Hypervisor Call is a system instruction. +let isCall = 1 in { +def t2HVC : T2XI <(outs), (ins imm0_65535:$imm16), IIC_Br, "hvc.w\t$imm16", []>, + Requires<[IsThumb2, HasVirtualization]>, Sched<[WriteBr]> { + bits<16> imm16; + let Inst{31-20} = 0b111101111110; + let Inst{19-16} = imm16{15-12}; + let Inst{15-12} = 0b1000; + let Inst{11-0} = imm16{11-0}; +} +} + +// Alias for HVC without the ".w" optional width specifier +def : t2InstAlias<"hvc\t$imm16", (t2HVC imm0_65535:$imm16)>; + +// ERET - Return from exception in Hypervisor mode. +// B9.3.3, B9.3.20: ERET is an alias for "SUBS PC, LR, #0" in an implementation that +// includes virtualization extensions. +def t2ERET : InstAlias<"eret${p}", (t2SUBS_PC_LR 0, pred:$p)>, + Requires<[IsThumb2, HasVirtualization]>; + //===----------------------------------------------------------------------===// // Non-Instruction Patterns // @@ -3941,10 +3982,10 @@ defm t2LDC : t2LdStCop<0b1110, 1, 0, "ldc">; defm t2LDCL : t2LdStCop<0b1110, 1, 1, "ldcl">; defm t2STC : t2LdStCop<0b1110, 0, 0, "stc">; defm t2STCL : t2LdStCop<0b1110, 0, 1, "stcl">; -defm t2LDC2 : t2LdStCop<0b1111, 1, 0, "ldc2">, Requires<[PreV8]>; -defm t2LDC2L : t2LdStCop<0b1111, 1, 1, "ldc2l">, Requires<[PreV8]>; -defm t2STC2 : t2LdStCop<0b1111, 0, 0, "stc2">, Requires<[PreV8]>; -defm t2STC2L : t2LdStCop<0b1111, 0, 1, "stc2l">, Requires<[PreV8]>; +defm t2LDC2 : t2LdStCop<0b1111, 1, 0, "ldc2">, Requires<[PreV8,IsThumb2]>; +defm t2LDC2L : t2LdStCop<0b1111, 1, 1, "ldc2l">, Requires<[PreV8,IsThumb2]>; +defm t2STC2 : t2LdStCop<0b1111, 0, 0, "stc2">, Requires<[PreV8,IsThumb2]>; +defm t2STC2L : t2LdStCop<0b1111, 0, 1, "stc2l">, Requires<[PreV8,IsThumb2]>; //===----------------------------------------------------------------------===// @@ -3960,7 +4001,7 @@ def t2MRS_AR : T2I<(outs GPR:$Rd), (ins), NoItinerary, "mrs", "\t$Rd, apsr", bits<4> Rd; let Inst{31-12} = 0b11110011111011111000; let Inst{11-8} = Rd; - let Inst{7-0} = 0b0000; + let Inst{7-0} = 0b00000000; } def : t2InstAlias<"mrs${p} $Rd, cpsr", (t2MRS_AR GPR:$Rd, pred:$p)>; @@ -3970,22 +4011,41 @@ def t2MRSsys_AR: T2I<(outs GPR:$Rd), (ins), NoItinerary, "mrs", "\t$Rd, spsr", bits<4> Rd; let Inst{31-12} = 0b11110011111111111000; let Inst{11-8} = Rd; - let Inst{7-0} = 0b0000; + let Inst{7-0} = 0b00000000; +} + +def t2MRSbanked : T2I<(outs rGPR:$Rd), (ins banked_reg:$banked), + NoItinerary, "mrs", "\t$Rd, $banked", []>, + Requires<[IsThumb, HasVirtualization]> { + bits<6> banked; + bits<4> Rd; + + let Inst{31-21} = 0b11110011111; + let Inst{20} = banked{5}; // R bit + let Inst{19-16} = banked{3-0}; + let Inst{15-12} = 0b1000; + let Inst{11-8} = Rd; + let Inst{7-5} = 0b001; + let Inst{4} = banked{4}; + let Inst{3-0} = 0b0000; } + // M class MRS. // // This MRS has a mask field in bits 7-0 and can take more values than // the A/R class (a full msr_mask). -def t2MRS_M : T2I<(outs rGPR:$Rd), (ins msr_mask:$mask), NoItinerary, - "mrs", "\t$Rd, $mask", []>, +def t2MRS_M : T2I<(outs rGPR:$Rd), (ins msr_mask:$SYSm), NoItinerary, + "mrs", "\t$Rd, $SYSm", []>, Requires<[IsThumb,IsMClass]> { bits<4> Rd; - bits<8> mask; + bits<8> SYSm; let Inst{31-12} = 0b11110011111011111000; let Inst{11-8} = Rd; - let Inst{19-16} = 0b1111; - let Inst{7-0} = mask; + let Inst{7-0} = SYSm; + + let Unpredictable{20-16} = 0b11111; + let Unpredictable{13} = 0b1; } @@ -4010,6 +4070,25 @@ def t2MSR_AR : T2I<(outs), (ins msr_mask:$mask, rGPR:$Rn), let Inst{7-0} = 0; } +// However, the MSR (banked register) system instruction (ARMv7VE) *does* have a +// separate encoding (distinguished by bit 5. +def t2MSRbanked : T2I<(outs), (ins banked_reg:$banked, rGPR:$Rn), + NoItinerary, "msr", "\t$banked, $Rn", []>, + Requires<[IsThumb, HasVirtualization]> { + bits<6> banked; + bits<4> Rn; + + let Inst{31-21} = 0b11110011100; + let Inst{20} = banked{5}; // R bit + let Inst{19-16} = Rn; + let Inst{15-12} = 0b1000; + let Inst{11-8} = banked{3-0}; + let Inst{7-5} = 0b001; + let Inst{4} = banked{4}; + let Inst{3-0} = 0b0000; +} + + // M class MSR. // // Move from ARM core register to Special Register @@ -4022,7 +4101,13 @@ def t2MSR_M : T2I<(outs), (ins msr_mask:$SYSm, rGPR:$Rn), let Inst{20} = 0b0; let Inst{19-16} = Rn; let Inst{15-12} = 0b1000; - let Inst{11-0} = SYSm; + let Inst{11-10} = SYSm{11-10}; + let Inst{9-8} = 0b00; + let Inst{7-0} = SYSm{7-0}; + + let Unpredictable{20} = 0b1; + let Unpredictable{13} = 0b1; + let Unpredictable{9-8} = 0b11; } diff --git a/lib/Target/ARM/ARMInstrVFP.td b/lib/Target/ARM/ARMInstrVFP.td index 55a6efcb4c04..e0a931499164 100644 --- a/lib/Target/ARM/ARMInstrVFP.td +++ b/lib/Target/ARM/ARMInstrVFP.td @@ -194,7 +194,7 @@ multiclass vfp_ldst_mult<string asm, bit L_bit, } } -let neverHasSideEffects = 1 in { +let hasSideEffects = 0 in { let mayLoad = 1, hasExtraDefRegAllocReq = 1 in defm VLDM : vfp_ldst_mult<"vldm", 1, IIC_fpLoad_m, IIC_fpLoad_mu>; @@ -202,7 +202,7 @@ defm VLDM : vfp_ldst_mult<"vldm", 1, IIC_fpLoad_m, IIC_fpLoad_mu>; let mayStore = 1, hasExtraSrcRegAllocReq = 1 in defm VSTM : vfp_ldst_mult<"vstm", 0, IIC_fpStore_m, IIC_fpStore_mu>; -} // neverHasSideEffects +} // hasSideEffects def : MnemonicAlias<"vldm", "vldmia">; def : MnemonicAlias<"vstm", "vstmia">; @@ -515,6 +515,8 @@ def VCVTDS : ASuI<0b11101, 0b11, 0b0111, 0b11, 0, let Inst{5} = Sm{0}; let Inst{15-12} = Dd{3-0}; let Inst{22} = Dd{4}; + + let Predicates = [HasVFP2, HasDPVFP]; } // Special case encoding: bits 11-8 is 0b1011. @@ -625,27 +627,30 @@ def : Pat<(f16_to_fp GPR:$a), def : Pat<(f64 (f16_to_fp GPR:$a)), (VCVTBHD (COPY_TO_REGCLASS GPR:$a, SPR))>; - -multiclass vcvt_inst<string opc, bits<2> rm> { +multiclass vcvt_inst<string opc, bits<2> rm, + SDPatternOperator node = null_frag> { let PostEncoderMethod = "", DecoderNamespace = "VFPV8" in { def SS : ASuInp<0b11101, 0b11, 0b1100, 0b11, 0, (outs SPR:$Sd), (ins SPR:$Sm), NoItinerary, !strconcat("vcvt", opc, ".s32.f32\t$Sd, $Sm"), - []>, Requires<[HasFPARMv8]> { + [(set SPR:$Sd, (arm_ftosi (node SPR:$Sm)))]>, + Requires<[HasFPARMv8]> { let Inst{17-16} = rm; } def US : ASuInp<0b11101, 0b11, 0b1100, 0b01, 0, (outs SPR:$Sd), (ins SPR:$Sm), NoItinerary, !strconcat("vcvt", opc, ".u32.f32\t$Sd, $Sm"), - []>, Requires<[HasFPARMv8]> { + [(set SPR:$Sd, (arm_ftoui (node SPR:$Sm)))]>, + Requires<[HasFPARMv8]> { let Inst{17-16} = rm; } def SD : ASuInp<0b11101, 0b11, 0b1100, 0b11, 0, (outs SPR:$Sd), (ins DPR:$Dm), NoItinerary, !strconcat("vcvt", opc, ".s32.f64\t$Sd, $Dm"), - []>, Requires<[HasFPARMv8, HasDPVFP]> { + [(set SPR:$Sd, (arm_ftosi (f64 (node (f64 DPR:$Dm)))))]>, + Requires<[HasFPARMv8, HasDPVFP]> { bits<5> Dm; let Inst{17-16} = rm; @@ -659,7 +664,8 @@ multiclass vcvt_inst<string opc, bits<2> rm> { def UD : ASuInp<0b11101, 0b11, 0b1100, 0b01, 0, (outs SPR:$Sd), (ins DPR:$Dm), NoItinerary, !strconcat("vcvt", opc, ".u32.f64\t$Sd, $Dm"), - []>, Requires<[HasFPARMv8, HasDPVFP]> { + [(set SPR:$Sd, (arm_ftoui (f64 (node (f64 DPR:$Dm)))))]>, + Requires<[HasFPARMv8, HasDPVFP]> { bits<5> Dm; let Inst{17-16} = rm; @@ -672,10 +678,10 @@ multiclass vcvt_inst<string opc, bits<2> rm> { } } -defm VCVTA : vcvt_inst<"a", 0b00>; +defm VCVTA : vcvt_inst<"a", 0b00, frnd>; defm VCVTN : vcvt_inst<"n", 0b01>; -defm VCVTP : vcvt_inst<"p", 0b10>; -defm VCVTM : vcvt_inst<"m", 0b11>; +defm VCVTP : vcvt_inst<"p", 0b10, fceil>; +defm VCVTM : vcvt_inst<"m", 0b11, ffloor>; def VNEGD : ADuI<0b11101, 0b11, 0b0001, 0b01, 0, (outs DPR:$Dd), (ins DPR:$Dm), @@ -691,18 +697,20 @@ def VNEGS : ASuIn<0b11101, 0b11, 0b0001, 0b01, 0, let D = VFPNeonA8Domain; } -multiclass vrint_inst_zrx<string opc, bit op, bit op2> { +multiclass vrint_inst_zrx<string opc, bit op, bit op2, SDPatternOperator node> { def S : ASuI<0b11101, 0b11, 0b0110, 0b11, 0, (outs SPR:$Sd), (ins SPR:$Sm), NoItinerary, !strconcat("vrint", opc), ".f32\t$Sd, $Sm", - []>, Requires<[HasFPARMv8]> { + [(set (f32 SPR:$Sd), (node (f32 SPR:$Sm)))]>, + Requires<[HasFPARMv8]> { let Inst{7} = op2; let Inst{16} = op; } def D : ADuI<0b11101, 0b11, 0b0110, 0b11, 0, (outs DPR:$Dd), (ins DPR:$Dm), NoItinerary, !strconcat("vrint", opc), ".f64\t$Dd, $Dm", - []>, Requires<[HasFPARMv8, HasDPVFP]> { + [(set (f64 DPR:$Dd), (node (f64 DPR:$Dm)))]>, + Requires<[HasFPARMv8, HasDPVFP]> { let Inst{7} = op2; let Inst{16} = op; } @@ -715,22 +723,25 @@ multiclass vrint_inst_zrx<string opc, bit op, bit op2> { Requires<[HasFPARMv8,HasDPVFP]>; } -defm VRINTZ : vrint_inst_zrx<"z", 0, 1>; -defm VRINTR : vrint_inst_zrx<"r", 0, 0>; -defm VRINTX : vrint_inst_zrx<"x", 1, 0>; +defm VRINTZ : vrint_inst_zrx<"z", 0, 1, ftrunc>; +defm VRINTR : vrint_inst_zrx<"r", 0, 0, fnearbyint>; +defm VRINTX : vrint_inst_zrx<"x", 1, 0, frint>; -multiclass vrint_inst_anpm<string opc, bits<2> rm> { +multiclass vrint_inst_anpm<string opc, bits<2> rm, + SDPatternOperator node = null_frag> { let PostEncoderMethod = "", DecoderNamespace = "VFPV8" in { def S : ASuInp<0b11101, 0b11, 0b1000, 0b01, 0, (outs SPR:$Sd), (ins SPR:$Sm), NoItinerary, !strconcat("vrint", opc, ".f32\t$Sd, $Sm"), - []>, Requires<[HasFPARMv8]> { + [(set (f32 SPR:$Sd), (node (f32 SPR:$Sm)))]>, + Requires<[HasFPARMv8]> { let Inst{17-16} = rm; } def D : ADuInp<0b11101, 0b11, 0b1000, 0b01, 0, (outs DPR:$Dd), (ins DPR:$Dm), NoItinerary, !strconcat("vrint", opc, ".f64\t$Dd, $Dm"), - []>, Requires<[HasFPARMv8, HasDPVFP]> { + [(set (f64 DPR:$Dd), (node (f64 DPR:$Dm)))]>, + Requires<[HasFPARMv8, HasDPVFP]> { let Inst{17-16} = rm; } } @@ -743,10 +754,10 @@ multiclass vrint_inst_anpm<string opc, bits<2> rm> { Requires<[HasFPARMv8,HasDPVFP]>; } -defm VRINTA : vrint_inst_anpm<"a", 0b00>; +defm VRINTA : vrint_inst_anpm<"a", 0b00, frnd>; defm VRINTN : vrint_inst_anpm<"n", 0b01>; -defm VRINTP : vrint_inst_anpm<"p", 0b10>; -defm VRINTM : vrint_inst_anpm<"m", 0b11>; +defm VRINTP : vrint_inst_anpm<"p", 0b10, fceil>; +defm VRINTM : vrint_inst_anpm<"m", 0b11, ffloor>; def VSQRTD : ADuI<0b11101, 0b11, 0b0001, 0b11, 0, (outs DPR:$Dd), (ins DPR:$Dm), @@ -758,7 +769,7 @@ def VSQRTS : ASuI<0b11101, 0b11, 0b0001, 0b11, 0, IIC_fpSQRT32, "vsqrt", ".f32\t$Sd, $Sm", [(set SPR:$Sd, (fsqrt SPR:$Sm))]>; -let neverHasSideEffects = 1 in { +let hasSideEffects = 0 in { def VMOVD : ADuI<0b11101, 0b11, 0b0000, 0b01, 0, (outs DPR:$Dd), (ins DPR:$Dm), IIC_fpUNA64, "vmov", ".f64\t$Dd, $Dm", []>; @@ -766,7 +777,7 @@ def VMOVD : ADuI<0b11101, 0b11, 0b0000, 0b01, 0, def VMOVS : ASuI<0b11101, 0b11, 0b0000, 0b01, 0, (outs SPR:$Sd), (ins SPR:$Sm), IIC_fpUNA32, "vmov", ".f32\t$Sd, $Sm", []>; -} // neverHasSideEffects +} // hasSideEffects //===----------------------------------------------------------------------===// // FP <-> GPR Copies. Int <-> FP Conversions. @@ -816,7 +827,7 @@ def VMOVSR : AVConv4I<0b11100000, 0b1010, let D = VFPNeonDomain; } -let neverHasSideEffects = 1 in { +let hasSideEffects = 0 in { def VMOVRRD : AVConv3I<0b11000101, 0b1011, (outs GPR:$Rt, GPR:$Rt2), (ins DPR:$Dm), IIC_fpMOVDI, "vmov", "\t$Rt, $Rt2, $Dm", @@ -837,6 +848,11 @@ def VMOVRRD : AVConv3I<0b11000101, 0b1011, // Some single precision VFP instructions may be executed on both NEON and VFP // pipelines. let D = VFPNeonDomain; + + // This instruction is equivalent to + // $Rt = EXTRACT_SUBREG $Dm, ssub_0 + // $Rt2 = EXTRACT_SUBREG $Dm, ssub_1 + let isExtractSubreg = 1; } def VMOVRRS : AVConv3I<0b11000101, 0b1010, @@ -860,7 +876,7 @@ def VMOVRRS : AVConv3I<0b11000101, 0b1010, let D = VFPNeonDomain; let DecoderMethod = "DecodeVMOVRRS"; } -} // neverHasSideEffects +} // hasSideEffects // FMDHR: GPR -> SPR // FMDLR: GPR -> SPR @@ -885,9 +901,13 @@ def VMOVDRR : AVConv5I<0b11000100, 0b1011, // Some single precision VFP instructions may be executed on both NEON and VFP // pipelines. let D = VFPNeonDomain; + + // This instruction is equivalent to + // $Dm = REG_SEQUENCE $Rt, ssub_0, $Rt2, ssub_1 + let isRegSequence = 1; } -let neverHasSideEffects = 1 in +let hasSideEffects = 0 in def VMOVSRR : AVConv5I<0b11000100, 0b1010, (outs SPR:$dst1, SPR:$dst2), (ins GPR:$src1, GPR:$src2), IIC_fpMOVID, "vmov", "\t$dst1, $dst2, $src1, $src2", @@ -1523,7 +1543,7 @@ def : Pat<(fneg (f32 (fma SPR:$Sn, (fneg SPR:$Sm), SPR:$Sdin))), // FP Conditional moves. // -let neverHasSideEffects = 1 in { +let hasSideEffects = 0 in { def VMOVDcc : PseudoInst<(outs DPR:$Dd), (ins DPR:$Dn, DPR:$Dm, cmovpred:$p), IIC_fpUNA64, [(set (f64 DPR:$Dd), @@ -1535,7 +1555,7 @@ def VMOVScc : PseudoInst<(outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm, cmovpred:$p), [(set (f32 SPR:$Sd), (ARMcmov SPR:$Sn, SPR:$Sm, cmovpred:$p))]>, RegConstraint<"$Sn = $Sd">, Requires<[HasVFP2]>; -} // neverHasSideEffects +} // hasSideEffects //===----------------------------------------------------------------------===// // Move from VFP System Register to ARM core register. diff --git a/lib/Target/ARM/ARMJITInfo.cpp b/lib/Target/ARM/ARMJITInfo.cpp deleted file mode 100644 index 6d1114d51aab..000000000000 --- a/lib/Target/ARM/ARMJITInfo.cpp +++ /dev/null @@ -1,344 +0,0 @@ -//===-- ARMJITInfo.cpp - Implement the JIT interfaces for the ARM target --===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file implements the JIT interfaces for the ARM target. -// -//===----------------------------------------------------------------------===// - -#include "ARMJITInfo.h" -#include "ARMConstantPoolValue.h" -#include "ARMMachineFunctionInfo.h" -#include "ARMRelocations.h" -#include "MCTargetDesc/ARMBaseInfo.h" -#include "llvm/CodeGen/JITCodeEmitter.h" -#include "llvm/IR/Function.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/Memory.h" -#include "llvm/Support/raw_ostream.h" -#include <cstdlib> -using namespace llvm; - -#define DEBUG_TYPE "jit" - -void ARMJITInfo::replaceMachineCodeForFunction(void *Old, void *New) { - report_fatal_error("ARMJITInfo::replaceMachineCodeForFunction"); -} - -/// JITCompilerFunction - This contains the address of the JIT function used to -/// compile a function lazily. -static TargetJITInfo::JITCompilerFn JITCompilerFunction; - -// Get the ASMPREFIX for the current host. This is often '_'. -#ifndef __USER_LABEL_PREFIX__ -#define __USER_LABEL_PREFIX__ -#endif -#define GETASMPREFIX2(X) #X -#define GETASMPREFIX(X) GETASMPREFIX2(X) -#define ASMPREFIX GETASMPREFIX(__USER_LABEL_PREFIX__) - -// CompilationCallback stub - We can't use a C function with inline assembly in -// it, because the prolog/epilog inserted by GCC won't work for us. (We need -// to preserve more context and manipulate the stack directly). Instead, -// write our own wrapper, which does things our way, so we have complete -// control over register saving and restoring. -extern "C" { -#if defined(__arm__) - void ARMCompilationCallback(); - asm( - ".text\n" - ".align 2\n" - ".globl " ASMPREFIX "ARMCompilationCallback\n" - ASMPREFIX "ARMCompilationCallback:\n" - // Save caller saved registers since they may contain stuff - // for the real target function right now. We have to act as if this - // whole compilation callback doesn't exist as far as the caller is - // concerned, so we can't just preserve the callee saved regs. - "stmdb sp!, {r0, r1, r2, r3, lr}\n" -#if (defined(__VFP_FP__) && !defined(__SOFTFP__)) - "vstmdb sp!, {d0, d1, d2, d3, d4, d5, d6, d7}\n" -#endif - // The LR contains the address of the stub function on entry. - // pass it as the argument to the C part of the callback - "mov r0, lr\n" - "sub sp, sp, #4\n" - // Call the C portion of the callback - "bl " ASMPREFIX "ARMCompilationCallbackC\n" - "add sp, sp, #4\n" - // Restoring the LR to the return address of the function that invoked - // the stub and de-allocating the stack space for it requires us to - // swap the two saved LR values on the stack, as they're backwards - // for what we need since the pop instruction has a pre-determined - // order for the registers. - // +--------+ - // 0 | LR | Original return address - // +--------+ - // 1 | LR | Stub address (start of stub) - // 2-5 | R3..R0 | Saved registers (we need to preserve all regs) - // 6-20 | D0..D7 | Saved VFP registers - // +--------+ - // -#if (defined(__VFP_FP__) && !defined(__SOFTFP__)) - // Restore VFP caller-saved registers. - "vldmia sp!, {d0, d1, d2, d3, d4, d5, d6, d7}\n" -#endif - // - // We need to exchange the values in slots 0 and 1 so we can - // return to the address in slot 1 with the address in slot 0 - // restored to the LR. - "ldr r0, [sp,#20]\n" - "ldr r1, [sp,#16]\n" - "str r1, [sp,#20]\n" - "str r0, [sp,#16]\n" - // Return to the (newly modified) stub to invoke the real function. - // The above twiddling of the saved return addresses allows us to - // deallocate everything, including the LR the stub saved, with two - // updating load instructions. - "ldmia sp!, {r0, r1, r2, r3, lr}\n" - "ldr pc, [sp], #4\n" - ); -#else // Not an ARM host - void ARMCompilationCallback() { - llvm_unreachable("Cannot call ARMCompilationCallback() on a non-ARM arch!"); - } -#endif -} - -/// ARMCompilationCallbackC - This is the target-specific function invoked -/// by the function stub when we did not know the real target of a call. -/// This function must locate the start of the stub or call site and pass -/// it into the JIT compiler function. -extern "C" void ARMCompilationCallbackC(intptr_t StubAddr) { - // Get the address of the compiled code for this function. - intptr_t NewVal = (intptr_t)JITCompilerFunction((void*)StubAddr); - - // Rewrite the call target... so that we don't end up here every time we - // execute the call. We're replacing the first two instructions of the - // stub with: - // ldr pc, [pc,#-4] - // <addr> - if (!sys::Memory::setRangeWritable((void*)StubAddr, 8)) { - llvm_unreachable("ERROR: Unable to mark stub writable"); - } - *(intptr_t *)StubAddr = 0xe51ff004; // ldr pc, [pc, #-4] - *(intptr_t *)(StubAddr+4) = NewVal; - if (!sys::Memory::setRangeExecutable((void*)StubAddr, 8)) { - llvm_unreachable("ERROR: Unable to mark stub executable"); - } -} - -TargetJITInfo::LazyResolverFn -ARMJITInfo::getLazyResolverFunction(JITCompilerFn F) { - JITCompilerFunction = F; - return ARMCompilationCallback; -} - -void *ARMJITInfo::emitGlobalValueIndirectSym(const GlobalValue *GV, void *Ptr, - JITCodeEmitter &JCE) { - uint8_t Buffer[4]; - uint8_t *Cur = Buffer; - MachineCodeEmitter::emitWordLEInto(Cur, (intptr_t)Ptr); - void *PtrAddr = JCE.allocIndirectGV( - GV, Buffer, sizeof(Buffer), /*Alignment=*/4); - addIndirectSymAddr(Ptr, (intptr_t)PtrAddr); - return PtrAddr; -} - -TargetJITInfo::StubLayout ARMJITInfo::getStubLayout() { - // The stub contains up to 3 4-byte instructions, aligned at 4 bytes, and a - // 4-byte address. See emitFunctionStub for details. - StubLayout Result = {16, 4}; - return Result; -} - -void *ARMJITInfo::emitFunctionStub(const Function* F, void *Fn, - JITCodeEmitter &JCE) { - void *Addr; - // If this is just a call to an external function, emit a branch instead of a - // call. The code is the same except for one bit of the last instruction. - if (Fn != (void*)(intptr_t)ARMCompilationCallback) { - // Branch to the corresponding function addr. - if (IsPIC) { - // The stub is 16-byte size and 4-aligned. - intptr_t LazyPtr = getIndirectSymAddr(Fn); - if (!LazyPtr) { - // In PIC mode, the function stub is loading a lazy-ptr. - LazyPtr= (intptr_t)emitGlobalValueIndirectSym((const GlobalValue*)F, Fn, JCE); - DEBUG(if (F) - errs() << "JIT: Indirect symbol emitted at [" << LazyPtr - << "] for GV '" << F->getName() << "'\n"; - else - errs() << "JIT: Stub emitted at [" << LazyPtr - << "] for external function at '" << Fn << "'\n"); - } - JCE.emitAlignment(4); - Addr = (void*)JCE.getCurrentPCValue(); - if (!sys::Memory::setRangeWritable(Addr, 16)) { - llvm_unreachable("ERROR: Unable to mark stub writable"); - } - JCE.emitWordLE(0xe59fc004); // ldr ip, [pc, #+4] - JCE.emitWordLE(0xe08fc00c); // L_func$scv: add ip, pc, ip - JCE.emitWordLE(0xe59cf000); // ldr pc, [ip] - JCE.emitWordLE(LazyPtr - (intptr_t(Addr)+4+8)); // func - (L_func$scv+8) - sys::Memory::InvalidateInstructionCache(Addr, 16); - if (!sys::Memory::setRangeExecutable(Addr, 16)) { - llvm_unreachable("ERROR: Unable to mark stub executable"); - } - } else { - // The stub is 8-byte size and 4-aligned. - JCE.emitAlignment(4); - Addr = (void*)JCE.getCurrentPCValue(); - if (!sys::Memory::setRangeWritable(Addr, 8)) { - llvm_unreachable("ERROR: Unable to mark stub writable"); - } - JCE.emitWordLE(0xe51ff004); // ldr pc, [pc, #-4] - JCE.emitWordLE((intptr_t)Fn); // addr of function - sys::Memory::InvalidateInstructionCache(Addr, 8); - if (!sys::Memory::setRangeExecutable(Addr, 8)) { - llvm_unreachable("ERROR: Unable to mark stub executable"); - } - } - } else { - // The compilation callback will overwrite the first two words of this - // stub with indirect branch instructions targeting the compiled code. - // This stub sets the return address to restart the stub, so that - // the new branch will be invoked when we come back. - // - // Branch and link to the compilation callback. - // The stub is 16-byte size and 4-byte aligned. - JCE.emitAlignment(4); - Addr = (void*)JCE.getCurrentPCValue(); - if (!sys::Memory::setRangeWritable(Addr, 16)) { - llvm_unreachable("ERROR: Unable to mark stub writable"); - } - // Save LR so the callback can determine which stub called it. - // The compilation callback is responsible for popping this prior - // to returning. - JCE.emitWordLE(0xe92d4000); // push {lr} - // Set the return address to go back to the start of this stub. - JCE.emitWordLE(0xe24fe00c); // sub lr, pc, #12 - // Invoke the compilation callback. - JCE.emitWordLE(0xe51ff004); // ldr pc, [pc, #-4] - // The address of the compilation callback. - JCE.emitWordLE((intptr_t)ARMCompilationCallback); - sys::Memory::InvalidateInstructionCache(Addr, 16); - if (!sys::Memory::setRangeExecutable(Addr, 16)) { - llvm_unreachable("ERROR: Unable to mark stub executable"); - } - } - - return Addr; -} - -intptr_t ARMJITInfo::resolveRelocDestAddr(MachineRelocation *MR) const { - ARM::RelocationType RT = (ARM::RelocationType)MR->getRelocationType(); - switch (RT) { - default: - return (intptr_t)(MR->getResultPointer()); - case ARM::reloc_arm_pic_jt: - // Destination address - jump table base. - return (intptr_t)(MR->getResultPointer()) - MR->getConstantVal(); - case ARM::reloc_arm_jt_base: - // Jump table base address. - return getJumpTableBaseAddr(MR->getJumpTableIndex()); - case ARM::reloc_arm_cp_entry: - case ARM::reloc_arm_vfp_cp_entry: - // Constant pool entry address. - return getConstantPoolEntryAddr(MR->getConstantPoolIndex()); - case ARM::reloc_arm_machine_cp_entry: { - ARMConstantPoolValue *ACPV = (ARMConstantPoolValue*)MR->getConstantVal(); - assert((!ACPV->hasModifier() && !ACPV->mustAddCurrentAddress()) && - "Can't handle this machine constant pool entry yet!"); - intptr_t Addr = (intptr_t)(MR->getResultPointer()); - Addr -= getPCLabelAddr(ACPV->getLabelId()) + ACPV->getPCAdjustment(); - return Addr; - } - } -} - -/// relocate - Before the JIT can run a block of code that has been emitted, -/// it must rewrite the code to contain the actual addresses of any -/// referenced global symbols. -void ARMJITInfo::relocate(void *Function, MachineRelocation *MR, - unsigned NumRelocs, unsigned char* GOTBase) { - for (unsigned i = 0; i != NumRelocs; ++i, ++MR) { - void *RelocPos = (char*)Function + MR->getMachineCodeOffset(); - intptr_t ResultPtr = resolveRelocDestAddr(MR); - switch ((ARM::RelocationType)MR->getRelocationType()) { - case ARM::reloc_arm_cp_entry: - case ARM::reloc_arm_vfp_cp_entry: - case ARM::reloc_arm_relative: { - // It is necessary to calculate the correct PC relative value. We - // subtract the base addr from the target addr to form a byte offset. - ResultPtr = ResultPtr - (intptr_t)RelocPos - 8; - // If the result is positive, set bit U(23) to 1. - if (ResultPtr >= 0) - *((intptr_t*)RelocPos) |= 1 << ARMII::U_BitShift; - else { - // Otherwise, obtain the absolute value and set bit U(23) to 0. - *((intptr_t*)RelocPos) &= ~(1 << ARMII::U_BitShift); - ResultPtr = - ResultPtr; - } - // Set the immed value calculated. - // VFP immediate offset is multiplied by 4. - if (MR->getRelocationType() == ARM::reloc_arm_vfp_cp_entry) - ResultPtr = ResultPtr >> 2; - *((intptr_t*)RelocPos) |= ResultPtr; - // Set register Rn to PC (which is register 15 on all architectures). - // FIXME: This avoids the need for register info in the JIT class. - *((intptr_t*)RelocPos) |= 15 << ARMII::RegRnShift; - break; - } - case ARM::reloc_arm_pic_jt: - case ARM::reloc_arm_machine_cp_entry: - case ARM::reloc_arm_absolute: { - // These addresses have already been resolved. - *((intptr_t*)RelocPos) |= (intptr_t)ResultPtr; - break; - } - case ARM::reloc_arm_branch: { - // It is necessary to calculate the correct value of signed_immed_24 - // field. We subtract the base addr from the target addr to form a - // byte offset, which must be inside the range -33554432 and +33554428. - // Then, we set the signed_immed_24 field of the instruction to bits - // [25:2] of the byte offset. More details ARM-ARM p. A4-11. - ResultPtr = ResultPtr - (intptr_t)RelocPos - 8; - ResultPtr = (ResultPtr & 0x03FFFFFC) >> 2; - assert(ResultPtr >= -33554432 && ResultPtr <= 33554428); - *((intptr_t*)RelocPos) |= ResultPtr; - break; - } - case ARM::reloc_arm_jt_base: { - // JT base - (instruction addr + 8) - ResultPtr = ResultPtr - (intptr_t)RelocPos - 8; - *((intptr_t*)RelocPos) |= ResultPtr; - break; - } - case ARM::reloc_arm_movw: { - ResultPtr = ResultPtr & 0xFFFF; - *((intptr_t*)RelocPos) |= ResultPtr & 0xFFF; - *((intptr_t*)RelocPos) |= ((ResultPtr >> 12) & 0xF) << 16; - break; - } - case ARM::reloc_arm_movt: { - ResultPtr = (ResultPtr >> 16) & 0xFFFF; - *((intptr_t*)RelocPos) |= ResultPtr & 0xFFF; - *((intptr_t*)RelocPos) |= ((ResultPtr >> 12) & 0xF) << 16; - break; - } - } - } -} - -void ARMJITInfo::Initialize(const MachineFunction &MF, bool isPIC) { - const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); - ConstPoolId2AddrMap.resize(AFI->getNumPICLabels()); - JumpTableId2AddrMap.resize(AFI->getNumJumpTables()); - IsPIC = isPIC; -} diff --git a/lib/Target/ARM/ARMJITInfo.h b/lib/Target/ARM/ARMJITInfo.h deleted file mode 100644 index 27e2a2013404..000000000000 --- a/lib/Target/ARM/ARMJITInfo.h +++ /dev/null @@ -1,177 +0,0 @@ -//===-- ARMJITInfo.h - ARM implementation of the JIT interface -*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file contains the declaration of the ARMJITInfo class. -// -//===----------------------------------------------------------------------===// - -#ifndef ARMJITINFO_H -#define ARMJITINFO_H - -#include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/CodeGen/MachineConstantPool.h" -#include "llvm/CodeGen/MachineFunction.h" -#include "llvm/CodeGen/MachineJumpTableInfo.h" -#include "llvm/Target/TargetJITInfo.h" - -namespace llvm { - class ARMTargetMachine; - - class ARMJITInfo : public TargetJITInfo { - // ConstPoolId2AddrMap - A map from constant pool ids to the corresponding - // CONSTPOOL_ENTRY addresses. - SmallVector<intptr_t, 16> ConstPoolId2AddrMap; - - // JumpTableId2AddrMap - A map from inline jumptable ids to the - // corresponding inline jump table bases. - SmallVector<intptr_t, 16> JumpTableId2AddrMap; - - // PCLabelMap - A map from PC labels to addresses. - DenseMap<unsigned, intptr_t> PCLabelMap; - - // Sym2IndirectSymMap - A map from symbol (GlobalValue and ExternalSymbol) - // addresses to their indirect symbol addresses. - DenseMap<void*, intptr_t> Sym2IndirectSymMap; - - // IsPIC - True if the relocation model is PIC. This is used to determine - // how to codegen function stubs. - bool IsPIC; - - public: - explicit ARMJITInfo() : IsPIC(false) { useGOT = false; } - - /// replaceMachineCodeForFunction - Make it so that calling the function - /// whose machine code is at OLD turns into a call to NEW, perhaps by - /// overwriting OLD with a branch to NEW. This is used for self-modifying - /// code. - /// - void replaceMachineCodeForFunction(void *Old, void *New) override; - - /// emitGlobalValueIndirectSym - Use the specified JITCodeEmitter object - /// to emit an indirect symbol which contains the address of the specified - /// ptr. - void *emitGlobalValueIndirectSym(const GlobalValue* GV, void *ptr, - JITCodeEmitter &JCE) override; - - // getStubLayout - Returns the size and alignment of the largest call stub - // on ARM. - StubLayout getStubLayout() override; - - /// emitFunctionStub - Use the specified JITCodeEmitter object to emit a - /// small native function that simply calls the function at the specified - /// address. - void *emitFunctionStub(const Function* F, void *Fn, - JITCodeEmitter &JCE) override; - - /// getLazyResolverFunction - Expose the lazy resolver to the JIT. - LazyResolverFn getLazyResolverFunction(JITCompilerFn) override; - - /// relocate - Before the JIT can run a block of code that has been emitted, - /// it must rewrite the code to contain the actual addresses of any - /// referenced global symbols. - void relocate(void *Function, MachineRelocation *MR, - unsigned NumRelocs, unsigned char* GOTBase) override; - - /// hasCustomConstantPool - Allows a target to specify that constant - /// pool address resolution is handled by the target. - bool hasCustomConstantPool() const override { return true; } - - /// hasCustomJumpTables - Allows a target to specify that jumptables - /// are emitted by the target. - bool hasCustomJumpTables() const override { return true; } - - /// allocateSeparateGVMemory - If true, globals should be placed in - /// separately allocated heap memory rather than in the same - /// code memory allocated by JITCodeEmitter. - bool allocateSeparateGVMemory() const override { -#ifdef __APPLE__ - return true; -#else - return false; -#endif - } - - /// Initialize - Initialize internal stage for the function being JITted. - /// Resize constant pool ids to CONSTPOOL_ENTRY addresses map; resize - /// jump table ids to jump table bases map; remember if codegen relocation - /// model is PIC. - void Initialize(const MachineFunction &MF, bool isPIC); - - /// getConstantPoolEntryAddr - The ARM target puts all constant - /// pool entries into constant islands. This returns the address of the - /// constant pool entry of the specified index. - intptr_t getConstantPoolEntryAddr(unsigned CPI) const { - assert(CPI < ConstPoolId2AddrMap.size()); - return ConstPoolId2AddrMap[CPI]; - } - - /// addConstantPoolEntryAddr - Map a Constant Pool Index to the address - /// where its associated value is stored. When relocations are processed, - /// this value will be used to resolve references to the constant. - void addConstantPoolEntryAddr(unsigned CPI, intptr_t Addr) { - assert(CPI < ConstPoolId2AddrMap.size()); - ConstPoolId2AddrMap[CPI] = Addr; - } - - /// getJumpTableBaseAddr - The ARM target inline all jump tables within - /// text section of the function. This returns the address of the base of - /// the jump table of the specified index. - intptr_t getJumpTableBaseAddr(unsigned JTI) const { - assert(JTI < JumpTableId2AddrMap.size()); - return JumpTableId2AddrMap[JTI]; - } - - /// addJumpTableBaseAddr - Map a jump table index to the address where - /// the corresponding inline jump table is emitted. When relocations are - /// processed, this value will be used to resolve references to the - /// jump table. - void addJumpTableBaseAddr(unsigned JTI, intptr_t Addr) { - assert(JTI < JumpTableId2AddrMap.size()); - JumpTableId2AddrMap[JTI] = Addr; - } - - /// getPCLabelAddr - Retrieve the address of the PC label of the - /// specified id. - intptr_t getPCLabelAddr(unsigned Id) const { - DenseMap<unsigned, intptr_t>::const_iterator I = PCLabelMap.find(Id); - assert(I != PCLabelMap.end()); - return I->second; - } - - /// addPCLabelAddr - Remember the address of the specified PC label. - void addPCLabelAddr(unsigned Id, intptr_t Addr) { - PCLabelMap.insert(std::make_pair(Id, Addr)); - } - - /// getIndirectSymAddr - Retrieve the address of the indirect symbol of the - /// specified symbol located at address. Returns 0 if the indirect symbol - /// has not been emitted. - intptr_t getIndirectSymAddr(void *Addr) const { - DenseMap<void*,intptr_t>::const_iterator I= Sym2IndirectSymMap.find(Addr); - if (I != Sym2IndirectSymMap.end()) - return I->second; - return 0; - } - - /// addIndirectSymAddr - Add a mapping from address of an emitted symbol to - /// its indirect symbol address. - void addIndirectSymAddr(void *SymAddr, intptr_t IndSymAddr) { - Sym2IndirectSymMap.insert(std::make_pair(SymAddr, IndSymAddr)); - } - - private: - /// resolveRelocDestAddr - Resolve the resulting address of the relocation - /// if it's not already solved. Constantpool entries must be resolved by - /// ARM target. - intptr_t resolveRelocDestAddr(MachineRelocation *MR) const; - }; -} - -#endif diff --git a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp index a03bcdbddd79..c429ac185211 100644 --- a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp +++ b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp @@ -144,6 +144,46 @@ namespace { char ARMLoadStoreOpt::ID = 0; } +static bool definesCPSR(const MachineInstr *MI) { + for (const auto &MO : MI->operands()) { + if (!MO.isReg()) + continue; + if (MO.isDef() && MO.getReg() == ARM::CPSR && !MO.isDead()) + // If the instruction has live CPSR def, then it's not safe to fold it + // into load / store. + return true; + } + + return false; +} + +static int getMemoryOpOffset(const MachineInstr *MI) { + int Opcode = MI->getOpcode(); + bool isAM3 = Opcode == ARM::LDRD || Opcode == ARM::STRD; + unsigned NumOperands = MI->getDesc().getNumOperands(); + unsigned OffField = MI->getOperand(NumOperands-3).getImm(); + + if (Opcode == ARM::t2LDRi12 || Opcode == ARM::t2LDRi8 || + Opcode == ARM::t2STRi12 || Opcode == ARM::t2STRi8 || + Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8 || + Opcode == ARM::LDRi12 || Opcode == ARM::STRi12) + return OffField; + + // Thumb1 immediate offsets are scaled by 4 + if (Opcode == ARM::tLDRi || Opcode == ARM::tSTRi) + return OffField * 4; + + int Offset = isAM3 ? ARM_AM::getAM3Offset(OffField) + : ARM_AM::getAM5Offset(OffField) * 4; + ARM_AM::AddrOpc Op = isAM3 ? ARM_AM::getAM3Op(OffField) + : ARM_AM::getAM5Op(OffField); + + if (Op == ARM_AM::sub) + return -Offset; + + return Offset; +} + static int getLoadStoreMultipleOpcode(int Opcode, ARM_AM::AMSubMode Mode) { switch (Opcode) { default: llvm_unreachable("Unhandled opcode!"); @@ -335,40 +375,50 @@ ARMLoadStoreOpt::UpdateBaseRegUses(MachineBasicBlock &MBB, unsigned WordOffset, ARMCC::CondCodes Pred, unsigned PredReg) { assert(isThumb1 && "Can only update base register uses for Thumb1!"); - - // Start updating any instructions with immediate offsets. Insert a sub before + // Start updating any instructions with immediate offsets. Insert a SUB before // the first non-updateable instruction (if any). for (; MBBI != MBB.end(); ++MBBI) { + bool InsertSub = false; + unsigned Opc = MBBI->getOpcode(); + if (MBBI->readsRegister(Base)) { - unsigned Opc = MBBI->getOpcode(); int Offset; - bool InsertSub = false; + bool IsLoad = + Opc == ARM::tLDRi || Opc == ARM::tLDRHi || Opc == ARM::tLDRBi; + bool IsStore = + Opc == ARM::tSTRi || Opc == ARM::tSTRHi || Opc == ARM::tSTRBi; - if (Opc == ARM::tLDRi || Opc == ARM::tSTRi || - Opc == ARM::tLDRHi || Opc == ARM::tSTRHi || - Opc == ARM::tLDRBi || Opc == ARM::tSTRBi) { + if (IsLoad || IsStore) { // Loads and stores with immediate offsets can be updated, but only if // the new offset isn't negative. // The MachineOperand containing the offset immediate is the last one // before predicates. MachineOperand &MO = MBBI->getOperand(MBBI->getDesc().getNumOperands() - 3); - // The offsets are scaled by 1, 2 or 4 depending on the Opcode + // The offsets are scaled by 1, 2 or 4 depending on the Opcode. Offset = MO.getImm() - WordOffset * getImmScale(Opc); - if (Offset >= 0) + + // If storing the base register, it needs to be reset first. + unsigned InstrSrcReg = MBBI->getOperand(0).getReg(); + + if (Offset >= 0 && !(IsStore && InstrSrcReg == Base)) MO.setImm(Offset); else InsertSub = true; - } else if (Opc == ARM::tSUBi8 || Opc == ARM::tADDi8) { - // SUB/ADD using this register. Merge it with the update. - // If the merged offset is too large, insert a new sub instead. + } else if ((Opc == ARM::tSUBi8 || Opc == ARM::tADDi8) && + !definesCPSR(MBBI)) { + // SUBS/ADDS using this register, with a dead def of the CPSR. + // Merge it with the update; if the merged offset is too large, + // insert a new sub instead. MachineOperand &MO = MBBI->getOperand(MBBI->getDesc().getNumOperands() - 3); Offset = (Opc == ARM::tSUBi8) ? MO.getImm() + WordOffset * 4 : MO.getImm() - WordOffset * 4 ; - if (TL->isLegalAddImmediate(Offset)) { + if (Offset >= 0 && TL->isLegalAddImmediate(Offset)) { + // FIXME: Swap ADDS<->SUBS if Offset < 0, erase instruction if + // Offset == 0. MO.setImm(Offset); // The base register has now been reset, so exit early. return; @@ -381,13 +431,19 @@ ARMLoadStoreOpt::UpdateBaseRegUses(MachineBasicBlock &MBB, InsertSub = true; } - if (InsertSub) { - // An instruction above couldn't be updated, so insert a sub. - AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TII->get(ARM::tSUBi8), Base)) - .addReg(Base, getKillRegState(true)).addImm(WordOffset * 4) - .addImm(Pred).addReg(PredReg); - return; - } + } else if (definesCPSR(MBBI) || MBBI->isCall() || MBBI->isBranch()) { + // Since SUBS sets the condition flags, we can't place the base reset + // after an instruction that has a live CPSR def. + // The base register might also contain an argument for a function call. + InsertSub = true; + } + + if (InsertSub) { + // An instruction above couldn't be updated, so insert a sub. + AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TII->get(ARM::tSUBi8), Base), true) + .addReg(Base, getKillRegState(false)).addImm(WordOffset * 4) + .addImm(Pred).addReg(PredReg); + return; } if (MBBI->killsRegister(Base)) @@ -395,15 +451,18 @@ ARMLoadStoreOpt::UpdateBaseRegUses(MachineBasicBlock &MBB, return; } - // The end of the block was reached. This means register liveness escapes the - // block, and it's necessary to insert a sub before the last instruction. - if (MBB.succ_size() > 0) - // But only insert the SUB if there is actually a successor block. - // FIXME: Check more carefully if register is live at this point, e.g. by - // also examining the successor block's register liveness information. - AddDefaultT1CC(BuildMI(MBB, --MBBI, dl, TII->get(ARM::tSUBi8), Base)) - .addReg(Base, getKillRegState(true)).addImm(WordOffset * 4) + // End of block was reached. + if (MBB.succ_size() > 0) { + // FIXME: Because of a bug, live registers are sometimes missing from + // the successor blocks' live-in sets. This means we can't trust that + // information and *always* have to reset at the end of a block. + // See PR21029. + if (MBBI != MBB.end()) --MBBI; + AddDefaultT1CC( + BuildMI(MBB, MBBI, dl, TII->get(ARM::tSUBi8), Base), true) + .addReg(Base, getKillRegState(false)).addImm(WordOffset * 4) .addImm(Pred).addReg(PredReg); + } } /// MergeOps - Create and insert a LDM or STM with Base as base register and @@ -422,6 +481,28 @@ ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB, if (NumRegs <= 1) return false; + // For Thumb1 targets, it might be necessary to clobber the CPSR to merge. + // Compute liveness information for that register to make the decision. + bool SafeToClobberCPSR = !isThumb1 || + (MBB.computeRegisterLiveness(TRI, ARM::CPSR, std::prev(MBBI), 15) == + MachineBasicBlock::LQR_Dead); + + bool Writeback = isThumb1; // Thumb1 LDM/STM have base reg writeback. + + // Exception: If the base register is in the input reglist, Thumb1 LDM is + // non-writeback. + // It's also not possible to merge an STR of the base register in Thumb1. + if (isThumb1) + for (unsigned I = 0; I < NumRegs; ++I) + if (Base == Regs[I].first) { + if (Opcode == ARM::tLDRi) { + Writeback = false; + break; + } else if (Opcode == ARM::tSTRi) { + return false; + } + } + ARM_AM::AMSubMode Mode = ARM_AM::ia; // VFP and Thumb2 do not support IB or DA modes. Thumb1 only supports IA. bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode); @@ -445,6 +526,11 @@ ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB, if (NumRegs <= 2) return false; + // On Thumb1, it's not worth materializing a new base register without + // clobbering the CPSR (i.e. not using ADDS/SUBS). + if (!SafeToClobberCPSR) + return false; + unsigned NewBase; if (isi32Load(Opcode)) { // If it is a load, then just use one of the destination register to @@ -459,13 +545,15 @@ ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB, int BaseOpc = isThumb2 ? ARM::t2ADDri : + (isThumb1 && Offset < 8) ? ARM::tADDi3 : isThumb1 ? ARM::tADDi8 : ARM::ADDri; if (Offset < 0) { + Offset = - Offset; BaseOpc = isThumb2 ? ARM::t2SUBri : + (isThumb1 && Offset < 8) ? ARM::tSUBi3 : isThumb1 ? ARM::tSUBi8 : ARM::SUBri; - Offset = - Offset; } if (!TL->isLegalAddImmediate(Offset)) @@ -473,22 +561,28 @@ ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB, return false; // Probably not worth it then. if (isThumb1) { - if (Base != NewBase) { + // Thumb1: depending on immediate size, use either + // ADDS NewBase, Base, #imm3 + // or + // MOV NewBase, Base + // ADDS NewBase, #imm8. + if (Base != NewBase && Offset >= 8) { // Need to insert a MOV to the new base first. - // FIXME: If the immediate fits in 3 bits, use ADD instead. BuildMI(MBB, MBBI, dl, TII->get(ARM::tMOVr), NewBase) .addReg(Base, getKillRegState(BaseKill)) .addImm(Pred).addReg(PredReg); + // Set up BaseKill and Base correctly to insert the ADDS/SUBS below. + Base = NewBase; + BaseKill = false; } - AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase)) - .addReg(NewBase, getKillRegState(true)).addImm(Offset) + AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase), true) + .addReg(Base, getKillRegState(BaseKill)).addImm(Offset) .addImm(Pred).addReg(PredReg); } else { BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase) .addReg(Base, getKillRegState(BaseKill)).addImm(Offset) .addImm(Pred).addReg(PredReg).addReg(0); } - Base = NewBase; BaseKill = true; // New base is always killed straight away. } @@ -501,16 +595,16 @@ ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB, Opcode = getLoadStoreMultipleOpcode(Opcode, Mode); if (!Opcode) return false; - bool Writeback = isThumb1; // Thumb1 LDM/STM have base reg writeback. - - // Exception: If the base register is in the input reglist, Thumb1 LDM is - // non-writeback. Check for this. - if (Opcode == ARM::tLDMIA && isThumb1) - for (unsigned I = 0; I < NumRegs; ++I) - if (Base == Regs[I].first) { - Writeback = false; - break; - } + // Check if a Thumb1 LDM/STM merge is safe. This is the case if: + // - There is no writeback (LDM of base register), + // - the base register is killed by the merged instruction, + // - or it's safe to overwrite the condition flags, i.e. to insert a SUBS + // to reset the base register. + // Otherwise, don't merge. + // It's safe to return here since the code to materialize a new base register + // above is also conditional on SafeToClobberCPSR. + if (isThumb1 && !SafeToClobberCPSR && Writeback && !BaseKill) + return false; MachineInstrBuilder MIB; @@ -525,11 +619,11 @@ ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB, MIB.addReg(Base, getDefRegState(true)) .addReg(Base, getKillRegState(BaseKill)); - // The base isn't dead after a merged instruction with writeback. Update - // future uses of the base with the added offset (if possible), or reset - // the base register as necessary. + // The base isn't dead after a merged instruction with writeback. + // Insert a sub instruction after the newly formed instruction to reset. if (!BaseKill) UpdateBaseRegUses(MBB, MBBI, dl, Base, NumRegs, Pred, PredReg); + } else { // No writeback, simply build the MachineInstr. MIB = BuildMI(MBB, MBBI, dl, TII->get(Opcode)); @@ -700,6 +794,11 @@ void ARMLoadStoreOpt::MergeOpsUpdate(MachineBasicBlock &MBB, memOps[i].MBBI = Merges.back(); memOps[i].Position = insertPos; } + + // Update memOps offsets, since they may have been modified by MergeOps. + for (auto &MemOp : memOps) { + MemOp.Offset = getMemoryOpOffset(MemOp.MBBI); + } } /// MergeLDR_STR - Merge a number of load / store instructions into one or more @@ -721,7 +820,7 @@ ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex, unsigned PRegNum = PMO.isUndef() ? UINT_MAX : TRI->getEncodingValue(PReg); unsigned Count = 1; unsigned Limit = ~0U; - + bool BaseKill = false; // vldm / vstm limit are 32 for S variants, 16 for D variants. switch (Opcode) { @@ -760,36 +859,28 @@ ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex, ++Count; } else { // Can't merge this in. Try merge the earlier ones first. - MergeOpsUpdate(MBB, MemOps, SIndex, i, insertAfter, SOffset, - Base, false, Opcode, Pred, PredReg, Scratch, dl, Merges); + // We need to compute BaseKill here because the MemOps may have been + // reordered. + BaseKill = Loc->killsRegister(Base); + + MergeOpsUpdate(MBB, MemOps, SIndex, i, insertAfter, SOffset, Base, + BaseKill, Opcode, Pred, PredReg, Scratch, dl, Merges); MergeLDR_STR(MBB, i, Base, Opcode, Size, Pred, PredReg, Scratch, MemOps, Merges); return; } - if (MemOps[i].Position > MemOps[insertAfter].Position) + if (MemOps[i].Position > MemOps[insertAfter].Position) { insertAfter = i; + Loc = MemOps[i].MBBI; + } } - bool BaseKill = Loc->findRegisterUseOperandIdx(Base, true) != -1; + BaseKill = Loc->killsRegister(Base); MergeOpsUpdate(MBB, MemOps, SIndex, MemOps.size(), insertAfter, SOffset, Base, BaseKill, Opcode, Pred, PredReg, Scratch, dl, Merges); } -static bool definesCPSR(MachineInstr *MI) { - for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { - const MachineOperand &MO = MI->getOperand(i); - if (!MO.isReg()) - continue; - if (MO.isDef() && MO.getReg() == ARM::CPSR && !MO.isDead()) - // If the instruction has live CPSR def, then it's not safe to fold it - // into load / store. - return true; - } - - return false; -} - static bool isMatchingDecrement(MachineInstr *MI, unsigned Base, unsigned Bytes, unsigned Limit, ARMCC::CondCodes Pred, unsigned PredReg) { @@ -1327,34 +1418,6 @@ void ARMLoadStoreOpt::AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps) { RS->forward(std::prev(Loc)); } -static int getMemoryOpOffset(const MachineInstr *MI) { - int Opcode = MI->getOpcode(); - bool isAM3 = Opcode == ARM::LDRD || Opcode == ARM::STRD; - unsigned NumOperands = MI->getDesc().getNumOperands(); - unsigned OffField = MI->getOperand(NumOperands-3).getImm(); - - if (Opcode == ARM::t2LDRi12 || Opcode == ARM::t2LDRi8 || - Opcode == ARM::t2STRi12 || Opcode == ARM::t2STRi8 || - Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8 || - Opcode == ARM::LDRi12 || Opcode == ARM::STRi12) - return OffField; - - // Thumb1 immediate offsets are scaled by 4 - if (Opcode == ARM::tLDRi || Opcode == ARM::tSTRi) - return OffField * 4; - - int Offset = isAM3 ? ARM_AM::getAM3Offset(OffField) - : ARM_AM::getAM5Offset(OffField) * 4; - if (isAM3) { - if (ARM_AM::getAM3Op(OffField) == ARM_AM::sub) - Offset = -Offset; - } else { - if (ARM_AM::getAM5Op(OffField) == ARM_AM::sub) - Offset = -Offset; - } - return Offset; -} - static void InsertLDR_STR(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, int Offset, bool isDef, @@ -1725,21 +1788,15 @@ bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) { bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) { const TargetMachine &TM = Fn.getTarget(); - TL = TM.getTargetLowering(); + TL = TM.getSubtargetImpl()->getTargetLowering(); AFI = Fn.getInfo<ARMFunctionInfo>(); - TII = TM.getInstrInfo(); - TRI = TM.getRegisterInfo(); + TII = TM.getSubtargetImpl()->getInstrInfo(); + TRI = TM.getSubtargetImpl()->getRegisterInfo(); STI = &TM.getSubtarget<ARMSubtarget>(); RS = new RegScavenger(); isThumb2 = AFI->isThumb2Function(); isThumb1 = AFI->isThumbFunction() && !isThumb2; - // FIXME: Temporarily disabling for Thumb-1 due to miscompiles - if (isThumb1) { - delete RS; - return false; - } - bool Modified = false; for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E; ++MFI) { @@ -1793,10 +1850,10 @@ namespace { } bool ARMPreAllocLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) { - TD = Fn.getTarget().getDataLayout(); - TII = Fn.getTarget().getInstrInfo(); - TRI = Fn.getTarget().getRegisterInfo(); - STI = &Fn.getTarget().getSubtarget<ARMSubtarget>(); + TD = Fn.getSubtarget().getDataLayout(); + TII = Fn.getSubtarget().getInstrInfo(); + TRI = Fn.getSubtarget().getRegisterInfo(); + STI = &static_cast<const ARMSubtarget &>(Fn.getSubtarget()); MRI = &Fn.getRegInfo(); MF = &Fn; @@ -1811,7 +1868,7 @@ bool ARMPreAllocLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) { static bool IsSafeAndProfitableToMove(bool isLd, unsigned Base, MachineBasicBlock::iterator I, MachineBasicBlock::iterator E, - SmallPtrSet<MachineInstr*, 4> &MemOps, + SmallPtrSetImpl<MachineInstr*> &MemOps, SmallSet<unsigned, 4> &MemRegs, const TargetRegisterInfo *TRI) { // Are there stores / loads / calls between them? diff --git a/lib/Target/ARM/ARMMCInstLower.cpp b/lib/Target/ARM/ARMMCInstLower.cpp index 023f5f8e37a0..fd4f5ff3f202 100644 --- a/lib/Target/ARM/ARMMCInstLower.cpp +++ b/lib/Target/ARM/ARMMCInstLower.cpp @@ -119,11 +119,45 @@ void llvm::LowerARMMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI, ARMAsmPrinter &AP) { OutMI.setOpcode(MI->getOpcode()); + // In the MC layer, we keep modified immediates in their encoded form + bool EncodeImms = false; + switch (MI->getOpcode()) { + default: break; + case ARM::MOVi: + case ARM::MVNi: + case ARM::CMPri: + case ARM::CMNri: + case ARM::TSTri: + case ARM::TEQri: + case ARM::MSRi: + case ARM::ADCri: + case ARM::ADDri: + case ARM::ADDSri: + case ARM::SBCri: + case ARM::SUBri: + case ARM::SUBSri: + case ARM::ANDri: + case ARM::ORRri: + case ARM::EORri: + case ARM::BICri: + case ARM::RSBri: + case ARM::RSBSri: + case ARM::RSCri: + EncodeImms = true; + break; + } + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { const MachineOperand &MO = MI->getOperand(i); MCOperand MCOp; - if (AP.lowerOperand(MO, MCOp)) + if (AP.lowerOperand(MO, MCOp)) { + if (MCOp.isImm() && EncodeImms) { + int32_t Enc = ARM_AM::getSOImmVal(MCOp.getImm()); + if (Enc != -1) + MCOp.setImm(Enc); + } OutMI.addOperand(MCOp); + } } } diff --git a/lib/Target/ARM/ARMMachineFunctionInfo.h b/lib/Target/ARM/ARMMachineFunctionInfo.h index d3fabc3ebb04..ddfdb5240c2b 100644 --- a/lib/Target/ARM/ARMMachineFunctionInfo.h +++ b/lib/Target/ARM/ARMMachineFunctionInfo.h @@ -11,15 +11,15 @@ // //===----------------------------------------------------------------------===// -#ifndef ARMMACHINEFUNCTIONINFO_H -#define ARMMACHINEFUNCTIONINFO_H +#ifndef LLVM_LIB_TARGET_ARM_ARMMACHINEFUNCTIONINFO_H +#define LLVM_LIB_TARGET_ARM_ARMMACHINEFUNCTIONINFO_H #include "ARMSubtarget.h" #include "llvm/ADT/BitVector.h" +#include "llvm/ADT/DenseMap.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetRegisterInfo.h" -#include "llvm/ADT/DenseMap.h" namespace llvm { @@ -48,6 +48,9 @@ class ARMFunctionInfo : public MachineFunctionInfo { /// unsigned ArgRegsSaveSize; + /// ReturnRegsCount - Number of registers used up in the return. + unsigned ReturnRegsCount; + /// HasStackFrame - True if this function has a stack frame. Set by /// processFunctionBeforeCalleeSavedScan(). bool HasStackFrame; @@ -83,6 +86,7 @@ class ARMFunctionInfo : public MachineFunctionInfo { /// areas. unsigned GPRCS1Size; unsigned GPRCS2Size; + unsigned DPRCSAlignGapSize; unsigned DPRCSSize; /// NumAlignedDPRCS2Regs - The number of callee-saved DPRs that are saved in @@ -127,10 +131,11 @@ public: ARMFunctionInfo() : isThumb(false), hasThumb2(false), - ArgRegsSaveSize(0), HasStackFrame(false), RestoreSPFromFP(false), + ArgRegsSaveSize(0), ReturnRegsCount(0), HasStackFrame(false), + RestoreSPFromFP(false), LRSpilledForFarJump(false), FramePtrSpillOffset(0), GPRCS1Offset(0), GPRCS2Offset(0), DPRCSOffset(0), - GPRCS1Size(0), GPRCS2Size(0), DPRCSSize(0), + GPRCS1Size(0), GPRCS2Size(0), DPRCSAlignGapSize(0), DPRCSSize(0), NumAlignedDPRCS2Regs(0), JumpTableUId(0), PICLabelUId(0), VarArgsFrameIndex(0), HasITBlocks(false), GlobalBaseReg(0) {} @@ -151,6 +156,9 @@ public: } void setArgRegsSaveSize(unsigned s) { ArgRegsSaveSize = s; } + unsigned getReturnRegsCount() const { return ReturnRegsCount; } + void setReturnRegsCount(unsigned s) { ReturnRegsCount = s; } + bool hasStackFrame() const { return HasStackFrame; } void setHasStackFrame(bool s) { HasStackFrame = s; } @@ -176,10 +184,12 @@ public: unsigned getGPRCalleeSavedArea1Size() const { return GPRCS1Size; } unsigned getGPRCalleeSavedArea2Size() const { return GPRCS2Size; } + unsigned getDPRCalleeSavedGapSize() const { return DPRCSAlignGapSize; } unsigned getDPRCalleeSavedAreaSize() const { return DPRCSSize; } void setGPRCalleeSavedArea1Size(unsigned s) { GPRCS1Size = s; } void setGPRCalleeSavedArea2Size(unsigned s) { GPRCS2Size = s; } + void setDPRCalleeSavedGapSize(unsigned s) { DPRCSAlignGapSize = s; } void setDPRCalleeSavedAreaSize(unsigned s) { DPRCSSize = s; } unsigned getArgumentStackSize() const { return ArgumentStackSize; } @@ -238,4 +248,4 @@ public: }; } // End llvm namespace -#endif // ARMMACHINEFUNCTIONINFO_H +#endif diff --git a/lib/Target/ARM/ARMOptimizeBarriersPass.cpp b/lib/Target/ARM/ARMOptimizeBarriersPass.cpp index 2a4925572ae8..1c50f9e9acfa 100644 --- a/lib/Target/ARM/ARMOptimizeBarriersPass.cpp +++ b/lib/Target/ARM/ARMOptimizeBarriersPass.cpp @@ -9,8 +9,8 @@ //===------------------------------------------------------------------------------------------===// #include "ARM.h" -#include "ARMMachineFunctionInfo.h" #include "ARMInstrInfo.h" +#include "ARMMachineFunctionInfo.h" #include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/MachineFunctionPass.h" using namespace llvm; diff --git a/lib/Target/ARM/ARMPerfectShuffle.h b/lib/Target/ARM/ARMPerfectShuffle.h index efa22fbed9f7..3ff0bee7e5bf 100644 --- a/lib/Target/ARM/ARMPerfectShuffle.h +++ b/lib/Target/ARM/ARMPerfectShuffle.h @@ -12,6 +12,9 @@ // //===----------------------------------------------------------------------===// +#ifndef LLVM_LIB_TARGET_ARM_ARMPERFECTSHUFFLE_H +#define LLVM_LIB_TARGET_ARM_ARMPERFECTSHUFFLE_H + // 31 entries have cost 0 // 242 entries have cost 1 // 1447 entries have cost 2 @@ -6584,3 +6587,5 @@ static const unsigned PerfectShuffleTable[6561+1] = { 835584U, // <u,u,u,u>: Cost 0 copy LHS 0 }; + +#endif diff --git a/lib/Target/ARM/ARMRegisterInfo.h b/lib/Target/ARM/ARMRegisterInfo.h index 3e6af3f04698..b6231735c2c0 100644 --- a/lib/Target/ARM/ARMRegisterInfo.h +++ b/lib/Target/ARM/ARMRegisterInfo.h @@ -11,8 +11,8 @@ // //===----------------------------------------------------------------------===// -#ifndef ARMREGISTERINFO_H -#define ARMREGISTERINFO_H +#ifndef LLVM_LIB_TARGET_ARM_ARMREGISTERINFO_H +#define LLVM_LIB_TARGET_ARM_ARMREGISTERINFO_H #include "ARMBaseRegisterInfo.h" diff --git a/lib/Target/ARM/ARMRelocations.h b/lib/Target/ARM/ARMRelocations.h deleted file mode 100644 index 21877fd9af37..000000000000 --- a/lib/Target/ARM/ARMRelocations.h +++ /dev/null @@ -1,62 +0,0 @@ -//===-- ARMRelocations.h - ARM Code Relocations -----------------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file defines the ARM target-specific relocation types. -// -//===----------------------------------------------------------------------===// - -#ifndef ARMRELOCATIONS_H -#define ARMRELOCATIONS_H - -#include "llvm/CodeGen/MachineRelocation.h" - -namespace llvm { - namespace ARM { - enum RelocationType { - // reloc_arm_absolute - Absolute relocation, just add the relocated value - // to the value already in memory. - reloc_arm_absolute, - - // reloc_arm_relative - PC relative relocation, add the relocated value to - // the value already in memory, after we adjust it for where the PC is. - reloc_arm_relative, - - // reloc_arm_cp_entry - PC relative relocation for constpool_entry's whose - // addresses are kept locally in a map. - reloc_arm_cp_entry, - - // reloc_arm_vfp_cp_entry - Same as reloc_arm_cp_entry except the offset - // should be divided by 4. - reloc_arm_vfp_cp_entry, - - // reloc_arm_machine_cp_entry - Relocation of a ARM machine constantpool - // entry. - reloc_arm_machine_cp_entry, - - // reloc_arm_jt_base - PC relative relocation for jump tables whose - // addresses are kept locally in a map. - reloc_arm_jt_base, - - // reloc_arm_pic_jt - PIC jump table entry relocation: dest bb - jt base. - reloc_arm_pic_jt, - - // reloc_arm_branch - Branch address relocation. - reloc_arm_branch, - - // reloc_arm_movt - MOVT immediate relocation. - reloc_arm_movt, - - // reloc_arm_movw - MOVW immediate relocation. - reloc_arm_movw - }; - } -} - -#endif - diff --git a/lib/Target/ARM/ARMSelectionDAGInfo.cpp b/lib/Target/ARM/ARMSelectionDAGInfo.cpp index 3dcc0df349d2..fa30ac31a30f 100644 --- a/lib/Target/ARM/ARMSelectionDAGInfo.cpp +++ b/lib/Target/ARM/ARMSelectionDAGInfo.cpp @@ -157,7 +157,7 @@ EmitTargetCodeForMemset(SelectionDAG &DAG, SDLoc dl, return SDValue(); const ARMTargetLowering &TLI = - *static_cast<const ARMTargetLowering*>(DAG.getTarget().getTargetLowering()); + *DAG.getTarget().getSubtarget<ARMSubtarget>().getTargetLowering(); TargetLowering::ArgListTy Args; TargetLowering::ArgListEntry Entry; diff --git a/lib/Target/ARM/ARMSelectionDAGInfo.h b/lib/Target/ARM/ARMSelectionDAGInfo.h index 13769dc8efe0..94b98e668470 100644 --- a/lib/Target/ARM/ARMSelectionDAGInfo.h +++ b/lib/Target/ARM/ARMSelectionDAGInfo.h @@ -11,8 +11,8 @@ // //===----------------------------------------------------------------------===// -#ifndef ARMSELECTIONDAGINFO_H -#define ARMSELECTIONDAGINFO_H +#ifndef LLVM_LIB_TARGET_ARM_ARMSELECTIONDAGINFO_H +#define LLVM_LIB_TARGET_ARM_ARMSELECTIONDAGINFO_H #include "MCTargetDesc/ARMAddressingModes.h" #include "llvm/Target/TargetSelectionDAGInfo.h" diff --git a/lib/Target/ARM/ARMSubtarget.cpp b/lib/Target/ARM/ARMSubtarget.cpp index c1b4562f4118..311afe909cf1 100644 --- a/lib/Target/ARM/ARMSubtarget.cpp +++ b/lib/Target/ARM/ARMSubtarget.cpp @@ -15,13 +15,14 @@ #include "ARMFrameLowering.h" #include "ARMISelLowering.h" #include "ARMInstrInfo.h" -#include "ARMJITInfo.h" +#include "ARMMachineFunctionInfo.h" #include "ARMSelectionDAGInfo.h" #include "ARMSubtarget.h" -#include "ARMMachineFunctionInfo.h" +#include "ARMTargetMachine.h" #include "Thumb1FrameLowering.h" #include "Thumb1InstrInfo.h" #include "Thumb2InstrInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/Function.h" #include "llvm/IR/GlobalValue.h" @@ -29,7 +30,6 @@ #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetOptions.h" #include "llvm/Target/TargetRegisterInfo.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" using namespace llvm; @@ -50,11 +50,13 @@ static cl::opt<bool> UseFusedMulOps("arm-use-mulops", cl::init(true), cl::Hidden); +namespace { enum AlignMode { DefaultAlign, StrictAlign, NoStrictAlign }; +} static cl::opt<AlignMode> Align(cl::desc("Load/store alignment support"), @@ -101,11 +103,6 @@ static std::string computeDataLayout(ARMSubtarget &ST) { // Pointers are 32 bits and aligned to 32 bits. Ret += "-p:32:32"; - // On thumb, i16,i18 and i1 have natural aligment requirements, but we try to - // align to 32. - if (ST.isThumb()) - Ret += "-i1:8:32-i8:8:32-i16:16:32"; - // ABIs other than APCS have 64 bit integers with natural alignment. if (!ST.isAPCS_ABI()) Ret += "-i64:64"; @@ -122,10 +119,9 @@ static std::string computeDataLayout(ARMSubtarget &ST) { else Ret += "-v128:64:128"; - // On thumb and APCS, only try to align aggregates to 32 bits (the default is - // 64 bits). - if (ST.isThumb() || ST.isAPCS_ABI()) - Ret += "-a:0:32"; + // Try to align aggregates to 32 bits (the default is 64 bits, which has no + // particular hardware support on 32-bit ARM). + Ret += "-a:0:32"; // Integer registers are 32 bits. Ret += "-n32"; @@ -147,18 +143,18 @@ static std::string computeDataLayout(ARMSubtarget &ST) { ARMSubtarget &ARMSubtarget::initializeSubtargetDependencies(StringRef CPU, StringRef FS) { initializeEnvironment(); - resetSubtargetFeatures(CPU, FS); + initSubtargetFeatures(CPU, FS); return *this; } ARMSubtarget::ARMSubtarget(const std::string &TT, const std::string &CPU, - const std::string &FS, TargetMachine &TM, - bool IsLittle, const TargetOptions &Options) + const std::string &FS, const ARMBaseTargetMachine &TM, + bool IsLittle) : ARMGenSubtargetInfo(TT, CPU, FS), ARMProcFamily(Others), ARMProcClass(None), stackAlignment(4), CPUString(CPU), IsLittle(IsLittle), - TargetTriple(TT), Options(Options), TargetABI(ARM_ABI_UNKNOWN), + TargetTriple(TT), Options(TM.Options), TM(TM), DL(computeDataLayout(initializeSubtargetDependencies(CPU, FS))), - TSInfo(DL), JITInfo(), + TSInfo(DL), InstrInfo(isThumb1Only() ? (ARMBaseInstrInfo *)new Thumb1InstrInfo(*this) : !isThumb() @@ -218,25 +214,9 @@ void ARMSubtarget::initializeEnvironment() { UnsafeFPMath = false; } -void ARMSubtarget::resetSubtargetFeatures(const MachineFunction *MF) { - AttributeSet FnAttrs = MF->getFunction()->getAttributes(); - Attribute CPUAttr = FnAttrs.getAttribute(AttributeSet::FunctionIndex, - "target-cpu"); - Attribute FSAttr = FnAttrs.getAttribute(AttributeSet::FunctionIndex, - "target-features"); - std::string CPU = - !CPUAttr.hasAttribute(Attribute::None) ?CPUAttr.getValueAsString() : ""; - std::string FS = - !FSAttr.hasAttribute(Attribute::None) ? FSAttr.getValueAsString() : ""; - if (!FS.empty()) { - initializeEnvironment(); - resetSubtargetFeatures(CPU, FS); - } -} - -void ARMSubtarget::resetSubtargetFeatures(StringRef CPU, StringRef FS) { +void ARMSubtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) { if (CPUString.empty()) { - if (isTargetIOS() && TargetTriple.getArchName().endswith("v7s")) + if (isTargetDarwin() && TargetTriple.getArchName().endswith("v7s")) // Default to the Swift CPU when targeting armv7s/thumbv7s. CPUString = "swift"; else @@ -246,8 +226,8 @@ void ARMSubtarget::resetSubtargetFeatures(StringRef CPU, StringRef FS) { // Insert the architecture feature derived from the target triple into the // feature string. This is important for setting features that are implied // based on the architecture version. - std::string ArchFS = ARM_MC::ParseARMTriple(TargetTriple.getTriple(), - CPUString); + std::string ArchFS = + ARM_MC::ParseARMTriple(TargetTriple.getTriple(), CPUString); if (!FS.empty()) { if (!ArchFS.empty()) ArchFS = ArchFS + "," + FS.str(); @@ -266,31 +246,9 @@ void ARMSubtarget::resetSubtargetFeatures(StringRef CPU, StringRef FS) { // Initialize scheduling itinerary for the specified CPU. InstrItins = getInstrItineraryForCPU(CPUString); - if (TargetABI == ARM_ABI_UNKNOWN) { - switch (TargetTriple.getEnvironment()) { - case Triple::Android: - case Triple::EABI: - case Triple::EABIHF: - case Triple::GNUEABI: - case Triple::GNUEABIHF: - TargetABI = ARM_ABI_AAPCS; - break; - default: - if ((isTargetIOS() && isMClass()) || - (TargetTriple.isOSBinFormatMachO() && - TargetTriple.getOS() == Triple::UnknownOS)) - TargetABI = ARM_ABI_AAPCS; - else - TargetABI = ARM_ABI_APCS; - break; - } - } - // FIXME: this is invalid for WindowsCE - if (isTargetWindows()) { - TargetABI = ARM_ABI_AAPCS; + if (isTargetWindows()) NoARM = true; - } if (isAAPCS_ABI()) stackAlignment = 8; @@ -300,46 +258,39 @@ void ARMSubtarget::resetSubtargetFeatures(StringRef CPU, StringRef FS) { UseMovt = hasV6T2Ops() && ArmUseMOVT; if (isTargetMachO()) { - IsR9Reserved = ReserveR9 | !HasV6Ops; + IsR9Reserved = ReserveR9 || !HasV6Ops; SupportsTailCall = !isTargetIOS() || !getTargetTriple().isOSVersionLT(5, 0); } else { IsR9Reserved = ReserveR9; SupportsTailCall = !isThumb1Only(); } - switch (Align) { - case DefaultAlign: - // Assume pre-ARMv6 doesn't support unaligned accesses. - // - // ARMv6 may or may not support unaligned accesses depending on the - // SCTLR.U bit, which is architecture-specific. We assume ARMv6 - // Darwin and NetBSD targets support unaligned accesses, and others don't. - // - // ARMv7 always has SCTLR.U set to 1, but it has a new SCTLR.A bit - // which raises an alignment fault on unaligned accesses. Linux - // defaults this bit to 0 and handles it as a system-wide (not - // per-process) setting. It is therefore safe to assume that ARMv7+ - // Linux targets support unaligned accesses. The same goes for NaCl. - // - // The above behavior is consistent with GCC. - AllowsUnalignedMem = - (hasV7Ops() && (isTargetLinux() || isTargetNaCl() || - isTargetNetBSD())) || - (hasV6Ops() && (isTargetMachO() || isTargetNetBSD())); - // The one exception is cortex-m0, which despite being v6, does not - // support unaligned accesses. Rather than make the above boolean - // expression even more obtuse, just override the value here. - if (isThumb1Only() && isMClass()) - AllowsUnalignedMem = false; - break; - case StrictAlign: - AllowsUnalignedMem = false; - break; - case NoStrictAlign: - AllowsUnalignedMem = true; - break; + if (Align == DefaultAlign) { + // Assume pre-ARMv6 doesn't support unaligned accesses. + // + // ARMv6 may or may not support unaligned accesses depending on the + // SCTLR.U bit, which is architecture-specific. We assume ARMv6 + // Darwin and NetBSD targets support unaligned accesses, and others don't. + // + // ARMv7 always has SCTLR.U set to 1, but it has a new SCTLR.A bit + // which raises an alignment fault on unaligned accesses. Linux + // defaults this bit to 0 and handles it as a system-wide (not + // per-process) setting. It is therefore safe to assume that ARMv7+ + // Linux targets support unaligned accesses. The same goes for NaCl. + // + // The above behavior is consistent with GCC. + AllowsUnalignedMem = + (hasV7Ops() && (isTargetLinux() || isTargetNaCl() || + isTargetNetBSD())) || + (hasV6Ops() && (isTargetMachO() || isTargetNetBSD())); + } else { + AllowsUnalignedMem = !(Align == StrictAlign); } + // No v6M core supports unaligned memory access (v6M ARM ARM A3.2) + if (isV6M()) + AllowsUnalignedMem = false; + switch (IT) { case DefaultIT: RestrictIT = hasV8Ops() ? true : false; @@ -359,6 +310,15 @@ void ARMSubtarget::resetSubtargetFeatures(StringRef CPU, StringRef FS) { UseNEONForSinglePrecisionFP = true; } +bool ARMSubtarget::isAPCS_ABI() const { + assert(TM.TargetABI != ARMBaseTargetMachine::ARM_ABI_UNKNOWN); + return TM.TargetABI == ARMBaseTargetMachine::ARM_ABI_APCS; +} +bool ARMSubtarget::isAAPCS_ABI() const { + assert(TM.TargetABI != ARMBaseTargetMachine::ARM_ABI_UNKNOWN); + return TM.TargetABI == ARMBaseTargetMachine::ARM_ABI_AAPCS; +} + /// GVIsIndirectSymbol - true if the GV will be accessed via an indirect symbol. bool ARMSubtarget::GVIsIndirectSymbol(const GlobalValue *GV, @@ -366,11 +326,7 @@ ARMSubtarget::GVIsIndirectSymbol(const GlobalValue *GV, if (RelocM == Reloc::Static) return false; - // Materializable GVs (in JIT lazy compilation mode) do not require an extra - // load from stub. - bool isDecl = GV->hasAvailableExternallyLinkage(); - if (GV->isDeclaration() && !GV->isMaterializable()) - isDecl = true; + bool isDecl = GV->isDeclarationForLinker(); if (!isTargetMachO()) { // Extra load is needed for all externally visible. @@ -413,12 +369,11 @@ ARMSubtarget::GVIsIndirectSymbol(const GlobalValue *GV, } unsigned ARMSubtarget::getMispredictionPenalty() const { - return SchedModel->MispredictPenalty; + return SchedModel.MispredictPenalty; } bool ARMSubtarget::hasSinCos() const { - return getTargetTriple().getOS() == Triple::IOS && - !getTargetTriple().isOSVersionLT(7, 0); + return getTargetTriple().isiOS() && !getTargetTriple().isOSVersionLT(7, 0); } // This overrides the PostRAScheduler bit in the SchedModel for any CPU. @@ -426,7 +381,7 @@ bool ARMSubtarget::enablePostMachineScheduler() const { return (!isThumb() || hasThumb2()); } -bool ARMSubtarget::enableAtomicExpandLoadLinked() const { +bool ARMSubtarget::enableAtomicExpand() const { return hasAnyDataBarrier() && !isThumb1Only(); } diff --git a/lib/Target/ARM/ARMSubtarget.h b/lib/Target/ARM/ARMSubtarget.h index f8283b08d485..dbacd4d6aada 100644 --- a/lib/Target/ARM/ARMSubtarget.h +++ b/lib/Target/ARM/ARMSubtarget.h @@ -11,21 +11,19 @@ // //===----------------------------------------------------------------------===// -#ifndef ARMSUBTARGET_H -#define ARMSUBTARGET_H +#ifndef LLVM_LIB_TARGET_ARM_ARMSUBTARGET_H +#define LLVM_LIB_TARGET_ARM_ARMSUBTARGET_H #include "ARMFrameLowering.h" #include "ARMISelLowering.h" #include "ARMInstrInfo.h" -#include "ARMJITInfo.h" #include "ARMSelectionDAGInfo.h" #include "ARMSubtarget.h" +#include "MCTargetDesc/ARMMCTargetDesc.h" #include "Thumb1FrameLowering.h" #include "Thumb1InstrInfo.h" #include "Thumb2InstrInfo.h" -#include "ARMJITInfo.h" -#include "MCTargetDesc/ARMMCTargetDesc.h" #include "llvm/ADT/Triple.h" #include "llvm/IR/DataLayout.h" #include "llvm/MC/MCInstrItineraries.h" @@ -39,12 +37,13 @@ namespace llvm { class GlobalValue; class StringRef; class TargetOptions; +class ARMBaseTargetMachine; class ARMSubtarget : public ARMGenSubtargetInfo { protected: enum ARMProcFamilyEnum { Others, CortexA5, CortexA7, CortexA8, CortexA9, CortexA12, CortexA15, - CortexR5, Swift, CortexA53, CortexA57, Krait + CortexA17, CortexR5, Swift, CortexA53, CortexA57, Krait, }; enum ARMProcClassEnum { None, AClass, RClass, MClass @@ -188,7 +187,7 @@ protected: /// AllowsUnalignedMem - If true, the subtarget allows unaligned memory /// accesses for some types. For details, see - /// ARMTargetLowering::allowsUnalignedMemoryAccesses(). + /// ARMTargetLowering::allowsMisalignedMemoryAccesses(). bool AllowsUnalignedMem; /// RestrictIT - If true, the subtarget disallows generation of deprecated IT @@ -219,7 +218,7 @@ protected: Triple TargetTriple; /// SchedModel - Processor specific instruction costs. - const MCSchedModel *SchedModel; + MCSchedModel SchedModel; /// Selected instruction itineraries (one entry per itinerary class.) InstrItineraryData InstrItins; @@ -227,19 +226,14 @@ protected: /// Options passed via command line that could influence the target const TargetOptions &Options; - public: - enum { - ARM_ABI_UNKNOWN, - ARM_ABI_APCS, - ARM_ABI_AAPCS // ARM EABI - } TargetABI; + const ARMBaseTargetMachine &TM; +public: /// This constructor initializes the data members to match that /// of the specified triple. /// ARMSubtarget(const std::string &TT, const std::string &CPU, - const std::string &FS, TargetMachine &TM, bool IsLittle, - const TargetOptions &Options); + const std::string &FS, const ARMBaseTargetMachine &TM, bool IsLittle); /// getMaxInlineSizeThreshold - Returns the maximum memset / memcpy size /// that still makes it profitable to inline the call. @@ -250,27 +244,30 @@ protected: /// subtarget options. Definition of function is auto generated by tblgen. void ParseSubtargetFeatures(StringRef CPU, StringRef FS); - /// \brief Reset the features for the ARM target. - void resetSubtargetFeatures(const MachineFunction *MF) override; - /// initializeSubtargetDependencies - Initializes using a CPU and feature string /// so that we can use initializer lists for subtarget initialization. ARMSubtarget &initializeSubtargetDependencies(StringRef CPU, StringRef FS); - const DataLayout *getDataLayout() const { return &DL; } - const ARMSelectionDAGInfo *getSelectionDAGInfo() const { return &TSInfo; } - ARMJITInfo *getJITInfo() { return &JITInfo; } - const ARMBaseInstrInfo *getInstrInfo() const { return InstrInfo.get(); } - const ARMTargetLowering *getTargetLowering() const { return &TLInfo; } - const ARMFrameLowering *getFrameLowering() const { return FrameLowering.get(); } - const ARMBaseRegisterInfo *getRegisterInfo() const { + const DataLayout *getDataLayout() const override { return &DL; } + const ARMSelectionDAGInfo *getSelectionDAGInfo() const override { + return &TSInfo; + } + const ARMBaseInstrInfo *getInstrInfo() const override { + return InstrInfo.get(); + } + const ARMTargetLowering *getTargetLowering() const override { + return &TLInfo; + } + const ARMFrameLowering *getFrameLowering() const override { + return FrameLowering.get(); + } + const ARMBaseRegisterInfo *getRegisterInfo() const override { return &InstrInfo->getRegisterInfo(); } private: const DataLayout DL; ARMSelectionDAGInfo TSInfo; - ARMJITInfo JITInfo; // Either Thumb1InstrInfo or Thumb2InstrInfo. std::unique_ptr<ARMBaseInstrInfo> InstrInfo; ARMTargetLowering TLInfo; @@ -278,7 +275,7 @@ private: std::unique_ptr<ARMFrameLowering> FrameLowering; void initializeEnvironment(); - void resetSubtargetFeatures(StringRef CPU, StringRef FS); + void initSubtargetFeatures(StringRef CPU, StringRef FS); public: void computeIssueWidth(); @@ -347,7 +344,7 @@ public: bool isTargetIOS() const { return TargetTriple.isiOS(); } bool isTargetLinux() const { return TargetTriple.isOSLinux(); } bool isTargetNaCl() const { return TargetTriple.isOSNaCl(); } - bool isTargetNetBSD() const { return TargetTriple.getOS() == Triple::NetBSD; } + bool isTargetNetBSD() const { return TargetTriple.isOSNetBSD(); } bool isTargetWindows() const { return TargetTriple.isOSWindows(); } bool isTargetCOFF() const { return TargetTriple.isOSBinFormatCOFF(); } @@ -388,14 +385,8 @@ public: return TargetTriple.getEnvironment() == Triple::Android; } - bool isAPCS_ABI() const { - assert(TargetABI != ARM_ABI_UNKNOWN); - return TargetABI == ARM_ABI_APCS; - } - bool isAAPCS_ABI() const { - assert(TargetABI != ARM_ABI_UNKNOWN); - return TargetABI == ARM_ABI_AAPCS; - } + bool isAPCS_ABI() const; + bool isAAPCS_ABI() const; bool isThumb() const { return InThumbMode; } bool isThumb1Only() const { return InThumbMode && !HasThumb2; } @@ -405,6 +396,10 @@ public: bool isRClass() const { return ARMProcClass == RClass; } bool isAClass() const { return ARMProcClass == AClass; } + bool isV6M() const { + return isThumb1Only() && isMClass(); + } + bool isR9Reserved() const { return IsR9Reserved; } bool useMovt(const MachineFunction &MF) const; @@ -428,12 +423,14 @@ public: /// True for some subtargets at > -O0. bool enablePostMachineScheduler() const override; - // enableAtomicExpandLoadLinked - True if we need to expand our atomics. - bool enableAtomicExpandLoadLinked() const override; + // enableAtomicExpand- True if we need to expand our atomics. + bool enableAtomicExpand() const override; - /// getInstrItins - Return the instruction itineraies based on subtarget + /// getInstrItins - Return the instruction itineraries based on subtarget /// selection. - const InstrItineraryData &getInstrItineraryData() const { return InstrItins; } + const InstrItineraryData *getInstrItineraryData() const override { + return &InstrItins; + } /// getStackAlignment - Returns the minimum alignment known to hold of the /// stack frame on entry to the function and which must be maintained by every diff --git a/lib/Target/ARM/ARMTargetMachine.cpp b/lib/Target/ARM/ARMTargetMachine.cpp index d85194b75ecb..7a8181b7528f 100644 --- a/lib/Target/ARM/ARMTargetMachine.cpp +++ b/lib/Target/ARM/ARMTargetMachine.cpp @@ -11,9 +11,11 @@ //===----------------------------------------------------------------------===// #include "ARM.h" -#include "ARMTargetMachine.h" #include "ARMFrameLowering.h" +#include "ARMTargetMachine.h" +#include "ARMTargetObjectFile.h" #include "llvm/CodeGen/Passes.h" +#include "llvm/IR/Function.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/PassManager.h" #include "llvm/Support/CommandLine.h" @@ -42,6 +44,65 @@ extern "C" void LLVMInitializeARMTarget() { RegisterTargetMachine<ThumbBETargetMachine> B(TheThumbBETarget); } +static std::unique_ptr<TargetLoweringObjectFile> createTLOF(const Triple &TT) { + if (TT.isOSBinFormatMachO()) + return make_unique<TargetLoweringObjectFileMachO>(); + if (TT.isOSWindows()) + return make_unique<TargetLoweringObjectFileCOFF>(); + return make_unique<ARMElfTargetObjectFile>(); +} + +static ARMBaseTargetMachine::ARMABI +computeTargetABI(const Triple &TT, StringRef CPU, + const TargetOptions &Options) { + if (Options.MCOptions.getABIName().startswith("aapcs")) + return ARMBaseTargetMachine::ARM_ABI_AAPCS; + else if (Options.MCOptions.getABIName().startswith("apcs")) + return ARMBaseTargetMachine::ARM_ABI_APCS; + + assert(Options.MCOptions.getABIName().empty() && + "Unknown target-abi option!"); + + ARMBaseTargetMachine::ARMABI TargetABI = + ARMBaseTargetMachine::ARM_ABI_UNKNOWN; + + // FIXME: This is duplicated code from the front end and should be unified. + if (TT.isOSBinFormatMachO()) { + if (TT.getEnvironment() == llvm::Triple::EABI || + (TT.getOS() == llvm::Triple::UnknownOS && + TT.getObjectFormat() == llvm::Triple::MachO) || + CPU.startswith("cortex-m")) { + TargetABI = ARMBaseTargetMachine::ARM_ABI_AAPCS; + } else { + TargetABI = ARMBaseTargetMachine::ARM_ABI_APCS; + } + } else if (TT.isOSWindows()) { + // FIXME: this is invalid for WindowsCE + TargetABI = ARMBaseTargetMachine::ARM_ABI_AAPCS; + } else { + // Select the default based on the platform. + switch (TT.getEnvironment()) { + case llvm::Triple::Android: + case llvm::Triple::GNUEABI: + case llvm::Triple::GNUEABIHF: + case llvm::Triple::EABIHF: + case llvm::Triple::EABI: + TargetABI = ARMBaseTargetMachine::ARM_ABI_AAPCS; + break; + case llvm::Triple::GNU: + TargetABI = ARMBaseTargetMachine::ARM_ABI_APCS; + break; + default: + if (TT.getOS() == llvm::Triple::NetBSD) + TargetABI = ARMBaseTargetMachine::ARM_ABI_APCS; + else + TargetABI = ARMBaseTargetMachine::ARM_ABI_AAPCS; + break; + } + } + + return TargetABI; +} /// TargetMachine ctor - Create an ARM architecture model. /// @@ -51,7 +112,9 @@ ARMBaseTargetMachine::ARMBaseTargetMachine(const Target &T, StringRef TT, Reloc::Model RM, CodeModel::Model CM, CodeGenOpt::Level OL, bool isLittle) : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL), - Subtarget(TT, CPU, FS, *this, isLittle, Options) { + TargetABI(computeTargetABI(Triple(TT), CPU, Options)), + TLOF(createTLOF(Triple(getTargetTriple()))), + Subtarget(TT, CPU, FS, *this, isLittle), isLittle(isLittle) { // Default to triple-appropriate float ABI if (Options.FloatABIType == FloatABI::Default) @@ -59,6 +122,46 @@ ARMBaseTargetMachine::ARMBaseTargetMachine(const Target &T, StringRef TT, Subtarget.isTargetHardFloat() ? FloatABI::Hard : FloatABI::Soft; } +ARMBaseTargetMachine::~ARMBaseTargetMachine() {} + +const ARMSubtarget * +ARMBaseTargetMachine::getSubtargetImpl(const Function &F) const { + AttributeSet FnAttrs = F.getAttributes(); + Attribute CPUAttr = + FnAttrs.getAttribute(AttributeSet::FunctionIndex, "target-cpu"); + Attribute FSAttr = + FnAttrs.getAttribute(AttributeSet::FunctionIndex, "target-features"); + + std::string CPU = !CPUAttr.hasAttribute(Attribute::None) + ? CPUAttr.getValueAsString().str() + : TargetCPU; + std::string FS = !FSAttr.hasAttribute(Attribute::None) + ? FSAttr.getValueAsString().str() + : TargetFS; + + // FIXME: This is related to the code below to reset the target options, + // we need to know whether or not the soft float flag is set on the + // function before we can generate a subtarget. We also need to use + // it as a key for the subtarget since that can be the only difference + // between two functions. + Attribute SFAttr = + FnAttrs.getAttribute(AttributeSet::FunctionIndex, "use-soft-float"); + bool SoftFloat = !SFAttr.hasAttribute(Attribute::None) + ? SFAttr.getValueAsString() == "true" + : Options.UseSoftFloat; + + auto &I = SubtargetMap[CPU + FS + (SoftFloat ? "use-soft-float=true" + : "use-soft-float=false")]; + if (!I) { + // This needs to be done before we create a new subtarget since any + // creation will depend on the TM and the code generation flags on the + // function that reside in TargetOptions. + resetTargetOptions(F); + I = llvm::make_unique<ARMSubtarget>(TargetTriple, CPU, FS, *this, isLittle); + } + return I.get(); +} + void ARMBaseTargetMachine::addAnalysisPasses(PassManagerBase &PM) { // Add first the target-independent BasicTTI pass, then our ARM pass. This // allows the ARM pass to delegate to the target independent layer when @@ -147,9 +250,9 @@ public: void addIRPasses() override; bool addPreISel() override; bool addInstSelector() override; - bool addPreRegAlloc() override; - bool addPreSched2() override; - bool addPreEmitPass() override; + void addPreRegAlloc() override; + void addPreSched2() override; + void addPreEmitPass() override; }; } // namespace @@ -158,7 +261,10 @@ TargetPassConfig *ARMBaseTargetMachine::createPassConfig(PassManagerBase &PM) { } void ARMPassConfig::addIRPasses() { - addPass(createAtomicExpandLoadLinkedPass(TM)); + if (TM->Options.ThreadModel == ThreadModel::Single) + addPass(createLowerAtomicPass()); + else + addPass(createAtomicExpandPass(TM)); // Cmpxchg instructions are often used with a subsequent comparison to // determine whether it succeeded. We can exploit existing control-flow in @@ -188,7 +294,7 @@ bool ARMPassConfig::addInstSelector() { return false; } -bool ARMPassConfig::addPreRegAlloc() { +void ARMPassConfig::addPreRegAlloc() { if (getOptLevel() != CodeGenOpt::None) addPass(createARMLoadStoreOptimizationPass(true)); if (getOptLevel() != CodeGenOpt::None && getARMSubtarget().isCortexA9()) @@ -199,13 +305,11 @@ bool ARMPassConfig::addPreRegAlloc() { getARMSubtarget().hasNEON() && !DisableA15SDOptimization) { addPass(createA15SDOptimizerPass()); } - return true; } -bool ARMPassConfig::addPreSched2() { +void ARMPassConfig::addPreSched2() { if (getOptLevel() != CodeGenOpt::None) { addPass(createARMLoadStoreOptimizationPass()); - printAndVerify("After ARM load / store optimizer"); if (getARMSubtarget().hasNEON()) addPass(createExecutionDependencyFixPass(&ARM::DPRRegClass)); @@ -226,11 +330,9 @@ bool ARMPassConfig::addPreSched2() { } if (getARMSubtarget().isThumb2()) addPass(createThumb2ITBlockPass()); - - return true; } -bool ARMPassConfig::addPreEmitPass() { +void ARMPassConfig::addPreEmitPass() { if (getARMSubtarget().isThumb2()) { if (!getARMSubtarget().prefers32BitThumb()) addPass(createThumb2SizeReductionPass()); @@ -241,13 +343,4 @@ bool ARMPassConfig::addPreEmitPass() { addPass(createARMOptimizeBarriersPass()); addPass(createARMConstantIslandPass()); - - return true; -} - -bool ARMBaseTargetMachine::addCodeEmitter(PassManagerBase &PM, - JITCodeEmitter &JCE) { - // Machine code emitter pass for ARM. - PM.add(createARMJITCodeEmitterPass(*this, JCE)); - return false; } diff --git a/lib/Target/ARM/ARMTargetMachine.h b/lib/Target/ARM/ARMTargetMachine.h index b72b1df4af83..18cf5fa0fa06 100644 --- a/lib/Target/ARM/ARMTargetMachine.h +++ b/lib/Target/ARM/ARMTargetMachine.h @@ -11,8 +11,8 @@ // //===----------------------------------------------------------------------===// -#ifndef ARMTARGETMACHINE_H -#define ARMTARGETMACHINE_H +#ifndef LLVM_LIB_TARGET_ARM_ARMTARGETMACHINE_H +#define LLVM_LIB_TARGET_ARM_ARMTARGETMACHINE_H #include "ARMInstrInfo.h" #include "ARMSubtarget.h" @@ -22,8 +22,19 @@ namespace llvm { class ARMBaseTargetMachine : public LLVMTargetMachine { +public: + enum ARMABI { + ARM_ABI_UNKNOWN, + ARM_ABI_APCS, + ARM_ABI_AAPCS // ARM EABI + } TargetABI; + protected: + std::unique_ptr<TargetLoweringObjectFile> TLOF; ARMSubtarget Subtarget; + bool isLittle; + mutable StringMap<std::unique_ptr<ARMSubtarget>> SubtargetMap; + public: ARMBaseTargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, @@ -31,30 +42,10 @@ public: Reloc::Model RM, CodeModel::Model CM, CodeGenOpt::Level OL, bool isLittle); + ~ARMBaseTargetMachine() override; const ARMSubtarget *getSubtargetImpl() const override { return &Subtarget; } - const ARMBaseRegisterInfo *getRegisterInfo() const override { - return getSubtargetImpl()->getRegisterInfo(); - } - const ARMTargetLowering *getTargetLowering() const override { - return getSubtargetImpl()->getTargetLowering(); - } - const ARMSelectionDAGInfo *getSelectionDAGInfo() const override { - return getSubtargetImpl()->getSelectionDAGInfo(); - } - const ARMBaseInstrInfo *getInstrInfo() const override { - return getSubtargetImpl()->getInstrInfo(); - } - const ARMFrameLowering *getFrameLowering() const override { - return getSubtargetImpl()->getFrameLowering(); - } - const InstrItineraryData *getInstrItineraryData() const override { - return &getSubtargetImpl()->getInstrItineraryData(); - } - const DataLayout *getDataLayout() const override { - return getSubtargetImpl()->getDataLayout(); - } - ARMJITInfo *getJITInfo() override { return Subtarget.getJITInfo(); } + const ARMSubtarget *getSubtargetImpl(const Function &F) const override; /// \brief Register ARM analysis passes with a pass manager. void addAnalysisPasses(PassManagerBase &PM) override; @@ -62,7 +53,9 @@ public: // Pass Pipeline Configuration TargetPassConfig *createPassConfig(PassManagerBase &PM) override; - bool addCodeEmitter(PassManagerBase &PM, JITCodeEmitter &MCE) override; + TargetLoweringObjectFile *getObjFileLowering() const override { + return TLOF.get(); + } }; /// ARMTargetMachine - ARM target machine. diff --git a/lib/Target/ARM/ARMTargetObjectFile.h b/lib/Target/ARM/ARMTargetObjectFile.h index c926421848e5..98e8763c4705 100644 --- a/lib/Target/ARM/ARMTargetObjectFile.h +++ b/lib/Target/ARM/ARMTargetObjectFile.h @@ -7,8 +7,8 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_TARGET_ARM_TARGETOBJECTFILE_H -#define LLVM_TARGET_ARM_TARGETOBJECTFILE_H +#ifndef LLVM_LIB_TARGET_ARM_ARMTARGETOBJECTFILE_H +#define LLVM_LIB_TARGET_ARM_ARMTARGETOBJECTFILE_H #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" diff --git a/lib/Target/ARM/ARMTargetTransformInfo.cpp b/lib/Target/ARM/ARMTargetTransformInfo.cpp index a2ace629baa1..ec834e8da599 100644 --- a/lib/Target/ARM/ARMTargetTransformInfo.cpp +++ b/lib/Target/ARM/ARMTargetTransformInfo.cpp @@ -49,7 +49,7 @@ public: ARMTTI(const ARMBaseTargetMachine *TM) : ImmutablePass(ID), TM(TM), ST(TM->getSubtargetImpl()), - TLI(TM->getTargetLowering()) { + TLI(TM->getSubtargetImpl()->getTargetLowering()) { initializeARMTTIPass(*PassRegistry::getPassRegistry()); } @@ -104,7 +104,7 @@ public: return 32; } - unsigned getMaximumUnrollFactor() const override { + unsigned getMaxInterleaveFactor() const override { // These are out of order CPUs: if (ST->isCortexA15() || ST->isSwift()) return 2; @@ -126,10 +126,11 @@ public: unsigned getAddressComputationCost(Type *Val, bool IsComplex) const override; - unsigned - getArithmeticInstrCost(unsigned Opcode, Type *Ty, - OperandValueKind Op1Info = OK_AnyValue, - OperandValueKind Op2Info = OK_AnyValue) const override; + unsigned getArithmeticInstrCost( + unsigned Opcode, Type *Ty, OperandValueKind Op1Info = OK_AnyValue, + OperandValueKind Op2Info = OK_AnyValue, + OperandValueProperties Opd1PropInfo = OP_None, + OperandValueProperties Opd2PropInfo = OP_None) const override; unsigned getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, unsigned AddressSpace) const override; @@ -389,6 +390,13 @@ unsigned ARMTTI::getVectorInstrCost(unsigned Opcode, Type *ValTy, ValTy->getScalarSizeInBits() <= 32) return 3; + // Cross-class copies are expensive on many microarchitectures, + // so assume they are expensive by default. + if ((Opcode == Instruction::InsertElement || + Opcode == Instruction::ExtractElement) && + ValTy->getVectorElementType()->isIntegerTy()) + return 3; + return TargetTransformInfo::getVectorInstrCost(Opcode, ValTy, Index); } @@ -497,9 +505,10 @@ unsigned ARMTTI::getShuffleCost(ShuffleKind Kind, Type *Tp, int Index, return TargetTransformInfo::getShuffleCost(Kind, Tp, Index, SubTp); } -unsigned ARMTTI::getArithmeticInstrCost(unsigned Opcode, Type *Ty, - OperandValueKind Op1Info, - OperandValueKind Op2Info) const { +unsigned ARMTTI::getArithmeticInstrCost( + unsigned Opcode, Type *Ty, OperandValueKind Op1Info, + OperandValueKind Op2Info, OperandValueProperties Opd1PropInfo, + OperandValueProperties Opd2PropInfo) const { int ISDOpcode = TLI->InstructionOpcodeToISD(Opcode); std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Ty); @@ -555,8 +564,8 @@ unsigned ARMTTI::getArithmeticInstrCost(unsigned Opcode, Type *Ty, if (Idx != -1) return LT.first * CostTbl[Idx].Cost; - unsigned Cost = - TargetTransformInfo::getArithmeticInstrCost(Opcode, Ty, Op1Info, Op2Info); + unsigned Cost = TargetTransformInfo::getArithmeticInstrCost( + Opcode, Ty, Op1Info, Op2Info, Opd1PropInfo, Opd2PropInfo); // This is somewhat of a hack. The problem that we are facing is that SROA // creates a sequence of shift, and, or instructions to construct values. diff --git a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp index b62706c45fbf..6a00b28f37a7 100644 --- a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp +++ b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp @@ -129,12 +129,13 @@ public: class ARMAsmParser : public MCTargetAsmParser { MCSubtargetInfo &STI; - MCAsmParser &Parser; const MCInstrInfo &MII; const MCRegisterInfo *MRI; UnwindContext UC; ARMTargetStreamer &getTargetStreamer() { + assert(getParser().getStreamer().getTargetStreamer() && + "do not have a target streamer"); MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer(); return static_cast<ARMTargetStreamer &>(TS); } @@ -163,7 +164,10 @@ class ARMAsmParser : public MCTargetAsmParser { // according to count of instructions in block. // ~0U if no active IT block. } ITState; - bool inITBlock() { return ITState.CurPosition != ~0U;} + bool inITBlock() { return ITState.CurPosition != ~0U; } + bool lastInITBlock() { + return ITState.CurPosition == 4 - countTrailingZeros(ITState.Mask); + } void forwardITPosition() { if (!inITBlock()) return; // Move to the next instruction in the IT block, if there is one. If not, @@ -173,22 +177,23 @@ class ARMAsmParser : public MCTargetAsmParser { ITState.CurPosition = ~0U; // Done with the IT block after this. } - - MCAsmParser &getParser() const { return Parser; } - MCAsmLexer &getLexer() const { return Parser.getLexer(); } - void Note(SMLoc L, const Twine &Msg, ArrayRef<SMRange> Ranges = None) { - return Parser.Note(L, Msg, Ranges); + return getParser().Note(L, Msg, Ranges); } bool Warning(SMLoc L, const Twine &Msg, ArrayRef<SMRange> Ranges = None) { - return Parser.Warning(L, Msg, Ranges); + return getParser().Warning(L, Msg, Ranges); } bool Error(SMLoc L, const Twine &Msg, ArrayRef<SMRange> Ranges = None) { - return Parser.Error(L, Msg, Ranges); + return getParser().Error(L, Msg, Ranges); } + bool validatetLDMRegList(MCInst Inst, const OperandVector &Operands, + unsigned ListNo, bool IsARPop = false); + bool validatetSTMRegList(MCInst Inst, const OperandVector &Operands, + unsigned ListNo); + int tryParseRegister(); bool tryParseRegisterWithWriteBack(OperandVector &); int tryParseShiftRegister(OperandVector &); @@ -265,9 +270,15 @@ class ARMAsmParser : public MCTargetAsmParser { bool hasARM() const { return !(STI.getFeatureBits() & ARM::FeatureNoARM); } + bool hasThumb2DSP() const { + return STI.getFeatureBits() & ARM::FeatureDSPThumb2; + } + bool hasD16() const { + return STI.getFeatureBits() & ARM::FeatureD16; + } void SwitchMode() { - unsigned FB = ComputeAvailableFeatures(STI.ToggleFeature(ARM::ModeThumb)); + uint64_t FB = ComputeAvailableFeatures(STI.ToggleFeature(ARM::ModeThumb)); setAvailableFeatures(FB); } bool isMClass() const { @@ -290,6 +301,7 @@ class ARMAsmParser : public MCTargetAsmParser { OperandMatchResultTy parseInstSyncBarrierOptOperand(OperandVector &); OperandMatchResultTy parseProcIFlagsOperand(OperandVector &); OperandMatchResultTy parseMSRMaskOperand(OperandVector &); + OperandMatchResultTy parseBankedRegOperand(OperandVector &); OperandMatchResultTy parsePKHImm(OperandVector &O, StringRef Op, int Low, int High); OperandMatchResultTy parsePKHLSLImm(OperandVector &O) { @@ -301,6 +313,7 @@ class ARMAsmParser : public MCTargetAsmParser { OperandMatchResultTy parseSetEndImm(OperandVector &); OperandMatchResultTy parseShifterImm(OperandVector &); OperandMatchResultTy parseRotImm(OperandVector &); + OperandMatchResultTy parseModImm(OperandVector &); OperandMatchResultTy parseBitfield(OperandVector &); OperandMatchResultTy parsePostIdxReg(OperandVector &); OperandMatchResultTy parseAM3Offset(OperandVector &); @@ -314,7 +327,7 @@ class ARMAsmParser : public MCTargetAsmParser { void cvtThumbBranches(MCInst &Inst, const OperandVector &); bool validateInstruction(MCInst &Inst, const OperandVector &Ops); - bool processInstruction(MCInst &Inst, const OperandVector &Ops); + bool processInstruction(MCInst &Inst, const OperandVector &Ops, MCStreamer &Out); bool shouldOmitCCOutOperand(StringRef Mnemonic, OperandVector &Operands); bool shouldOmitPredicateOperand(StringRef Mnemonic, OperandVector &Operands); @@ -329,10 +342,9 @@ public: }; - ARMAsmParser(MCSubtargetInfo &_STI, MCAsmParser &_Parser, - const MCInstrInfo &MII, - const MCTargetOptions &Options) - : MCTargetAsmParser(), STI(_STI), Parser(_Parser), MII(MII), UC(_Parser) { + ARMAsmParser(MCSubtargetInfo & _STI, MCAsmParser & _Parser, + const MCInstrInfo &MII, const MCTargetOptions &Options) + : MCTargetAsmParser(), STI(_STI), MII(MII), UC(_Parser) { MCAsmParserExtension::Initialize(_Parser); // Cache the MCRegisterInfo. @@ -359,7 +371,7 @@ public: bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, OperandVector &Operands, MCStreamer &Out, - unsigned &ErrorInfo, + uint64_t &ErrorInfo, bool MatchingInlineAsm) override; void onLabelParsed(MCSymbol *Symbol) override; }; @@ -383,6 +395,7 @@ class ARMOperand : public MCParsedAsmOperand { k_Memory, k_PostIndexRegister, k_MSRMask, + k_BankedReg, k_ProcIFlags, k_VectorIndex, k_Register, @@ -396,6 +409,7 @@ class ARMOperand : public MCParsedAsmOperand { k_ShiftedImmediate, k_ShifterImmediate, k_RotateImmediate, + k_ModifiedImmediate, k_BitfieldDescriptor, k_Token } Kind; @@ -435,6 +449,10 @@ class ARMOperand : public MCParsedAsmOperand { unsigned Val; }; + struct BankedRegOp { + unsigned Val; + }; + struct TokOp { const char *Data; unsigned Length; @@ -503,6 +521,11 @@ class ARMOperand : public MCParsedAsmOperand { unsigned Imm; }; + struct ModImmOp { + unsigned Bits; + unsigned Rot; + }; + struct BitfieldOp { unsigned LSB; unsigned Width; @@ -517,6 +540,7 @@ class ARMOperand : public MCParsedAsmOperand { struct ITMaskOp ITMask; struct IFlagsOp IFlags; struct MMaskOp MMask; + struct BankedRegOp BankedReg; struct TokOp Tok; struct RegOp Reg; struct VectorListOp VectorList; @@ -528,6 +552,7 @@ class ARMOperand : public MCParsedAsmOperand { struct RegShiftedRegOp RegShiftedReg; struct RegShiftedImmOp RegShiftedImm; struct RotImmOp RotImm; + struct ModImmOp ModImm; struct BitfieldOp Bitfield; }; @@ -585,6 +610,9 @@ public: case k_MSRMask: MMask = o.MMask; break; + case k_BankedReg: + BankedReg = o.BankedReg; + break; case k_ProcIFlags: IFlags = o.IFlags; break; @@ -600,6 +628,9 @@ public: case k_RotateImmediate: RotImm = o.RotImm; break; + case k_ModifiedImmediate: + ModImm = o.ModImm; + break; case k_BitfieldDescriptor: Bitfield = o.Bitfield; break; @@ -679,6 +710,11 @@ public: return MMask.Val; } + unsigned getBankedReg() const { + assert(Kind == k_BankedReg && "Invalid access!"); + return BankedReg.Val; + } + bool isCoprocNum() const { return Kind == k_CoprocNum; } bool isCoprocReg() const { return Kind == k_CoprocReg; } bool isCoprocOption() const { return Kind == k_CoprocOption; } @@ -1003,33 +1039,17 @@ public: } bool isAdrLabel() const { // If we have an immediate that's not a constant, treat it as a label - // reference needing a fixup. If it is a constant, but it can't fit - // into shift immediate encoding, we reject it. - if (isImm() && !isa<MCConstantExpr>(getImm())) return true; - else return (isARMSOImm() || isARMSOImmNeg()); - } - bool isARMSOImm() const { - if (!isImm()) return false; - const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); - if (!CE) return false; - int64_t Value = CE->getValue(); - return ARM_AM::getSOImmVal(Value) != -1; - } - bool isARMSOImmNot() const { - if (!isImm()) return false; - const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); - if (!CE) return false; - int64_t Value = CE->getValue(); - return ARM_AM::getSOImmVal(~Value) != -1; - } - bool isARMSOImmNeg() const { + // reference needing a fixup. + if (isImm() && !isa<MCConstantExpr>(getImm())) + return true; + + // If it is a constant, it must fit into a modified immediate encoding. if (!isImm()) return false; const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); if (!CE) return false; int64_t Value = CE->getValue(); - // Only use this when not representable as a plain so_imm. - return ARM_AM::getSOImmVal(Value) == -1 && - ARM_AM::getSOImmVal(-Value) != -1; + return (ARM_AM::getSOImmVal(Value) != -1 || + ARM_AM::getSOImmVal(-Value) != -1);; } bool isT2SOImm() const { if (!isImm()) return false; @@ -1074,6 +1094,22 @@ public: bool isRegShiftedReg() const { return Kind == k_ShiftedRegister; } bool isRegShiftedImm() const { return Kind == k_ShiftedImmediate; } bool isRotImm() const { return Kind == k_RotateImmediate; } + bool isModImm() const { return Kind == k_ModifiedImmediate; } + bool isModImmNot() const { + if (!isImm()) return false; + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); + if (!CE) return false; + int64_t Value = CE->getValue(); + return ARM_AM::getSOImmVal(~Value) != -1; + } + bool isModImmNeg() const { + if (!isImm()) return false; + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); + if (!CE) return false; + int64_t Value = CE->getValue(); + return ARM_AM::getSOImmVal(Value) == -1 && + ARM_AM::getSOImmVal(-Value) != -1; + } bool isBitfield() const { return Kind == k_BitfieldDescriptor; } bool isPostIdxRegShifted() const { return Kind == k_PostIndexRegister; } bool isPostIdxReg() const { @@ -1384,6 +1420,7 @@ public: } bool isMSRMask() const { return Kind == k_MSRMask; } + bool isBankedReg() const { return Kind == k_BankedReg; } bool isProcIFlags() const { return Kind == k_ProcIFlags; } // NEON operands. @@ -1601,9 +1638,18 @@ public: const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); // Must be a constant. if (!CE) return false; - int64_t Value = CE->getValue(); - // i16 value in the range [0,255] or [0x0100, 0xff00] - return (Value >= 0 && Value < 256) || (Value >= 0x0100 && Value <= 0xff00); + unsigned Value = CE->getValue(); + return ARM_AM::isNEONi16splat(Value); + } + + bool isNEONi16splatNot() const { + if (!isImm()) + return false; + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); + // Must be a constant. + if (!CE) return false; + unsigned Value = CE->getValue(); + return ARM_AM::isNEONi16splat(~Value & 0xffff); } bool isNEONi32splat() const { @@ -1614,12 +1660,18 @@ public: const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); // Must be a constant. if (!CE) return false; - int64_t Value = CE->getValue(); - // i32 value with set bits only in one byte X000, 0X00, 00X0, or 000X. - return (Value >= 0 && Value < 256) || - (Value >= 0x0100 && Value <= 0xff00) || - (Value >= 0x010000 && Value <= 0xff0000) || - (Value >= 0x01000000 && Value <= 0xff000000); + unsigned Value = CE->getValue(); + return ARM_AM::isNEONi32splat(Value); + } + + bool isNEONi32splatNot() const { + if (!isImm()) + return false; + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); + // Must be a constant. + if (!CE) return false; + unsigned Value = CE->getValue(); + return ARM_AM::isNEONi32splat(~Value); } bool isNEONByteReplicate(unsigned NumBytes) const { @@ -1655,6 +1707,7 @@ public: int64_t Value = CE->getValue(); // i32 value with set bits only in one byte X000, 0X00, 00X0, or 000X, // for VMOV/VMVN only, 00Xf or 0Xff are also accepted. + // FIXME: This is probably wrong and a copy and paste from previous example return (Value >= 0 && Value < 256) || (Value >= 0x0100 && Value <= 0xff00) || (Value >= 0x010000 && Value <= 0xff0000) || @@ -1670,6 +1723,7 @@ public: int64_t Value = ~CE->getValue(); // i32 value with set bits only in one byte X000, 0X00, 00X0, or 000X, // for VMOV/VMVN only, 00Xf or 0Xff are also accepted. + // FIXME: This is probably wrong and a copy and paste from previous example return (Value >= 0 && Value < 256) || (Value >= 0x0100 && Value <= 0xff00) || (Value >= 0x010000 && Value <= 0xff0000) || @@ -1791,6 +1845,30 @@ public: Inst.addOperand(MCOperand::CreateImm(RotImm.Imm >> 3)); } + void addModImmOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + + // Support for fixups (MCFixup) + if (isImm()) + return addImmOperands(Inst, N); + + Inst.addOperand(MCOperand::CreateImm(ModImm.Bits | (ModImm.Rot << 7))); + } + + void addModImmNotOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); + uint32_t Enc = ARM_AM::getSOImmVal(~CE->getValue()); + Inst.addOperand(MCOperand::CreateImm(Enc)); + } + + void addModImmNegOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); + uint32_t Enc = ARM_AM::getSOImmVal(-CE->getValue()); + Inst.addOperand(MCOperand::CreateImm(Enc)); + } + void addBitfieldOperands(MCInst &Inst, unsigned N) const { assert(N == 1 && "Invalid number of operands!"); // Munge the lsb/width into a bitfield mask. @@ -1947,22 +2025,6 @@ public: Inst.addOperand(MCOperand::CreateImm(Memory.OffsetImm->getValue())); } - void addARMSOImmNotOperands(MCInst &Inst, unsigned N) const { - assert(N == 1 && "Invalid number of operands!"); - // The operand is actually a so_imm, but we have its bitwise - // negation in the assembly source, so twiddle it here. - const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); - Inst.addOperand(MCOperand::CreateImm(~CE->getValue())); - } - - void addARMSOImmNegOperands(MCInst &Inst, unsigned N) const { - assert(N == 1 && "Invalid number of operands!"); - // The operand is actually a so_imm, but we have its - // negation in the assembly source, so twiddle it here. - const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); - Inst.addOperand(MCOperand::CreateImm(-CE->getValue())); - } - void addMemBarrierOptOperands(MCInst &Inst, unsigned N) const { assert(N == 1 && "Invalid number of operands!"); Inst.addOperand(MCOperand::CreateImm(unsigned(getMemBarrierOpt()))); @@ -2334,6 +2396,11 @@ public: Inst.addOperand(MCOperand::CreateImm(unsigned(getMSRMask()))); } + void addBankedRegOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + Inst.addOperand(MCOperand::CreateImm(unsigned(getBankedReg()))); + } + void addProcIFlagsOperands(MCInst &Inst, unsigned N) const { assert(N == 1 && "Invalid number of operands!"); Inst.addOperand(MCOperand::CreateImm(unsigned(getProcIFlags()))); @@ -2378,10 +2445,16 @@ public: // The immediate encodes the type of constant as well as the value. const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); unsigned Value = CE->getValue(); - if (Value >= 256) - Value = (Value >> 8) | 0xa00; - else - Value |= 0x800; + Value = ARM_AM::encodeNEONi16splat(Value); + Inst.addOperand(MCOperand::CreateImm(Value)); + } + + void addNEONi16splatNotOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + // The immediate encodes the type of constant as well as the value. + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); + unsigned Value = CE->getValue(); + Value = ARM_AM::encodeNEONi16splat(~Value & 0xffff); Inst.addOperand(MCOperand::CreateImm(Value)); } @@ -2390,12 +2463,16 @@ public: // The immediate encodes the type of constant as well as the value. const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); unsigned Value = CE->getValue(); - if (Value >= 256 && Value <= 0xff00) - Value = (Value >> 8) | 0x200; - else if (Value > 0xffff && Value <= 0xff0000) - Value = (Value >> 16) | 0x400; - else if (Value > 0xffffff) - Value = (Value >> 24) | 0x600; + Value = ARM_AM::encodeNEONi32splat(Value); + Inst.addOperand(MCOperand::CreateImm(Value)); + } + + void addNEONi32splatNotOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + // The immediate encodes the type of constant as well as the value. + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); + unsigned Value = CE->getValue(); + Value = ARM_AM::encodeNEONi32splat(~Value); Inst.addOperand(MCOperand::CreateImm(Value)); } @@ -2580,6 +2657,16 @@ public: return Op; } + static std::unique_ptr<ARMOperand> CreateModImm(unsigned Bits, unsigned Rot, + SMLoc S, SMLoc E) { + auto Op = make_unique<ARMOperand>(k_ModifiedImmediate); + Op->ModImm.Bits = Bits; + Op->ModImm.Rot = Rot; + Op->StartLoc = S; + Op->EndLoc = E; + return Op; + } + static std::unique_ptr<ARMOperand> CreateBitfield(unsigned LSB, unsigned Width, SMLoc S, SMLoc E) { auto Op = make_unique<ARMOperand>(k_BitfieldDescriptor); @@ -2736,6 +2823,14 @@ public: Op->EndLoc = S; return Op; } + + static std::unique_ptr<ARMOperand> CreateBankedReg(unsigned Reg, SMLoc S) { + auto Op = make_unique<ARMOperand>(k_BankedReg); + Op->BankedReg.Val = Reg; + Op->StartLoc = S; + Op->EndLoc = S; + return Op; + } }; } // end anonymous namespace. @@ -2769,6 +2864,9 @@ void ARMOperand::print(raw_ostream &OS) const { case k_MSRMask: OS << "<mask: " << getMSRMask() << ">"; break; + case k_BankedReg: + OS << "<banked reg: " << getBankedReg() << ">"; + break; case k_Immediate: getImm()->print(OS); break; @@ -2822,6 +2920,10 @@ void ARMOperand::print(raw_ostream &OS) const { case k_RotateImmediate: OS << "<ror " << " #" << (RotImm.Imm * 8) << ">"; break; + case k_ModifiedImmediate: + OS << "<mod_imm #" << ModImm.Bits << ", #" + << ModImm.Rot << ")>"; + break; case k_BitfieldDescriptor: OS << "<bitfield " << "lsb: " << Bitfield.LSB << ", width: " << Bitfield.Width << ">"; @@ -2871,8 +2973,9 @@ static unsigned MatchRegisterName(StringRef Name); bool ARMAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) { - StartLoc = Parser.getTok().getLoc(); - EndLoc = Parser.getTok().getEndLoc(); + const AsmToken &Tok = getParser().getTok(); + StartLoc = Tok.getLoc(); + EndLoc = Tok.getEndLoc(); RegNo = tryParseRegister(); return (RegNo == (unsigned)-1); @@ -2883,6 +2986,7 @@ bool ARMAsmParser::ParseRegister(unsigned &RegNo, /// returned. Otherwise return -1. /// int ARMAsmParser::tryParseRegister() { + MCAsmParser &Parser = getParser(); const AsmToken &Tok = Parser.getTok(); if (Tok.isNot(AsmToken::Identifier)) return -1; @@ -2924,6 +3028,10 @@ int ARMAsmParser::tryParseRegister() { return Entry->getValue(); } + // Some FPUs only have 16 D registers, so D16-D31 are invalid + if (hasD16() && RegNum >= ARM::D16 && RegNum <= ARM::D31) + return -1; + Parser.Lex(); // Eat identifier token. return RegNum; @@ -2935,6 +3043,7 @@ int ARMAsmParser::tryParseRegister() { // consumed in the process of trying to parse the shifter (i.e., when it is // indeed a shifter operand, but malformed). int ARMAsmParser::tryParseShiftRegister(OperandVector &Operands) { + MCAsmParser &Parser = getParser(); SMLoc S = Parser.getTok().getLoc(); const AsmToken &Tok = Parser.getTok(); if (Tok.isNot(AsmToken::Identifier)) @@ -3037,6 +3146,7 @@ int ARMAsmParser::tryParseShiftRegister(OperandVector &Operands) { /// TODO this is likely to change to allow different register types and or to /// parse for a specific register type. bool ARMAsmParser::tryParseRegisterWithWriteBack(OperandVector &Operands) { + MCAsmParser &Parser = getParser(); const AsmToken &RegTok = Parser.getTok(); int RegNo = tryParseRegister(); if (RegNo == -1) @@ -3118,9 +3228,10 @@ static int MatchCoprocessorOperandName(StringRef Name, char CoprocOp) { return -1; switch (Name[1]) { default: return -1; - // p10 and p11 are invalid for coproc instructions (reserved for FP/NEON) - case '0': return CoprocOp == 'p'? -1: 10; - case '1': return CoprocOp == 'p'? -1: 11; + // CP10 and CP11 are VFP/NEON and so vector instructions should be used. + // However, old cores (v5/v6) did use them in that way. + case '0': return 10; + case '1': return 11; case '2': return 12; case '3': return 13; case '4': return 14; @@ -3132,6 +3243,7 @@ static int MatchCoprocessorOperandName(StringRef Name, char CoprocOp) { /// parseITCondCode - Try to parse a condition code for an IT instruction. ARMAsmParser::OperandMatchResultTy ARMAsmParser::parseITCondCode(OperandVector &Operands) { + MCAsmParser &Parser = getParser(); SMLoc S = Parser.getTok().getLoc(); const AsmToken &Tok = Parser.getTok(); if (!Tok.is(AsmToken::Identifier)) @@ -3169,6 +3281,7 @@ ARMAsmParser::parseITCondCode(OperandVector &Operands) { /// number, the token is eaten and the operand is added to the operand list. ARMAsmParser::OperandMatchResultTy ARMAsmParser::parseCoprocNumOperand(OperandVector &Operands) { + MCAsmParser &Parser = getParser(); SMLoc S = Parser.getTok().getLoc(); const AsmToken &Tok = Parser.getTok(); if (Tok.isNot(AsmToken::Identifier)) @@ -3177,6 +3290,9 @@ ARMAsmParser::parseCoprocNumOperand(OperandVector &Operands) { int Num = MatchCoprocessorOperandName(Tok.getString(), 'p'); if (Num == -1) return MatchOperand_NoMatch; + // ARMv7 and v8 don't allow cp10/cp11 due to VFP/NEON specific instructions + if ((hasV7Ops() || hasV8Ops()) && (Num == 10 || Num == 11)) + return MatchOperand_NoMatch; Parser.Lex(); // Eat identifier token. Operands.push_back(ARMOperand::CreateCoprocNum(Num, S)); @@ -3188,6 +3304,7 @@ ARMAsmParser::parseCoprocNumOperand(OperandVector &Operands) { /// number, the token is eaten and the operand is added to the operand list. ARMAsmParser::OperandMatchResultTy ARMAsmParser::parseCoprocRegOperand(OperandVector &Operands) { + MCAsmParser &Parser = getParser(); SMLoc S = Parser.getTok().getLoc(); const AsmToken &Tok = Parser.getTok(); if (Tok.isNot(AsmToken::Identifier)) @@ -3206,6 +3323,7 @@ ARMAsmParser::parseCoprocRegOperand(OperandVector &Operands) { /// coproc_option : '{' imm0_255 '}' ARMAsmParser::OperandMatchResultTy ARMAsmParser::parseCoprocOptionOperand(OperandVector &Operands) { + MCAsmParser &Parser = getParser(); SMLoc S = Parser.getTok().getLoc(); // If this isn't a '{', this isn't a coprocessor immediate operand. @@ -3283,6 +3401,7 @@ static unsigned getDRegFromQReg(unsigned QReg) { /// Parse a register list. bool ARMAsmParser::parseRegisterList(OperandVector &Operands) { + MCAsmParser &Parser = getParser(); assert(Parser.getTok().is(AsmToken::LCurly) && "Token is not a Left Curly Brace"); SMLoc S = Parser.getTok().getLoc(); @@ -3414,6 +3533,7 @@ bool ARMAsmParser::parseRegisterList(OperandVector &Operands) { // Helper function to parse the lane index for vector lists. ARMAsmParser::OperandMatchResultTy ARMAsmParser:: parseVectorLane(VectorLaneTy &LaneKind, unsigned &Index, SMLoc &EndLoc) { + MCAsmParser &Parser = getParser(); Index = 0; // Always return a defined index value. if (Parser.getTok().is(AsmToken::LBrac)) { Parser.Lex(); // Eat the '['. @@ -3465,6 +3585,7 @@ parseVectorLane(VectorLaneTy &LaneKind, unsigned &Index, SMLoc &EndLoc) { // parse a vector register list ARMAsmParser::OperandMatchResultTy ARMAsmParser::parseVectorList(OperandVector &Operands) { + MCAsmParser &Parser = getParser(); VectorLaneTy LaneKind; unsigned LaneIndex; SMLoc S = Parser.getTok().getLoc(); @@ -3716,6 +3837,7 @@ ARMAsmParser::parseVectorList(OperandVector &Operands) { /// parseMemBarrierOptOperand - Try to parse DSB/DMB data barrier options. ARMAsmParser::OperandMatchResultTy ARMAsmParser::parseMemBarrierOptOperand(OperandVector &Operands) { + MCAsmParser &Parser = getParser(); SMLoc S = Parser.getTok().getLoc(); const AsmToken &Tok = Parser.getTok(); unsigned Opt; @@ -3787,6 +3909,7 @@ ARMAsmParser::parseMemBarrierOptOperand(OperandVector &Operands) { /// parseInstSyncBarrierOptOperand - Try to parse ISB inst sync barrier options. ARMAsmParser::OperandMatchResultTy ARMAsmParser::parseInstSyncBarrierOptOperand(OperandVector &Operands) { + MCAsmParser &Parser = getParser(); SMLoc S = Parser.getTok().getLoc(); const AsmToken &Tok = Parser.getTok(); unsigned Opt; @@ -3838,6 +3961,7 @@ ARMAsmParser::parseInstSyncBarrierOptOperand(OperandVector &Operands) { /// parseProcIFlagsOperand - Try to parse iflags from CPS instruction. ARMAsmParser::OperandMatchResultTy ARMAsmParser::parseProcIFlagsOperand(OperandVector &Operands) { + MCAsmParser &Parser = getParser(); SMLoc S = Parser.getTok().getLoc(); const AsmToken &Tok = Parser.getTok(); if (!Tok.is(AsmToken::Identifier)) @@ -3872,6 +3996,7 @@ ARMAsmParser::parseProcIFlagsOperand(OperandVector &Operands) { /// parseMSRMaskOperand - Try to parse mask flags from MSR instruction. ARMAsmParser::OperandMatchResultTy ARMAsmParser::parseMSRMaskOperand(OperandVector &Operands) { + MCAsmParser &Parser = getParser(); SMLoc S = Parser.getTok().getLoc(); const AsmToken &Tok = Parser.getTok(); if (!Tok.is(AsmToken::Identifier)) @@ -3892,9 +4017,6 @@ ARMAsmParser::parseMSRMaskOperand(OperandVector &Operands) { // should really only be allowed when writing a special register. Note // they get dropped in the MRS instruction reading a special register as // the SYSm field is only 8 bits. - // - // FIXME: the _g and _nzcvqg versions are only allowed if the processor - // includes the DSP extension but that is not checked. .Case("apsr", 0x800) .Case("apsr_nzcvq", 0x800) .Case("apsr_g", 0x400) @@ -3926,6 +4048,11 @@ ARMAsmParser::parseMSRMaskOperand(OperandVector &Operands) { if (FlagsVal == ~0U) return MatchOperand_NoMatch; + if (!hasThumb2DSP() && (FlagsVal & 0x400)) + // The _g and _nzcvqg versions are only valid if the DSP extension is + // available. + return MatchOperand_NoMatch; + if (!hasV7Ops() && FlagsVal >= 0x811 && FlagsVal <= 0x813) // basepri, basepri_max and faultmask only valid for V7m. return MatchOperand_NoMatch; @@ -3998,9 +4125,67 @@ ARMAsmParser::parseMSRMaskOperand(OperandVector &Operands) { return MatchOperand_Success; } +/// parseBankedRegOperand - Try to parse a banked register (e.g. "lr_irq") for +/// use in the MRS/MSR instructions added to support virtualization. +ARMAsmParser::OperandMatchResultTy +ARMAsmParser::parseBankedRegOperand(OperandVector &Operands) { + MCAsmParser &Parser = getParser(); + SMLoc S = Parser.getTok().getLoc(); + const AsmToken &Tok = Parser.getTok(); + if (!Tok.is(AsmToken::Identifier)) + return MatchOperand_NoMatch; + StringRef RegName = Tok.getString(); + + // The values here come from B9.2.3 of the ARM ARM, where bits 4-0 are SysM + // and bit 5 is R. + unsigned Encoding = StringSwitch<unsigned>(RegName.lower()) + .Case("r8_usr", 0x00) + .Case("r9_usr", 0x01) + .Case("r10_usr", 0x02) + .Case("r11_usr", 0x03) + .Case("r12_usr", 0x04) + .Case("sp_usr", 0x05) + .Case("lr_usr", 0x06) + .Case("r8_fiq", 0x08) + .Case("r9_fiq", 0x09) + .Case("r10_fiq", 0x0a) + .Case("r11_fiq", 0x0b) + .Case("r12_fiq", 0x0c) + .Case("sp_fiq", 0x0d) + .Case("lr_fiq", 0x0e) + .Case("lr_irq", 0x10) + .Case("sp_irq", 0x11) + .Case("lr_svc", 0x12) + .Case("sp_svc", 0x13) + .Case("lr_abt", 0x14) + .Case("sp_abt", 0x15) + .Case("lr_und", 0x16) + .Case("sp_und", 0x17) + .Case("lr_mon", 0x1c) + .Case("sp_mon", 0x1d) + .Case("elr_hyp", 0x1e) + .Case("sp_hyp", 0x1f) + .Case("spsr_fiq", 0x2e) + .Case("spsr_irq", 0x30) + .Case("spsr_svc", 0x32) + .Case("spsr_abt", 0x34) + .Case("spsr_und", 0x36) + .Case("spsr_mon", 0x3c) + .Case("spsr_hyp", 0x3e) + .Default(~0U); + + if (Encoding == ~0U) + return MatchOperand_NoMatch; + + Parser.Lex(); // Eat identifier token. + Operands.push_back(ARMOperand::CreateBankedReg(Encoding, S)); + return MatchOperand_Success; +} + ARMAsmParser::OperandMatchResultTy ARMAsmParser::parsePKHImm(OperandVector &Operands, StringRef Op, int Low, int High) { + MCAsmParser &Parser = getParser(); const AsmToken &Tok = Parser.getTok(); if (Tok.isNot(AsmToken::Identifier)) { Error(Parser.getTok().getLoc(), Op + " operand expected."); @@ -4048,6 +4233,7 @@ ARMAsmParser::parsePKHImm(OperandVector &Operands, StringRef Op, int Low, ARMAsmParser::OperandMatchResultTy ARMAsmParser::parseSetEndImm(OperandVector &Operands) { + MCAsmParser &Parser = getParser(); const AsmToken &Tok = Parser.getTok(); SMLoc S = Tok.getLoc(); if (Tok.isNot(AsmToken::Identifier)) { @@ -4077,6 +4263,7 @@ ARMAsmParser::parseSetEndImm(OperandVector &Operands) { /// n == 32 encoded as n == 0. ARMAsmParser::OperandMatchResultTy ARMAsmParser::parseShifterImm(OperandVector &Operands) { + MCAsmParser &Parser = getParser(); const AsmToken &Tok = Parser.getTok(); SMLoc S = Tok.getLoc(); if (Tok.isNot(AsmToken::Identifier)) { @@ -4147,6 +4334,7 @@ ARMAsmParser::parseShifterImm(OperandVector &Operands) { /// ror #n 'n' in {0, 8, 16, 24} ARMAsmParser::OperandMatchResultTy ARMAsmParser::parseRotImm(OperandVector &Operands) { + MCAsmParser &Parser = getParser(); const AsmToken &Tok = Parser.getTok(); SMLoc S = Tok.getLoc(); if (Tok.isNot(AsmToken::Identifier)) @@ -4192,7 +4380,130 @@ ARMAsmParser::parseRotImm(OperandVector &Operands) { } ARMAsmParser::OperandMatchResultTy +ARMAsmParser::parseModImm(OperandVector &Operands) { + MCAsmParser &Parser = getParser(); + MCAsmLexer &Lexer = getLexer(); + int64_t Imm1, Imm2; + + SMLoc S = Parser.getTok().getLoc(); + + // 1) A mod_imm operand can appear in the place of a register name: + // add r0, #mod_imm + // add r0, r0, #mod_imm + // to correctly handle the latter, we bail out as soon as we see an + // identifier. + // + // 2) Similarly, we do not want to parse into complex operands: + // mov r0, #mod_imm + // mov r0, :lower16:(_foo) + if (Parser.getTok().is(AsmToken::Identifier) || + Parser.getTok().is(AsmToken::Colon)) + return MatchOperand_NoMatch; + + // Hash (dollar) is optional as per the ARMARM + if (Parser.getTok().is(AsmToken::Hash) || + Parser.getTok().is(AsmToken::Dollar)) { + // Avoid parsing into complex operands (#:) + if (Lexer.peekTok().is(AsmToken::Colon)) + return MatchOperand_NoMatch; + + // Eat the hash (dollar) + Parser.Lex(); + } + + SMLoc Sx1, Ex1; + Sx1 = Parser.getTok().getLoc(); + const MCExpr *Imm1Exp; + if (getParser().parseExpression(Imm1Exp, Ex1)) { + Error(Sx1, "malformed expression"); + return MatchOperand_ParseFail; + } + + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Imm1Exp); + + if (CE) { + // Immediate must fit within 32-bits + Imm1 = CE->getValue(); + if (Imm1 < INT32_MIN || Imm1 > UINT32_MAX) { + Error(Sx1, "immediate operand must be representable with 32 bits"); + return MatchOperand_ParseFail; + } + + int Enc = ARM_AM::getSOImmVal(Imm1); + if (Enc != -1 && Parser.getTok().is(AsmToken::EndOfStatement)) { + // We have a match! + Operands.push_back(ARMOperand::CreateModImm((Enc & 0xFF), + (Enc & 0xF00) >> 7, + Sx1, Ex1)); + return MatchOperand_Success; + } + + // We have parsed an immediate which is not for us, fallback to a plain + // immediate. This can happen for instruction aliases. For an example, + // ARMInstrInfo.td defines the alias [mov <-> mvn] which can transform + // a mov (mvn) with a mod_imm_neg/mod_imm_not operand into the opposite + // instruction with a mod_imm operand. The alias is defined such that the + // parser method is shared, that's why we have to do this here. + if (Parser.getTok().is(AsmToken::EndOfStatement)) { + Operands.push_back(ARMOperand::CreateImm(Imm1Exp, Sx1, Ex1)); + return MatchOperand_Success; + } + } else { + // Operands like #(l1 - l2) can only be evaluated at a later stage (via an + // MCFixup). Fallback to a plain immediate. + Operands.push_back(ARMOperand::CreateImm(Imm1Exp, Sx1, Ex1)); + return MatchOperand_Success; + } + + // From this point onward, we expect the input to be a (#bits, #rot) pair + if (Parser.getTok().isNot(AsmToken::Comma)) { + Error(Sx1, "expected modified immediate operand: #[0, 255], #even[0-30]"); + return MatchOperand_ParseFail; + } + + if (Imm1 & ~0xFF) { + Error(Sx1, "immediate operand must a number in the range [0, 255]"); + return MatchOperand_ParseFail; + } + + // Eat the comma + Parser.Lex(); + + // Repeat for #rot + SMLoc Sx2, Ex2; + Sx2 = Parser.getTok().getLoc(); + + // Eat the optional hash (dollar) + if (Parser.getTok().is(AsmToken::Hash) || + Parser.getTok().is(AsmToken::Dollar)) + Parser.Lex(); + + const MCExpr *Imm2Exp; + if (getParser().parseExpression(Imm2Exp, Ex2)) { + Error(Sx2, "malformed expression"); + return MatchOperand_ParseFail; + } + + CE = dyn_cast<MCConstantExpr>(Imm2Exp); + + if (CE) { + Imm2 = CE->getValue(); + if (!(Imm2 & ~0x1E)) { + // We have a match! + Operands.push_back(ARMOperand::CreateModImm(Imm1, Imm2, S, Ex2)); + return MatchOperand_Success; + } + Error(Sx2, "immediate operand must an even number in the range [0, 30]"); + return MatchOperand_ParseFail; + } else { + Error(Sx2, "constant expression expected"); + return MatchOperand_ParseFail; + } +} + +ARMAsmParser::OperandMatchResultTy ARMAsmParser::parseBitfield(OperandVector &Operands) { + MCAsmParser &Parser = getParser(); SMLoc S = Parser.getTok().getLoc(); // The bitfield descriptor is really two operands, the LSB and the width. if (Parser.getTok().isNot(AsmToken::Hash) && @@ -4269,6 +4580,7 @@ ARMAsmParser::parsePostIdxReg(OperandVector &Operands) { // This method must return MatchOperand_NoMatch without consuming any tokens // in the case where there is no match, as other alternatives take other // parse methods. + MCAsmParser &Parser = getParser(); AsmToken Tok = Parser.getTok(); SMLoc S = Tok.getLoc(); bool haveEaten = false; @@ -4321,6 +4633,7 @@ ARMAsmParser::parseAM3Offset(OperandVector &Operands) { // This method must return MatchOperand_NoMatch without consuming any tokens // in the case where there is no match, as other alternatives take other // parse methods. + MCAsmParser &Parser = getParser(); AsmToken Tok = Parser.getTok(); SMLoc S = Tok.getLoc(); @@ -4458,6 +4771,7 @@ void ARMAsmParser::cvtThumbBranches(MCInst &Inst, /// Parse an ARM memory expression, return false if successful else return true /// or an error. The first token must be a '[' when called. bool ARMAsmParser::parseMemory(OperandVector &Operands) { + MCAsmParser &Parser = getParser(); SMLoc S, E; assert(Parser.getTok().is(AsmToken::LBrac) && "Token is not a Left Bracket"); @@ -4649,6 +4963,7 @@ bool ARMAsmParser::parseMemory(OperandVector &Operands) { /// return true if it parses a shift otherwise it returns false. bool ARMAsmParser::parseMemRegOffsetShift(ARM_AM::ShiftOpc &St, unsigned &Amount) { + MCAsmParser &Parser = getParser(); SMLoc Loc = Parser.getTok().getLoc(); const AsmToken &Tok = Parser.getTok(); if (Tok.isNot(AsmToken::Identifier)) @@ -4709,6 +5024,7 @@ bool ARMAsmParser::parseMemRegOffsetShift(ARM_AM::ShiftOpc &St, /// parseFPImm - A floating point immediate expression operand. ARMAsmParser::OperandMatchResultTy ARMAsmParser::parseFPImm(OperandVector &Operands) { + MCAsmParser &Parser = getParser(); // Anything that can accept a floating point constant as an operand // needs to go through here, as the regular parseExpression is // integer only. @@ -4789,6 +5105,7 @@ ARMAsmParser::parseFPImm(OperandVector &Operands) { /// Parse a arm instruction operand. For now this parses the operand regardless /// of the mnemonic. bool ARMAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic) { + MCAsmParser &Parser = getParser(); SMLoc S, E; // Check if the current operand has a custom associated parser, if so, try to @@ -4921,6 +5238,7 @@ bool ARMAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic) { // parsePrefix - Parse ARM 16-bit relocations expression prefix, i.e. // :lower16: and :upper16:. bool ARMAsmParser::parsePrefix(ARMMCExpr::VariantKind &RefKind) { + MCAsmParser &Parser = getParser(); RefKind = ARMMCExpr::VK_ARM_None; // consume an optional '#' (GNU compatibility) @@ -4936,15 +5254,52 @@ bool ARMAsmParser::parsePrefix(ARMMCExpr::VariantKind &RefKind) { return true; } + enum { + COFF = (1 << MCObjectFileInfo::IsCOFF), + ELF = (1 << MCObjectFileInfo::IsELF), + MACHO = (1 << MCObjectFileInfo::IsMachO) + }; + static const struct PrefixEntry { + const char *Spelling; + ARMMCExpr::VariantKind VariantKind; + uint8_t SupportedFormats; + } PrefixEntries[] = { + { "lower16", ARMMCExpr::VK_ARM_LO16, COFF | ELF | MACHO }, + { "upper16", ARMMCExpr::VK_ARM_HI16, COFF | ELF | MACHO }, + }; + StringRef IDVal = Parser.getTok().getIdentifier(); - if (IDVal == "lower16") { - RefKind = ARMMCExpr::VK_ARM_LO16; - } else if (IDVal == "upper16") { - RefKind = ARMMCExpr::VK_ARM_HI16; - } else { + + const auto &Prefix = + std::find_if(std::begin(PrefixEntries), std::end(PrefixEntries), + [&IDVal](const PrefixEntry &PE) { + return PE.Spelling == IDVal; + }); + if (Prefix == std::end(PrefixEntries)) { Error(Parser.getTok().getLoc(), "unexpected prefix in operand"); return true; } + + uint8_t CurrentFormat; + switch (getContext().getObjectFileInfo()->getObjectFileType()) { + case MCObjectFileInfo::IsMachO: + CurrentFormat = MACHO; + break; + case MCObjectFileInfo::IsELF: + CurrentFormat = ELF; + break; + case MCObjectFileInfo::IsCOFF: + CurrentFormat = COFF; + break; + } + + if (~Prefix->SupportedFormats & CurrentFormat) { + Error(Parser.getTok().getLoc(), + "cannot represent relocation in the current file format"); + return true; + } + + RefKind = Prefix->VariantKind; Parser.Lex(); if (getLexer().isNot(AsmToken::Colon)) { @@ -4952,6 +5307,7 @@ bool ARMAsmParser::parsePrefix(ARMMCExpr::VariantKind &RefKind) { return true; } Parser.Lex(); // Eat the last ':' + return false; } @@ -4984,7 +5340,8 @@ StringRef ARMAsmParser::splitMnemonic(StringRef Mnemonic, Mnemonic == "fmuls" || Mnemonic == "vmaxnm" || Mnemonic == "vminnm" || Mnemonic == "vcvta" || Mnemonic == "vcvtn" || Mnemonic == "vcvtp" || Mnemonic == "vcvtm" || Mnemonic == "vrinta" || Mnemonic == "vrintn" || - Mnemonic == "vrintp" || Mnemonic == "vrintm" || Mnemonic.startswith("vsel")) + Mnemonic == "vrintp" || Mnemonic == "vrintm" || Mnemonic == "hvc" || + Mnemonic.startswith("vsel")) return Mnemonic; // First, split out any predication code. Ignore mnemonics we know aren't @@ -5089,7 +5446,7 @@ getMnemonicAcceptInfo(StringRef Mnemonic, StringRef FullInst, Mnemonic == "vmaxnm" || Mnemonic == "vminnm" || Mnemonic == "vcvta" || Mnemonic == "vcvtn" || Mnemonic == "vcvtp" || Mnemonic == "vcvtm" || Mnemonic == "vrinta" || Mnemonic == "vrintn" || Mnemonic == "vrintp" || - Mnemonic == "vrintm" || Mnemonic.startswith("aes") || + Mnemonic == "vrintm" || Mnemonic.startswith("aes") || Mnemonic == "hvc" || Mnemonic.startswith("sha1") || Mnemonic.startswith("sha256") || (FullInst.startswith("vmull") && FullInst.endswith(".p64"))) { // These mnemonics are never predicable @@ -5127,7 +5484,7 @@ bool ARMAsmParser::shouldOmitCCOutOperand(StringRef Mnemonic, // conditionally adding the cc_out in the first place because we need // to check the type of the parsed immediate operand. if (Mnemonic == "mov" && Operands.size() > 4 && !isThumb() && - !static_cast<ARMOperand &>(*Operands[4]).isARMSOImm() && + !static_cast<ARMOperand &>(*Operands[4]).isModImm() && static_cast<ARMOperand &>(*Operands[4]).isImm0_65535Expr() && static_cast<ARMOperand &>(*Operands[1]).getReg() == 0) return true; @@ -5271,7 +5628,7 @@ static bool isDataTypeToken(StringRef Tok) { static bool doesIgnoreDataTypeSuffix(StringRef Mnemonic, StringRef DT) { return Mnemonic.startswith("vldm") || Mnemonic.startswith("vstm"); } -static void applyMnemonicAliases(StringRef &Mnemonic, unsigned Features, +static void applyMnemonicAliases(StringRef &Mnemonic, uint64_t Features, unsigned VariantID); static bool RequiresVFPRegListValidation(StringRef Inst, @@ -5296,6 +5653,7 @@ static bool RequiresVFPRegListValidation(StringRef Inst, /// Parse an arm instruction mnemonic followed by its operands. bool ARMAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name, SMLoc NameLoc, OperandVector &Operands) { + MCAsmParser &Parser = getParser(); // FIXME: Can this be done via tablegen in some fashion? bool RequireVFPRegisterListCheck; bool AcceptSinglePrecisionOnly; @@ -5309,7 +5667,7 @@ bool ARMAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name, // The generic tblgen'erated code does this later, at the start of // MatchInstructionImpl(), but that's too late for aliases that include // any sort of suffix. - unsigned AvailableFeatures = getAvailableFeatures(); + uint64_t AvailableFeatures = getAvailableFeatures(); unsigned AssemblerDialect = getParser().getAssemblerDialect(); applyMnemonicAliases(Name, AvailableFeatures, AssemblerDialect); @@ -5415,6 +5773,8 @@ bool ARMAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name, Operands.push_back(ARMOperand::CreateImm( MCConstantExpr::Create(ProcessorIMod, getContext()), NameLoc, NameLoc)); + } else if (Mnemonic == "cps" && isMClass()) { + return Error(NameLoc, "instruction 'cps' requires effect for M-class"); } // Add the remaining tokens in the mnemonic. @@ -5546,6 +5906,48 @@ bool ARMAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name, } } + // If first 2 operands of a 3 operand instruction are the same + // then transform to 2 operand version of the same instruction + // e.g. 'adds r0, r0, #1' transforms to 'adds r0, #1' + // FIXME: We would really like to be able to tablegen'erate this. + if (isThumbOne() && Operands.size() == 6 && + (Mnemonic == "add" || Mnemonic == "sub" || Mnemonic == "and" || + Mnemonic == "eor" || Mnemonic == "lsl" || Mnemonic == "lsr" || + Mnemonic == "asr" || Mnemonic == "adc" || Mnemonic == "sbc" || + Mnemonic == "ror" || Mnemonic == "orr" || Mnemonic == "bic")) { + ARMOperand &Op3 = static_cast<ARMOperand &>(*Operands[3]); + ARMOperand &Op4 = static_cast<ARMOperand &>(*Operands[4]); + ARMOperand &Op5 = static_cast<ARMOperand &>(*Operands[5]); + + // If both registers are the same then remove one of them from + // the operand list. + if (Op3.isReg() && Op4.isReg() && Op3.getReg() == Op4.getReg()) { + // If 3rd operand (variable Op5) is a register and the instruction is adds/sub + // then do not transform as the backend already handles this instruction + // correctly. + if (!Op5.isReg() || !((Mnemonic == "add" && CarrySetting) || Mnemonic == "sub")) { + Operands.erase(Operands.begin() + 3); + if (Mnemonic == "add" && !CarrySetting) { + // Special case for 'add' (not 'adds') instruction must + // remove the CCOut operand as well. + Operands.erase(Operands.begin() + 1); + } + } + } + } + + // If instruction is 'add' and first two register operands + // use SP register, then remove one of the SP registers from + // the instruction. + // FIXME: We would really like to be able to tablegen'erate this. + if (isThumbOne() && Operands.size() == 5 && Mnemonic == "add" && !CarrySetting) { + ARMOperand &Op2 = static_cast<ARMOperand &>(*Operands[2]); + ARMOperand &Op3 = static_cast<ARMOperand &>(*Operands[3]); + if (Op2.isReg() && Op3.isReg() && Op2.getReg() == ARM::SP && Op3.getReg() == ARM::SP) { + Operands.erase(Operands.begin() + 2); + } + } + // GNU Assembler extension (compatibility) if ((Mnemonic == "ldrd" || Mnemonic == "strd")) { ARMOperand &Op2 = static_cast<ARMOperand &>(*Operands[2]); @@ -5623,6 +6025,50 @@ static bool instIsBreakpoint(const MCInst &Inst) { } +bool ARMAsmParser::validatetLDMRegList(MCInst Inst, + const OperandVector &Operands, + unsigned ListNo, bool IsARPop) { + const ARMOperand &Op = static_cast<const ARMOperand &>(*Operands[ListNo]); + bool HasWritebackToken = Op.isToken() && Op.getToken() == "!"; + + bool ListContainsSP = listContainsReg(Inst, ListNo, ARM::SP); + bool ListContainsLR = listContainsReg(Inst, ListNo, ARM::LR); + bool ListContainsPC = listContainsReg(Inst, ListNo, ARM::PC); + + if (!IsARPop && ListContainsSP) + return Error(Operands[ListNo + HasWritebackToken]->getStartLoc(), + "SP may not be in the register list"); + else if (ListContainsPC && ListContainsLR) + return Error(Operands[ListNo + HasWritebackToken]->getStartLoc(), + "PC and LR may not be in the register list simultaneously"); + else if (inITBlock() && !lastInITBlock() && ListContainsPC) + return Error(Operands[ListNo + HasWritebackToken]->getStartLoc(), + "instruction must be outside of IT block or the last " + "instruction in an IT block"); + return false; +} + +bool ARMAsmParser::validatetSTMRegList(MCInst Inst, + const OperandVector &Operands, + unsigned ListNo) { + const ARMOperand &Op = static_cast<const ARMOperand &>(*Operands[ListNo]); + bool HasWritebackToken = Op.isToken() && Op.getToken() == "!"; + + bool ListContainsSP = listContainsReg(Inst, ListNo, ARM::SP); + bool ListContainsPC = listContainsReg(Inst, ListNo, ARM::PC); + + if (ListContainsSP && ListContainsPC) + return Error(Operands[ListNo + HasWritebackToken]->getStartLoc(), + "SP and PC may not be in the register list"); + else if (ListContainsSP) + return Error(Operands[ListNo + HasWritebackToken]->getStartLoc(), + "SP may not be in the register list"); + else if (ListContainsPC) + return Error(Operands[ListNo + HasWritebackToken]->getStartLoc(), + "PC may not be in the register list"); + return false; +} + // FIXME: We would really like to be able to tablegen'erate this. bool ARMAsmParser::validateInstruction(MCInst &Inst, const OperandVector &Operands) { @@ -5728,6 +6174,48 @@ bool ARMAsmParser::validateInstruction(MCInst &Inst, "source operands must be sequential"); return false; } + case ARM::STR_PRE_IMM: + case ARM::STR_PRE_REG: + case ARM::STR_POST_IMM: + case ARM::STR_POST_REG: + case ARM::STRH_PRE: + case ARM::STRH_POST: + case ARM::STRB_PRE_IMM: + case ARM::STRB_PRE_REG: + case ARM::STRB_POST_IMM: + case ARM::STRB_POST_REG: { + // Rt must be different from Rn. + const unsigned Rt = MRI->getEncodingValue(Inst.getOperand(1).getReg()); + const unsigned Rn = MRI->getEncodingValue(Inst.getOperand(2).getReg()); + + if (Rt == Rn) + return Error(Operands[3]->getStartLoc(), + "source register and base register can't be identical"); + return false; + } + case ARM::LDR_PRE_IMM: + case ARM::LDR_PRE_REG: + case ARM::LDR_POST_IMM: + case ARM::LDR_POST_REG: + case ARM::LDRH_PRE: + case ARM::LDRH_POST: + case ARM::LDRSH_PRE: + case ARM::LDRSH_POST: + case ARM::LDRB_PRE_IMM: + case ARM::LDRB_PRE_REG: + case ARM::LDRB_POST_IMM: + case ARM::LDRB_POST_REG: + case ARM::LDRSB_PRE: + case ARM::LDRSB_POST: { + // Rt must be different from Rn. + const unsigned Rt = MRI->getEncodingValue(Inst.getOperand(0).getReg()); + const unsigned Rn = MRI->getEncodingValue(Inst.getOperand(2).getReg()); + + if (Rt == Rn) + return Error(Operands[3]->getStartLoc(), + "destination register and base register can't be identical"); + return false; + } case ARM::SBFX: case ARM::UBFX: { // Width must be in range [1, 32-lsb]. @@ -5765,6 +6253,8 @@ bool ARMAsmParser::validateInstruction(MCInst &Inst, "writeback operator '!' not allowed when base register " "in register list"); + if (validatetLDMRegList(Inst, Operands, 3)) + return true; break; } case ARM::LDMIA_UPD: @@ -5775,7 +6265,20 @@ bool ARMAsmParser::validateInstruction(MCInst &Inst, // UNPREDICTABLE on v7 upwards. Goodness knows what they did before. if (!hasV7Ops()) break; - // Fallthrough + if (listContainsReg(Inst, 3, Inst.getOperand(0).getReg())) + return Error(Operands.back()->getStartLoc(), + "writeback register not allowed in register list"); + break; + case ARM::t2LDMIA: + case ARM::t2LDMDB: + if (validatetLDMRegList(Inst, Operands, 3)) + return true; + break; + case ARM::t2STMIA: + case ARM::t2STMDB: + if (validatetSTMRegList(Inst, Operands, 3)) + return true; + break; case ARM::t2LDMIA_UPD: case ARM::t2LDMDB_UPD: case ARM::t2STMIA_UPD: @@ -5783,6 +6286,14 @@ bool ARMAsmParser::validateInstruction(MCInst &Inst, if (listContainsReg(Inst, 3, Inst.getOperand(0).getReg())) return Error(Operands.back()->getStartLoc(), "writeback register not allowed in register list"); + + if (Opcode == ARM::t2LDMIA_UPD || Opcode == ARM::t2LDMDB_UPD) { + if (validatetLDMRegList(Inst, Operands, 3)) + return true; + } else { + if (validatetSTMRegList(Inst, Operands, 3)) + return true; + } break; } case ARM::sysLDMIA_UPD: @@ -5827,6 +6338,8 @@ bool ARMAsmParser::validateInstruction(MCInst &Inst, !isThumbTwo()) return Error(Operands[2]->getStartLoc(), "registers must be in range r0-r7 or pc"); + if (validatetLDMRegList(Inst, Operands, 2, !isMClass())) + return true; break; } case ARM::tPUSH: { @@ -5835,6 +6348,8 @@ bool ARMAsmParser::validateInstruction(MCInst &Inst, !isThumbTwo()) return Error(Operands[2]->getStartLoc(), "registers must be in range r0-r7 or lr"); + if (validatetSTMRegList(Inst, Operands, 2)) + return true; break; } case ARM::tSTMIA_UPD: { @@ -5851,6 +6366,9 @@ bool ARMAsmParser::validateInstruction(MCInst &Inst, return Error(Operands[4]->getStartLoc(), "writeback operator '!' not allowed when base register " "in register list"); + + if (validatetSTMRegList(Inst, Operands, 4)) + return true; break; } case ARM::tADDrSP: { @@ -6171,7 +6689,8 @@ static unsigned getRealVLDOpcode(unsigned Opc, unsigned &Spacing) { } bool ARMAsmParser::processInstruction(MCInst &Inst, - const OperandVector &Operands) { + const OperandVector &Operands, + MCStreamer &Out) { switch (Inst.getOpcode()) { // Alias for alternate form of 'ldr{,b}t Rt, [Rn], #imm' instruction. case ARM::LDRT_POST: @@ -6212,12 +6731,35 @@ bool ARMAsmParser::processInstruction(MCInst &Inst, // Alias for alternate form of 'ADR Rd, #imm' instruction. case ARM::ADDri: { if (Inst.getOperand(1).getReg() != ARM::PC || - Inst.getOperand(5).getReg() != 0) + Inst.getOperand(5).getReg() != 0 || + !(Inst.getOperand(2).isExpr() || Inst.getOperand(2).isImm())) return false; MCInst TmpInst; TmpInst.setOpcode(ARM::ADR); TmpInst.addOperand(Inst.getOperand(0)); - TmpInst.addOperand(Inst.getOperand(2)); + if (Inst.getOperand(2).isImm()) { + // Immediate (mod_imm) will be in its encoded form, we must unencode it + // before passing it to the ADR instruction. + unsigned Enc = Inst.getOperand(2).getImm(); + TmpInst.addOperand(MCOperand::CreateImm( + ARM_AM::rotr32(Enc & 0xFF, (Enc & 0xF00) >> 7))); + } else { + // Turn PC-relative expression into absolute expression. + // Reading PC provides the start of the current instruction + 8 and + // the transform to adr is biased by that. + MCSymbol *Dot = getContext().CreateTempSymbol(); + Out.EmitLabel(Dot); + const MCExpr *OpExpr = Inst.getOperand(2).getExpr(); + const MCExpr *InstPC = MCSymbolRefExpr::Create(Dot, + MCSymbolRefExpr::VK_None, + getContext()); + const MCExpr *Const8 = MCConstantExpr::Create(8, getContext()); + const MCExpr *ReadPC = MCBinaryExpr::CreateAdd(InstPC, Const8, + getContext()); + const MCExpr *FixupAddr = MCBinaryExpr::CreateAdd(ReadPC, OpExpr, + getContext()); + TmpInst.addOperand(MCOperand::CreateExpr(FixupAddr)); + } TmpInst.addOperand(Inst.getOperand(3)); TmpInst.addOperand(Inst.getOperand(4)); Inst = TmpInst; @@ -8010,7 +8552,7 @@ unsigned ARMAsmParser::checkTargetMatchPredicate(MCInst &Inst) { } // Some high-register supporting Thumb1 encodings only allow both registers // to be from r0-r7 when in Thumb2. - else if (Opc == ARM::tADDhirr && isThumbOne() && + else if (Opc == ARM::tADDhirr && isThumbOne() && !hasV6MOps() && isARMLowRegister(Inst.getOperand(1).getReg()) && isARMLowRegister(Inst.getOperand(2).getReg())) return Match_RequiresThumb2; @@ -8028,10 +8570,10 @@ template <> inline bool IsCPSRDead<MCInst>(MCInst *Instr) { } } -static const char *getSubtargetFeatureName(unsigned Val); +static const char *getSubtargetFeatureName(uint64_t Val); bool ARMAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, OperandVector &Operands, - MCStreamer &Out, unsigned &ErrorInfo, + MCStreamer &Out, uint64_t &ErrorInfo, bool MatchingInlineAsm) { MCInst Inst; unsigned MatchResult; @@ -8039,7 +8581,6 @@ bool ARMAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, MatchResult = MatchInstructionImpl(Operands, Inst, ErrorInfo, MatchingInlineAsm); switch (MatchResult) { - default: break; case Match_Success: // Context sensitive operand constraints aren't handled by the matcher, // so check them here. @@ -8057,7 +8598,7 @@ bool ARMAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, // encoding is selected. Loop on it while changes happen so the // individual transformations can chain off each other. E.g., // tPOP(r8)->t2LDMIA_UPD(sp,r8)->t2STR_POST(sp,r8) - while (processInstruction(Inst, Operands)) + while (processInstruction(Inst, Operands, Out)) ; // Only after the instruction is fully processed, we can validate it @@ -8085,7 +8626,7 @@ bool ARMAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, // Special case the error message for the very common case where only // a single subtarget feature is missing (Thumb vs. ARM, e.g.). std::string Msg = "instruction requires:"; - unsigned Mask = 1; + uint64_t Mask = 1; for (unsigned i = 0; i < (sizeof(ErrorInfo)*8-1); ++i) { if (ErrorInfo & Mask) { Msg += " "; @@ -8097,7 +8638,7 @@ bool ARMAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, } case Match_InvalidOperand: { SMLoc ErrorLoc = IDLoc; - if (ErrorInfo != ~0U) { + if (ErrorInfo != ~0ULL) { if (ErrorInfo >= Operands.size()) return Error(IDLoc, "too few operands for instruction"); @@ -8174,6 +8715,7 @@ bool ARMAsmParser::ParseDirective(AsmToken DirectiveID) { const MCObjectFileInfo::Environment Format = getContext().getObjectFileInfo()->getObjectFileType(); bool IsMachO = Format == MCObjectFileInfo::IsMachO; + bool IsCOFF = Format == MCObjectFileInfo::IsCOFF; StringRef IDVal = DirectiveID.getIdentifier(); if (IDVal == ".word") @@ -8225,7 +8767,7 @@ bool ARMAsmParser::ParseDirective(AsmToken DirectiveID) { else if (IDVal == ".thumb_set") return parseDirectiveThumbSet(DirectiveID.getLoc()); - if (!IsMachO) { + if (!IsMachO && !IsCOFF) { if (IDVal == ".arch") return parseDirectiveArch(DirectiveID.getLoc()); else if (IDVal == ".cpu") @@ -8256,6 +8798,7 @@ bool ARMAsmParser::ParseDirective(AsmToken DirectiveID) { /// ::= .short expression [, expression]* /// ::= .word expression [, expression]* bool ARMAsmParser::parseLiteralValues(unsigned Size, SMLoc L) { + MCAsmParser &Parser = getParser(); if (getLexer().isNot(AsmToken::EndOfStatement)) { for (;;) { const MCExpr *Value; @@ -8285,6 +8828,7 @@ bool ARMAsmParser::parseLiteralValues(unsigned Size, SMLoc L) { /// parseDirectiveThumb /// ::= .thumb bool ARMAsmParser::parseDirectiveThumb(SMLoc L) { + MCAsmParser &Parser = getParser(); if (getLexer().isNot(AsmToken::EndOfStatement)) { Error(L, "unexpected token in directive"); return false; @@ -8306,6 +8850,7 @@ bool ARMAsmParser::parseDirectiveThumb(SMLoc L) { /// parseDirectiveARM /// ::= .arm bool ARMAsmParser::parseDirectiveARM(SMLoc L) { + MCAsmParser &Parser = getParser(); if (getLexer().isNot(AsmToken::EndOfStatement)) { Error(L, "unexpected token in directive"); return false; @@ -8334,12 +8879,13 @@ void ARMAsmParser::onLabelParsed(MCSymbol *Symbol) { /// parseDirectiveThumbFunc /// ::= .thumbfunc symbol_name bool ARMAsmParser::parseDirectiveThumbFunc(SMLoc L) { - const MCAsmInfo *MAI = getParser().getStreamer().getContext().getAsmInfo(); - bool isMachO = MAI->hasSubsectionsViaSymbols(); + MCAsmParser &Parser = getParser(); + const auto Format = getContext().getObjectFileInfo()->getObjectFileType(); + bool IsMachO = Format == MCObjectFileInfo::IsMachO; // Darwin asm has (optionally) function name after .thumb_func direction // ELF doesn't - if (isMachO) { + if (IsMachO) { const AsmToken &Tok = Parser.getTok(); if (Tok.isNot(AsmToken::EndOfStatement)) { if (Tok.isNot(AsmToken::Identifier) && Tok.isNot(AsmToken::String)) { @@ -8356,7 +8902,8 @@ bool ARMAsmParser::parseDirectiveThumbFunc(SMLoc L) { } if (getLexer().isNot(AsmToken::EndOfStatement)) { - Error(L, "unexpected token in directive"); + Error(Parser.getTok().getLoc(), "unexpected token in directive"); + Parser.eatToEndOfStatement(); return false; } @@ -8367,6 +8914,7 @@ bool ARMAsmParser::parseDirectiveThumbFunc(SMLoc L) { /// parseDirectiveSyntax /// ::= .syntax unified | divided bool ARMAsmParser::parseDirectiveSyntax(SMLoc L) { + MCAsmParser &Parser = getParser(); const AsmToken &Tok = Parser.getTok(); if (Tok.isNot(AsmToken::Identifier)) { Error(L, "unexpected token in .syntax directive"); @@ -8398,6 +8946,7 @@ bool ARMAsmParser::parseDirectiveSyntax(SMLoc L) { /// parseDirectiveCode /// ::= .code 16 | 32 bool ARMAsmParser::parseDirectiveCode(SMLoc L) { + MCAsmParser &Parser = getParser(); const AsmToken &Tok = Parser.getTok(); if (Tok.isNot(AsmToken::Integer)) { Error(L, "unexpected token in .code directive"); @@ -8442,6 +8991,7 @@ bool ARMAsmParser::parseDirectiveCode(SMLoc L) { /// parseDirectiveReq /// ::= name .req registername bool ARMAsmParser::parseDirectiveReq(StringRef Name, SMLoc L) { + MCAsmParser &Parser = getParser(); Parser.Lex(); // Eat the '.req' token. unsigned Reg; SMLoc SRegLoc, ERegLoc; @@ -8460,7 +9010,7 @@ bool ARMAsmParser::parseDirectiveReq(StringRef Name, SMLoc L) { Parser.Lex(); // Consume the EndOfStatement - if (RegisterReqs.GetOrCreateValue(Name, Reg).getValue() != Reg) { + if (!RegisterReqs.insert(std::make_pair(Name, Reg)).second) { Error(SRegLoc, "redefinition of '" + Name + "' does not match original."); return false; } @@ -8471,6 +9021,7 @@ bool ARMAsmParser::parseDirectiveReq(StringRef Name, SMLoc L) { /// parseDirectiveUneq /// ::= .unreq registername bool ARMAsmParser::parseDirectiveUnreq(SMLoc L) { + MCAsmParser &Parser = getParser(); if (Parser.getTok().isNot(AsmToken::Identifier)) { Parser.eatToEndOfStatement(); Error(L, "unexpected input in .unreq directive."); @@ -8507,6 +9058,7 @@ bool ARMAsmParser::parseDirectiveArch(SMLoc L) { /// ::= .eabi_attribute int, int [, "str"] /// ::= .eabi_attribute Tag_name, int [, "str"] bool ARMAsmParser::parseDirectiveEabiAttr(SMLoc L) { + MCAsmParser &Parser = getParser(); int64_t Tag; SMLoc TagLoc; TagLoc = Parser.getTok().getLoc(); @@ -8584,8 +9136,13 @@ bool ARMAsmParser::parseDirectiveEabiAttr(SMLoc L) { if (Tag == ARMBuildAttrs::compatibility) { if (Parser.getTok().isNot(AsmToken::Comma)) IsStringValue = false; - else - Parser.Lex(); + if (Parser.getTok().isNot(AsmToken::Comma)) { + Error(Parser.getTok().getLoc(), "comma expected"); + Parser.eatToEndOfStatement(); + return false; + } else { + Parser.Lex(); + } } if (IsStringValue) { @@ -8614,9 +9171,49 @@ bool ARMAsmParser::parseDirectiveEabiAttr(SMLoc L) { bool ARMAsmParser::parseDirectiveCPU(SMLoc L) { StringRef CPU = getParser().parseStringToEndOfStatement().trim(); getTargetStreamer().emitTextAttribute(ARMBuildAttrs::CPU_name, CPU); + + if (!STI.isCPUStringValid(CPU)) { + Error(L, "Unknown CPU name"); + return false; + } + + // FIXME: This switches the CPU features globally, therefore it might + // happen that code you would not expect to assemble will. For details + // see: http://llvm.org/bugs/show_bug.cgi?id=20757 + STI.InitMCProcessorInfo(CPU, ""); + STI.InitCPUSchedModel(CPU); + unsigned FB = ComputeAvailableFeatures(STI.getFeatureBits()); + setAvailableFeatures(FB); + return false; } +// FIXME: This is duplicated in getARMFPUFeatures() in +// tools/clang/lib/Driver/Tools.cpp +static const struct { + const unsigned Fpu; + const uint64_t Enabled; + const uint64_t Disabled; +} Fpus[] = { + {ARM::VFP, ARM::FeatureVFP2, ARM::FeatureNEON}, + {ARM::VFPV2, ARM::FeatureVFP2, ARM::FeatureNEON}, + {ARM::VFPV3, ARM::FeatureVFP3, ARM::FeatureNEON}, + {ARM::VFPV3_D16, ARM::FeatureVFP3 | ARM::FeatureD16, ARM::FeatureNEON}, + {ARM::VFPV4, ARM::FeatureVFP4, ARM::FeatureNEON}, + {ARM::VFPV4_D16, ARM::FeatureVFP4 | ARM::FeatureD16, ARM::FeatureNEON}, + {ARM::FPV5_D16, ARM::FeatureFPARMv8 | ARM::FeatureD16, + ARM::FeatureNEON | ARM::FeatureCrypto}, + {ARM::FP_ARMV8, ARM::FeatureFPARMv8, + ARM::FeatureNEON | ARM::FeatureCrypto}, + {ARM::NEON, ARM::FeatureNEON, 0}, + {ARM::NEON_VFPV4, ARM::FeatureVFP4 | ARM::FeatureNEON, 0}, + {ARM::NEON_FP_ARMV8, ARM::FeatureFPARMv8 | ARM::FeatureNEON, + ARM::FeatureCrypto}, + {ARM::CRYPTO_NEON_FP_ARMV8, + ARM::FeatureFPARMv8 | ARM::FeatureNEON | ARM::FeatureCrypto, 0}, + {ARM::SOFTVFP, 0, 0}, +}; + /// parseDirectiveFPU /// ::= .fpu str bool ARMAsmParser::parseDirectiveFPU(SMLoc L) { @@ -8632,6 +9229,18 @@ bool ARMAsmParser::parseDirectiveFPU(SMLoc L) { return false; } + for (const auto &Fpu : Fpus) { + if (Fpu.Fpu != ID) + continue; + + // Need to toggle features that should be on but are off and that + // should off but are on. + uint64_t Toggle = (Fpu.Enabled & ~STI.getFeatureBits()) | + (Fpu.Disabled & STI.getFeatureBits()); + setAvailableFeatures(ComputeAvailableFeatures(STI.ToggleFeature(Toggle))); + break; + } + getTargetStreamer().emitFPU(ID); return false; } @@ -8698,6 +9307,7 @@ bool ARMAsmParser::parseDirectiveCantUnwind(SMLoc L) { /// parseDirectivePersonality /// ::= .personality name bool ARMAsmParser::parseDirectivePersonality(SMLoc L) { + MCAsmParser &Parser = getParser(); bool HasExistingPersonality = UC.hasPersonality(); UC.recordPersonality(L); @@ -8761,6 +9371,7 @@ bool ARMAsmParser::parseDirectiveHandlerData(SMLoc L) { /// parseDirectiveSetFP /// ::= .setfp fpreg, spreg [, offset] bool ARMAsmParser::parseDirectiveSetFP(SMLoc L) { + MCAsmParser &Parser = getParser(); // Check the ordering of unwind directives if (!UC.hasFnStart()) { Error(L, ".fnstart must precede .setfp directive"); @@ -8838,6 +9449,7 @@ bool ARMAsmParser::parseDirectiveSetFP(SMLoc L) { /// parseDirective /// ::= .pad offset bool ARMAsmParser::parseDirectivePad(SMLoc L) { + MCAsmParser &Parser = getParser(); // Check the ordering of unwind directives if (!UC.hasFnStart()) { Error(L, ".fnstart must precede .pad directive"); @@ -8912,6 +9524,7 @@ bool ARMAsmParser::parseDirectiveRegSave(SMLoc L, bool IsVector) { /// ::= .inst.n opcode [, ...] /// ::= .inst.w opcode [, ...] bool ARMAsmParser::parseDirectiveInst(SMLoc Loc, char Suffix) { + MCAsmParser &Parser = getParser(); int Width; if (isThumb()) { @@ -9008,7 +9621,7 @@ bool ARMAsmParser::parseDirectiveEven(SMLoc L) { } if (!Section) { - getStreamer().InitSections(); + getStreamer().InitSections(false); Section = getStreamer().getCurrentSection().first; } @@ -9024,6 +9637,7 @@ bool ARMAsmParser::parseDirectiveEven(SMLoc L) { /// parseDirectivePersonalityIndex /// ::= .personalityindex index bool ARMAsmParser::parseDirectivePersonalityIndex(SMLoc L) { + MCAsmParser &Parser = getParser(); bool HasExistingPersonality = UC.hasPersonality(); UC.recordPersonalityIndex(L); @@ -9079,6 +9693,7 @@ bool ARMAsmParser::parseDirectivePersonalityIndex(SMLoc L) { /// parseDirectiveUnwindRaw /// ::= .unwind_raw offset, opcode [, opcode...] bool ARMAsmParser::parseDirectiveUnwindRaw(SMLoc L) { + MCAsmParser &Parser = getParser(); if (!UC.hasFnStart()) { Parser.eatToEndOfStatement(); Error(L, ".fnstart must precede .unwind_raw directives"); @@ -9160,6 +9775,8 @@ bool ARMAsmParser::parseDirectiveUnwindRaw(SMLoc L) { /// parseDirectiveTLSDescSeq /// ::= .tlsdescseq tls-variable bool ARMAsmParser::parseDirectiveTLSDescSeq(SMLoc L) { + MCAsmParser &Parser = getParser(); + if (getLexer().isNot(AsmToken::Identifier)) { TokError("expected variable after '.tlsdescseq' directive"); Parser.eatToEndOfStatement(); @@ -9184,6 +9801,7 @@ bool ARMAsmParser::parseDirectiveTLSDescSeq(SMLoc L) { /// parseDirectiveMovSP /// ::= .movsp reg [, #offset] bool ARMAsmParser::parseDirectiveMovSP(SMLoc L) { + MCAsmParser &Parser = getParser(); if (!UC.hasFnStart()) { Parser.eatToEndOfStatement(); Error(L, ".fnstart must precede .movsp directives"); @@ -9247,6 +9865,7 @@ bool ARMAsmParser::parseDirectiveMovSP(SMLoc L) { /// parseDirectiveObjectArch /// ::= .object_arch name bool ARMAsmParser::parseDirectiveObjectArch(SMLoc L) { + MCAsmParser &Parser = getParser(); if (getLexer().isNot(AsmToken::Identifier)) { Error(getLexer().getLoc(), "unexpected token"); Parser.eatToEndOfStatement(); @@ -9303,6 +9922,8 @@ bool ARMAsmParser::parseDirectiveAlign(SMLoc L) { /// parseDirectiveThumbSet /// ::= .thumb_set name, value bool ARMAsmParser::parseDirectiveThumbSet(SMLoc L) { + MCAsmParser &Parser = getParser(); + StringRef Name; if (Parser.parseIdentifier(Name)) { TokError("expected identifier after '.thumb_set'"); @@ -9349,8 +9970,8 @@ extern "C" void LLVMInitializeARMAsmParser() { #define GET_MATCHER_IMPLEMENTATION #include "ARMGenAsmMatcher.inc" -static const struct ExtMapEntry { - const char *Extension; +static const struct { + const char *Name; const unsigned ArchCheck; const uint64_t Features; } Extensions[] = { @@ -9381,46 +10002,47 @@ static const struct ExtMapEntry { /// parseDirectiveArchExtension /// ::= .arch_extension [no]feature bool ARMAsmParser::parseDirectiveArchExtension(SMLoc L) { + MCAsmParser &Parser = getParser(); + if (getLexer().isNot(AsmToken::Identifier)) { Error(getLexer().getLoc(), "unexpected token"); Parser.eatToEndOfStatement(); return false; } - StringRef Extension = Parser.getTok().getString(); + StringRef Name = Parser.getTok().getString(); SMLoc ExtLoc = Parser.getTok().getLoc(); getLexer().Lex(); bool EnableFeature = true; - if (Extension.startswith_lower("no")) { + if (Name.startswith_lower("no")) { EnableFeature = false; - Extension = Extension.substr(2); + Name = Name.substr(2); } - for (unsigned EI = 0, EE = array_lengthof(Extensions); EI != EE; ++EI) { - if (Extensions[EI].Extension != Extension) + for (const auto &Extension : Extensions) { + if (Extension.Name != Name) continue; - unsigned FB = getAvailableFeatures(); - if ((FB & Extensions[EI].ArchCheck) != Extensions[EI].ArchCheck) { - Error(ExtLoc, "architectural extension '" + Extension + "' is not " + if (!Extension.Features) + report_fatal_error("unsupported architectural extension: " + Name); + + if ((getAvailableFeatures() & Extension.ArchCheck) != Extension.ArchCheck) { + Error(ExtLoc, "architectural extension '" + Name + "' is not " "allowed for the current base architecture"); return false; } - if (!Extensions[EI].Features) - report_fatal_error("unsupported architectural extension: " + Extension); - - if (EnableFeature) - FB |= ComputeAvailableFeatures(Extensions[EI].Features); - else - FB &= ~ComputeAvailableFeatures(Extensions[EI].Features); - - setAvailableFeatures(FB); + uint64_t ToggleFeatures = EnableFeature + ? (~STI.getFeatureBits() & Extension.Features) + : ( STI.getFeatureBits() & Extension.Features); + uint64_t Features = + ComputeAvailableFeatures(STI.ToggleFeature(ToggleFeatures)); + setAvailableFeatures(Features); return false; } - Error(ExtLoc, "unknown architectural extension: " + Extension); + Error(ExtLoc, "unknown architectural extension: " + Name); Parser.eatToEndOfStatement(); return false; } @@ -9441,7 +10063,7 @@ unsigned ARMAsmParser::validateTargetOperandClass(MCParsedAsmOperand &AsmOp, if (CE->getValue() == 0) return Match_Success; break; - case MCK_ARMSOImm: + case MCK_ModImm: if (Op.isImm()) { const MCExpr *SOExpr = Op.getImm(); int64_t Value; diff --git a/lib/Target/ARM/CMakeLists.txt b/lib/Target/ARM/CMakeLists.txt index 9b5fa75fe2a7..2530640139ac 100644 --- a/lib/Target/ARM/CMakeLists.txt +++ b/lib/Target/ARM/CMakeLists.txt @@ -2,8 +2,7 @@ set(LLVM_TARGET_DEFINITIONS ARM.td) tablegen(LLVM ARMGenRegisterInfo.inc -gen-register-info) tablegen(LLVM ARMGenInstrInfo.inc -gen-instr-info) -tablegen(LLVM ARMGenCodeEmitter.inc -gen-emitter) -tablegen(LLVM ARMGenMCCodeEmitter.inc -gen-emitter -mc-emitter) +tablegen(LLVM ARMGenMCCodeEmitter.inc -gen-emitter) tablegen(LLVM ARMGenMCPseudoLowering.inc -gen-pseudo-lowering) tablegen(LLVM ARMGenAsmWriter.inc -gen-asm-writer) tablegen(LLVM ARMGenAsmMatcher.inc -gen-asm-matcher) @@ -19,7 +18,6 @@ add_llvm_target(ARMCodeGen ARMAsmPrinter.cpp ARMBaseInstrInfo.cpp ARMBaseRegisterInfo.cpp - ARMCodeEmitter.cpp ARMConstantIslandPass.cpp ARMConstantPoolValue.cpp ARMExpandPseudoInsts.cpp @@ -29,7 +27,6 @@ add_llvm_target(ARMCodeGen ARMISelDAGToDAG.cpp ARMISelLowering.cpp ARMInstrInfo.cpp - ARMJITInfo.cpp ARMLoadStoreOptimizer.cpp ARMMCInstLower.cpp ARMMachineFunctionInfo.cpp diff --git a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp index 4d4038deac7c..51faf692c88f 100644 --- a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp +++ b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp @@ -20,7 +20,6 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/LEB128.h" -#include "llvm/Support/MemoryObject.h" #include "llvm/Support/TargetRegistry.h" #include "llvm/Support/raw_ostream.h" #include <vector> @@ -85,42 +84,34 @@ namespace { } namespace { -/// ARMDisassembler - ARM disassembler for all ARM platforms. +/// ARM disassembler for all ARM platforms. class ARMDisassembler : public MCDisassembler { public: - /// Constructor - Initializes the disassembler. - /// ARMDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx) : MCDisassembler(STI, Ctx) { } - ~ARMDisassembler() { - } + ~ARMDisassembler() {} - /// getInstruction - See MCDisassembler. - DecodeStatus getInstruction(MCInst &instr, uint64_t &size, - const MemoryObject ®ion, uint64_t address, - raw_ostream &vStream, - raw_ostream &cStream) const override; + DecodeStatus getInstruction(MCInst &Instr, uint64_t &Size, + ArrayRef<uint8_t> Bytes, uint64_t Address, + raw_ostream &VStream, + raw_ostream &CStream) const override; }; -/// ThumbDisassembler - Thumb disassembler for all Thumb platforms. +/// Thumb disassembler for all Thumb platforms. class ThumbDisassembler : public MCDisassembler { public: - /// Constructor - Initializes the disassembler. - /// ThumbDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx) : MCDisassembler(STI, Ctx) { } - ~ThumbDisassembler() { - } + ~ThumbDisassembler() {} - /// getInstruction - See MCDisassembler. - DecodeStatus getInstruction(MCInst &instr, uint64_t &size, - const MemoryObject ®ion, uint64_t address, - raw_ostream &vStream, - raw_ostream &cStream) const override; + DecodeStatus getInstruction(MCInst &Instr, uint64_t &Size, + ArrayRef<uint8_t> Bytes, uint64_t Address, + raw_ostream &VStream, + raw_ostream &CStream) const override; private: mutable ITStatus ITBlock; @@ -185,8 +176,6 @@ static DecodeStatus DecodePredicateOperand(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder); static DecodeStatus DecodeCCOutOperand(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeSOImmOperand(MCInst &Inst, unsigned Val, - uint64_t Address, const void *Decoder); static DecodeStatus DecodeRegListOperand(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder); static DecodeStatus DecodeSPRRegListOperand(MCInst &Inst, unsigned Val, @@ -281,6 +270,8 @@ static DecodeStatus DecodeInstSyncBarrierOption(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder); static DecodeStatus DecodeMSRMask(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder); +static DecodeStatus DecodeBankedReg(MCInst &Inst, unsigned Insn, + uint64_t Address, const void *Decoder); static DecodeStatus DecodeDoubleRegLoad(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder); static DecodeStatus DecodeDoubleRegStore(MCInst &Inst, unsigned Insn, @@ -412,104 +403,122 @@ static MCDisassembler *createThumbDisassembler(const Target &T, return new ThumbDisassembler(STI, Ctx); } -DecodeStatus ARMDisassembler::getInstruction(MCInst &MI, uint64_t &Size, - const MemoryObject &Region, - uint64_t Address, - raw_ostream &os, - raw_ostream &cs) const { - CommentStream = &cs; +// Post-decoding checks +static DecodeStatus checkDecodedInstruction(MCInst &MI, uint64_t &Size, + uint64_t Address, raw_ostream &OS, + raw_ostream &CS, + uint32_t Insn, + DecodeStatus Result) +{ + switch (MI.getOpcode()) { + case ARM::HVC: { + // HVC is undefined if condition = 0xf otherwise upredictable + // if condition != 0xe + uint32_t Cond = (Insn >> 28) & 0xF; + if (Cond == 0xF) + return MCDisassembler::Fail; + if (Cond != 0xE) + return MCDisassembler::SoftFail; + return Result; + } + default: return Result; + } +} - uint8_t bytes[4]; +DecodeStatus ARMDisassembler::getInstruction(MCInst &MI, uint64_t &Size, + ArrayRef<uint8_t> Bytes, + uint64_t Address, raw_ostream &OS, + raw_ostream &CS) const { + CommentStream = &CS; assert(!(STI.getFeatureBits() & ARM::ModeThumb) && - "Asked to disassemble an ARM instruction but Subtarget is in Thumb mode!"); + "Asked to disassemble an ARM instruction but Subtarget is in Thumb " + "mode!"); // We want to read exactly 4 bytes of data. - if (Region.readBytes(Address, 4, bytes) == -1) { + if (Bytes.size() < 4) { Size = 0; return MCDisassembler::Fail; } // Encoded as a small-endian 32-bit word in the stream. - uint32_t insn = (bytes[3] << 24) | - (bytes[2] << 16) | - (bytes[1] << 8) | - (bytes[0] << 0); + uint32_t Insn = + (Bytes[3] << 24) | (Bytes[2] << 16) | (Bytes[1] << 8) | (Bytes[0] << 0); // Calling the auto-generated decoder function. - DecodeStatus result = decodeInstruction(DecoderTableARM32, MI, insn, - Address, this, STI); - if (result != MCDisassembler::Fail) { + DecodeStatus Result = + decodeInstruction(DecoderTableARM32, MI, Insn, Address, this, STI); + if (Result != MCDisassembler::Fail) { Size = 4; - return result; + return checkDecodedInstruction(MI, Size, Address, OS, CS, Insn, Result); } // VFP and NEON instructions, similarly, are shared between ARM // and Thumb modes. MI.clear(); - result = decodeInstruction(DecoderTableVFP32, MI, insn, Address, this, STI); - if (result != MCDisassembler::Fail) { + Result = decodeInstruction(DecoderTableVFP32, MI, Insn, Address, this, STI); + if (Result != MCDisassembler::Fail) { Size = 4; - return result; + return Result; } MI.clear(); - result = decodeInstruction(DecoderTableVFPV832, MI, insn, Address, this, STI); - if (result != MCDisassembler::Fail) { + Result = decodeInstruction(DecoderTableVFPV832, MI, Insn, Address, this, STI); + if (Result != MCDisassembler::Fail) { Size = 4; - return result; + return Result; } MI.clear(); - result = decodeInstruction(DecoderTableNEONData32, MI, insn, Address, - this, STI); - if (result != MCDisassembler::Fail) { + Result = + decodeInstruction(DecoderTableNEONData32, MI, Insn, Address, this, STI); + if (Result != MCDisassembler::Fail) { Size = 4; // Add a fake predicate operand, because we share these instruction // definitions with Thumb2 where these instructions are predicable. if (!DecodePredicateOperand(MI, 0xE, Address, this)) return MCDisassembler::Fail; - return result; + return Result; } MI.clear(); - result = decodeInstruction(DecoderTableNEONLoadStore32, MI, insn, Address, + Result = decodeInstruction(DecoderTableNEONLoadStore32, MI, Insn, Address, this, STI); - if (result != MCDisassembler::Fail) { + if (Result != MCDisassembler::Fail) { Size = 4; // Add a fake predicate operand, because we share these instruction // definitions with Thumb2 where these instructions are predicable. if (!DecodePredicateOperand(MI, 0xE, Address, this)) return MCDisassembler::Fail; - return result; + return Result; } MI.clear(); - result = decodeInstruction(DecoderTableNEONDup32, MI, insn, Address, - this, STI); - if (result != MCDisassembler::Fail) { + Result = + decodeInstruction(DecoderTableNEONDup32, MI, Insn, Address, this, STI); + if (Result != MCDisassembler::Fail) { Size = 4; // Add a fake predicate operand, because we share these instruction // definitions with Thumb2 where these instructions are predicable. if (!DecodePredicateOperand(MI, 0xE, Address, this)) return MCDisassembler::Fail; - return result; + return Result; } MI.clear(); - result = decodeInstruction(DecoderTablev8NEON32, MI, insn, Address, - this, STI); - if (result != MCDisassembler::Fail) { + Result = + decodeInstruction(DecoderTablev8NEON32, MI, Insn, Address, this, STI); + if (Result != MCDisassembler::Fail) { Size = 4; - return result; + return Result; } MI.clear(); - result = decodeInstruction(DecoderTablev8Crypto32, MI, insn, Address, - this, STI); - if (result != MCDisassembler::Fail) { + Result = + decodeInstruction(DecoderTablev8Crypto32, MI, Insn, Address, this, STI); + if (Result != MCDisassembler::Fail) { Size = 4; - return result; + return Result; } MI.clear(); @@ -681,55 +690,53 @@ void ThumbDisassembler::UpdateThumbVFPPredicate(MCInst &MI) const { } DecodeStatus ThumbDisassembler::getInstruction(MCInst &MI, uint64_t &Size, - const MemoryObject &Region, + ArrayRef<uint8_t> Bytes, uint64_t Address, - raw_ostream &os, - raw_ostream &cs) const { - CommentStream = &cs; - - uint8_t bytes[4]; + raw_ostream &OS, + raw_ostream &CS) const { + CommentStream = &CS; assert((STI.getFeatureBits() & ARM::ModeThumb) && "Asked to disassemble in Thumb mode but Subtarget is in ARM mode!"); // We want to read exactly 2 bytes of data. - if (Region.readBytes(Address, 2, bytes) == -1) { + if (Bytes.size() < 2) { Size = 0; return MCDisassembler::Fail; } - uint16_t insn16 = (bytes[1] << 8) | bytes[0]; - DecodeStatus result = decodeInstruction(DecoderTableThumb16, MI, insn16, - Address, this, STI); - if (result != MCDisassembler::Fail) { + uint16_t Insn16 = (Bytes[1] << 8) | Bytes[0]; + DecodeStatus Result = + decodeInstruction(DecoderTableThumb16, MI, Insn16, Address, this, STI); + if (Result != MCDisassembler::Fail) { Size = 2; - Check(result, AddThumbPredicate(MI)); - return result; + Check(Result, AddThumbPredicate(MI)); + return Result; } MI.clear(); - result = decodeInstruction(DecoderTableThumbSBit16, MI, insn16, - Address, this, STI); - if (result) { + Result = decodeInstruction(DecoderTableThumbSBit16, MI, Insn16, Address, this, + STI); + if (Result) { Size = 2; bool InITBlock = ITBlock.instrInITBlock(); - Check(result, AddThumbPredicate(MI)); + Check(Result, AddThumbPredicate(MI)); AddThumb1SBit(MI, InITBlock); - return result; + return Result; } MI.clear(); - result = decodeInstruction(DecoderTableThumb216, MI, insn16, - Address, this, STI); - if (result != MCDisassembler::Fail) { + Result = + decodeInstruction(DecoderTableThumb216, MI, Insn16, Address, this, STI); + if (Result != MCDisassembler::Fail) { Size = 2; // Nested IT blocks are UNPREDICTABLE. Must be checked before we add // the Thumb predicate. if (MI.getOpcode() == ARM::t2IT && ITBlock.instrInITBlock()) - result = MCDisassembler::SoftFail; + Result = MCDisassembler::SoftFail; - Check(result, AddThumbPredicate(MI)); + Check(Result, AddThumbPredicate(MI)); // If we find an IT instruction, we need to parse its condition // code and mask operands so that we can apply them correctly @@ -741,115 +748,115 @@ DecodeStatus ThumbDisassembler::getInstruction(MCInst &MI, uint64_t &Size, ITBlock.setITState(Firstcond, Mask); } - return result; + return Result; } // We want to read exactly 4 bytes of data. - if (Region.readBytes(Address, 4, bytes) == -1) { + if (Bytes.size() < 4) { Size = 0; return MCDisassembler::Fail; } - uint32_t insn32 = (bytes[3] << 8) | - (bytes[2] << 0) | - (bytes[1] << 24) | - (bytes[0] << 16); + uint32_t Insn32 = + (Bytes[3] << 8) | (Bytes[2] << 0) | (Bytes[1] << 24) | (Bytes[0] << 16); MI.clear(); - result = decodeInstruction(DecoderTableThumb32, MI, insn32, Address, - this, STI); - if (result != MCDisassembler::Fail) { + Result = + decodeInstruction(DecoderTableThumb32, MI, Insn32, Address, this, STI); + if (Result != MCDisassembler::Fail) { Size = 4; bool InITBlock = ITBlock.instrInITBlock(); - Check(result, AddThumbPredicate(MI)); + Check(Result, AddThumbPredicate(MI)); AddThumb1SBit(MI, InITBlock); - return result; + return Result; } MI.clear(); - result = decodeInstruction(DecoderTableThumb232, MI, insn32, Address, - this, STI); - if (result != MCDisassembler::Fail) { + Result = + decodeInstruction(DecoderTableThumb232, MI, Insn32, Address, this, STI); + if (Result != MCDisassembler::Fail) { Size = 4; - Check(result, AddThumbPredicate(MI)); - return result; + Check(Result, AddThumbPredicate(MI)); + return Result; } - if (fieldFromInstruction(insn32, 28, 4) == 0xE) { + if (fieldFromInstruction(Insn32, 28, 4) == 0xE) { MI.clear(); - result = decodeInstruction(DecoderTableVFP32, MI, insn32, Address, this, STI); - if (result != MCDisassembler::Fail) { + Result = + decodeInstruction(DecoderTableVFP32, MI, Insn32, Address, this, STI); + if (Result != MCDisassembler::Fail) { Size = 4; UpdateThumbVFPPredicate(MI); - return result; + return Result; } } MI.clear(); - result = decodeInstruction(DecoderTableVFPV832, MI, insn32, Address, this, STI); - if (result != MCDisassembler::Fail) { + Result = + decodeInstruction(DecoderTableVFPV832, MI, Insn32, Address, this, STI); + if (Result != MCDisassembler::Fail) { Size = 4; - return result; + return Result; } - if (fieldFromInstruction(insn32, 28, 4) == 0xE) { + if (fieldFromInstruction(Insn32, 28, 4) == 0xE) { MI.clear(); - result = decodeInstruction(DecoderTableNEONDup32, MI, insn32, Address, - this, STI); - if (result != MCDisassembler::Fail) { + Result = decodeInstruction(DecoderTableNEONDup32, MI, Insn32, Address, this, + STI); + if (Result != MCDisassembler::Fail) { Size = 4; - Check(result, AddThumbPredicate(MI)); - return result; + Check(Result, AddThumbPredicate(MI)); + return Result; } } - if (fieldFromInstruction(insn32, 24, 8) == 0xF9) { + if (fieldFromInstruction(Insn32, 24, 8) == 0xF9) { MI.clear(); - uint32_t NEONLdStInsn = insn32; + uint32_t NEONLdStInsn = Insn32; NEONLdStInsn &= 0xF0FFFFFF; NEONLdStInsn |= 0x04000000; - result = decodeInstruction(DecoderTableNEONLoadStore32, MI, NEONLdStInsn, + Result = decodeInstruction(DecoderTableNEONLoadStore32, MI, NEONLdStInsn, Address, this, STI); - if (result != MCDisassembler::Fail) { + if (Result != MCDisassembler::Fail) { Size = 4; - Check(result, AddThumbPredicate(MI)); - return result; + Check(Result, AddThumbPredicate(MI)); + return Result; } } - if (fieldFromInstruction(insn32, 24, 4) == 0xF) { + if (fieldFromInstruction(Insn32, 24, 4) == 0xF) { MI.clear(); - uint32_t NEONDataInsn = insn32; + uint32_t NEONDataInsn = Insn32; NEONDataInsn &= 0xF0FFFFFF; // Clear bits 27-24 NEONDataInsn |= (NEONDataInsn & 0x10000000) >> 4; // Move bit 28 to bit 24 NEONDataInsn |= 0x12000000; // Set bits 28 and 25 - result = decodeInstruction(DecoderTableNEONData32, MI, NEONDataInsn, + Result = decodeInstruction(DecoderTableNEONData32, MI, NEONDataInsn, Address, this, STI); - if (result != MCDisassembler::Fail) { + if (Result != MCDisassembler::Fail) { Size = 4; - Check(result, AddThumbPredicate(MI)); - return result; + Check(Result, AddThumbPredicate(MI)); + return Result; } MI.clear(); - uint32_t NEONCryptoInsn = insn32; + uint32_t NEONCryptoInsn = Insn32; NEONCryptoInsn &= 0xF0FFFFFF; // Clear bits 27-24 NEONCryptoInsn |= (NEONCryptoInsn & 0x10000000) >> 4; // Move bit 28 to bit 24 NEONCryptoInsn |= 0x12000000; // Set bits 28 and 25 - result = decodeInstruction(DecoderTablev8Crypto32, MI, NEONCryptoInsn, + Result = decodeInstruction(DecoderTablev8Crypto32, MI, NEONCryptoInsn, Address, this, STI); - if (result != MCDisassembler::Fail) { + if (Result != MCDisassembler::Fail) { Size = 4; - return result; + return Result; } MI.clear(); - uint32_t NEONv8Insn = insn32; + uint32_t NEONv8Insn = Insn32; NEONv8Insn &= 0xF3FFFFFF; // Clear bits 27-26 - result = decodeInstruction(DecoderTablev8NEON32, MI, NEONv8Insn, Address, + Result = decodeInstruction(DecoderTablev8NEON32, MI, NEONv8Insn, Address, this, STI); - if (result != MCDisassembler::Fail) { + if (Result != MCDisassembler::Fail) { Size = 4; - return result; + return Result; } } @@ -1015,7 +1022,11 @@ static const uint16_t DPRDecoderTable[] = { static DecodeStatus DecodeDPRRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, const void *Decoder) { - if (RegNo > 31) + uint64_t featureBits = ((const MCDisassembler*)Decoder)->getSubtargetInfo() + .getFeatureBits(); + bool hasD16 = featureBits & ARM::FeatureD16; + + if (RegNo > 31 || (hasD16 && RegNo > 15)) return MCDisassembler::Fail; unsigned Register = DPRDecoderTable[RegNo]; @@ -1122,15 +1133,6 @@ static DecodeStatus DecodeCCOutOperand(MCInst &Inst, unsigned Val, return MCDisassembler::Success; } -static DecodeStatus DecodeSOImmOperand(MCInst &Inst, unsigned Val, - uint64_t Address, const void *Decoder) { - uint32_t imm = Val & 0xFF; - uint32_t rot = (Val & 0xF00) >> 7; - uint32_t rot_imm = (imm >> rot) | (imm << ((32-rot) & 0x1F)); - Inst.addOperand(MCOperand::CreateImm(rot_imm)); - return MCDisassembler::Success; -} - static DecodeStatus DecodeSORegImmOperand(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; @@ -2973,11 +2975,9 @@ static DecodeStatus DecodeVLD4DupInstruction(MCInst &Inst, unsigned Insn, if (size == 0x3) { if (align == 0) return MCDisassembler::Fail; - size = 4; align = 16; } else { if (size == 2) { - size = 1 << size; align *= 8; } else { size = 1 << size; @@ -3267,6 +3267,11 @@ static DecodeStatus DecodeT2LoadShift(MCInst &Inst, unsigned Insn, unsigned Rt = fieldFromInstruction(Insn, 12, 4); unsigned Rn = fieldFromInstruction(Insn, 16, 4); + uint64_t featureBits = ((const MCDisassembler*)Decoder)->getSubtargetInfo() + .getFeatureBits(); + bool hasMP = featureBits & ARM::FeatureMP; + bool hasV7Ops = featureBits & ARM::HasV7Ops; + if (Rn == 15) { switch (Inst.getOpcode()) { case ARM::t2LDRBs: @@ -3302,11 +3307,10 @@ static DecodeStatus DecodeT2LoadShift(MCInst &Inst, unsigned Insn, case ARM::t2LDRSHs: return MCDisassembler::Fail; case ARM::t2LDRHs: - // FIXME: this instruction is only available with MP extensions, - // this should be checked first but we don't have access to the - // feature bits here. Inst.setOpcode(ARM::t2PLDWs); break; + case ARM::t2LDRSBs: + Inst.setOpcode(ARM::t2PLIs); default: break; } @@ -3314,8 +3318,14 @@ static DecodeStatus DecodeT2LoadShift(MCInst &Inst, unsigned Insn, switch (Inst.getOpcode()) { case ARM::t2PLDs: - case ARM::t2PLDWs: + break; case ARM::t2PLIs: + if (!hasV7Ops) + return MCDisassembler::Fail; + break; + case ARM::t2PLDWs: + if (!hasV7Ops || !hasMP) + return MCDisassembler::Fail; break; default: if (!Check(S, DecodeGPRRegisterClass(Inst, Rt, Address, Decoder))) @@ -3341,6 +3351,12 @@ static DecodeStatus DecodeT2LoadImm8(MCInst &Inst, unsigned Insn, unsigned imm = fieldFromInstruction(Insn, 0, 8); imm |= (U << 8); imm |= (Rn << 9); + unsigned add = fieldFromInstruction(Insn, 9, 1); + + uint64_t featureBits = ((const MCDisassembler*)Decoder)->getSubtargetInfo() + .getFeatureBits(); + bool hasMP = featureBits & ARM::FeatureMP; + bool hasV7Ops = featureBits & ARM::HasV7Ops; if (Rn == 15) { switch (Inst.getOpcode()) { @@ -3375,6 +3391,13 @@ static DecodeStatus DecodeT2LoadImm8(MCInst &Inst, unsigned Insn, switch (Inst.getOpcode()) { case ARM::t2LDRSHi8: return MCDisassembler::Fail; + case ARM::t2LDRHi8: + if (!add) + Inst.setOpcode(ARM::t2PLDWi8); + break; + case ARM::t2LDRSBi8: + Inst.setOpcode(ARM::t2PLIi8); + break; default: break; } @@ -3382,9 +3405,15 @@ static DecodeStatus DecodeT2LoadImm8(MCInst &Inst, unsigned Insn, switch (Inst.getOpcode()) { case ARM::t2PLDi8: + break; case ARM::t2PLIi8: - case ARM::t2PLDWi8: + if (!hasV7Ops) + return MCDisassembler::Fail; break; + case ARM::t2PLDWi8: + if (!hasV7Ops || !hasMP) + return MCDisassembler::Fail; + break; default: if (!Check(S, DecodeGPRRegisterClass(Inst, Rt, Address, Decoder))) return MCDisassembler::Fail; @@ -3404,6 +3433,11 @@ static DecodeStatus DecodeT2LoadImm12(MCInst &Inst, unsigned Insn, unsigned imm = fieldFromInstruction(Insn, 0, 12); imm |= (Rn << 13); + uint64_t featureBits = ((const MCDisassembler*)Decoder)->getSubtargetInfo() + .getFeatureBits(); + bool hasMP = (featureBits & ARM::FeatureMP); + bool hasV7Ops = (featureBits & ARM::HasV7Ops); + if (Rn == 15) { switch (Inst.getOpcode()) { case ARM::t2LDRi12: @@ -3438,7 +3472,10 @@ static DecodeStatus DecodeT2LoadImm12(MCInst &Inst, unsigned Insn, case ARM::t2LDRSHi12: return MCDisassembler::Fail; case ARM::t2LDRHi12: - Inst.setOpcode(ARM::t2PLDi12); + Inst.setOpcode(ARM::t2PLDWi12); + break; + case ARM::t2LDRSBi12: + Inst.setOpcode(ARM::t2PLIi12); break; default: break; @@ -3447,9 +3484,15 @@ static DecodeStatus DecodeT2LoadImm12(MCInst &Inst, unsigned Insn, switch (Inst.getOpcode()) { case ARM::t2PLDi12: - case ARM::t2PLDWi12: + break; case ARM::t2PLIi12: + if (!hasV7Ops) + return MCDisassembler::Fail; break; + case ARM::t2PLDWi12: + if (!hasV7Ops || !hasMP) + return MCDisassembler::Fail; + break; default: if (!Check(S, DecodeGPRRegisterClass(Inst, Rt, Address, Decoder))) return MCDisassembler::Fail; @@ -3507,6 +3550,10 @@ static DecodeStatus DecodeT2LoadLabel(MCInst &Inst, unsigned Insn, unsigned U = fieldFromInstruction(Insn, 23, 1); int imm = fieldFromInstruction(Insn, 0, 12); + uint64_t featureBits = ((const MCDisassembler*)Decoder)->getSubtargetInfo() + .getFeatureBits(); + bool hasV7Ops = (featureBits & ARM::HasV7Ops); + if (Rt == 15) { switch (Inst.getOpcode()) { case ARM::t2LDRBpci: @@ -3525,7 +3572,10 @@ static DecodeStatus DecodeT2LoadLabel(MCInst &Inst, unsigned Insn, switch(Inst.getOpcode()) { case ARM::t2PLDpci: + break; case ARM::t2PLIpci: + if (!hasV7Ops) + return MCDisassembler::Fail; break; default: if (!Check(S, DecodeGPRRegisterClass(Inst, Rt, Address, Decoder))) @@ -3974,7 +4024,85 @@ static DecodeStatus DecodeInstSyncBarrierOption(MCInst &Inst, unsigned Val, static DecodeStatus DecodeMSRMask(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder) { - if (!Val) return MCDisassembler::Fail; + DecodeStatus S = MCDisassembler::Success; + uint64_t FeatureBits = ((const MCDisassembler*)Decoder)->getSubtargetInfo() + .getFeatureBits(); + if (FeatureBits & ARM::FeatureMClass) { + unsigned ValLow = Val & 0xff; + + // Validate the SYSm value first. + switch (ValLow) { + case 0: // apsr + case 1: // iapsr + case 2: // eapsr + case 3: // xpsr + case 5: // ipsr + case 6: // epsr + case 7: // iepsr + case 8: // msp + case 9: // psp + case 16: // primask + case 20: // control + break; + case 17: // basepri + case 18: // basepri_max + case 19: // faultmask + if (!(FeatureBits & ARM::HasV7Ops)) + // Values basepri, basepri_max and faultmask are only valid for v7m. + return MCDisassembler::Fail; + break; + default: + return MCDisassembler::Fail; + } + + if (Inst.getOpcode() == ARM::t2MSR_M) { + unsigned Mask = fieldFromInstruction(Val, 10, 2); + if (!(FeatureBits & ARM::HasV7Ops)) { + // The ARMv6-M MSR bits {11-10} can be only 0b10, other values are + // unpredictable. + if (Mask != 2) + S = MCDisassembler::SoftFail; + } + else { + // The ARMv7-M architecture stores an additional 2-bit mask value in + // MSR bits {11-10}. The mask is used only with apsr, iapsr, eapsr and + // xpsr, it has to be 0b10 in other cases. Bit mask{1} indicates if + // the NZCVQ bits should be moved by the instruction. Bit mask{0} + // indicates the move for the GE{3:0} bits, the mask{0} bit can be set + // only if the processor includes the DSP extension. + if (Mask == 0 || (Mask != 2 && ValLow > 3) || + (!(FeatureBits & ARM::FeatureDSPThumb2) && (Mask & 1))) + S = MCDisassembler::SoftFail; + } + } + } else { + // A/R class + if (Val == 0) + return MCDisassembler::Fail; + } + Inst.addOperand(MCOperand::CreateImm(Val)); + return S; +} + +static DecodeStatus DecodeBankedReg(MCInst &Inst, unsigned Val, + uint64_t Address, const void *Decoder) { + + unsigned R = fieldFromInstruction(Val, 5, 1); + unsigned SysM = fieldFromInstruction(Val, 0, 5); + + // The table of encodings for these banked registers comes from B9.2.3 of the + // ARM ARM. There are patterns, but nothing regular enough to make this logic + // neater. So by fiat, these values are UNPREDICTABLE: + if (!R) { + if (SysM == 0x7 || SysM == 0xf || SysM == 0x18 || SysM == 0x19 || + SysM == 0x1a || SysM == 0x1b) + return MCDisassembler::SoftFail; + } else { + if (SysM != 0xe && SysM != 0x10 && SysM != 0x12 && SysM != 0x14 && + SysM != 0x16 && SysM != 0x1c && SysM != 0x1e) + return MCDisassembler::SoftFail; + } + Inst.addOperand(MCOperand::CreateImm(Val)); return MCDisassembler::Success; } diff --git a/lib/Target/ARM/Disassembler/LLVMBuild.txt b/lib/Target/ARM/Disassembler/LLVMBuild.txt index 52d833893270..a64a8a970c05 100644 --- a/lib/Target/ARM/Disassembler/LLVMBuild.txt +++ b/lib/Target/ARM/Disassembler/LLVMBuild.txt @@ -19,5 +19,5 @@ type = Library name = ARMDisassembler parent = ARM -required_libraries = ARMDesc ARMInfo MC Support +required_libraries = ARMDesc ARMInfo MCDisassembler Support add_to_library_groups = ARM diff --git a/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp b/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp index 228fb5756ca8..16eea335261f 100644 --- a/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp +++ b/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp @@ -269,7 +269,7 @@ void ARMInstPrinter::printInst(const MCInst *MI, raw_ostream &O, // expressed as a GPRPair, so we have to manually merge them. // FIXME: We would really like to be able to tablegen'erate this. case ARM::LDREXD: case ARM::STREXD: - case ARM::LDAEXD: case ARM::STLEXD: + case ARM::LDAEXD: case ARM::STLEXD: { const MCRegisterClass& MRC = MRI.getRegClass(ARM::GPRRegClassID); bool isStore = Opcode == ARM::STREXD || Opcode == ARM::STLEXD; unsigned Reg = MI->getOperand(isStore ? 1 : 0).getReg(); @@ -290,6 +290,23 @@ void ARMInstPrinter::printInst(const MCInst *MI, raw_ostream &O, printInstruction(&NewMI, O); return; } + break; + } + // B9.3.3 ERET (Thumb) + // For a target that has Virtualization Extensions, ERET is the preferred + // disassembly of SUBS PC, LR, #0 + case ARM::t2SUBS_PC_LR: { + if (MI->getNumOperands() == 3 && + MI->getOperand(0).isImm() && + MI->getOperand(0).getImm() == 0 && + (getAvailableFeatures() & ARM::FeatureVirtualization)) { + O << "\teret"; + printPredicateOperand(MI, 1, O); + printAnnotation(O, Annot); + return; + } + break; + } } printInstruction(MI, O); @@ -503,30 +520,6 @@ void ARMInstPrinter::printAddrMode2OffsetOperand(const MCInst *MI, // Addressing Mode #3 //===--------------------------------------------------------------------===// -void ARMInstPrinter::printAM3PostIndexOp(const MCInst *MI, unsigned Op, - raw_ostream &O) { - const MCOperand &MO1 = MI->getOperand(Op); - const MCOperand &MO2 = MI->getOperand(Op+1); - const MCOperand &MO3 = MI->getOperand(Op+2); - - O << markup("<mem:") << "["; - printRegName(O, MO1.getReg()); - O << "], " << markup(">"); - - if (MO2.getReg()) { - O << (char)ARM_AM::getAM3Op(MO3.getImm()); - printRegName(O, MO2.getReg()); - return; - } - - unsigned ImmOffs = ARM_AM::getAM3Offset(MO3.getImm()); - O << markup("<imm:") - << '#' - << ARM_AM::getAddrOpcStr(ARM_AM::getAM3Op(MO3.getImm())) - << ImmOffs - << markup(">"); -} - void ARMInstPrinter::printAM3PreOrOffsetIndexOp(const MCInst *MI, unsigned Op, raw_ostream &O, bool AlwaysPrintImm0) { @@ -568,13 +561,9 @@ void ARMInstPrinter::printAddrMode3Operand(const MCInst *MI, unsigned Op, return; } - const MCOperand &MO3 = MI->getOperand(Op+2); - unsigned IdxMode = ARM_AM::getAM3IdxMode(MO3.getImm()); - - if (IdxMode == ARMII::IndexModePost) { - printAM3PostIndexOp(MI, Op, O); - return; - } + assert(ARM_AM::getAM3IdxMode(MI->getOperand(Op + 2).getImm()) != + ARMII::IndexModePost && + "unexpected idxmode"); printAM3PreOrOffsetIndexOp(MI, Op, O, AlwaysPrintImm0); } @@ -807,52 +796,56 @@ void ARMInstPrinter::printMSRMaskOperand(const MCInst *MI, unsigned OpNum, const MCOperand &Op = MI->getOperand(OpNum); unsigned SpecRegRBit = Op.getImm() >> 4; unsigned Mask = Op.getImm() & 0xf; + uint64_t FeatureBits = getAvailableFeatures(); - if (getAvailableFeatures() & ARM::FeatureMClass) { + if (FeatureBits & ARM::FeatureMClass) { unsigned SYSm = Op.getImm(); unsigned Opcode = MI->getOpcode(); - // For reads of the special registers ignore the "mask encoding" bits - // which are only for writes. - if (Opcode == ARM::t2MRS_M) - SYSm &= 0xff; + + // For writes, handle extended mask bits if the DSP extension is present. + if (Opcode == ARM::t2MSR_M && (FeatureBits & ARM::FeatureDSPThumb2)) { + switch (SYSm) { + case 0x400: O << "apsr_g"; return; + case 0xc00: O << "apsr_nzcvqg"; return; + case 0x401: O << "iapsr_g"; return; + case 0xc01: O << "iapsr_nzcvqg"; return; + case 0x402: O << "eapsr_g"; return; + case 0xc02: O << "eapsr_nzcvqg"; return; + case 0x403: O << "xpsr_g"; return; + case 0xc03: O << "xpsr_nzcvqg"; return; + } + } + + // Handle the basic 8-bit mask. + SYSm &= 0xff; + + if (Opcode == ARM::t2MSR_M && (FeatureBits & ARM::HasV7Ops)) { + // ARMv7-M deprecates using MSR APSR without a _<bits> qualifier as an + // alias for MSR APSR_nzcvq. + switch (SYSm) { + case 0: O << "apsr_nzcvq"; return; + case 1: O << "iapsr_nzcvq"; return; + case 2: O << "eapsr_nzcvq"; return; + case 3: O << "xpsr_nzcvq"; return; + } + } + switch (SYSm) { default: llvm_unreachable("Unexpected mask value!"); - case 0: - case 0x800: O << "apsr"; return; // with _nzcvq bits is an alias for aspr - case 0x400: O << "apsr_g"; return; - case 0xc00: O << "apsr_nzcvqg"; return; - case 1: - case 0x801: O << "iapsr"; return; // with _nzcvq bits is an alias for iapsr - case 0x401: O << "iapsr_g"; return; - case 0xc01: O << "iapsr_nzcvqg"; return; - case 2: - case 0x802: O << "eapsr"; return; // with _nzcvq bits is an alias for eapsr - case 0x402: O << "eapsr_g"; return; - case 0xc02: O << "eapsr_nzcvqg"; return; - case 3: - case 0x803: O << "xpsr"; return; // with _nzcvq bits is an alias for xpsr - case 0x403: O << "xpsr_g"; return; - case 0xc03: O << "xpsr_nzcvqg"; return; - case 5: - case 0x805: O << "ipsr"; return; - case 6: - case 0x806: O << "epsr"; return; - case 7: - case 0x807: O << "iepsr"; return; - case 8: - case 0x808: O << "msp"; return; - case 9: - case 0x809: O << "psp"; return; - case 0x10: - case 0x810: O << "primask"; return; - case 0x11: - case 0x811: O << "basepri"; return; - case 0x12: - case 0x812: O << "basepri_max"; return; - case 0x13: - case 0x813: O << "faultmask"; return; - case 0x14: - case 0x814: O << "control"; return; + case 0: O << "apsr"; return; + case 1: O << "iapsr"; return; + case 2: O << "eapsr"; return; + case 3: O << "xpsr"; return; + case 5: O << "ipsr"; return; + case 6: O << "epsr"; return; + case 7: O << "iepsr"; return; + case 8: O << "msp"; return; + case 9: O << "psp"; return; + case 16: O << "primask"; return; + case 17: O << "basepri"; return; + case 18: O << "basepri_max"; return; + case 19: O << "faultmask"; return; + case 20: O << "control"; return; } } @@ -882,6 +875,42 @@ void ARMInstPrinter::printMSRMaskOperand(const MCInst *MI, unsigned OpNum, } } +void ARMInstPrinter::printBankedRegOperand(const MCInst *MI, unsigned OpNum, + raw_ostream &O) { + uint32_t Banked = MI->getOperand(OpNum).getImm(); + uint32_t R = (Banked & 0x20) >> 5; + uint32_t SysM = Banked & 0x1f; + + // Nothing much we can do about this, the encodings are specified in B9.2.3 of + // the ARM ARM v7C, and are all over the shop. + if (R) { + O << "SPSR_"; + + switch(SysM) { + case 0x0e: O << "fiq"; return; + case 0x10: O << "irq"; return; + case 0x12: O << "svc"; return; + case 0x14: O << "abt"; return; + case 0x16: O << "und"; return; + case 0x1c: O << "mon"; return; + case 0x1e: O << "hyp"; return; + default: llvm_unreachable("Invalid banked SPSR register"); + } + } + + assert(!R && "should have dealt with SPSR regs"); + const char *RegNames[] = { + "r8_usr", "r9_usr", "r10_usr", "r11_usr", "r12_usr", "sp_usr", "lr_usr", "", + "r8_fiq", "r9_fiq", "r10_fiq", "r11_fiq", "r12_fiq", "sp_fiq", "lr_fiq", "", + "lr_irq", "sp_irq", "lr_svc", "sp_svc", "lr_abt", "sp_abt", "lr_und", "sp_und", + "", "", "", "", "lr_mon", "sp_mon", "elr_hyp", "sp_hyp" + }; + const char *Name = RegNames[SysM]; + assert(Name[0] && "invalid banked register operand"); + + O << Name; +} + void ARMInstPrinter::printPredicateOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O) { ARMCC::CondCodes CC = (ARMCC::CondCodes)MI->getOperand(OpNum).getImm(); @@ -1289,6 +1318,52 @@ void ARMInstPrinter::printRotImmOperand(const MCInst *MI, unsigned OpNum, O << markup(">"); } +void ARMInstPrinter::printModImmOperand(const MCInst *MI, unsigned OpNum, + raw_ostream &O) { + MCOperand Op = MI->getOperand(OpNum); + + // Support for fixups (MCFixup) + if (Op.isExpr()) + return printOperand(MI, OpNum, O); + + unsigned Bits = Op.getImm() & 0xFF; + unsigned Rot = (Op.getImm() & 0xF00) >> 7; + + bool PrintUnsigned = false; + switch (MI->getOpcode()){ + case ARM::MOVi: + // Movs to PC should be treated unsigned + PrintUnsigned = (MI->getOperand(OpNum - 1).getReg() == ARM::PC); + break; + case ARM::MSRi: + // Movs to special registers should be treated unsigned + PrintUnsigned = true; + break; + } + + int32_t Rotated = ARM_AM::rotr32(Bits, Rot); + if (ARM_AM::getSOImmVal(Rotated) == Op.getImm()) { + // #rot has the least possible value + O << "#" << markup("<imm:"); + if (PrintUnsigned) + O << static_cast<uint32_t>(Rotated); + else + O << Rotated; + O << markup(">"); + return; + } + + // Explicit #bits, #rot implied + O << "#" + << markup("<imm:") + << Bits + << markup(">") + << ", #" + << markup("<imm:") + << Rot + << markup(">"); +} + void ARMInstPrinter::printFBits16(const MCInst *MI, unsigned OpNum, raw_ostream &O) { O << markup("<imm:") diff --git a/lib/Target/ARM/InstPrinter/ARMInstPrinter.h b/lib/Target/ARM/InstPrinter/ARMInstPrinter.h index f671fe49bb66..f179e017278e 100644 --- a/lib/Target/ARM/InstPrinter/ARMInstPrinter.h +++ b/lib/Target/ARM/InstPrinter/ARMInstPrinter.h @@ -11,8 +11,8 @@ // //===----------------------------------------------------------------------===// -#ifndef ARMINSTPRINTER_H -#define ARMINSTPRINTER_H +#ifndef LLVM_LIB_TARGET_ARM_INSTPRINTER_ARMINSTPRINTER_H +#define LLVM_LIB_TARGET_ARM_INSTPRINTER_ARMINSTPRINTER_H #include "llvm/MC/MCInstPrinter.h" #include "llvm/MC/MCSubtargetInfo.h" @@ -51,7 +51,6 @@ public: void printAddrMode3Operand(const MCInst *MI, unsigned OpNum, raw_ostream &O); void printAddrMode3OffsetOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); - void printAM3PostIndexOp(const MCInst *MI, unsigned Op, raw_ostream &O); void printAM3PreOrOffsetIndexOp(const MCInst *MI, unsigned Op, raw_ostream &O, bool AlwaysPrintImm0); void printPostIdxImm8Operand(const MCInst *MI, unsigned OpNum, @@ -117,6 +116,7 @@ public: void printCPSIMod(const MCInst *MI, unsigned OpNum, raw_ostream &O); void printCPSIFlag(const MCInst *MI, unsigned OpNum, raw_ostream &O); void printMSRMaskOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); + void printBankedRegOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); void printPredicateOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); void printMandatoryPredicateOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); @@ -131,6 +131,7 @@ public: void printNEONModImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); void printImmPlusOneOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); void printRotImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); + void printModImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); void printGPRPairOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); void printPCLabel(const MCInst *MI, unsigned OpNum, raw_ostream &O); diff --git a/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h b/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h index b6c85c2e9466..f0eed9b811d4 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h +++ b/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h @@ -11,8 +11,8 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_TARGET_ARM_ARMADDRESSINGMODES_H -#define LLVM_TARGET_ARM_ARMADDRESSINGMODES_H +#ifndef LLVM_LIB_TARGET_ARM_MCTARGETDESC_ARMADDRESSINGMODES_H +#define LLVM_LIB_TARGET_ARM_MCTARGETDESC_ARMADDRESSINGMODES_H #include "llvm/ADT/APFloat.h" #include "llvm/ADT/APInt.h" @@ -575,6 +575,53 @@ namespace ARM_AM { return Val; } + // Generic validation for single-byte immediate (0X00, 00X0, etc). + static inline bool isNEONBytesplat(unsigned Value, unsigned Size) { + assert(Size >= 1 && Size <= 4 && "Invalid size"); + unsigned count = 0; + for (unsigned i = 0; i < Size; ++i) { + if (Value & 0xff) count++; + Value >>= 8; + } + return count == 1; + } + + /// Checks if Value is a correct immediate for instructions like VBIC/VORR. + static inline bool isNEONi16splat(unsigned Value) { + if (Value > 0xffff) + return false; + // i16 value with set bits only in one byte X0 or 0X. + return Value == 0 || isNEONBytesplat(Value, 2); + } + + // Encode NEON 16 bits Splat immediate for instructions like VBIC/VORR + static inline unsigned encodeNEONi16splat(unsigned Value) { + assert(isNEONi16splat(Value) && "Invalid NEON splat value"); + if (Value >= 0x100) + Value = (Value >> 8) | 0xa00; + else + Value |= 0x800; + return Value; + } + + /// Checks if Value is a correct immediate for instructions like VBIC/VORR. + static inline bool isNEONi32splat(unsigned Value) { + // i32 value with set bits only in one byte X000, 0X00, 00X0, or 000X. + return Value == 0 || isNEONBytesplat(Value, 4); + } + + /// Encode NEON 32 bits Splat immediate for instructions like VBIC/VORR. + static inline unsigned encodeNEONi32splat(unsigned Value) { + assert(isNEONi32splat(Value) && "Invalid NEON splat value"); + if (Value >= 0x100 && Value <= 0xff00) + Value = (Value >> 8) | 0x200; + else if (Value > 0xffff && Value <= 0xff0000) + Value = (Value >> 16) | 0x400; + else if (Value > 0xffffff) + Value = (Value >> 24) | 0x600; + return Value; + } + AMSubMode getLoadStoreMultipleSubMode(int Opcode); //===--------------------------------------------------------------------===// diff --git a/lib/Target/ARM/MCTargetDesc/ARMArchName.h b/lib/Target/ARM/MCTargetDesc/ARMArchName.h index 34b9fc126ff1..bc056737a82b 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMArchName.h +++ b/lib/Target/ARM/MCTargetDesc/ARMArchName.h @@ -7,8 +7,8 @@ // //===----------------------------------------------------------------------===// -#ifndef ARMARCHNAME_H -#define ARMARCHNAME_H +#ifndef LLVM_LIB_TARGET_ARM_MCTARGETDESC_ARMARCHNAME_H +#define LLVM_LIB_TARGET_ARM_MCTARGETDESC_ARMARCHNAME_H namespace llvm { namespace ARM { @@ -24,4 +24,4 @@ enum ArchKind { } // namespace ARM } // namespace llvm -#endif // ARMARCHNAME_H +#endif diff --git a/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp b/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp index 7acd9cc4d302..0b2e3b0e67bb 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp @@ -9,6 +9,10 @@ #include "MCTargetDesc/ARMMCTargetDesc.h" #include "MCTargetDesc/ARMAddressingModes.h" +#include "MCTargetDesc/ARMAsmBackend.h" +#include "MCTargetDesc/ARMAsmBackendDarwin.h" +#include "MCTargetDesc/ARMAsmBackendELF.h" +#include "MCTargetDesc/ARMAsmBackendWinCOFF.h" #include "MCTargetDesc/ARMBaseInfo.h" #include "MCTargetDesc/ARMFixupKinds.h" #include "llvm/ADT/StringSwitch.h" @@ -35,164 +39,136 @@ namespace { class ARMELFObjectWriter : public MCELFObjectTargetWriter { public: ARMELFObjectWriter(uint8_t OSABI) - : MCELFObjectTargetWriter(/*Is64Bit*/ false, OSABI, ELF::EM_ARM, - /*HasRelocationAddend*/ false) {} + : MCELFObjectTargetWriter(/*Is64Bit*/ false, OSABI, ELF::EM_ARM, + /*HasRelocationAddend*/ false) {} }; -class ARMAsmBackend : public MCAsmBackend { - const MCSubtargetInfo* STI; - bool isThumbMode; // Currently emitting Thumb code. - bool IsLittleEndian; // Big or little endian. -public: - ARMAsmBackend(const Target &T, const StringRef TT, bool IsLittle) - : MCAsmBackend(), STI(ARM_MC::createARMMCSubtargetInfo(TT, "", "")), - isThumbMode(TT.startswith("thumb")), IsLittleEndian(IsLittle) {} - - ~ARMAsmBackend() { - delete STI; - } - - unsigned getNumFixupKinds() const override { - return ARM::NumTargetFixupKinds; - } - - bool hasNOP() const { - return (STI->getFeatureBits() & ARM::HasV6T2Ops) != 0; - } +const MCFixupKindInfo &ARMAsmBackend::getFixupKindInfo(MCFixupKind Kind) const { + const static MCFixupKindInfo InfosLE[ARM::NumTargetFixupKinds] = { + // This table *must* be in the order that the fixup_* kinds are defined in + // ARMFixupKinds.h. + // + // Name Offset (bits) Size (bits) Flags + {"fixup_arm_ldst_pcrel_12", 0, 32, MCFixupKindInfo::FKF_IsPCRel}, + {"fixup_t2_ldst_pcrel_12", 0, 32, + MCFixupKindInfo::FKF_IsPCRel | + MCFixupKindInfo::FKF_IsAlignedDownTo32Bits}, + {"fixup_arm_pcrel_10_unscaled", 0, 32, MCFixupKindInfo::FKF_IsPCRel}, + {"fixup_arm_pcrel_10", 0, 32, MCFixupKindInfo::FKF_IsPCRel}, + {"fixup_t2_pcrel_10", 0, 32, + MCFixupKindInfo::FKF_IsPCRel | + MCFixupKindInfo::FKF_IsAlignedDownTo32Bits}, + {"fixup_thumb_adr_pcrel_10", 0, 8, + MCFixupKindInfo::FKF_IsPCRel | + MCFixupKindInfo::FKF_IsAlignedDownTo32Bits}, + {"fixup_arm_adr_pcrel_12", 0, 32, MCFixupKindInfo::FKF_IsPCRel}, + {"fixup_t2_adr_pcrel_12", 0, 32, + MCFixupKindInfo::FKF_IsPCRel | + MCFixupKindInfo::FKF_IsAlignedDownTo32Bits}, + {"fixup_arm_condbranch", 0, 24, MCFixupKindInfo::FKF_IsPCRel}, + {"fixup_arm_uncondbranch", 0, 24, MCFixupKindInfo::FKF_IsPCRel}, + {"fixup_t2_condbranch", 0, 32, MCFixupKindInfo::FKF_IsPCRel}, + {"fixup_t2_uncondbranch", 0, 32, MCFixupKindInfo::FKF_IsPCRel}, + {"fixup_arm_thumb_br", 0, 16, MCFixupKindInfo::FKF_IsPCRel}, + {"fixup_arm_uncondbl", 0, 24, MCFixupKindInfo::FKF_IsPCRel}, + {"fixup_arm_condbl", 0, 24, MCFixupKindInfo::FKF_IsPCRel}, + {"fixup_arm_blx", 0, 24, MCFixupKindInfo::FKF_IsPCRel}, + {"fixup_arm_thumb_bl", 0, 32, MCFixupKindInfo::FKF_IsPCRel}, + {"fixup_arm_thumb_blx", 0, 32, MCFixupKindInfo::FKF_IsPCRel}, + {"fixup_arm_thumb_cb", 0, 16, MCFixupKindInfo::FKF_IsPCRel}, + {"fixup_arm_thumb_cp", 0, 8, + MCFixupKindInfo::FKF_IsPCRel | + MCFixupKindInfo::FKF_IsAlignedDownTo32Bits}, + {"fixup_arm_thumb_bcc", 0, 8, MCFixupKindInfo::FKF_IsPCRel}, + // movw / movt: 16-bits immediate but scattered into two chunks 0 - 12, 16 + // - 19. + {"fixup_arm_movt_hi16", 0, 20, 0}, + {"fixup_arm_movw_lo16", 0, 20, 0}, + {"fixup_t2_movt_hi16", 0, 20, 0}, + {"fixup_t2_movw_lo16", 0, 20, 0}, + }; + const static MCFixupKindInfo InfosBE[ARM::NumTargetFixupKinds] = { + // This table *must* be in the order that the fixup_* kinds are defined in + // ARMFixupKinds.h. + // + // Name Offset (bits) Size (bits) Flags + {"fixup_arm_ldst_pcrel_12", 0, 32, MCFixupKindInfo::FKF_IsPCRel}, + {"fixup_t2_ldst_pcrel_12", 0, 32, + MCFixupKindInfo::FKF_IsPCRel | + MCFixupKindInfo::FKF_IsAlignedDownTo32Bits}, + {"fixup_arm_pcrel_10_unscaled", 0, 32, MCFixupKindInfo::FKF_IsPCRel}, + {"fixup_arm_pcrel_10", 0, 32, MCFixupKindInfo::FKF_IsPCRel}, + {"fixup_t2_pcrel_10", 0, 32, + MCFixupKindInfo::FKF_IsPCRel | + MCFixupKindInfo::FKF_IsAlignedDownTo32Bits}, + {"fixup_thumb_adr_pcrel_10", 8, 8, + MCFixupKindInfo::FKF_IsPCRel | + MCFixupKindInfo::FKF_IsAlignedDownTo32Bits}, + {"fixup_arm_adr_pcrel_12", 0, 32, MCFixupKindInfo::FKF_IsPCRel}, + {"fixup_t2_adr_pcrel_12", 0, 32, + MCFixupKindInfo::FKF_IsPCRel | + MCFixupKindInfo::FKF_IsAlignedDownTo32Bits}, + {"fixup_arm_condbranch", 8, 24, MCFixupKindInfo::FKF_IsPCRel}, + {"fixup_arm_uncondbranch", 8, 24, MCFixupKindInfo::FKF_IsPCRel}, + {"fixup_t2_condbranch", 0, 32, MCFixupKindInfo::FKF_IsPCRel}, + {"fixup_t2_uncondbranch", 0, 32, MCFixupKindInfo::FKF_IsPCRel}, + {"fixup_arm_thumb_br", 0, 16, MCFixupKindInfo::FKF_IsPCRel}, + {"fixup_arm_uncondbl", 8, 24, MCFixupKindInfo::FKF_IsPCRel}, + {"fixup_arm_condbl", 8, 24, MCFixupKindInfo::FKF_IsPCRel}, + {"fixup_arm_blx", 8, 24, MCFixupKindInfo::FKF_IsPCRel}, + {"fixup_arm_thumb_bl", 0, 32, MCFixupKindInfo::FKF_IsPCRel}, + {"fixup_arm_thumb_blx", 0, 32, MCFixupKindInfo::FKF_IsPCRel}, + {"fixup_arm_thumb_cb", 0, 16, MCFixupKindInfo::FKF_IsPCRel}, + {"fixup_arm_thumb_cp", 8, 8, + MCFixupKindInfo::FKF_IsPCRel | + MCFixupKindInfo::FKF_IsAlignedDownTo32Bits}, + {"fixup_arm_thumb_bcc", 8, 8, MCFixupKindInfo::FKF_IsPCRel}, + // movw / movt: 16-bits immediate but scattered into two chunks 0 - 12, 16 + // - 19. + {"fixup_arm_movt_hi16", 12, 20, 0}, + {"fixup_arm_movw_lo16", 12, 20, 0}, + {"fixup_t2_movt_hi16", 12, 20, 0}, + {"fixup_t2_movw_lo16", 12, 20, 0}, + }; + + if (Kind < FirstTargetFixupKind) + return MCAsmBackend::getFixupKindInfo(Kind); + + assert(unsigned(Kind - FirstTargetFixupKind) < getNumFixupKinds() && + "Invalid kind!"); + return (IsLittleEndian ? InfosLE : InfosBE)[Kind - FirstTargetFixupKind]; +} - const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const override { - const static MCFixupKindInfo InfosLE[ARM::NumTargetFixupKinds] = { -// This table *must* be in the order that the fixup_* kinds are defined in -// ARMFixupKinds.h. -// -// Name Offset (bits) Size (bits) Flags -{ "fixup_arm_ldst_pcrel_12", 0, 32, MCFixupKindInfo::FKF_IsPCRel }, -{ "fixup_t2_ldst_pcrel_12", 0, 32, MCFixupKindInfo::FKF_IsPCRel | - MCFixupKindInfo::FKF_IsAlignedDownTo32Bits}, -{ "fixup_arm_pcrel_10_unscaled", 0, 32, MCFixupKindInfo::FKF_IsPCRel }, -{ "fixup_arm_pcrel_10", 0, 32, MCFixupKindInfo::FKF_IsPCRel }, -{ "fixup_t2_pcrel_10", 0, 32, MCFixupKindInfo::FKF_IsPCRel | - MCFixupKindInfo::FKF_IsAlignedDownTo32Bits}, -{ "fixup_thumb_adr_pcrel_10",0, 8, MCFixupKindInfo::FKF_IsPCRel | - MCFixupKindInfo::FKF_IsAlignedDownTo32Bits}, -{ "fixup_arm_adr_pcrel_12", 0, 32, MCFixupKindInfo::FKF_IsPCRel }, -{ "fixup_t2_adr_pcrel_12", 0, 32, MCFixupKindInfo::FKF_IsPCRel | - MCFixupKindInfo::FKF_IsAlignedDownTo32Bits}, -{ "fixup_arm_condbranch", 0, 24, MCFixupKindInfo::FKF_IsPCRel }, -{ "fixup_arm_uncondbranch", 0, 24, MCFixupKindInfo::FKF_IsPCRel }, -{ "fixup_t2_condbranch", 0, 32, MCFixupKindInfo::FKF_IsPCRel }, -{ "fixup_t2_uncondbranch", 0, 32, MCFixupKindInfo::FKF_IsPCRel }, -{ "fixup_arm_thumb_br", 0, 16, MCFixupKindInfo::FKF_IsPCRel }, -{ "fixup_arm_uncondbl", 0, 24, MCFixupKindInfo::FKF_IsPCRel }, -{ "fixup_arm_condbl", 0, 24, MCFixupKindInfo::FKF_IsPCRel }, -{ "fixup_arm_blx", 0, 24, MCFixupKindInfo::FKF_IsPCRel }, -{ "fixup_arm_thumb_bl", 0, 32, MCFixupKindInfo::FKF_IsPCRel }, -{ "fixup_arm_thumb_blx", 0, 32, MCFixupKindInfo::FKF_IsPCRel }, -{ "fixup_arm_thumb_cb", 0, 16, MCFixupKindInfo::FKF_IsPCRel }, -{ "fixup_arm_thumb_cp", 0, 8, MCFixupKindInfo::FKF_IsPCRel | - MCFixupKindInfo::FKF_IsAlignedDownTo32Bits}, -{ "fixup_arm_thumb_bcc", 0, 8, MCFixupKindInfo::FKF_IsPCRel }, -// movw / movt: 16-bits immediate but scattered into two chunks 0 - 12, 16 - 19. -{ "fixup_arm_movt_hi16", 0, 20, 0 }, -{ "fixup_arm_movw_lo16", 0, 20, 0 }, -{ "fixup_t2_movt_hi16", 0, 20, 0 }, -{ "fixup_t2_movw_lo16", 0, 20, 0 }, - }; - const static MCFixupKindInfo InfosBE[ARM::NumTargetFixupKinds] = { -// This table *must* be in the order that the fixup_* kinds are defined in -// ARMFixupKinds.h. -// -// Name Offset (bits) Size (bits) Flags -{ "fixup_arm_ldst_pcrel_12", 0, 32, MCFixupKindInfo::FKF_IsPCRel }, -{ "fixup_t2_ldst_pcrel_12", 0, 32, MCFixupKindInfo::FKF_IsPCRel | - MCFixupKindInfo::FKF_IsAlignedDownTo32Bits}, -{ "fixup_arm_pcrel_10_unscaled", 0, 32, MCFixupKindInfo::FKF_IsPCRel }, -{ "fixup_arm_pcrel_10", 0, 32, MCFixupKindInfo::FKF_IsPCRel }, -{ "fixup_t2_pcrel_10", 0, 32, MCFixupKindInfo::FKF_IsPCRel | - MCFixupKindInfo::FKF_IsAlignedDownTo32Bits}, -{ "fixup_thumb_adr_pcrel_10",8, 8, MCFixupKindInfo::FKF_IsPCRel | - MCFixupKindInfo::FKF_IsAlignedDownTo32Bits}, -{ "fixup_arm_adr_pcrel_12", 0, 32, MCFixupKindInfo::FKF_IsPCRel }, -{ "fixup_t2_adr_pcrel_12", 0, 32, MCFixupKindInfo::FKF_IsPCRel | - MCFixupKindInfo::FKF_IsAlignedDownTo32Bits}, -{ "fixup_arm_condbranch", 8, 24, MCFixupKindInfo::FKF_IsPCRel }, -{ "fixup_arm_uncondbranch", 8, 24, MCFixupKindInfo::FKF_IsPCRel }, -{ "fixup_t2_condbranch", 0, 32, MCFixupKindInfo::FKF_IsPCRel }, -{ "fixup_t2_uncondbranch", 0, 32, MCFixupKindInfo::FKF_IsPCRel }, -{ "fixup_arm_thumb_br", 0, 16, MCFixupKindInfo::FKF_IsPCRel }, -{ "fixup_arm_uncondbl", 8, 24, MCFixupKindInfo::FKF_IsPCRel }, -{ "fixup_arm_condbl", 8, 24, MCFixupKindInfo::FKF_IsPCRel }, -{ "fixup_arm_blx", 8, 24, MCFixupKindInfo::FKF_IsPCRel }, -{ "fixup_arm_thumb_bl", 0, 32, MCFixupKindInfo::FKF_IsPCRel }, -{ "fixup_arm_thumb_blx", 0, 32, MCFixupKindInfo::FKF_IsPCRel }, -{ "fixup_arm_thumb_cb", 0, 16, MCFixupKindInfo::FKF_IsPCRel }, -{ "fixup_arm_thumb_cp", 8, 8, MCFixupKindInfo::FKF_IsPCRel | - MCFixupKindInfo::FKF_IsAlignedDownTo32Bits}, -{ "fixup_arm_thumb_bcc", 8, 8, MCFixupKindInfo::FKF_IsPCRel }, -// movw / movt: 16-bits immediate but scattered into two chunks 0 - 12, 16 - 19. -{ "fixup_arm_movt_hi16", 12, 20, 0 }, -{ "fixup_arm_movw_lo16", 12, 20, 0 }, -{ "fixup_t2_movt_hi16", 12, 20, 0 }, -{ "fixup_t2_movw_lo16", 12, 20, 0 }, - }; - - if (Kind < FirstTargetFixupKind) - return MCAsmBackend::getFixupKindInfo(Kind); - - assert(unsigned(Kind - FirstTargetFixupKind) < getNumFixupKinds() && - "Invalid kind!"); - return (IsLittleEndian ? InfosLE : InfosBE)[Kind - FirstTargetFixupKind]; - } - - /// processFixupValue - Target hook to process the literal value of a fixup - /// if necessary. - void processFixupValue(const MCAssembler &Asm, const MCAsmLayout &Layout, - const MCFixup &Fixup, const MCFragment *DF, - const MCValue &Target, uint64_t &Value, - bool &IsResolved) override; - - - void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize, - uint64_t Value, bool IsPCRel) const override; - - bool mayNeedRelaxation(const MCInst &Inst) const override; - - bool fixupNeedsRelaxation(const MCFixup &Fixup, uint64_t Value, - const MCRelaxableFragment *DF, - const MCAsmLayout &Layout) const override; - - void relaxInstruction(const MCInst &Inst, MCInst &Res) const override; - - bool writeNopData(uint64_t Count, MCObjectWriter *OW) const override; - - void handleAssemblerFlag(MCAssemblerFlag Flag) override { - switch (Flag) { - default: break; - case MCAF_Code16: - setIsThumb(true); - break; - case MCAF_Code32: - setIsThumb(false); - break; - } +void ARMAsmBackend::handleAssemblerFlag(MCAssemblerFlag Flag) { + switch (Flag) { + default: + break; + case MCAF_Code16: + setIsThumb(true); + break; + case MCAF_Code32: + setIsThumb(false); + break; } - - unsigned getPointerSize() const { return 4; } - bool isThumb() const { return isThumbMode; } - void setIsThumb(bool it) { isThumbMode = it; } - bool isLittle() const { return IsLittleEndian; } -}; +} } // end anonymous namespace static unsigned getRelaxedOpcode(unsigned Op) { switch (Op) { - default: return Op; - case ARM::tBcc: return ARM::t2Bcc; - case ARM::tLDRpci: return ARM::t2LDRpci; - case ARM::tADR: return ARM::t2ADR; - case ARM::tB: return ARM::t2B; - case ARM::tCBZ: return ARM::tHINT; - case ARM::tCBNZ: return ARM::tHINT; + default: + return Op; + case ARM::tBcc: + return ARM::t2Bcc; + case ARM::tLDRpci: + return ARM::t2LDRpci; + case ARM::tADR: + return ARM::t2ADR; + case ARM::tB: + return ARM::t2B; + case ARM::tCBZ: + return ARM::tHINT; + case ARM::tCBNZ: + return ARM::tHINT; } } @@ -202,8 +178,7 @@ bool ARMAsmBackend::mayNeedRelaxation(const MCInst &Inst) const { return false; } -bool ARMAsmBackend::fixupNeedsRelaxation(const MCFixup &Fixup, - uint64_t Value, +bool ARMAsmBackend::fixupNeedsRelaxation(const MCFixup &Fixup, uint64_t Value, const MCRelaxableFragment *DF, const MCAsmLayout &Layout) const { switch ((unsigned)Fixup.getKind()) { @@ -265,7 +240,7 @@ void ARMAsmBackend::relaxInstruction(const MCInst &Inst, MCInst &Res) const { Res.addOperand(MCOperand::CreateImm(14)); Res.addOperand(MCOperand::CreateReg(0)); return; - } + } // The rest of instructions we're relaxing have the same operands. // We just need to update to the proper opcode. @@ -276,11 +251,11 @@ void ARMAsmBackend::relaxInstruction(const MCInst &Inst, MCInst &Res) const { bool ARMAsmBackend::writeNopData(uint64_t Count, MCObjectWriter *OW) const { const uint16_t Thumb1_16bitNopEncoding = 0x46c0; // using MOV r8,r8 const uint16_t Thumb2_16bitNopEncoding = 0xbf00; // NOP - const uint32_t ARMv4_NopEncoding = 0xe1a00000; // using MOV r0,r0 + const uint32_t ARMv4_NopEncoding = 0xe1a00000; // using MOV r0,r0 const uint32_t ARMv6T2_NopEncoding = 0xe320f000; // NOP if (isThumb()) { - const uint16_t nopEncoding = hasNOP() ? Thumb2_16bitNopEncoding - : Thumb1_16bitNopEncoding; + const uint16_t nopEncoding = + hasNOP() ? Thumb2_16bitNopEncoding : Thumb1_16bitNopEncoding; uint64_t NumNops = Count / 2; for (uint64_t i = 0; i != NumNops; ++i) OW->Write16(nopEncoding); @@ -289,18 +264,26 @@ bool ARMAsmBackend::writeNopData(uint64_t Count, MCObjectWriter *OW) const { return true; } // ARM mode - const uint32_t nopEncoding = hasNOP() ? ARMv6T2_NopEncoding - : ARMv4_NopEncoding; + const uint32_t nopEncoding = + hasNOP() ? ARMv6T2_NopEncoding : ARMv4_NopEncoding; uint64_t NumNops = Count / 4; for (uint64_t i = 0; i != NumNops; ++i) OW->Write32(nopEncoding); // FIXME: should this function return false when unable to write exactly // 'Count' bytes with NOP encodings? switch (Count % 4) { - default: break; // No leftover bytes to write - case 1: OW->Write8(0); break; - case 2: OW->Write16(0); break; - case 3: OW->Write16(0); OW->Write8(0xa0); break; + default: + break; // No leftover bytes to write + case 1: + OW->Write8(0); + break; + case 2: + OW->Write16(0); + break; + case 3: + OW->Write16(0); + OW->Write8(0xa0); + break; } return true; @@ -313,8 +296,7 @@ static uint32_t swapHalfWords(uint32_t Value, bool IsLittleEndian) { uint32_t Swapped = (Value & 0xFFFF0000) >> 16; Swapped |= (Value & 0x0000FFFF) << 16; return Swapped; - } - else + } else return Value; } @@ -351,7 +333,7 @@ static unsigned adjustFixupValue(const MCFixup &Fixup, uint64_t Value, case ARM::fixup_arm_movt_hi16: if (!IsPCRel) Value >>= 16; - // Fallthrough + // Fallthrough case ARM::fixup_arm_movw_lo16: { unsigned Hi4 = (Value & 0xF000) >> 12; unsigned Lo12 = Value & 0x0FFF; @@ -363,7 +345,7 @@ static unsigned adjustFixupValue(const MCFixup &Fixup, uint64_t Value, case ARM::fixup_t2_movt_hi16: if (!IsPCRel) Value >>= 16; - // Fallthrough + // Fallthrough case ARM::fixup_t2_movw_lo16: { unsigned Hi4 = (Value & 0xF000) >> 12; unsigned i = (Value & 0x800) >> 11; @@ -379,7 +361,7 @@ static unsigned adjustFixupValue(const MCFixup &Fixup, uint64_t Value, case ARM::fixup_arm_ldst_pcrel_12: // ARM PC-relative values are offset by 8. Value -= 4; - // FALLTHROUGH + // FALLTHROUGH case ARM::fixup_t2_ldst_pcrel_12: { // Offset by 4, adjusted by two due to the half-word ordering of thumb. Value -= 4; @@ -438,7 +420,8 @@ static unsigned adjustFixupValue(const MCFixup &Fixup, uint64_t Value, case ARM::fixup_arm_blx: // These values don't encode the low two bits since they're always zero. // Offset by 8 just as above. - if (const MCSymbolRefExpr *SRE = dyn_cast<MCSymbolRefExpr>(Fixup.getValue())) + if (const MCSymbolRefExpr *SRE = + dyn_cast<MCSymbolRefExpr>(Fixup.getValue())) if (SRE->getKind() == MCSymbolRefExpr::VK_ARM_TLSCALL) return 0; return 0xffffff & ((Value - 8) >> 2); @@ -447,17 +430,17 @@ static unsigned adjustFixupValue(const MCFixup &Fixup, uint64_t Value, Value >>= 1; // Low bit is not encoded. uint32_t out = 0; - bool I = Value & 0x800000; + bool I = Value & 0x800000; bool J1 = Value & 0x400000; bool J2 = Value & 0x200000; J1 ^= I; J2 ^= I; - out |= I << 26; // S bit - out |= !J1 << 13; // J1 bit - out |= !J2 << 11; // J2 bit - out |= (Value & 0x1FF800) << 5; // imm6 field - out |= (Value & 0x0007FF); // imm11 field + out |= I << 26; // S bit + out |= !J1 << 13; // J1 bit + out |= !J2 << 11; // J2 bit + out |= (Value & 0x1FF800) << 5; // imm6 field + out |= (Value & 0x0007FF); // imm11 field return swapHalfWords(out, IsLittleEndian); } @@ -498,7 +481,7 @@ static unsigned adjustFixupValue(const MCFixup &Fixup, uint64_t Value, uint32_t FirstHalf = (((uint16_t)signBit << 10) | (uint16_t)imm10Bits); uint32_t SecondHalf = (((uint16_t)J1Bit << 13) | ((uint16_t)J2Bit << 11) | - (uint16_t)imm11Bits); + (uint16_t)imm11Bits); return joinHalfWords(FirstHalf, SecondHalf, IsLittleEndian); } case ARM::fixup_arm_thumb_blx: { @@ -515,7 +498,8 @@ static unsigned adjustFixupValue(const MCFixup &Fixup, uint64_t Value, // Note that the halfwords are stored high first, low second; so we need // to transpose the fixup value here to map properly. uint32_t offset = (Value - 2) >> 2; - if (const MCSymbolRefExpr *SRE = dyn_cast<MCSymbolRefExpr>(Fixup.getValue())) + if (const MCSymbolRefExpr *SRE = + dyn_cast<MCSymbolRefExpr>(Fixup.getValue())) if (SRE->getKind() == MCSymbolRefExpr::VK_ARM_TLSCALL) offset = 0; uint32_t signBit = (offset & 0x400000) >> 22; @@ -528,7 +512,7 @@ static unsigned adjustFixupValue(const MCFixup &Fixup, uint64_t Value, uint32_t FirstHalf = (((uint16_t)signBit << 10) | (uint16_t)imm10HBits); uint32_t SecondHalf = (((uint16_t)J1Bit << 13) | ((uint16_t)J2Bit << 11) | - ((uint16_t)imm10LBits) << 1); + ((uint16_t)imm10LBits) << 1); return joinHalfWords(FirstHalf, SecondHalf, IsLittleEndian); } case ARM::fixup_arm_thumb_cp: @@ -564,7 +548,7 @@ static unsigned adjustFixupValue(const MCFixup &Fixup, uint64_t Value, case ARM::fixup_arm_pcrel_10: Value = Value - 4; // ARM fixups offset by an additional word and don't // need to adjust for the half-word ordering. - // Fall through. + // Fall through. case ARM::fixup_t2_pcrel_10: { // Offset by 4, adjusted by two due to the half-word ordering of thumb. Value = Value - 4; @@ -735,7 +719,8 @@ void ARMAsmBackend::applyFixup(const MCFixup &Fixup, char *Data, bool IsPCRel) const { unsigned NumBytes = getFixupKindNumBytes(Fixup.getKind()); Value = adjustFixupValue(Fixup, Value, IsPCRel, nullptr, IsLittleEndian); - if (!Value) return; // Doesn't change encoding. + if (!Value) + return; // Doesn't change encoding. unsigned Offset = Fixup.getOffset(); assert(Offset + NumBytes <= DataSize && "Invalid fixup offset!"); @@ -757,80 +742,36 @@ void ARMAsmBackend::applyFixup(const MCFixup &Fixup, char *Data, } } -namespace { -// FIXME: This should be in a separate file. -class ARMWinCOFFAsmBackend : public ARMAsmBackend { -public: - ARMWinCOFFAsmBackend(const Target &T, const StringRef &Triple) - : ARMAsmBackend(T, Triple, true) { } - MCObjectWriter *createObjectWriter(raw_ostream &OS) const override { - return createARMWinCOFFObjectWriter(OS, /*Is64Bit=*/false); - } -}; - -// FIXME: This should be in a separate file. -// ELF is an ELF of course... -class ELFARMAsmBackend : public ARMAsmBackend { -public: - uint8_t OSABI; - ELFARMAsmBackend(const Target &T, const StringRef TT, - uint8_t OSABI, bool IsLittle) - : ARMAsmBackend(T, TT, IsLittle), OSABI(OSABI) { } - - MCObjectWriter *createObjectWriter(raw_ostream &OS) const override { - return createARMELFObjectWriter(OS, OSABI, isLittle()); - } -}; - -// FIXME: This should be in a separate file. -class DarwinARMAsmBackend : public ARMAsmBackend { -public: - const MachO::CPUSubTypeARM Subtype; - DarwinARMAsmBackend(const Target &T, const StringRef TT, - MachO::CPUSubTypeARM st) - : ARMAsmBackend(T, TT, /* IsLittleEndian */ true), Subtype(st) { - HasDataInCodeSupport = true; - } - - MCObjectWriter *createObjectWriter(raw_ostream &OS) const override { - return createARMMachObjectWriter(OS, /*Is64Bit=*/false, - MachO::CPU_TYPE_ARM, - Subtype); - } -}; - -} // end anonymous namespace - MCAsmBackend *llvm::createARMAsmBackend(const Target &T, - const MCRegisterInfo &MRI, - StringRef TT, StringRef CPU, - bool isLittle) { + const MCRegisterInfo &MRI, StringRef TT, + StringRef CPU, bool isLittle) { Triple TheTriple(TT); switch (TheTriple.getObjectFormat()) { - default: llvm_unreachable("unsupported object format"); + default: + llvm_unreachable("unsupported object format"); case Triple::MachO: { MachO::CPUSubTypeARM CS = - StringSwitch<MachO::CPUSubTypeARM>(TheTriple.getArchName()) - .Cases("armv4t", "thumbv4t", MachO::CPU_SUBTYPE_ARM_V4T) - .Cases("armv5e", "thumbv5e", MachO::CPU_SUBTYPE_ARM_V5TEJ) - .Cases("armv6", "thumbv6", MachO::CPU_SUBTYPE_ARM_V6) - .Cases("armv6m", "thumbv6m", MachO::CPU_SUBTYPE_ARM_V6M) - .Cases("armv7em", "thumbv7em", MachO::CPU_SUBTYPE_ARM_V7EM) - .Cases("armv7k", "thumbv7k", MachO::CPU_SUBTYPE_ARM_V7K) - .Cases("armv7m", "thumbv7m", MachO::CPU_SUBTYPE_ARM_V7M) - .Cases("armv7s", "thumbv7s", MachO::CPU_SUBTYPE_ARM_V7S) - .Default(MachO::CPU_SUBTYPE_ARM_V7); - - return new DarwinARMAsmBackend(T, TT, CS); + StringSwitch<MachO::CPUSubTypeARM>(TheTriple.getArchName()) + .Cases("armv4t", "thumbv4t", MachO::CPU_SUBTYPE_ARM_V4T) + .Cases("armv5e", "thumbv5e", MachO::CPU_SUBTYPE_ARM_V5TEJ) + .Cases("armv6", "thumbv6", MachO::CPU_SUBTYPE_ARM_V6) + .Cases("armv6m", "thumbv6m", MachO::CPU_SUBTYPE_ARM_V6M) + .Cases("armv7em", "thumbv7em", MachO::CPU_SUBTYPE_ARM_V7EM) + .Cases("armv7k", "thumbv7k", MachO::CPU_SUBTYPE_ARM_V7K) + .Cases("armv7m", "thumbv7m", MachO::CPU_SUBTYPE_ARM_V7M) + .Cases("armv7s", "thumbv7s", MachO::CPU_SUBTYPE_ARM_V7S) + .Default(MachO::CPU_SUBTYPE_ARM_V7); + + return new ARMAsmBackendDarwin(T, TT, CS); } case Triple::COFF: assert(TheTriple.isOSWindows() && "non-Windows ARM COFF is not supported"); - return new ARMWinCOFFAsmBackend(T, TT); + return new ARMAsmBackendWinCOFF(T, TT); case Triple::ELF: assert(TheTriple.isOSBinFormatELF() && "using ELF for non-ELF target"); uint8_t OSABI = MCELFObjectTargetWriter::getOSABI(Triple(TT).getOS()); - return new ELFARMAsmBackend(T, TT, OSABI, isLittle); + return new ARMAsmBackendELF(T, TT, OSABI, isLittle); } } @@ -847,14 +788,13 @@ MCAsmBackend *llvm::createARMBEAsmBackend(const Target &T, } MCAsmBackend *llvm::createThumbLEAsmBackend(const Target &T, - const MCRegisterInfo &MRI, - StringRef TT, StringRef CPU) { + const MCRegisterInfo &MRI, + StringRef TT, StringRef CPU) { return createARMAsmBackend(T, MRI, TT, CPU, true); } MCAsmBackend *llvm::createThumbBEAsmBackend(const Target &T, - const MCRegisterInfo &MRI, - StringRef TT, StringRef CPU) { + const MCRegisterInfo &MRI, + StringRef TT, StringRef CPU) { return createARMAsmBackend(T, MRI, TT, CPU, false); } - diff --git a/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.h b/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.h new file mode 100644 index 000000000000..f4f10821037e --- /dev/null +++ b/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.h @@ -0,0 +1,69 @@ +//===-- ARMAsmBackend.h - ARM Assembler Backend -----------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_ARM_ARMASMBACKEND_H +#define LLVM_LIB_TARGET_ARM_ARMASMBACKEND_H + +#include "MCTargetDesc/ARMFixupKinds.h" +#include "llvm/MC/MCAsmBackend.h" +#include "llvm/MC/MCSubtargetInfo.h" + +using namespace llvm; + +namespace { + +class ARMAsmBackend : public MCAsmBackend { + const MCSubtargetInfo *STI; + bool isThumbMode; // Currently emitting Thumb code. + bool IsLittleEndian; // Big or little endian. +public: + ARMAsmBackend(const Target &T, StringRef TT, bool IsLittle) + : MCAsmBackend(), STI(ARM_MC::createARMMCSubtargetInfo(TT, "", "")), + isThumbMode(TT.startswith("thumb")), IsLittleEndian(IsLittle) {} + + ~ARMAsmBackend() override { delete STI; } + + unsigned getNumFixupKinds() const override { + return ARM::NumTargetFixupKinds; + } + + bool hasNOP() const { return (STI->getFeatureBits() & ARM::HasV6T2Ops) != 0; } + + const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const override; + + /// processFixupValue - Target hook to process the literal value of a fixup + /// if necessary. + void processFixupValue(const MCAssembler &Asm, const MCAsmLayout &Layout, + const MCFixup &Fixup, const MCFragment *DF, + const MCValue &Target, uint64_t &Value, + bool &IsResolved) override; + + void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize, + uint64_t Value, bool IsPCRel) const override; + + bool mayNeedRelaxation(const MCInst &Inst) const override; + + bool fixupNeedsRelaxation(const MCFixup &Fixup, uint64_t Value, + const MCRelaxableFragment *DF, + const MCAsmLayout &Layout) const override; + + void relaxInstruction(const MCInst &Inst, MCInst &Res) const override; + + bool writeNopData(uint64_t Count, MCObjectWriter *OW) const override; + + void handleAssemblerFlag(MCAssemblerFlag Flag) override; + + unsigned getPointerSize() const { return 4; } + bool isThumb() const { return isThumbMode; } + void setIsThumb(bool it) { isThumbMode = it; } + bool isLittle() const { return IsLittleEndian; } +}; +} // end anonymous namespace + +#endif diff --git a/lib/Target/ARM/MCTargetDesc/ARMAsmBackendDarwin.h b/lib/Target/ARM/MCTargetDesc/ARMAsmBackendDarwin.h new file mode 100644 index 000000000000..3bd7ab73839a --- /dev/null +++ b/lib/Target/ARM/MCTargetDesc/ARMAsmBackendDarwin.h @@ -0,0 +1,33 @@ +//===-- ARMAsmBackendDarwin.h ARM Asm Backend Darwin ----------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_ARM_ARMASMBACKENDDARWIN_H +#define LLVM_LIB_TARGET_ARM_ARMASMBACKENDDARWIN_H + +#include "llvm/Support/MachO.h" + +using namespace llvm; + +namespace { +class ARMAsmBackendDarwin : public ARMAsmBackend { +public: + const MachO::CPUSubTypeARM Subtype; + ARMAsmBackendDarwin(const Target &T, StringRef TT, MachO::CPUSubTypeARM st) + : ARMAsmBackend(T, TT, /* IsLittleEndian */ true), Subtype(st) { + HasDataInCodeSupport = true; + } + + MCObjectWriter *createObjectWriter(raw_ostream &OS) const override { + return createARMMachObjectWriter(OS, /*Is64Bit=*/false, MachO::CPU_TYPE_ARM, + Subtype); + } +}; +} + +#endif diff --git a/lib/Target/ARM/MCTargetDesc/ARMAsmBackendELF.h b/lib/Target/ARM/MCTargetDesc/ARMAsmBackendELF.h new file mode 100644 index 000000000000..4efd32515426 --- /dev/null +++ b/lib/Target/ARM/MCTargetDesc/ARMAsmBackendELF.h @@ -0,0 +1,27 @@ +//===-- ARMAsmBackendELF.h ARM Asm Backend ELF -----------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_ARM_ELFARMASMBACKEND_H +#define LLVM_LIB_TARGET_ARM_ELFARMASMBACKEND_H + +using namespace llvm; +namespace { +class ARMAsmBackendELF : public ARMAsmBackend { +public: + uint8_t OSABI; + ARMAsmBackendELF(const Target &T, StringRef TT, uint8_t OSABI, bool IsLittle) + : ARMAsmBackend(T, TT, IsLittle), OSABI(OSABI) {} + + MCObjectWriter *createObjectWriter(raw_ostream &OS) const override { + return createARMELFObjectWriter(OS, OSABI, isLittle()); + } +}; +} + +#endif diff --git a/lib/Target/ARM/MCTargetDesc/ARMAsmBackendWinCOFF.h b/lib/Target/ARM/MCTargetDesc/ARMAsmBackendWinCOFF.h new file mode 100644 index 000000000000..33be347b03ac --- /dev/null +++ b/lib/Target/ARM/MCTargetDesc/ARMAsmBackendWinCOFF.h @@ -0,0 +1,26 @@ +//===-- ARMAsmBackendWinCOFF.h - ARM Asm Backend WinCOFF --------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_ARM_ARMASMBACKENDWINCOFF_H +#define LLVM_LIB_TARGET_ARM_ARMASMBACKENDWINCOFF_H + +using namespace llvm; + +namespace { +class ARMAsmBackendWinCOFF : public ARMAsmBackend { +public: + ARMAsmBackendWinCOFF(const Target &T, StringRef Triple) + : ARMAsmBackend(T, Triple, true) {} + MCObjectWriter *createObjectWriter(raw_ostream &OS) const override { + return createARMWinCOFFObjectWriter(OS, /*Is64Bit=*/false); + } +}; +} + +#endif diff --git a/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h b/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h index 1686d76b8e1b..4289a73e9d6b 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h +++ b/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h @@ -14,8 +14,8 @@ // //===----------------------------------------------------------------------===// -#ifndef ARMBASEINFO_H -#define ARMBASEINFO_H +#ifndef LLVM_LIB_TARGET_ARM_MCTARGETDESC_ARMBASEINFO_H +#define LLVM_LIB_TARGET_ARM_MCTARGETDESC_ARMBASEINFO_H #include "ARMMCTargetDesc.h" #include "llvm/Support/ErrorHandling.h" diff --git a/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp b/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp index a86601b6bb5b..a821a6b0b532 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp @@ -51,7 +51,7 @@ ARMELFObjectWriter::~ARMELFObjectWriter() {} bool ARMELFObjectWriter::needsRelocateWithSymbol(const MCSymbolData &SD, unsigned Type) const { - // FIXME: This is extremelly conservative. This really needs to use a + // FIXME: This is extremely conservative. This really needs to use a // whitelist with a clear explanation for why each realocation needs to // point to the symbol, not to the section. switch (Type) { @@ -102,7 +102,7 @@ unsigned ARMELFObjectWriter::GetRelocTypeInner(const MCValue &Target, case ARM::fixup_arm_uncondbl: switch (Modifier) { case MCSymbolRefExpr::VK_PLT: - Type = ELF::R_ARM_PLT32; + Type = ELF::R_ARM_CALL; break; case MCSymbolRefExpr::VK_ARM_TLSCALL: Type = ELF::R_ARM_TLS_CALL; @@ -148,6 +148,22 @@ unsigned ARMELFObjectWriter::GetRelocTypeInner(const MCValue &Target, } else { switch ((unsigned)Fixup.getKind()) { default: llvm_unreachable("invalid fixup kind!"); + case FK_Data_1: + switch (Modifier) { + default: llvm_unreachable("unsupported Modifier"); + case MCSymbolRefExpr::VK_None: + Type = ELF::R_ARM_ABS8; + break; + } + break; + case FK_Data_2: + switch (Modifier) { + default: llvm_unreachable("unsupported modifier"); + case MCSymbolRefExpr::VK_None: + Type = ELF::R_ARM_ABS16; + break; + } + break; case FK_Data_4: switch (Modifier) { default: llvm_unreachable("Unsupported Modifier"); @@ -184,6 +200,9 @@ unsigned ARMELFObjectWriter::GetRelocTypeInner(const MCValue &Target, case MCSymbolRefExpr::VK_ARM_PREL31: Type = ELF::R_ARM_PREL31; break; + case MCSymbolRefExpr::VK_ARM_SBREL: + Type = ELF::R_ARM_SBREL32; + break; case MCSymbolRefExpr::VK_ARM_TLSLDO: Type = ELF::R_ARM_TLS_LDO32; break; diff --git a/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp b/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp index 7b5d8b01dfe6..99b5c628f506 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp @@ -300,7 +300,19 @@ private: StringRef StringValue; static bool LessTag(const AttributeItem &LHS, const AttributeItem &RHS) { - return (LHS.Tag < RHS.Tag); + // The conformance tag must be emitted first when serialised + // into an object file. Specifically, the addenda to the ARM ABI + // states that (2.3.7.4): + // + // "To simplify recognition by consumers in the common case of + // claiming conformity for the whole file, this tag should be + // emitted first in a file-scope sub-subsection of the first + // public subsection of the attributes section." + // + // So it is special-cased in this comparison predicate when the + // attributes are sorted in finishAttributeSection(). + return (RHS.Tag != ARMBuildAttrs::conformance) && + ((LHS.Tag == ARMBuildAttrs::conformance) || (LHS.Tag < RHS.Tag)); } }; @@ -541,6 +553,10 @@ public: /// necessary. void EmitValueImpl(const MCExpr *Value, unsigned Size, const SMLoc &Loc) override { + if (const MCSymbolRefExpr *SRE = dyn_cast_or_null<MCSymbolRefExpr>(Value)) + if (SRE->getKind() == MCSymbolRefExpr::VK_ARM_SBREL && !(Size == 4)) + getContext().FatalError(Loc, "relocated expression must be 32-bit"); + EmitDataMappingSymbol(); MCELFStreamer::EmitValueImpl(Value, Size); } @@ -848,6 +864,14 @@ void ARMTargetELFStreamer::emitFPUDefaultAttributes() { /* OverwriteExisting= */ false); break; + // FPV5_D16 is identical to FP_ARMV8 except for the number of D registers, so + // uses the FP_ARMV8_D16 build attribute. + case ARM::FPV5_D16: + setAttributeItem(ARMBuildAttrs::FP_arch, + ARMBuildAttrs::AllowFPARMv8B, + /* OverwriteExisting= */ false); + break; + case ARM::NEON: setAttributeItem(ARMBuildAttrs::FP_arch, ARMBuildAttrs::AllowFPv3A, @@ -971,12 +995,12 @@ void ARMTargetELFStreamer::finishAttributeSection() { Streamer.EmitULEB128IntValue(item.IntValue); break; case AttributeItem::TextAttribute: - Streamer.EmitBytes(item.StringValue.upper()); + Streamer.EmitBytes(item.StringValue); Streamer.EmitIntValue(0, 1); // '\0' break; case AttributeItem::NumericAndTextAttributes: Streamer.EmitULEB128IntValue(item.IntValue); - Streamer.EmitBytes(item.StringValue.upper()); + Streamer.EmitBytes(item.StringValue); Streamer.EmitIntValue(0, 1); // '\0' break; } @@ -1339,10 +1363,9 @@ MCStreamer *createARMNullStreamer(MCContext &Ctx) { return S; } - MCELFStreamer* createARMELFStreamer(MCContext &Context, MCAsmBackend &TAB, - raw_ostream &OS, MCCodeEmitter *Emitter, - bool RelaxAll, bool NoExecStack, - bool IsThumb) { +MCELFStreamer *createARMELFStreamer(MCContext &Context, MCAsmBackend &TAB, + raw_ostream &OS, MCCodeEmitter *Emitter, + bool RelaxAll, bool IsThumb) { ARMELFStreamer *S = new ARMELFStreamer(Context, TAB, OS, Emitter, IsThumb); new ARMTargetELFStreamer(*S); // FIXME: This should eventually end up somewhere else where more @@ -1352,8 +1375,6 @@ MCStreamer *createARMNullStreamer(MCContext &Ctx) { if (RelaxAll) S->getAssembler().setRelaxAll(true); - if (NoExecStack) - S->getAssembler().setNoExecStack(true); return S; } diff --git a/lib/Target/ARM/MCTargetDesc/ARMFixupKinds.h b/lib/Target/ARM/MCTargetDesc/ARMFixupKinds.h index bfd9e3364825..46ba57170db5 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMFixupKinds.h +++ b/lib/Target/ARM/MCTargetDesc/ARMFixupKinds.h @@ -7,8 +7,8 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_ARM_ARMFIXUPKINDS_H -#define LLVM_ARM_ARMFIXUPKINDS_H +#ifndef LLVM_LIB_TARGET_ARM_MCTARGETDESC_ARMFIXUPKINDS_H +#define LLVM_LIB_TARGET_ARM_MCTARGETDESC_ARMFIXUPKINDS_H #include "llvm/MC/MCFixup.h" diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp index 7a19208cffc7..66a1618c370a 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp @@ -12,8 +12,8 @@ //===----------------------------------------------------------------------===// #include "ARMMCAsmInfo.h" -#include "llvm/Support/CommandLine.h" #include "llvm/ADT/Triple.h" +#include "llvm/Support/CommandLine.h" using namespace llvm; @@ -55,7 +55,6 @@ ARMELFMCAsmInfo::ARMELFMCAsmInfo(StringRef TT) { Code16Directive = ".code\t16"; Code32Directive = ".code\t32"; - HasLEB128 = true; SupportsDebugInformation = true; // Exceptions handling @@ -90,6 +89,7 @@ ARMCOFFMCAsmInfoMicrosoft::ARMCOFFMCAsmInfoMicrosoft() { AlignmentIsInBytes = false; PrivateGlobalPrefix = "$M"; + PrivateLabelPrefix = "$M"; } void ARMCOFFMCAsmInfoGNU::anchor() { } @@ -102,8 +102,8 @@ ARMCOFFMCAsmInfoGNU::ARMCOFFMCAsmInfoGNU() { Code16Directive = ".code\t16"; Code32Directive = ".code\t32"; PrivateGlobalPrefix = ".L"; + PrivateLabelPrefix = ".L"; - HasLEB128 = true; SupportsDebugInformation = true; ExceptionsType = ExceptionHandling::None; UseParensForSymbolVariant = true; diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.h b/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.h index 51cfa0adc1a9..6cb471537f6e 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.h +++ b/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.h @@ -11,8 +11,8 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_ARMTARGETASMINFO_H -#define LLVM_ARMTARGETASMINFO_H +#ifndef LLVM_LIB_TARGET_ARM_MCTARGETDESC_ARMMCASMINFO_H +#define LLVM_LIB_TARGET_ARM_MCTARGETDESC_ARMMCASMINFO_H #include "llvm/MC/MCAsmInfoCOFF.h" #include "llvm/MC/MCAsmInfoDarwin.h" @@ -21,7 +21,8 @@ namespace llvm { class ARMMCAsmInfoDarwin : public MCAsmInfoDarwin { - void anchor() override; + virtual void anchor(); + public: explicit ARMMCAsmInfoDarwin(StringRef TT); }; diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp index b8ee55574972..7320f4053f7f 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp @@ -304,6 +304,28 @@ public: return Binary; } + unsigned getModImmOpValue(const MCInst &MI, unsigned Op, + SmallVectorImpl<MCFixup> &Fixups, + const MCSubtargetInfo &ST) const { + const MCOperand &MO = MI.getOperand(Op); + + // Support for fixups (MCFixup) + if (MO.isExpr()) { + const MCExpr *Expr = MO.getExpr(); + // In instruction code this value always encoded as lowest 12 bits, + // so we don't have to perform any specific adjustments. + // Due to requirements of relocatable records we have to use FK_Data_4. + // See ARMELFObjectWriter::ExplicitRelSym and + // ARMELFObjectWriter::GetRelocTypeInner for more details. + MCFixupKind Kind = MCFixupKind(FK_Data_4); + Fixups.push_back(MCFixup::Create(0, Expr, Kind, MI.getLoc())); + return 0; + } + + // Immediate is already in its encoded format + return MO.getImm(); + } + /// getT2SOImmOpValue - Return an encoded 12-bit shifted-immediate value. unsigned getT2SOImmOpValue(const MCInst &MI, unsigned Op, SmallVectorImpl<MCFixup> &Fixups, diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCExpr.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCExpr.cpp index e545e3c2f301..68d32b27fd7d 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMMCExpr.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMMCExpr.cpp @@ -35,12 +35,6 @@ void ARMMCExpr::PrintImpl(raw_ostream &OS) const { OS << ')'; } -bool -ARMMCExpr::EvaluateAsRelocatableImpl(MCValue &Res, - const MCAsmLayout *Layout) const { - return false; -} - void ARMMCExpr::visitUsedExpr(MCStreamer &Streamer) const { Streamer.visitUsedExpr(*getSubExpr()); } diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCExpr.h b/lib/Target/ARM/MCTargetDesc/ARMMCExpr.h index c5c0b10f8ad9..06bf6c97a4f1 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMMCExpr.h +++ b/lib/Target/ARM/MCTargetDesc/ARMMCExpr.h @@ -7,8 +7,8 @@ // //===----------------------------------------------------------------------===// -#ifndef ARMMCEXPR_H -#define ARMMCEXPR_H +#ifndef LLVM_LIB_TARGET_ARM_MCTARGETDESC_ARMMCEXPR_H +#define LLVM_LIB_TARGET_ARM_MCTARGETDESC_ARMMCEXPR_H #include "llvm/MC/MCExpr.h" @@ -58,8 +58,11 @@ public: void PrintImpl(raw_ostream &OS) const override; bool EvaluateAsRelocatableImpl(MCValue &Res, - const MCAsmLayout *Layout) const override; - void visitUsedExpr(MCStreamer &Streamer) const override; + const MCAsmLayout *Layout, + const MCFixup *Fixup) const override { + return false; + } + void visitUsedExpr(MCStreamer &Streamer) const override; const MCSection *FindAssociatedSection() const override { return getSubExpr()->FindAssociatedSection(); } diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp index 6a3ec8fcc486..a6310e5093bc 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp @@ -64,10 +64,60 @@ static bool getMCRDeprecationInfo(MCInst &MI, MCSubtargetInfo &STI, } static bool getITDeprecationInfo(MCInst &MI, MCSubtargetInfo &STI, - std::string &Info) { - if (STI.getFeatureBits() & llvm::ARM::HasV8Ops && - MI.getOperand(1).isImm() && MI.getOperand(1).getImm() != 8) { - Info = "applying IT instruction to more than one subsequent instruction is deprecated"; + std::string &Info) { + if (STI.getFeatureBits() & llvm::ARM::HasV8Ops && MI.getOperand(1).isImm() && + MI.getOperand(1).getImm() != 8) { + Info = "applying IT instruction to more than one subsequent instruction is " + "deprecated"; + return true; + } + + return false; +} + +static bool getARMStoreDeprecationInfo(MCInst &MI, MCSubtargetInfo &STI, + std::string &Info) { + assert((~STI.getFeatureBits() & llvm::ARM::ModeThumb) && + "cannot predicate thumb instructions"); + + assert(MI.getNumOperands() >= 4 && "expected >= 4 arguments"); + for (unsigned OI = 4, OE = MI.getNumOperands(); OI < OE; ++OI) { + assert(MI.getOperand(OI).isReg() && "expected register"); + if (MI.getOperand(OI).getReg() == ARM::SP || + MI.getOperand(OI).getReg() == ARM::PC) { + Info = "use of SP or PC in the list is deprecated"; + return true; + } + } + return false; +} + +static bool getARMLoadDeprecationInfo(MCInst &MI, MCSubtargetInfo &STI, + std::string &Info) { + assert((~STI.getFeatureBits() & llvm::ARM::ModeThumb) && + "cannot predicate thumb instructions"); + + assert(MI.getNumOperands() >= 4 && "expected >= 4 arguments"); + bool ListContainsPC = false, ListContainsLR = false; + for (unsigned OI = 4, OE = MI.getNumOperands(); OI < OE; ++OI) { + assert(MI.getOperand(OI).isReg() && "expected register"); + switch (MI.getOperand(OI).getReg()) { + default: + break; + case ARM::LR: + ListContainsLR = true; + break; + case ARM::PC: + ListContainsPC = true; + break; + case ARM::SP: + Info = "use of SP in the list is deprecated"; + return true; + } + } + + if (ListContainsPC && ListContainsLR) { + Info = "use of LR and PC simultaneously in the list is deprecated"; return true; } @@ -90,6 +140,8 @@ std::string ARM_MC::ParseARMTriple(StringRef TT, StringRef CPU) { bool NoCPU = CPU == "generic" || CPU.empty(); std::string ARMArchFeature; switch (triple.getSubArch()) { + default: + llvm_unreachable("invalid sub-architecture for ARM"); case Triple::ARMSubArch_v8: if (NoCPU) // v8a: FeatureDB, FeatureFPARMv8, FeatureNEON, FeatureDSPThumb2, @@ -215,31 +267,14 @@ static MCAsmInfo *createARMMCAsmInfo(const MCRegisterInfo &MRI, StringRef TT) { Triple TheTriple(TT); MCAsmInfo *MAI; - switch (TheTriple.getOS()) { - case llvm::Triple::Darwin: - case llvm::Triple::IOS: - case llvm::Triple::MacOSX: + if (TheTriple.isOSDarwin() || TheTriple.isOSBinFormatMachO()) MAI = new ARMMCAsmInfoDarwin(TT); - break; - case llvm::Triple::Win32: - switch (TheTriple.getEnvironment()) { - case llvm::Triple::Itanium: - MAI = new ARMCOFFMCAsmInfoGNU(); - break; - case llvm::Triple::MSVC: - MAI = new ARMCOFFMCAsmInfoMicrosoft(); - break; - default: - llvm_unreachable("invalid environment"); - } - break; - default: - if (TheTriple.isOSBinFormatMachO()) - MAI = new ARMMCAsmInfoDarwin(TT); - else - MAI = new ARMELFMCAsmInfo(TT); - break; - } + else if (TheTriple.isWindowsItaniumEnvironment()) + MAI = new ARMCOFFMCAsmInfoGNU(); + else if (TheTriple.isWindowsMSVCEnvironment()) + MAI = new ARMCOFFMCAsmInfoMicrosoft(); + else + MAI = new ARMELFMCAsmInfo(TT); unsigned Reg = MRI.getDwarfRegNum(ARM::SP, true); MAI->addInitialFrameState(MCCFIInstruction::createDefCfa(nullptr, Reg, 0)); @@ -263,11 +298,8 @@ static MCCodeGenInfo *createARMMCCodeGenInfo(StringRef TT, Reloc::Model RM, // This is duplicated code. Refactor this. static MCStreamer *createMCStreamer(const Target &T, StringRef TT, MCContext &Ctx, MCAsmBackend &MAB, - raw_ostream &OS, - MCCodeEmitter *Emitter, - const MCSubtargetInfo &STI, - bool RelaxAll, - bool NoExecStack) { + raw_ostream &OS, MCCodeEmitter *Emitter, + const MCSubtargetInfo &STI, bool RelaxAll) { Triple TheTriple(TT); switch (TheTriple.getObjectFormat()) { @@ -281,7 +313,7 @@ static MCStreamer *createMCStreamer(const Target &T, StringRef TT, assert(TheTriple.isOSWindows() && "non-Windows ARM COFF is not supported"); return createARMWinCOFFStreamer(Ctx, MAB, *Emitter, OS); case Triple::ELF: - return createARMELFStreamer(Ctx, MAB, OS, Emitter, false, NoExecStack, + return createARMELFStreamer(Ctx, MAB, OS, Emitter, false, TheTriple.getArch() == Triple::thumb); } } @@ -356,8 +388,10 @@ extern "C" void LLVMInitializeARMTargetMC() { // Register the MC codegen info. TargetRegistry::RegisterMCCodeGenInfo(TheARMLETarget, createARMMCCodeGenInfo); TargetRegistry::RegisterMCCodeGenInfo(TheARMBETarget, createARMMCCodeGenInfo); - TargetRegistry::RegisterMCCodeGenInfo(TheThumbLETarget, createARMMCCodeGenInfo); - TargetRegistry::RegisterMCCodeGenInfo(TheThumbBETarget, createARMMCCodeGenInfo); + TargetRegistry::RegisterMCCodeGenInfo(TheThumbLETarget, + createARMMCCodeGenInfo); + TargetRegistry::RegisterMCCodeGenInfo(TheThumbBETarget, + createARMMCCodeGenInfo); // Register the MC instruction info. TargetRegistry::RegisterMCInstrInfo(TheARMLETarget, createARMMCInstrInfo); diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h index 5326e564f363..a6c20d5f94d6 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h +++ b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h @@ -11,8 +11,8 @@ // //===----------------------------------------------------------------------===// -#ifndef ARMMCTARGETDESC_H -#define ARMMCTARGETDESC_H +#ifndef LLVM_LIB_TARGET_ARM_MCTARGETDESC_ARMMCTARGETDESC_H +#define LLVM_LIB_TARGET_ARM_MCTARGETDESC_ARMMCTARGETDESC_H #include "llvm/Support/DataTypes.h" #include <string> diff --git a/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp b/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp index 186776a1944f..3187d36f7519 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp @@ -54,10 +54,10 @@ public: : MCMachObjectTargetWriter(Is64Bit, CPUType, CPUSubtype, /*UseAggressiveSymbolFolding=*/true) {} - void RecordRelocation(MachObjectWriter *Writer, - const MCAssembler &Asm, const MCAsmLayout &Layout, - const MCFragment *Fragment, const MCFixup &Fixup, - MCValue Target, uint64_t &FixedValue) override; + void RecordRelocation(MachObjectWriter *Writer, MCAssembler &Asm, + const MCAsmLayout &Layout, const MCFragment *Fragment, + const MCFixup &Fixup, MCValue Target, + uint64_t &FixedValue) override; }; } @@ -232,7 +232,7 @@ RecordARMScatteredHalfRelocation(MachObjectWriter *Writer, (IsPCRel << 30) | MachO::R_SCATTERED); MRE.r_word1 = Value2; - Writer->addRelocation(Fragment->getParent(), MRE); + Writer->addRelocation(nullptr, Fragment->getParent(), MRE); } MachO::any_relocation_info MRE; @@ -243,7 +243,7 @@ RecordARMScatteredHalfRelocation(MachObjectWriter *Writer, (IsPCRel << 30) | MachO::R_SCATTERED); MRE.r_word1 = Value; - Writer->addRelocation(Fragment->getParent(), MRE); + Writer->addRelocation(nullptr, Fragment->getParent(), MRE); } void ARMMachObjectWriter::RecordARMScatteredRelocation(MachObjectWriter *Writer, @@ -297,7 +297,7 @@ void ARMMachObjectWriter::RecordARMScatteredRelocation(MachObjectWriter *Writer, (IsPCRel << 30) | MachO::R_SCATTERED); MRE.r_word1 = Value2; - Writer->addRelocation(Fragment->getParent(), MRE); + Writer->addRelocation(nullptr, Fragment->getParent(), MRE); } MachO::any_relocation_info MRE; @@ -307,7 +307,7 @@ void ARMMachObjectWriter::RecordARMScatteredRelocation(MachObjectWriter *Writer, (IsPCRel << 30) | MachO::R_SCATTERED); MRE.r_word1 = Value; - Writer->addRelocation(Fragment->getParent(), MRE); + Writer->addRelocation(nullptr, Fragment->getParent(), MRE); } bool ARMMachObjectWriter::requiresExternRelocation(MachObjectWriter *Writer, @@ -351,11 +351,10 @@ bool ARMMachObjectWriter::requiresExternRelocation(MachObjectWriter *Writer, } void ARMMachObjectWriter::RecordRelocation(MachObjectWriter *Writer, - const MCAssembler &Asm, + MCAssembler &Asm, const MCAsmLayout &Layout, const MCFragment *Fragment, - const MCFixup &Fixup, - MCValue Target, + const MCFixup &Fixup, MCValue Target, uint64_t &FixedValue) { unsigned IsPCRel = Writer->isFixupKindPCRel(Asm, Fixup.getKind()); unsigned Log2Size; @@ -401,8 +400,8 @@ void ARMMachObjectWriter::RecordRelocation(MachObjectWriter *Writer, // See <reloc.h>. uint32_t FixupOffset = Layout.getFragmentOffset(Fragment)+Fixup.getOffset(); unsigned Index = 0; - unsigned IsExtern = 0; unsigned Type = 0; + const MCSymbolData *RelSymbol = nullptr; if (Target.isAbsolute()) { // constant // FIXME! @@ -422,13 +421,12 @@ void ARMMachObjectWriter::RecordRelocation(MachObjectWriter *Writer, // Check whether we need an external or internal relocation. if (requiresExternRelocation(Writer, Asm, *Fragment, RelocType, SD, FixedValue)) { - IsExtern = 1; - Index = SD->getIndex(); + RelSymbol = SD; // For external relocations, make sure to offset the fixup value to // compensate for the addend of the symbol address, if it was // undefined. This occurs with weak definitions, for example. - if (!SD->Symbol->isUndefined()) + if (!SD->getSymbol().isUndefined()) FixedValue -= Layout.getSymbolOffset(SD); } else { // The index is the section ordinal (1-based). @@ -447,11 +445,8 @@ void ARMMachObjectWriter::RecordRelocation(MachObjectWriter *Writer, // struct relocation_info (8 bytes) MachO::any_relocation_info MRE; MRE.r_word0 = FixupOffset; - MRE.r_word1 = ((Index << 0) | - (IsPCRel << 24) | - (Log2Size << 25) | - (IsExtern << 27) | - (Type << 28)); + MRE.r_word1 = + (Index << 0) | (IsPCRel << 24) | (Log2Size << 25) | (Type << 28); // Even when it's not a scattered relocation, movw/movt always uses // a PAIR relocation. @@ -476,10 +471,10 @@ void ARMMachObjectWriter::RecordRelocation(MachObjectWriter *Writer, (Log2Size << 25) | (MachO::ARM_RELOC_PAIR << 28)); - Writer->addRelocation(Fragment->getParent(), MREPair); + Writer->addRelocation(nullptr, Fragment->getParent(), MREPair); } - Writer->addRelocation(Fragment->getParent(), MRE); + Writer->addRelocation(RelSymbol, Fragment->getParent(), MRE); } MCObjectWriter *llvm::createARMMachObjectWriter(raw_ostream &OS, diff --git a/lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.h b/lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.h index cd5875924300..e0c113ecfaa3 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.h +++ b/lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.h @@ -12,8 +12,8 @@ // //===----------------------------------------------------------------------===// -#ifndef ARM_UNWIND_OP_ASM_H -#define ARM_UNWIND_OP_ASM_H +#ifndef LLVM_LIB_TARGET_ARM_MCTARGETDESC_ARMUNWINDOPASM_H +#define LLVM_LIB_TARGET_ARM_MCTARGETDESC_ARMUNWINDOPASM_H #include "llvm/ADT/SmallVector.h" #include "llvm/Support/ARMEHABI.h" @@ -90,4 +90,4 @@ private: } // namespace llvm -#endif // ARM_UNWIND_OP_ASM_H +#endif diff --git a/lib/Target/ARM/MCTargetDesc/LLVMBuild.txt b/lib/Target/ARM/MCTargetDesc/LLVMBuild.txt index 2a7fe6188b50..db8fc925edb8 100644 --- a/lib/Target/ARM/MCTargetDesc/LLVMBuild.txt +++ b/lib/Target/ARM/MCTargetDesc/LLVMBuild.txt @@ -19,5 +19,5 @@ type = Library name = ARMDesc parent = ARM -required_libraries = ARMAsmPrinter ARMInfo MC Support +required_libraries = ARMAsmPrinter ARMInfo MC MCDisassembler Support add_to_library_groups = ARM diff --git a/lib/Target/ARM/MLxExpansionPass.cpp b/lib/Target/ARM/MLxExpansionPass.cpp index f6d24e9e4f93..35fe9b3342d1 100644 --- a/lib/Target/ARM/MLxExpansionPass.cpp +++ b/lib/Target/ARM/MLxExpansionPass.cpp @@ -378,8 +378,8 @@ bool MLxExpansion::ExpandFPMLxInstructions(MachineBasicBlock &MBB) { } bool MLxExpansion::runOnMachineFunction(MachineFunction &Fn) { - TII = static_cast<const ARMBaseInstrInfo*>(Fn.getTarget().getInstrInfo()); - TRI = Fn.getTarget().getRegisterInfo(); + TII = static_cast<const ARMBaseInstrInfo *>(Fn.getSubtarget().getInstrInfo()); + TRI = Fn.getSubtarget().getRegisterInfo(); MRI = &Fn.getRegInfo(); const ARMSubtarget *STI = &Fn.getTarget().getSubtarget<ARMSubtarget>(); isLikeA9 = STI->isLikeA9() || STI->isSwift(); diff --git a/lib/Target/ARM/Makefile b/lib/Target/ARM/Makefile index f069535ff3c0..c1601a3f29dd 100644 --- a/lib/Target/ARM/Makefile +++ b/lib/Target/ARM/Makefile @@ -15,7 +15,7 @@ TARGET = ARM BUILT_SOURCES = ARMGenRegisterInfo.inc ARMGenInstrInfo.inc \ ARMGenAsmWriter.inc ARMGenAsmMatcher.inc \ ARMGenDAGISel.inc ARMGenSubtargetInfo.inc \ - ARMGenCodeEmitter.inc ARMGenCallingConv.inc \ + ARMGenCallingConv.inc \ ARMGenFastISel.inc ARMGenMCCodeEmitter.inc \ ARMGenMCPseudoLowering.inc ARMGenDisassemblerTables.inc diff --git a/lib/Target/ARM/Thumb1FrameLowering.cpp b/lib/Target/ARM/Thumb1FrameLowering.cpp index baa97a7c479a..13f935877d54 100644 --- a/lib/Target/ARM/Thumb1FrameLowering.cpp +++ b/lib/Target/ARM/Thumb1FrameLowering.cpp @@ -52,9 +52,9 @@ void Thumb1FrameLowering:: eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator I) const { const Thumb1InstrInfo &TII = - *static_cast<const Thumb1InstrInfo*>(MF.getTarget().getInstrInfo()); - const Thumb1RegisterInfo *RegInfo = - static_cast<const Thumb1RegisterInfo*>(MF.getTarget().getRegisterInfo()); + *static_cast<const Thumb1InstrInfo *>(MF.getSubtarget().getInstrInfo()); + const Thumb1RegisterInfo *RegInfo = static_cast<const Thumb1RegisterInfo *>( + MF.getSubtarget().getRegisterInfo()); if (!hasReservedCallFrame(MF)) { // If we have alloca, convert as follows: // ADJCALLSTACKDOWN -> sub, sp, sp, amount @@ -89,12 +89,15 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF) const { ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); MachineModuleInfo &MMI = MF.getMMI(); const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo(); - const Thumb1RegisterInfo *RegInfo = - static_cast<const Thumb1RegisterInfo*>(MF.getTarget().getRegisterInfo()); + const Thumb1RegisterInfo *RegInfo = static_cast<const Thumb1RegisterInfo *>( + MF.getSubtarget().getRegisterInfo()); const Thumb1InstrInfo &TII = - *static_cast<const Thumb1InstrInfo*>(MF.getTarget().getInstrInfo()); + *static_cast<const Thumb1InstrInfo *>(MF.getSubtarget().getInstrInfo()); - unsigned Align = MF.getTarget().getFrameLowering()->getStackAlignment(); + unsigned Align = MF.getTarget() + .getSubtargetImpl() + ->getFrameLowering() + ->getStackAlignment(); unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize(Align); unsigned NumBytes = MFI->getStackSize(); assert(NumBytes >= ArgRegsSaveSize && @@ -121,7 +124,8 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF) const { unsigned CFIIndex = MMI.addFrameInst( MCCFIInstruction::createDefCfaOffset(nullptr, CFAOffset)); BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) - .addCFIIndex(CFIIndex); + .addCFIIndex(CFIIndex) + .setMIFlags(MachineInstr::FrameSetup); } if (!AFI->hasStackFrame()) { @@ -132,7 +136,8 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF) const { unsigned CFIIndex = MMI.addFrameInst( MCCFIInstruction::createDefCfaOffset(nullptr, CFAOffset)); BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) - .addCFIIndex(CFIIndex); + .addCFIIndex(CFIIndex) + .setMIFlags(MachineInstr::FrameSetup); } return; } @@ -196,7 +201,8 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF) const { unsigned CFIIndex = MMI.addFrameInst( MCCFIInstruction::createDefCfaOffset(nullptr, CFAOffset)); BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) - .addCFIIndex(CFIIndex); + .addCFIIndex(CFIIndex) + .setMIFlags(MachineInstr::FrameSetup); } for (std::vector<CalleeSavedInfo>::const_iterator I = CSI.begin(), E = CSI.end(); I != E; ++I) { @@ -223,7 +229,8 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF) const { unsigned CFIIndex = MMI.addFrameInst(MCCFIInstruction::createOffset( nullptr, MRI->getDwarfRegNum(Reg, true), MFI->getObjectOffset(FI))); BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) - .addCFIIndex(CFIIndex); + .addCFIIndex(CFIIndex) + .setMIFlags(MachineInstr::FrameSetup); break; } } @@ -241,13 +248,15 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF) const { unsigned CFIIndex = MMI.addFrameInst(MCCFIInstruction::createDefCfa( nullptr, MRI->getDwarfRegNum(FramePtr, true), CFAOffset)); BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) - .addCFIIndex(CFIIndex); + .addCFIIndex(CFIIndex) + .setMIFlags(MachineInstr::FrameSetup); } else { unsigned CFIIndex = MMI.addFrameInst(MCCFIInstruction::createDefCfaRegister( nullptr, MRI->getDwarfRegNum(FramePtr, true))); BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) - .addCFIIndex(CFIIndex); + .addCFIIndex(CFIIndex) + .setMIFlags(MachineInstr::FrameSetup); } if (NumBytes > 508) // If offset is > 508 then sp cannot be adjusted in a single instruction, @@ -264,7 +273,8 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF) const { unsigned CFIIndex = MMI.addFrameInst( MCCFIInstruction::createDefCfaOffset(nullptr, CFAOffset)); BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) - .addCFIIndex(CFIIndex); + .addCFIIndex(CFIIndex) + .setMIFlags(MachineInstr::FrameSetup); } } @@ -321,12 +331,15 @@ void Thumb1FrameLowering::emitEpilogue(MachineFunction &MF, DebugLoc dl = MBBI->getDebugLoc(); MachineFrameInfo *MFI = MF.getFrameInfo(); ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); - const Thumb1RegisterInfo *RegInfo = - static_cast<const Thumb1RegisterInfo*>(MF.getTarget().getRegisterInfo()); + const Thumb1RegisterInfo *RegInfo = static_cast<const Thumb1RegisterInfo *>( + MF.getSubtarget().getRegisterInfo()); const Thumb1InstrInfo &TII = - *static_cast<const Thumb1InstrInfo*>(MF.getTarget().getInstrInfo()); + *static_cast<const Thumb1InstrInfo *>(MF.getSubtarget().getInstrInfo()); - unsigned Align = MF.getTarget().getFrameLowering()->getStackAlignment(); + unsigned Align = MF.getTarget() + .getSubtargetImpl() + ->getFrameLowering() + ->getStackAlignment(); unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize(Align); int NumBytes = (int)MFI->getStackSize(); assert((unsigned)NumBytes >= ArgRegsSaveSize && @@ -382,28 +395,65 @@ void Thumb1FrameLowering::emitEpilogue(MachineFunction &MF, } } - if (ArgRegsSaveSize) { - // Unlike T2 and ARM mode, the T1 pop instruction cannot restore - // to LR, and we can't pop the value directly to the PC since - // we need to update the SP after popping the value. Therefore, we - // pop the old LR into R3 as a temporary. - + bool IsV4PopReturn = false; + for (const CalleeSavedInfo &CSI : MFI->getCalleeSavedInfo()) + if (CSI.getReg() == ARM::LR) + IsV4PopReturn = true; + IsV4PopReturn &= STI.hasV4TOps() && !STI.hasV5TOps(); + + // Unlike T2 and ARM mode, the T1 pop instruction cannot restore + // to LR, and we can't pop the value directly to the PC since + // we need to update the SP after popping the value. So instead + // we have to emit: + // POP {r3} + // ADD sp, #offset + // BX r3 + // If this would clobber a return value, then generate this sequence instead: + // MOV ip, r3 + // POP {r3} + // ADD sp, #offset + // MOV lr, r3 + // MOV r3, ip + // BX lr + if (ArgRegsSaveSize || IsV4PopReturn) { // Get the last instruction, tBX_RET MBBI = MBB.getLastNonDebugInstr(); assert (MBBI->getOpcode() == ARM::tBX_RET); - // Epilogue for vararg functions: pop LR to R3 and branch off it. - AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tPOP))) - .addReg(ARM::R3, RegState::Define); - - emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, ArgRegsSaveSize); - - MachineInstrBuilder MIB = - BuildMI(MBB, MBBI, dl, TII.get(ARM::tBX_RET_vararg)) - .addReg(ARM::R3, RegState::Kill); - AddDefaultPred(MIB); - MIB.copyImplicitOps(&*MBBI); - // erase the old tBX_RET instruction - MBB.erase(MBBI); + DebugLoc dl = MBBI->getDebugLoc(); + + if (AFI->getReturnRegsCount() <= 3) { + // Epilogue: pop saved LR to R3 and branch off it. + AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tPOP))) + .addReg(ARM::R3, RegState::Define); + + emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, ArgRegsSaveSize); + + MachineInstrBuilder MIB = + BuildMI(MBB, MBBI, dl, TII.get(ARM::tBX)) + .addReg(ARM::R3, RegState::Kill); + AddDefaultPred(MIB); + MIB.copyImplicitOps(&*MBBI); + // erase the old tBX_RET instruction + MBB.erase(MBBI); + } else { + AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr)) + .addReg(ARM::R12, RegState::Define) + .addReg(ARM::R3, RegState::Kill)); + + AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tPOP))) + .addReg(ARM::R3, RegState::Define); + + emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, ArgRegsSaveSize); + + AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr)) + .addReg(ARM::LR, RegState::Define) + .addReg(ARM::R3, RegState::Kill)); + + AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr)) + .addReg(ARM::R3, RegState::Define) + .addReg(ARM::R12, RegState::Kill)); + // Keep the tBX_RET instruction + } } } @@ -417,7 +467,7 @@ spillCalleeSavedRegisters(MachineBasicBlock &MBB, DebugLoc DL; MachineFunction &MF = *MBB.getParent(); - const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo(); + const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); if (MI != MBB.end()) DL = MI->getDebugLoc(); @@ -456,7 +506,7 @@ restoreCalleeSavedRegisters(MachineBasicBlock &MBB, MachineFunction &MF = *MBB.getParent(); ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); - const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo(); + const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); bool isVarArg = AFI->getArgRegsSaveSize() > 0; DebugLoc DL = MI->getDebugLoc(); @@ -470,6 +520,9 @@ restoreCalleeSavedRegisters(MachineBasicBlock &MBB, // Special epilogue for vararg functions. See emitEpilogue if (isVarArg) continue; + // ARMv4T requires BX, see emitEpilogue + if (STI.hasV4TOps() && !STI.hasV5TOps()) + continue; Reg = ARM::PC; (*MIB).setDesc(TII.get(ARM::tPOP_RET)); MIB.copyImplicitOps(&*MI); diff --git a/lib/Target/ARM/Thumb1FrameLowering.h b/lib/Target/ARM/Thumb1FrameLowering.h index a227f8ece73f..b785b2823dae 100644 --- a/lib/Target/ARM/Thumb1FrameLowering.h +++ b/lib/Target/ARM/Thumb1FrameLowering.h @@ -11,8 +11,8 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_ARM_THUMB1FRAMELOWERING_H -#define LLVM_ARM_THUMB1FRAMELOWERING_H +#ifndef LLVM_LIB_TARGET_ARM_THUMB1FRAMELOWERING_H +#define LLVM_LIB_TARGET_ARM_THUMB1FRAMELOWERING_H #include "ARMFrameLowering.h" #include "Thumb1InstrInfo.h" diff --git a/lib/Target/ARM/Thumb1InstrInfo.cpp b/lib/Target/ARM/Thumb1InstrInfo.cpp index 68cbb5c580df..8ea912e27039 100644 --- a/lib/Target/ARM/Thumb1InstrInfo.cpp +++ b/lib/Target/ARM/Thumb1InstrInfo.cpp @@ -11,6 +11,7 @@ // //===----------------------------------------------------------------------===// +#include "ARMSubtarget.h" #include "Thumb1InstrInfo.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineInstrBuilder.h" @@ -41,10 +42,30 @@ void Thumb1InstrInfo::copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, DebugLoc DL, unsigned DestReg, unsigned SrcReg, bool KillSrc) const { - AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::tMOVr), DestReg) - .addReg(SrcReg, getKillRegState(KillSrc))); + // Need to check the arch. + MachineFunction &MF = *MBB.getParent(); + const ARMSubtarget &st = MF.getTarget().getSubtarget<ARMSubtarget>(); + assert(ARM::GPRRegClass.contains(DestReg, SrcReg) && "Thumb1 can only copy GPR registers"); + + if (st.hasV6Ops() || ARM::hGPRRegClass.contains(SrcReg) + || !ARM::tGPRRegClass.contains(DestReg)) + AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::tMOVr), DestReg) + .addReg(SrcReg, getKillRegState(KillSrc))); + else { + // FIXME: The performance consequences of this are going to be atrocious. + // Some things to try that should be better: + // * 'mov hi, $src; mov $dst, hi', with hi as either r10 or r11 + // * 'movs $dst, $src' if cpsr isn't live + // See: http://lists.cs.uiuc.edu/pipermail/llvmdev/2014-August/075998.html + + // 'MOV lo, lo' is unpredictable on < v6, so use the stack to do it + AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::tPUSH))) + .addReg(SrcReg, getKillRegState(KillSrc)); + AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::tPOP))) + .addReg(DestReg, getDefRegState(true)); + } } void Thumb1InstrInfo:: @@ -101,3 +122,12 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, .addFrameIndex(FI).addImm(0).addMemOperand(MMO)); } } + +void +Thumb1InstrInfo::expandLoadStackGuard(MachineBasicBlock::iterator MI, + Reloc::Model RM) const { + if (RM == Reloc::PIC_) + expandLoadStackGuardBase(MI, ARM::tLDRLIT_ga_pcrel, ARM::tLDRi, RM); + else + expandLoadStackGuardBase(MI, ARM::tLDRLIT_ga_abs, ARM::tLDRi, RM); +} diff --git a/lib/Target/ARM/Thumb1InstrInfo.h b/lib/Target/ARM/Thumb1InstrInfo.h index c5845b75e587..9fba76052a11 100644 --- a/lib/Target/ARM/Thumb1InstrInfo.h +++ b/lib/Target/ARM/Thumb1InstrInfo.h @@ -11,8 +11,8 @@ // //===----------------------------------------------------------------------===// -#ifndef THUMB1INSTRUCTIONINFO_H -#define THUMB1INSTRUCTIONINFO_H +#ifndef LLVM_LIB_TARGET_ARM_THUMB1INSTRINFO_H +#define LLVM_LIB_TARGET_ARM_THUMB1INSTRINFO_H #include "ARMBaseInstrInfo.h" #include "Thumb1RegisterInfo.h" @@ -54,7 +54,10 @@ public: const TargetRegisterClass *RC, const TargetRegisterInfo *TRI) const override; +private: + void expandLoadStackGuard(MachineBasicBlock::iterator MI, + Reloc::Model RM) const override; }; } -#endif // THUMB1INSTRUCTIONINFO_H +#endif diff --git a/lib/Target/ARM/Thumb1RegisterInfo.cpp b/lib/Target/ARM/Thumb1RegisterInfo.cpp index f907b143ef1b..928c8e3c0098 100644 --- a/lib/Target/ARM/Thumb1RegisterInfo.cpp +++ b/lib/Target/ARM/Thumb1RegisterInfo.cpp @@ -66,8 +66,12 @@ Thumb1RegisterInfo::emitLoadConstPool(MachineBasicBlock &MBB, int Val, ARMCC::CondCodes Pred, unsigned PredReg, unsigned MIFlags) const { + assert((isARMLowRegister(DestReg) || + isVirtualRegister(DestReg)) && + "Thumb1 does not have ldr to high register"); + MachineFunction &MF = *MBB.getParent(); - const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo(); + const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); MachineConstantPool *ConstantPool = MF.getConstantPool(); const Constant *C = ConstantInt::get( Type::getInt32Ty(MBB.getParent()->getFunction()->getContext()), Val); @@ -106,15 +110,15 @@ void emitThumbRegPlusImmInReg(MachineBasicBlock &MBB, NumBytes = -NumBytes; } unsigned LdReg = DestReg; - if (DestReg == ARM::SP) { + if (DestReg == ARM::SP) assert(BaseReg == ARM::SP && "Unexpected!"); + if (!isARMLowRegister(DestReg) && !MRI.isVirtualRegister(DestReg)) LdReg = MF.getRegInfo().createVirtualRegister(&ARM::tGPRRegClass); - } - if (NumBytes <= 255 && NumBytes >= 0) + if (NumBytes <= 255 && NumBytes >= 0 && CanChangeCC) { AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVi8), LdReg)) .addImm(NumBytes).setMIFlags(MIFlags); - else if (NumBytes < 0 && NumBytes >= -255) { + } else if (NumBytes < 0 && NumBytes >= -255 && CanChangeCC) { AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVi8), LdReg)) .addImm(NumBytes).setMIFlags(MIFlags); AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TII.get(ARM::tRSB), LdReg)) @@ -124,7 +128,8 @@ void emitThumbRegPlusImmInReg(MachineBasicBlock &MBB, ARMCC::AL, 0, MIFlags); // Emit add / sub. - int Opc = (isSub) ? ARM::tSUBrr : (isHigh ? ARM::tADDhirr : ARM::tADDrr); + int Opc = (isSub) ? ARM::tSUBrr : ((isHigh || !CanChangeCC) ? ARM::tADDhirr + : ARM::tADDrr); MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII.get(Opc), DestReg); if (Opc != ARM::tADDhirr) @@ -136,32 +141,10 @@ void emitThumbRegPlusImmInReg(MachineBasicBlock &MBB, AddDefaultPred(MIB); } -/// calcNumMI - Returns the number of instructions required to materialize -/// the specific add / sub r, c instruction. -static unsigned calcNumMI(int Opc, int ExtraOpc, unsigned Bytes, - unsigned NumBits, unsigned Scale) { - unsigned NumMIs = 0; - unsigned Chunk = ((1 << NumBits) - 1) * Scale; - - if (Opc == ARM::tADDrSPi) { - unsigned ThisVal = (Bytes > Chunk) ? Chunk : Bytes; - Bytes -= ThisVal; - NumMIs++; - NumBits = 8; - Scale = 1; // Followed by a number of tADDi8. - Chunk = ((1 << NumBits) - 1) * Scale; - } - - NumMIs += Bytes / Chunk; - if ((Bytes % Chunk) != 0) - NumMIs++; - if (ExtraOpc) - NumMIs++; - return NumMIs; -} - /// emitThumbRegPlusImmediate - Emits a series of instructions to materialize -/// a destreg = basereg + immediate in Thumb code. +/// a destreg = basereg + immediate in Thumb code. Tries a series of ADDs or +/// SUBs first, and uses a constant pool value if the instruction sequence would +/// be too long. This is allowed to modify the condition flags. void llvm::emitThumbRegPlusImmediate(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, DebugLoc dl, @@ -172,151 +155,145 @@ void llvm::emitThumbRegPlusImmediate(MachineBasicBlock &MBB, bool isSub = NumBytes < 0; unsigned Bytes = (unsigned)NumBytes; if (isSub) Bytes = -NumBytes; - bool isMul4 = (Bytes & 3) == 0; - bool isTwoAddr = false; - bool DstNotEqBase = false; - unsigned NumBits = 1; - unsigned Scale = 1; - int Opc = 0; + + int CopyOpc = 0; + unsigned CopyBits = 0; + unsigned CopyScale = 1; + bool CopyNeedsCC = false; int ExtraOpc = 0; - bool NeedCC = false; - - if (DestReg == BaseReg && BaseReg == ARM::SP) { - assert(isMul4 && "Thumb sp inc / dec size must be multiple of 4!"); - NumBits = 7; - Scale = 4; - Opc = isSub ? ARM::tSUBspi : ARM::tADDspi; - isTwoAddr = true; - } else if (!isSub && BaseReg == ARM::SP) { - // r1 = add sp, 403 - // => - // r1 = add sp, 100 * 4 - // r1 = add r1, 3 - if (!isMul4) { - Bytes &= ~3; - ExtraOpc = ARM::tADDi3; + unsigned ExtraBits = 0; + unsigned ExtraScale = 1; + bool ExtraNeedsCC = false; + + // Strategy: + // We need to select two types of instruction, maximizing the available + // immediate range of each. The instructions we use will depend on whether + // DestReg and BaseReg are low, high or the stack pointer. + // * CopyOpc - DestReg = BaseReg + imm + // This will be emitted once if DestReg != BaseReg, and never if + // DestReg == BaseReg. + // * ExtraOpc - DestReg = DestReg + imm + // This will be emitted as many times as necessary to add the + // full immediate. + // If the immediate ranges of these instructions are not large enough to cover + // NumBytes with a reasonable number of instructions, we fall back to using a + // value loaded from a constant pool. + if (DestReg == ARM::SP) { + if (BaseReg == ARM::SP) { + // sp -> sp + // Already in right reg, no copy needed + } else { + // low -> sp or high -> sp + CopyOpc = ARM::tMOVr; + CopyBits = 0; } - NumBits = 8; - Scale = 4; - Opc = ARM::tADDrSPi; - } else { - // sp = sub sp, c - // r1 = sub sp, c - // r8 = sub sp, c - if (DestReg != BaseReg) - DstNotEqBase = true; - NumBits = 8; - if (DestReg == ARM::SP) { - Opc = isSub ? ARM::tSUBspi : ARM::tADDspi; - assert(isMul4 && "Thumb sp inc / dec size must be multiple of 4!"); - NumBits = 7; - Scale = 4; + ExtraOpc = isSub ? ARM::tSUBspi : ARM::tADDspi; + ExtraBits = 7; + ExtraScale = 4; + } else if (isARMLowRegister(DestReg)) { + if (BaseReg == ARM::SP) { + // sp -> low + assert(!isSub && "Thumb1 does not have tSUBrSPi"); + CopyOpc = ARM::tADDrSPi; + CopyBits = 8; + CopyScale = 4; + } else if (DestReg == BaseReg) { + // low -> same low + // Already in right reg, no copy needed + } else if (isARMLowRegister(BaseReg)) { + // low -> different low + CopyOpc = isSub ? ARM::tSUBi3 : ARM::tADDi3; + CopyBits = 3; + CopyNeedsCC = true; } else { - Opc = isSub ? ARM::tSUBi8 : ARM::tADDi8; - NumBits = 8; - NeedCC = true; + // high -> low + CopyOpc = ARM::tMOVr; + CopyBits = 0; } - isTwoAddr = true; + ExtraOpc = isSub ? ARM::tSUBi8 : ARM::tADDi8; + ExtraBits = 8; + ExtraNeedsCC = true; + } else /* DestReg is high */ { + if (DestReg == BaseReg) { + // high -> same high + // Already in right reg, no copy needed + } else { + // {low,high,sp} -> high + CopyOpc = ARM::tMOVr; + CopyBits = 0; + } + ExtraOpc = 0; } - unsigned NumMIs = calcNumMI(Opc, ExtraOpc, Bytes, NumBits, Scale); + // We could handle an unaligned immediate with an unaligned copy instruction + // and an aligned extra instruction, but this case is not currently needed. + assert(((Bytes & 3) == 0 || ExtraScale == 1) && + "Unaligned offset, but all instructions require alignment"); + + unsigned CopyRange = ((1 << CopyBits) - 1) * CopyScale; + // If we would emit the copy with an immediate of 0, just use tMOVr. + if (CopyOpc && Bytes < CopyScale) { + CopyOpc = ARM::tMOVr; + CopyScale = 1; + CopyNeedsCC = false; + CopyRange = 0; + } + unsigned ExtraRange = ((1 << ExtraBits) - 1) * ExtraScale; // per instruction + unsigned RequiredCopyInstrs = CopyOpc ? 1 : 0; + unsigned RangeAfterCopy = (CopyRange > Bytes) ? 0 : (Bytes - CopyRange); + + // We could handle this case when the copy instruction does not require an + // aligned immediate, but we do not currently do this. + assert(RangeAfterCopy % ExtraScale == 0 && + "Extra instruction requires immediate to be aligned"); + + unsigned RequiredExtraInstrs; + if (ExtraRange) + RequiredExtraInstrs = RoundUpToAlignment(RangeAfterCopy, ExtraRange) / ExtraRange; + else if (RangeAfterCopy > 0) + // We need an extra instruction but none is available + RequiredExtraInstrs = 1000000; + else + RequiredExtraInstrs = 0; + unsigned RequiredInstrs = RequiredCopyInstrs + RequiredExtraInstrs; unsigned Threshold = (DestReg == ARM::SP) ? 3 : 2; - if (NumMIs > Threshold) { - // This will expand into too many instructions. Load the immediate from a - // constpool entry. + + // Use a constant pool, if the sequence of ADDs/SUBs is too expensive. + if (RequiredInstrs > Threshold) { emitThumbRegPlusImmInReg(MBB, MBBI, dl, DestReg, BaseReg, NumBytes, true, TII, MRI, MIFlags); return; } - if (DstNotEqBase) { - if (isARMLowRegister(DestReg) && isARMLowRegister(BaseReg)) { - // If both are low registers, emit DestReg = add BaseReg, max(Imm, 7) - unsigned Chunk = (1 << 3) - 1; - unsigned ThisVal = (Bytes > Chunk) ? Chunk : Bytes; - Bytes -= ThisVal; - const MCInstrDesc &MCID = TII.get(isSub ? ARM::tSUBi3 : ARM::tADDi3); - const MachineInstrBuilder MIB = - AddDefaultT1CC(BuildMI(MBB, MBBI, dl, MCID, DestReg) - .setMIFlags(MIFlags)); - AddDefaultPred(MIB.addReg(BaseReg, RegState::Kill).addImm(ThisVal)); - } else { - AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), DestReg) - .addReg(BaseReg, RegState::Kill)) - .setMIFlags(MIFlags); + // Emit zero or one copy instructions + if (CopyOpc) { + unsigned CopyImm = std::min(Bytes, CopyRange) / CopyScale; + Bytes -= CopyImm * CopyScale; + + MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII.get(CopyOpc), DestReg); + if (CopyNeedsCC) + MIB = AddDefaultT1CC(MIB); + MIB.addReg(BaseReg, RegState::Kill); + if (CopyOpc != ARM::tMOVr) { + MIB.addImm(CopyImm); } + AddDefaultPred(MIB.setMIFlags(MIFlags)); + BaseReg = DestReg; } - unsigned Chunk = ((1 << NumBits) - 1) * Scale; + // Emit zero or more in-place add/sub instructions while (Bytes) { - unsigned ThisVal = (Bytes > Chunk) ? Chunk : Bytes; - Bytes -= ThisVal; - ThisVal /= Scale; - // Build the new tADD / tSUB. - if (isTwoAddr) { - MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII.get(Opc), DestReg); - if (NeedCC) - MIB = AddDefaultT1CC(MIB); - MIB.addReg(DestReg).addImm(ThisVal); - MIB = AddDefaultPred(MIB); - MIB.setMIFlags(MIFlags); - } else { - bool isKill = BaseReg != ARM::SP; - MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII.get(Opc), DestReg); - if (NeedCC) - MIB = AddDefaultT1CC(MIB); - MIB.addReg(BaseReg, getKillRegState(isKill)).addImm(ThisVal); - MIB = AddDefaultPred(MIB); - MIB.setMIFlags(MIFlags); - - BaseReg = DestReg; - if (Opc == ARM::tADDrSPi) { - // r4 = add sp, imm - // r4 = add r4, imm - // ... - NumBits = 8; - Scale = 1; - Chunk = ((1 << NumBits) - 1) * Scale; - Opc = isSub ? ARM::tSUBi8 : ARM::tADDi8; - NeedCC = isTwoAddr = true; - } - } - } - - if (ExtraOpc) { - const MCInstrDesc &MCID = TII.get(ExtraOpc); - AddDefaultPred(AddDefaultT1CC(BuildMI(MBB, MBBI, dl, MCID, DestReg)) - .addReg(DestReg, RegState::Kill) - .addImm(((unsigned)NumBytes) & 3) - .setMIFlags(MIFlags)); - } -} + unsigned ExtraImm = std::min(Bytes, ExtraRange) / ExtraScale; + Bytes -= ExtraImm * ExtraScale; -/// emitThumbConstant - Emit a series of instructions to materialize a -/// constant. -static void emitThumbConstant(MachineBasicBlock &MBB, - MachineBasicBlock::iterator &MBBI, - unsigned DestReg, int Imm, - const TargetInstrInfo &TII, - const Thumb1RegisterInfo& MRI, - DebugLoc dl) { - bool isSub = Imm < 0; - if (isSub) Imm = -Imm; - - int Chunk = (1 << 8) - 1; - int ThisVal = (Imm > Chunk) ? Chunk : Imm; - Imm -= ThisVal; - AddDefaultPred(AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVi8), - DestReg)) - .addImm(ThisVal)); - if (Imm > 0) - emitThumbRegPlusImmediate(MBB, MBBI, dl, DestReg, DestReg, Imm, TII, MRI); - if (isSub) { - const MCInstrDesc &MCID = TII.get(ARM::tRSB); - AddDefaultPred(AddDefaultT1CC(BuildMI(MBB, MBBI, dl, MCID, DestReg)) - .addReg(DestReg, RegState::Kill)); + MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII.get(ExtraOpc), DestReg); + if (ExtraNeedsCC) + MIB = AddDefaultT1CC(MIB); + MIB.addReg(BaseReg).addImm(ExtraImm); + MIB = AddDefaultPred(MIB); + MIB.setMIFlags(MIFlags); } } @@ -352,86 +329,13 @@ rewriteFrameIndex(MachineBasicBlock::iterator II, unsigned FrameRegIdx, const MCInstrDesc &Desc = MI.getDesc(); unsigned AddrMode = (Desc.TSFlags & ARMII::AddrModeMask); - if (Opcode == ARM::tADDrSPi) { + if (Opcode == ARM::tADDframe) { Offset += MI.getOperand(FrameRegIdx+1).getImm(); - - // Can't use tADDrSPi if it's based off the frame pointer. - unsigned NumBits = 0; - unsigned Scale = 1; - if (FrameReg != ARM::SP) { - Opcode = ARM::tADDi3; - NumBits = 3; - } else { - NumBits = 8; - Scale = 4; - assert((Offset & 3) == 0 && - "Thumb add/sub sp, #imm immediate must be multiple of 4!"); - } - - unsigned PredReg; - if (Offset == 0 && getInstrPredicate(&MI, PredReg) == ARMCC::AL) { - // Turn it into a move. - MI.setDesc(TII.get(ARM::tMOVr)); - MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false); - // Remove offset - MI.RemoveOperand(FrameRegIdx+1); - return true; - } - - // Common case: small offset, fits into instruction. - unsigned Mask = (1 << NumBits) - 1; - if (((Offset / Scale) & ~Mask) == 0) { - // Replace the FrameIndex with sp / fp - if (Opcode == ARM::tADDi3) { - MI.setDesc(TII.get(Opcode)); - removeOperands(MI, FrameRegIdx); - AddDefaultPred(AddDefaultT1CC(MIB).addReg(FrameReg) - .addImm(Offset / Scale)); - } else { - MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false); - MI.getOperand(FrameRegIdx+1).ChangeToImmediate(Offset / Scale); - } - return true; - } - unsigned DestReg = MI.getOperand(0).getReg(); - unsigned Bytes = (Offset > 0) ? Offset : -Offset; - unsigned NumMIs = calcNumMI(Opcode, 0, Bytes, NumBits, Scale); - // MI would expand into a large number of instructions. Don't try to - // simplify the immediate. - if (NumMIs > 2) { - emitThumbRegPlusImmediate(MBB, II, dl, DestReg, FrameReg, Offset, TII, - *this); - MBB.erase(II); - return true; - } - if (Offset > 0) { - // Translate r0 = add sp, imm to - // r0 = add sp, 255*4 - // r0 = add r0, (imm - 255*4) - if (Opcode == ARM::tADDi3) { - MI.setDesc(TII.get(Opcode)); - removeOperands(MI, FrameRegIdx); - AddDefaultPred(AddDefaultT1CC(MIB).addReg(FrameReg).addImm(Mask)); - } else { - MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false); - MI.getOperand(FrameRegIdx+1).ChangeToImmediate(Mask); - } - Offset = (Offset - Mask * Scale); - MachineBasicBlock::iterator NII = std::next(II); - emitThumbRegPlusImmediate(MBB, NII, dl, DestReg, DestReg, Offset, TII, - *this); - } else { - // Translate r0 = add sp, -imm to - // r0 = -imm (this is then translated into a series of instructions) - // r0 = add r0, sp - emitThumbConstant(MBB, II, DestReg, Offset, TII, *this, dl); - - MI.setDesc(TII.get(ARM::tADDhirr)); - MI.getOperand(FrameRegIdx).ChangeToRegister(DestReg, false, false, true); - MI.getOperand(FrameRegIdx+1).ChangeToRegister(FrameReg, false); - } + emitThumbRegPlusImmediate(MBB, II, dl, DestReg, FrameReg, Offset, TII, + *this); + MBB.erase(II); return true; } else { if (AddrMode != ARMII::AddrModeT1_s) @@ -485,8 +389,11 @@ rewriteFrameIndex(MachineBasicBlock::iterator II, unsigned FrameRegIdx, void Thumb1RegisterInfo::resolveFrameIndex(MachineInstr &MI, unsigned BaseReg, int64_t Offset) const { const ARMBaseInstrInfo &TII = - *static_cast<const ARMBaseInstrInfo*>( - MI.getParent()->getParent()->getTarget().getInstrInfo()); + *static_cast<const ARMBaseInstrInfo *>(MI.getParent() + ->getParent() + ->getTarget() + .getSubtargetImpl() + ->getInstrInfo()); int Off = Offset; // ARM doesn't need the general 64-bit offsets unsigned i = 0; @@ -512,7 +419,7 @@ Thumb1RegisterInfo::saveScavengerRegister(MachineBasicBlock &MBB, // off the frame pointer (if, for example, there are alloca() calls in // the function, the offset will be negative. Use R12 instead since that's // a call clobbered register that we know won't be used in Thumb1 mode. - const TargetInstrInfo &TII = *MBB.getParent()->getTarget().getInstrInfo(); + const TargetInstrInfo &TII = *MBB.getParent()->getSubtarget().getInstrInfo(); DebugLoc DL; AddDefaultPred(BuildMI(MBB, I, DL, TII.get(ARM::tMOVr)) .addReg(ARM::R12, RegState::Define) @@ -559,7 +466,7 @@ Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, MachineBasicBlock &MBB = *MI.getParent(); MachineFunction &MF = *MBB.getParent(); const ARMBaseInstrInfo &TII = - *static_cast<const ARMBaseInstrInfo*>(MF.getTarget().getInstrInfo()); + *static_cast<const ARMBaseInstrInfo *>(MF.getSubtarget().getInstrInfo()); ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); DebugLoc dl = MI.getDebugLoc(); MachineInstrBuilder MIB(*MBB.getParent(), &MI); @@ -570,7 +477,7 @@ Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, MF.getFrameInfo()->getStackSize() + SPAdj; if (MF.getFrameInfo()->hasVarSizedObjects()) { - assert(SPAdj == 0 && MF.getTarget().getFrameLowering()->hasFP(MF) && + assert(SPAdj == 0 && MF.getSubtarget().getFrameLowering()->hasFP(MF) && "Unexpected"); // There are alloca()'s in this function, must reference off the frame // pointer or base pointer instead. @@ -587,7 +494,10 @@ Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, // when !hasReservedCallFrame(). #ifndef NDEBUG if (RS && FrameReg == ARM::SP && RS->isScavengingFrameIndex(FrameIndex)){ - assert(MF.getTarget().getFrameLowering()->hasReservedCallFrame(MF) && + assert(MF.getTarget() + .getSubtargetImpl() + ->getFrameLowering() + ->hasReservedCallFrame(MF) && "Cannot use SP to access the emergency spill slot in " "functions without a reserved call frame"); assert(!MF.getFrameInfo()->hasVarSizedObjects() && diff --git a/lib/Target/ARM/Thumb1RegisterInfo.h b/lib/Target/ARM/Thumb1RegisterInfo.h index 0c0abbe99042..5feaf525396e 100644 --- a/lib/Target/ARM/Thumb1RegisterInfo.h +++ b/lib/Target/ARM/Thumb1RegisterInfo.h @@ -12,8 +12,8 @@ // //===----------------------------------------------------------------------===// -#ifndef THUMB1REGISTERINFO_H -#define THUMB1REGISTERINFO_H +#ifndef LLVM_LIB_TARGET_ARM_THUMB1REGISTERINFO_H +#define LLVM_LIB_TARGET_ARM_THUMB1REGISTERINFO_H #include "ARMBaseRegisterInfo.h" #include "llvm/Target/TargetRegisterInfo.h" @@ -60,4 +60,4 @@ public: }; } -#endif // THUMB1REGISTERINFO_H +#endif diff --git a/lib/Target/ARM/Thumb2ITBlockPass.cpp b/lib/Target/ARM/Thumb2ITBlockPass.cpp index edb9ff3a24f2..fdcb522a9144 100644 --- a/lib/Target/ARM/Thumb2ITBlockPass.cpp +++ b/lib/Target/ARM/Thumb2ITBlockPass.cpp @@ -188,7 +188,7 @@ bool Thumb2ITBlockPass::InsertITInstructions(MachineBasicBlock &MBB) { true/*isImp*/, false/*isKill*/)); MachineInstr *LastITMI = MI; - MachineBasicBlock::iterator InsertPos = MIB; + MachineBasicBlock::iterator InsertPos = MIB.getInstr(); ++MBBI; // Form IT block. @@ -255,8 +255,9 @@ bool Thumb2ITBlockPass::InsertITInstructions(MachineBasicBlock &MBB) { bool Thumb2ITBlockPass::runOnMachineFunction(MachineFunction &Fn) { const TargetMachine &TM = Fn.getTarget(); AFI = Fn.getInfo<ARMFunctionInfo>(); - TII = static_cast<const Thumb2InstrInfo*>(TM.getInstrInfo()); - TRI = TM.getRegisterInfo(); + TII = static_cast<const Thumb2InstrInfo *>( + TM.getSubtargetImpl()->getInstrInfo()); + TRI = TM.getSubtargetImpl()->getRegisterInfo(); restrictIT = TM.getSubtarget<ARMSubtarget>().restrictIT(); if (!AFI->isThumbFunction()) diff --git a/lib/Target/ARM/Thumb2InstrInfo.cpp b/lib/Target/ARM/Thumb2InstrInfo.cpp index a9df006fb8c1..91973e1c463e 100644 --- a/lib/Target/ARM/Thumb2InstrInfo.cpp +++ b/lib/Target/ARM/Thumb2InstrInfo.cpp @@ -209,6 +209,15 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, ARMBaseInstrInfo::loadRegFromStackSlot(MBB, I, DestReg, FI, RC, TRI); } +void +Thumb2InstrInfo::expandLoadStackGuard(MachineBasicBlock::iterator MI, + Reloc::Model RM) const { + if (RM == Reloc::PIC_) + expandLoadStackGuardBase(MI, ARM::t2MOV_ga_pcrel, ARM::t2LDRi12, RM); + else + expandLoadStackGuardBase(MI, ARM::t2MOVi32imm, ARM::t2LDRi12, RM); +} + void llvm::emitT2RegPlusImmediate(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, DebugLoc dl, unsigned DestReg, unsigned BaseReg, int NumBytes, diff --git a/lib/Target/ARM/Thumb2InstrInfo.h b/lib/Target/ARM/Thumb2InstrInfo.h index 34d45d307ff4..46a1f6d600a7 100644 --- a/lib/Target/ARM/Thumb2InstrInfo.h +++ b/lib/Target/ARM/Thumb2InstrInfo.h @@ -11,8 +11,8 @@ // //===----------------------------------------------------------------------===// -#ifndef THUMB2INSTRUCTIONINFO_H -#define THUMB2INSTRUCTIONINFO_H +#ifndef LLVM_LIB_TARGET_ARM_THUMB2INSTRINFO_H +#define LLVM_LIB_TARGET_ARM_THUMB2INSTRINFO_H #include "ARMBaseInstrInfo.h" #include "Thumb2RegisterInfo.h" @@ -61,6 +61,10 @@ public: /// always be able to get register info as well (through this method). /// const Thumb2RegisterInfo &getRegisterInfo() const override { return RI; } + +private: + void expandLoadStackGuard(MachineBasicBlock::iterator MI, + Reloc::Model RM) const override; }; /// getITInstrPredicate - Valid only in Thumb2 mode. This function is identical @@ -71,4 +75,4 @@ ARMCC::CondCodes getITInstrPredicate(const MachineInstr *MI, unsigned &PredReg); } -#endif // THUMB2INSTRUCTIONINFO_H +#endif diff --git a/lib/Target/ARM/Thumb2RegisterInfo.cpp b/lib/Target/ARM/Thumb2RegisterInfo.cpp index 782d81fd424d..0d5d85a00614 100644 --- a/lib/Target/ARM/Thumb2RegisterInfo.cpp +++ b/lib/Target/ARM/Thumb2RegisterInfo.cpp @@ -40,7 +40,7 @@ Thumb2RegisterInfo::emitLoadConstPool(MachineBasicBlock &MBB, ARMCC::CondCodes Pred, unsigned PredReg, unsigned MIFlags) const { MachineFunction &MF = *MBB.getParent(); - const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo(); + const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); MachineConstantPool *ConstantPool = MF.getConstantPool(); const Constant *C = ConstantInt::get( Type::getInt32Ty(MBB.getParent()->getFunction()->getContext()), Val); diff --git a/lib/Target/ARM/Thumb2RegisterInfo.h b/lib/Target/ARM/Thumb2RegisterInfo.h index 8a33e6cea919..1dd94cc5027d 100644 --- a/lib/Target/ARM/Thumb2RegisterInfo.h +++ b/lib/Target/ARM/Thumb2RegisterInfo.h @@ -12,8 +12,8 @@ // //===----------------------------------------------------------------------===// -#ifndef THUMB2REGISTERINFO_H -#define THUMB2REGISTERINFO_H +#ifndef LLVM_LIB_TARGET_ARM_THUMB2REGISTERINFO_H +#define LLVM_LIB_TARGET_ARM_THUMB2REGISTERINFO_H #include "ARMBaseRegisterInfo.h" @@ -35,4 +35,4 @@ public: }; } -#endif // THUMB2REGISTERINFO_H +#endif diff --git a/lib/Target/ARM/Thumb2SizeReduction.cpp b/lib/Target/ARM/Thumb2SizeReduction.cpp index 09debe76f1b6..c51eb8bedb95 100644 --- a/lib/Target/ARM/Thumb2SizeReduction.cpp +++ b/lib/Target/ARM/Thumb2SizeReduction.cpp @@ -335,7 +335,7 @@ static bool VerifyLowRegs(MachineInstr *MI) { bool isPCOk = (Opc == ARM::t2LDMIA_RET || Opc == ARM::t2LDMIA || Opc == ARM::t2LDMDB || Opc == ARM::t2LDMIA_UPD || Opc == ARM::t2LDMDB_UPD); - bool isLROk = (Opc == ARM::t2STMIA_UPD || Opc == ARM::t2STMDB_UPD); + bool isLROk = (Opc == ARM::t2STMDB_UPD); bool isSPOk = isPCOk || isLROk; for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { const MachineOperand &MO = MI->getOperand(i); @@ -384,7 +384,6 @@ Thumb2SizeReduce::ReduceLoadStore(MachineBasicBlock &MBB, MachineInstr *MI, if (MI->getOperand(1).getReg() == ARM::SP) { Opc = Entry.NarrowOpc2; ImmLimit = Entry.Imm2Limit; - HasOffReg = false; } Scale = 4; @@ -1003,7 +1002,8 @@ bool Thumb2SizeReduce::ReduceMBB(MachineBasicBlock &MBB) { bool Thumb2SizeReduce::runOnMachineFunction(MachineFunction &MF) { const TargetMachine &TM = MF.getTarget(); - TII = static_cast<const Thumb2InstrInfo*>(TM.getInstrInfo()); + TII = static_cast<const Thumb2InstrInfo *>( + TM.getSubtargetImpl()->getInstrInfo()); STI = &TM.getSubtarget<ARMSubtarget>(); // Optimizing / minimizing size? |