diff options
Diffstat (limited to 'lib/Target/ARM')
87 files changed, 3854 insertions, 2567 deletions
diff --git a/lib/Target/ARM/A15SDOptimizer.cpp b/lib/Target/ARM/A15SDOptimizer.cpp index 8640c873f4413..16d5f74d19e3f 100644 --- a/lib/Target/ARM/A15SDOptimizer.cpp +++ b/lib/Target/ARM/A15SDOptimizer.cpp @@ -34,10 +34,10 @@ #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/TargetRegisterInfo.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetRegisterInfo.h" -#include "llvm/Target/TargetSubtargetInfo.h" #include <map> #include <set> @@ -189,8 +189,7 @@ void A15SDOptimizer::eraseInstrWithNoUses(MachineInstr *MI) { // MI is already known to be dead. We need to see // if other instructions can also be removed. - for (unsigned int i = 0; i < MI->getNumOperands(); ++i) { - MachineOperand &MO = MI->getOperand(i); + for (MachineOperand &MO : MI->operands()) { if ((!MO.isReg()) || (!MO.isUse())) continue; unsigned Reg = MO.getReg(); @@ -212,8 +211,7 @@ void A15SDOptimizer::eraseInstrWithNoUses(MachineInstr *MI) { // dead. If so, we can also mark this instruction as being // dead. bool IsDead = true; - for (unsigned int j = 0; j < Def->getNumOperands(); ++j) { - MachineOperand &MODef = Def->getOperand(j); + for (MachineOperand &MODef : Def->operands()) { if ((!MODef.isReg()) || (!MODef.isDef())) continue; unsigned DefReg = MODef.getReg(); @@ -221,13 +219,11 @@ void A15SDOptimizer::eraseInstrWithNoUses(MachineInstr *MI) { IsDead = false; break; } - for (MachineRegisterInfo::use_instr_iterator - II = MRI->use_instr_begin(Reg), EE = MRI->use_instr_end(); - II != EE; ++II) { + for (MachineInstr &Use : MRI->use_instructions(Reg)) { // We don't care about self references. - if (&*II == Def) + if (&Use == Def) continue; - if (DeadInstr.find(&*II) == DeadInstr.end()) { + if (DeadInstr.find(&Use) == DeadInstr.end()) { IsDead = false; break; } @@ -277,7 +273,7 @@ unsigned A15SDOptimizer::optimizeSDPattern(MachineInstr *MI) { MRI->getRegClass(MI->getOperand(1).getReg()); if (TRC->hasSuperClassEq(MRI->getRegClass(FullReg))) { DEBUG(dbgs() << "Subreg copy is compatible - returning "); - DEBUG(dbgs() << PrintReg(FullReg) << "\n"); + DEBUG(dbgs() << printReg(FullReg) << "\n"); eraseInstrWithNoUses(MI); return FullReg; } @@ -405,9 +401,7 @@ SmallVector<unsigned, 8> A15SDOptimizer::getReadDPRs(MachineInstr *MI) { return SmallVector<unsigned, 8>(); SmallVector<unsigned, 8> Defs; - for (unsigned i = 0; i < MI->getNumOperands(); ++i) { - MachineOperand &MO = MI->getOperand(i); - + for (MachineOperand &MO : MI->operands()) { if (!MO.isReg() || !MO.isUse()) continue; if (!usesRegClass(MO, &ARM::DPRRegClass) && @@ -617,10 +611,7 @@ bool A15SDOptimizer::runOnInstruction(MachineInstr *MI) { elideCopiesAndPHIs(Def, DefSrcs); - for (SmallVectorImpl<MachineInstr *>::iterator II = DefSrcs.begin(), - EE = DefSrcs.end(); II != EE; ++II) { - MachineInstr *MI = *II; - + for (MachineInstr *MI : DefSrcs) { // If we've already analyzed and replaced this operand, don't do // anything. if (Replacements.find(MI) != Replacements.end()) @@ -653,7 +644,7 @@ bool A15SDOptimizer::runOnInstruction(MachineInstr *MI) { DEBUG(dbgs() << "Replacing operand " << **I << " with " - << PrintReg(NewReg) << "\n"); + << printReg(NewReg) << "\n"); (*I)->substVirtReg(NewReg, 0, *TRI); } } @@ -664,7 +655,7 @@ bool A15SDOptimizer::runOnInstruction(MachineInstr *MI) { } bool A15SDOptimizer::runOnMachineFunction(MachineFunction &Fn) { - if (skipFunction(*Fn.getFunction())) + if (skipFunction(Fn.getFunction())) return false; const ARMSubtarget &STI = Fn.getSubtarget<ARMSubtarget>(); @@ -682,20 +673,14 @@ bool A15SDOptimizer::runOnMachineFunction(MachineFunction &Fn) { DeadInstr.clear(); Replacements.clear(); - for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E; - ++MFI) { - - for (MachineBasicBlock::iterator MI = MFI->begin(), ME = MFI->end(); - MI != ME;) { - Modified |= runOnInstruction(&*MI++); + for (MachineBasicBlock &MBB : Fn) { + for (MachineInstr &MI : MBB) { + Modified |= runOnInstruction(&MI); } - } - for (std::set<MachineInstr *>::iterator I = DeadInstr.begin(), - E = DeadInstr.end(); - I != E; ++I) { - (*I)->eraseFromParent(); + for (MachineInstr *MI : DeadInstr) { + MI->eraseFromParent(); } return Modified; diff --git a/lib/Target/ARM/ARM.h b/lib/Target/ARM/ARM.h index 4676226acd9c8..3aac689c63104 100644 --- a/lib/Target/ARM/ARM.h +++ b/lib/Target/ARM/ARM.h @@ -60,6 +60,7 @@ std::vector<BasicBlockInfo> computeAllBlockSizes(MachineFunction *MF); void initializeARMLoadStoreOptPass(PassRegistry &); void initializeARMPreAllocLoadStoreOptPass(PassRegistry &); void initializeARMConstantIslandsPass(PassRegistry &); +void initializeARMExpandPseudoPass(PassRegistry &); } // end namespace llvm diff --git a/lib/Target/ARM/ARM.td b/lib/Target/ARM/ARM.td index e49c1babac210..c1a3f639461d1 100644 --- a/lib/Target/ARM/ARM.td +++ b/lib/Target/ARM/ARM.td @@ -114,6 +114,9 @@ def FeatureCrypto : SubtargetFeature<"crypto", "HasCrypto", "true", def FeatureCRC : SubtargetFeature<"crc", "HasCRC", "true", "Enable support for CRC instructions">; +def FeatureDotProd : SubtargetFeature<"dotprod", "HasDotProd", "true", + "Enable support for dot product instructions", + [FeatureNEON]>; // Not to be confused with FeatureHasRetAddrStack (return address stack) def FeatureRAS : SubtargetFeature<"ras", "HasRAS", "true", @@ -129,6 +132,10 @@ def FeatureFPAO : SubtargetFeature<"fpao", "HasFPAO", "true", def FeatureFuseAES : SubtargetFeature<"fuse-aes", "HasFuseAES", "true", "CPU fuses AES crypto operations">; +// The way of reading thread pointer +def FeatureReadTp : SubtargetFeature<"read-tp-hard", "ReadTPHard", "true", + "Reading thread pointer from register">; + // Cyclone can zero VFP registers in 0 cycles. def FeatureZCZeroing : SubtargetFeature<"zcz", "HasZeroCycleZeroing", "true", "Has zero-cycle zeroing instructions">; @@ -312,6 +319,13 @@ def FeatureNoNegativeImmediates "equivalent when the immediate does " "not fit in the encoding.">; +// Use the MachineScheduler for instruction scheduling for the subtarget. +def FeatureUseMISched: SubtargetFeature<"use-misched", "UseMISched", "true", + "Use the MachineScheduler">; + +def FeatureNoPostRASched : SubtargetFeature<"disable-postra-scheduler", + "DisablePostRAScheduler", "true", + "Don't schedule again after register allocation">; //===----------------------------------------------------------------------===// // ARM architecture class @@ -334,9 +348,7 @@ def FeatureThumb2 : SubtargetFeature<"thumb2", "HasThumb2", "true", "Enable Thumb2 instructions">; def FeatureNoARM : SubtargetFeature<"noarm", "NoARM", "true", - "Does not support ARM mode execution", - [ModeThumb]>; - + "Does not support ARM mode execution">; //===----------------------------------------------------------------------===// // ARM ISAa. @@ -396,6 +408,9 @@ def HasV8_2aOps : SubtargetFeature<"v8.2a", "HasV8_2aOps", "true", "Support ARM v8.2a instructions", [HasV8_1aOps]>; +def HasV8_3aOps : SubtargetFeature<"v8.3a", "HasV8_3aOps", "true", + "Support ARM v8.3a instructions", + [HasV8_2aOps]>; //===----------------------------------------------------------------------===// // ARM Processor subtarget features. @@ -421,12 +436,16 @@ def ProcA35 : SubtargetFeature<"a35", "ARMProcFamily", "CortexA35", "Cortex-A35 ARM processors", []>; def ProcA53 : SubtargetFeature<"a53", "ARMProcFamily", "CortexA53", "Cortex-A53 ARM processors", []>; +def ProcA55 : SubtargetFeature<"a55", "ARMProcFamily", "CortexA55", + "Cortex-A55 ARM processors", []>; def ProcA57 : SubtargetFeature<"a57", "ARMProcFamily", "CortexA57", "Cortex-A57 ARM processors", []>; def ProcA72 : SubtargetFeature<"a72", "ARMProcFamily", "CortexA72", "Cortex-A72 ARM processors", []>; def ProcA73 : SubtargetFeature<"a73", "ARMProcFamily", "CortexA73", "Cortex-A73 ARM processors", []>; +def ProcA75 : SubtargetFeature<"a75", "ARMProcFamily", "CortexA75", + "Cortex-A75 ARM processors", []>; def ProcKrait : SubtargetFeature<"krait", "ARMProcFamily", "Krait", "Qualcomm Krait processors", []>; @@ -436,7 +455,7 @@ def ProcSwift : SubtargetFeature<"swift", "ARMProcFamily", "Swift", "Swift ARM processors", []>; def ProcExynosM1 : SubtargetFeature<"exynosm1", "ARMProcFamily", "ExynosM1", - "Samsung Exynos-M1 processors", []>; + "Samsung Exynos-Mx processors", []>; def ProcR4 : SubtargetFeature<"r4", "ARMProcFamily", "CortexR4", "Cortex-R4 ARM processors", []>; @@ -497,11 +516,13 @@ def ARMv6kz : Architecture<"armv6kz", "ARMv6kz", [HasV6KOps, def ARMv6m : Architecture<"armv6-m", "ARMv6m", [HasV6MOps, FeatureNoARM, + ModeThumb, FeatureDB, FeatureMClass]>; def ARMv6sm : Architecture<"armv6s-m", "ARMv6sm", [HasV6MOps, FeatureNoARM, + ModeThumb, FeatureDB, FeatureMClass]>; @@ -529,6 +550,7 @@ def ARMv7r : Architecture<"armv7-r", "ARMv7r", [HasV7Ops, def ARMv7m : Architecture<"armv7-m", "ARMv7m", [HasV7Ops, FeatureThumb2, FeatureNoARM, + ModeThumb, FeatureDB, FeatureHWDivThumb, FeatureMClass]>; @@ -536,6 +558,7 @@ def ARMv7m : Architecture<"armv7-m", "ARMv7m", [HasV7Ops, def ARMv7em : Architecture<"armv7e-m", "ARMv7em", [HasV7Ops, FeatureThumb2, FeatureNoARM, + ModeThumb, FeatureDB, FeatureHWDivThumb, FeatureMClass, @@ -578,6 +601,19 @@ def ARMv82a : Architecture<"armv8.2-a", "ARMv82a", [HasV8_2aOps, FeatureCRC, FeatureRAS]>; +def ARMv83a : Architecture<"armv8.3-a", "ARMv83a", [HasV8_3aOps, + FeatureAClass, + FeatureDB, + FeatureFPARMv8, + FeatureNEON, + FeatureDSP, + FeatureTrustZone, + FeatureMP, + FeatureVirtualization, + FeatureCrypto, + FeatureCRC, + FeatureRAS]>; + def ARMv8r : Architecture<"armv8-r", "ARMv8r", [HasV8Ops, FeatureRClass, FeatureDB, @@ -591,6 +627,7 @@ def ARMv8r : Architecture<"armv8-r", "ARMv8r", [HasV8Ops, def ARMv8mBaseline : Architecture<"armv8-m.base", "ARMv8mBaseline", [HasV8MBaselineOps, FeatureNoARM, + ModeThumb, FeatureDB, FeatureHWDivThumb, FeatureV7Clrex, @@ -601,6 +638,7 @@ def ARMv8mBaseline : Architecture<"armv8-m.base", "ARMv8mBaseline", def ARMv8mMainline : Architecture<"armv8-m.main", "ARMv8mMainline", [HasV8MMainlineOps, FeatureNoARM, + ModeThumb, FeatureDB, FeatureHWDivThumb, Feature8MSecExt, @@ -791,7 +829,9 @@ def : ProcessorModel<"swift", SwiftModel, [ARMv7a, ProcSwift, FeatureSlowOddRegister, FeatureSlowLoadDSubreg, FeatureSlowVGETLNi32, - FeatureSlowVDUP32]>; + FeatureSlowVDUP32, + FeatureUseMISched, + FeatureNoPostRASched]>; def : ProcessorModel<"cortex-r4", CortexA8Model, [ARMv7r, ProcR4, FeatureHasRetAddrStack, @@ -883,6 +923,11 @@ def : ProcNoItin<"cortex-a53", [ARMv8a, ProcA53, FeatureCRC, FeatureFPAO]>; +def : ProcNoItin<"cortex-a55", [ARMv82a, ProcA55, + FeatureHWDivThumb, + FeatureHWDivARM, + FeatureDotProd]>; + def : ProcessorModel<"cortex-a57", CortexA57Model, [ARMv8a, ProcA57, FeatureHWDivThumb, FeatureHWDivARM, @@ -904,6 +949,11 @@ def : ProcNoItin<"cortex-a73", [ARMv8a, ProcA73, FeatureCrypto, FeatureCRC]>; +def : ProcNoItin<"cortex-a75", [ARMv82a, ProcA75, + FeatureHWDivThumb, + FeatureHWDivARM, + FeatureDotProd]>; + def : ProcessorModel<"cyclone", SwiftModel, [ARMv8a, ProcSwift, FeatureHasRetAddrStack, FeatureNEONForFP, @@ -915,7 +965,9 @@ def : ProcessorModel<"cyclone", SwiftModel, [ARMv8a, ProcSwift, FeatureAvoidMOVsShOp, FeatureHasSlowFPVMLx, FeatureCrypto, - FeatureZCZeroing]>; + FeatureUseMISched, + FeatureZCZeroing, + FeatureNoPostRASched]>; def : ProcNoItin<"exynos-m1", [ARMv8a, ProcExynosM1, FeatureHWDivThumb, @@ -970,6 +1022,10 @@ def ARMAsmWriter : AsmWriter { bit isMCAsmWriter = 1; } +def ARMAsmParser : AsmParser { + bit ReportMultipleNearMisses = 1; +} + def ARMAsmParserVariant : AsmParserVariant { int Variant = 0; string Name = "ARM"; @@ -980,5 +1036,6 @@ def ARM : Target { // Pull in Instruction Info. let InstructionSet = ARMInstrInfo; let AssemblyWriters = [ARMAsmWriter]; + let AssemblyParsers = [ARMAsmParser]; let AssemblyParserVariants = [ARMAsmParserVariant]; } diff --git a/lib/Target/ARM/ARMAsmPrinter.cpp b/lib/Target/ARM/ARMAsmPrinter.cpp index b24d3420d1d96..d3d79fe975bbd 100644 --- a/lib/Target/ARM/ARMAsmPrinter.cpp +++ b/lib/Target/ARM/ARMAsmPrinter.cpp @@ -24,13 +24,11 @@ #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallString.h" #include "llvm/BinaryFormat/COFF.h" -#include "llvm/BinaryFormat/ELF.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineJumpTableInfo.h" #include "llvm/CodeGen/MachineModuleInfoImpls.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" -#include "llvm/IR/DebugInfo.h" #include "llvm/IR/Mangler.h" #include "llvm/IR/Module.h" #include "llvm/IR/Type.h" @@ -41,7 +39,6 @@ #include "llvm/MC/MCInst.h" #include "llvm/MC/MCInstBuilder.h" #include "llvm/MC/MCObjectStreamer.h" -#include "llvm/MC/MCSectionMachO.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSymbol.h" #include "llvm/Support/ARMBuildAttributes.h" @@ -51,7 +48,6 @@ #include "llvm/Support/TargetRegistry.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetMachine.h" -#include <cctype> using namespace llvm; #define DEBUG_TYPE "asm-printer" @@ -113,7 +109,7 @@ bool ARMAsmPrinter::runOnMachineFunction(MachineFunction &MF) { Subtarget = &MF.getSubtarget<ARMSubtarget>(); SetupMachineFunction(MF); - const Function* F = MF.getFunction(); + const Function &F = MF.getFunction(); const TargetMachine& TM = MF.getTarget(); // Collect all globals that had their storage promoted to a constant pool. @@ -124,13 +120,13 @@ bool ARMAsmPrinter::runOnMachineFunction(MachineFunction &MF) { // Calculate this function's optimization goal. unsigned OptimizationGoal; - if (F->hasFnAttribute(Attribute::OptimizeNone)) + if (F.hasFnAttribute(Attribute::OptimizeNone)) // For best debugging illusion, speed and small size sacrificed OptimizationGoal = 6; - else if (F->optForMinSize()) + else if (F.optForMinSize()) // Aggressively for small size, speed and debug illusion sacrificed OptimizationGoal = 4; - else if (F->optForSize()) + else if (F.optForSize()) // For small size, but speed and debugging illusion preserved OptimizationGoal = 3; else if (TM.getOptLevel() == CodeGenOpt::Aggressive) @@ -150,7 +146,7 @@ bool ARMAsmPrinter::runOnMachineFunction(MachineFunction &MF) { OptimizationGoals = 0; if (Subtarget->isTargetCOFF()) { - bool Internal = F->hasInternalLinkage(); + bool Internal = F.hasInternalLinkage(); COFF::SymbolStorageClass Scl = Internal ? COFF::IMAGE_SYM_CLASS_STATIC : COFF::IMAGE_SYM_CLASS_EXTERNAL; int Type = COFF::IMAGE_SYM_DTYPE_FUNCTION << COFF::SCT_COMPLEX_TYPE_SHIFT; @@ -173,10 +169,10 @@ bool ARMAsmPrinter::runOnMachineFunction(MachineFunction &MF) { if (! ThumbIndirectPads.empty()) { OutStreamer->EmitAssemblerFlag(MCAF_Code16); EmitAlignment(1); - for (unsigned i = 0, e = ThumbIndirectPads.size(); i < e; i++) { - OutStreamer->EmitLabel(ThumbIndirectPads[i].second); + for (std::pair<unsigned, MCSymbol *> &TIP : ThumbIndirectPads) { + OutStreamer->EmitLabel(TIP.second); EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::tBX) - .addReg(ThumbIndirectPads[i].first) + .addReg(TIP.first) // Add predicate operands. .addImm(ARMCC::AL) .addReg(0)); @@ -476,11 +472,7 @@ void ARMAsmPrinter::EmitStartOfAsmFile(Module &M) { // Use the triple's architecture and subarchitecture to determine // if we're thumb for the purposes of the top level code16 assembler // flag. - bool isThumb = TT.getArch() == Triple::thumb || - TT.getArch() == Triple::thumbeb || - TT.getSubArch() == Triple::ARMSubArch_v7m || - TT.getSubArch() == Triple::ARMSubArch_v6m; - if (!M.getModuleInlineAsm().empty() && isThumb) + if (!M.getModuleInlineAsm().empty() && TT.isThumb()) OutStreamer->EmitAssemblerFlag(MCAF_Code16); } @@ -869,11 +861,12 @@ EmitMachineConstantPoolValue(MachineConstantPoolValue *MCPV) { // However, if this global is promoted into several functions we must ensure // we don't try and emit duplicate symbols! auto *ACPC = cast<ARMConstantPoolConstant>(ACPV); - auto *GV = ACPC->getPromotedGlobal(); - if (!EmittedPromotedGlobalLabels.count(GV)) { - MCSymbol *GVSym = getSymbol(GV); - OutStreamer->EmitLabel(GVSym); - EmittedPromotedGlobalLabels.insert(GV); + for (const auto *GV : ACPC->promotedGlobals()) { + if (!EmittedPromotedGlobalLabels.count(GV)) { + MCSymbol *GVSym = getSymbol(GV); + OutStreamer->EmitLabel(GVSym); + EmittedPromotedGlobalLabels.insert(GV); + } } return EmitGlobalConstant(DL, ACPC->getPromotedGlobalInit()); } @@ -949,8 +942,7 @@ void ARMAsmPrinter::EmitJumpTableAddrs(const MachineInstr *MI) { const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables(); const std::vector<MachineBasicBlock*> &JTBBs = JT[JTI].MBBs; - for (unsigned i = 0, e = JTBBs.size(); i != e; ++i) { - MachineBasicBlock *MBB = JTBBs[i]; + for (MachineBasicBlock *MBB : JTBBs) { // Construct an MCExpr for the entry. We want a value of the form: // (BasicBlockAddr - TableBeginAddr) // @@ -993,8 +985,7 @@ void ARMAsmPrinter::EmitJumpTableInsts(const MachineInstr *MI) { const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables(); const std::vector<MachineBasicBlock*> &JTBBs = JT[JTI].MBBs; - for (unsigned i = 0, e = JTBBs.size(); i != e; ++i) { - MachineBasicBlock *MBB = JTBBs[i]; + for (MachineBasicBlock *MBB : JTBBs) { const MCExpr *MBBSymbolExpr = MCSymbolRefExpr::create(MBB->getSymbol(), OutContext); // If this isn't a TBB or TBH, the entries are direct branch instructions. @@ -1209,6 +1200,10 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) { MCTargetStreamer &TS = *OutStreamer->getTargetStreamer(); ARMTargetStreamer &ATS = static_cast<ARMTargetStreamer &>(TS); + const MachineFunction &MF = *MI->getParent()->getParent(); + const ARMSubtarget &STI = MF.getSubtarget<ARMSubtarget>(); + unsigned FramePtr = STI.useR7AsFramePointer() ? ARM::R7 : ARM::R11; + // If we just ended a constant pool, mark it as such. if (InConstantPool && MI->getOpcode() != ARM::CONSTPOOL_ENTRY) { OutStreamer->EmitDataRegion(MCDR_DataRegionEnd); @@ -1293,9 +1288,9 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) { unsigned TReg = MI->getOperand(0).getReg(); MCSymbol *TRegSym = nullptr; - for (unsigned i = 0, e = ThumbIndirectPads.size(); i < e; i++) { - if (ThumbIndirectPads[i].first == TReg) { - TRegSym = ThumbIndirectPads[i].second; + for (std::pair<unsigned, MCSymbol *> &TIP : ThumbIndirectPads) { + if (TIP.first == TReg) { + TRegSym = TIP.second; break; } } @@ -1543,7 +1538,6 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) { EmitJumpTableTBInst(MI, MI->getOpcode() == ARM::JUMPTABLE_TBB ? 1 : 2); return; case ARM::t2BR_JT: { - // Lower and emit the instruction itself, then the jump table following it. EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::tMOVr) .addReg(ARM::PC) .addReg(MI->getOperand(0).getReg()) @@ -1652,7 +1646,6 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) { } case ARM::tBR_JTr: case ARM::BR_JTr: { - // Lower and emit the instruction itself, then the jump table following it. // mov pc, target MCInst TmpInst; unsigned Opc = MI->getOpcode() == ARM::BR_JTr ? @@ -1669,23 +1662,27 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) { EmitToStreamer(*OutStreamer, TmpInst); return; } - case ARM::BR_JTm: { - // Lower and emit the instruction itself, then the jump table following it. + case ARM::BR_JTm_i12: { // ldr pc, target MCInst TmpInst; - if (MI->getOperand(1).getReg() == 0) { - // literal offset - TmpInst.setOpcode(ARM::LDRi12); - TmpInst.addOperand(MCOperand::createReg(ARM::PC)); - TmpInst.addOperand(MCOperand::createReg(MI->getOperand(0).getReg())); - TmpInst.addOperand(MCOperand::createImm(MI->getOperand(2).getImm())); - } else { - TmpInst.setOpcode(ARM::LDRrs); - TmpInst.addOperand(MCOperand::createReg(ARM::PC)); - TmpInst.addOperand(MCOperand::createReg(MI->getOperand(0).getReg())); - TmpInst.addOperand(MCOperand::createReg(MI->getOperand(1).getReg())); - TmpInst.addOperand(MCOperand::createImm(0)); - } + TmpInst.setOpcode(ARM::LDRi12); + TmpInst.addOperand(MCOperand::createReg(ARM::PC)); + TmpInst.addOperand(MCOperand::createReg(MI->getOperand(0).getReg())); + TmpInst.addOperand(MCOperand::createImm(MI->getOperand(2).getImm())); + // Add predicate operands. + TmpInst.addOperand(MCOperand::createImm(ARMCC::AL)); + TmpInst.addOperand(MCOperand::createReg(0)); + EmitToStreamer(*OutStreamer, TmpInst); + return; + } + case ARM::BR_JTm_rs: { + // ldr pc, target + MCInst TmpInst; + TmpInst.setOpcode(ARM::LDRrs); + TmpInst.addOperand(MCOperand::createReg(ARM::PC)); + TmpInst.addOperand(MCOperand::createReg(MI->getOperand(0).getReg())); + TmpInst.addOperand(MCOperand::createReg(MI->getOperand(1).getReg())); + TmpInst.addOperand(MCOperand::createImm(MI->getOperand(2).getImm())); // Add predicate operands. TmpInst.addOperand(MCOperand::createImm(ARMCC::AL)); TmpInst.addOperand(MCOperand::createReg(0)); @@ -1693,7 +1690,6 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) { return; } case ARM::BR_JTadd: { - // Lower and emit the instruction itself, then the jump table following it. // add pc, target, idx EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::ADDrr) .addReg(ARM::PC) @@ -1889,13 +1885,33 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) { .addImm(ARMCC::AL) .addReg(0)); - EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::LDRi12) - .addReg(ARM::R7) - .addReg(SrcReg) - .addImm(0) - // Predicate. - .addImm(ARMCC::AL) - .addReg(0)); + if (STI.isTargetDarwin() || STI.isTargetWindows()) { + // These platforms always use the same frame register + EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::LDRi12) + .addReg(FramePtr) + .addReg(SrcReg) + .addImm(0) + // Predicate. + .addImm(ARMCC::AL) + .addReg(0)); + } else { + // If the calling code might use either R7 or R11 as + // frame pointer register, restore it into both. + EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::LDRi12) + .addReg(ARM::R7) + .addReg(SrcReg) + .addImm(0) + // Predicate. + .addImm(ARMCC::AL) + .addReg(0)); + EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::LDRi12) + .addReg(ARM::R11) + .addReg(SrcReg) + .addImm(0) + // Predicate. + .addImm(ARMCC::AL) + .addReg(0)); + } assert(Subtarget->hasV4TOps()); EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::BX) @@ -1939,13 +1955,33 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) { .addImm(ARMCC::AL) .addReg(0)); - EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::tLDRi) - .addReg(ARM::R7) - .addReg(SrcReg) - .addImm(0) - // Predicate. - .addImm(ARMCC::AL) - .addReg(0)); + if (STI.isTargetDarwin() || STI.isTargetWindows()) { + // These platforms always use the same frame register + EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::tLDRi) + .addReg(FramePtr) + .addReg(SrcReg) + .addImm(0) + // Predicate. + .addImm(ARMCC::AL) + .addReg(0)); + } else { + // If the calling code might use either R7 or R11 as + // frame pointer register, restore it into both. + EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::tLDRi) + .addReg(ARM::R7) + .addReg(SrcReg) + .addImm(0) + // Predicate. + .addImm(ARMCC::AL) + .addReg(0)); + EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::tLDRi) + .addReg(ARM::R11) + .addReg(SrcReg) + .addImm(0) + // Predicate. + .addImm(ARMCC::AL) + .addReg(0)); + } EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::tBX) .addReg(ScratchReg) diff --git a/lib/Target/ARM/ARMAsmPrinter.h b/lib/Target/ARM/ARMAsmPrinter.h index 93fed10eb2d05..7b811b18f74ad 100644 --- a/lib/Target/ARM/ARMAsmPrinter.h +++ b/lib/Target/ARM/ARMAsmPrinter.h @@ -135,8 +135,7 @@ public: const Triple &TT = TM.getTargetTriple(); if (!TT.isOSBinFormatMachO()) return 0; - bool isThumb = TT.getArch() == Triple::thumb || - TT.getArch() == Triple::thumbeb || + bool isThumb = TT.isThumb() || TT.getSubArch() == Triple::ARMSubArch_v7m || TT.getSubArch() == Triple::ARMSubArch_v6m; return isThumb ? ARM::DW_ISA_ARM_thumb : ARM::DW_ISA_ARM_arm; diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp index 3cf5950a1918d..8c1727724a9e3 100644 --- a/lib/Target/ARM/ARMBaseInstrInfo.cpp +++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp @@ -37,6 +37,8 @@ #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/ScoreboardHazardRecognizer.h" #include "llvm/CodeGen/SelectionDAGNodes.h" +#include "llvm/CodeGen/TargetInstrInfo.h" +#include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/CodeGen/TargetSchedule.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/Constants.h" @@ -53,9 +55,7 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetRegisterInfo.h" #include <algorithm> #include <cassert> #include <cstdint> @@ -1357,25 +1357,34 @@ void ARMBaseInstrInfo::expandMEMCPY(MachineBasicBlock::iterator MI) const { MachineInstrBuilder LDM, STM; if (isThumb1 || !MI->getOperand(1).isDead()) { + MachineOperand LDWb(MI->getOperand(1)); + LDWb.setIsRenamable(false); LDM = BuildMI(*BB, MI, dl, TII->get(isThumb2 ? ARM::t2LDMIA_UPD : isThumb1 ? ARM::tLDMIA_UPD : ARM::LDMIA_UPD)) - .add(MI->getOperand(1)); + .add(LDWb); } else { LDM = BuildMI(*BB, MI, dl, TII->get(isThumb2 ? ARM::t2LDMIA : ARM::LDMIA)); } if (isThumb1 || !MI->getOperand(0).isDead()) { + MachineOperand STWb(MI->getOperand(0)); + STWb.setIsRenamable(false); STM = BuildMI(*BB, MI, dl, TII->get(isThumb2 ? ARM::t2STMIA_UPD : isThumb1 ? ARM::tSTMIA_UPD : ARM::STMIA_UPD)) - .add(MI->getOperand(0)); + .add(STWb); } else { STM = BuildMI(*BB, MI, dl, TII->get(isThumb2 ? ARM::t2STMIA : ARM::STMIA)); } - LDM.add(MI->getOperand(3)).add(predOps(ARMCC::AL)); - STM.add(MI->getOperand(2)).add(predOps(ARMCC::AL)); + MachineOperand LDBase(MI->getOperand(3)); + LDBase.setIsRenamable(false); + LDM.add(LDBase).add(predOps(ARMCC::AL)); + + MachineOperand STBase(MI->getOperand(2)); + STBase.setIsRenamable(false); + STM.add(STBase).add(predOps(ARMCC::AL)); // Sort the scratch registers into ascending order. const TargetRegisterInfo &TRI = getRegisterInfo(); @@ -1447,7 +1456,7 @@ bool ARMBaseInstrInfo::expandPostRAPseudo(MachineInstr &MI) const { DEBUG(dbgs() << "widening: " << MI); MachineInstrBuilder MIB(*MI.getParent()->getParent(), MI); - // Get rid of the old <imp-def> of DstRegD. Leave it if it defines a Q-reg + // Get rid of the old implicit-def of DstRegD. Leave it if it defines a Q-reg // or some other super-register. int ImpDefIdx = MI.findRegisterDefOperandIdx(DstRegD); if (ImpDefIdx != -1) @@ -1503,18 +1512,18 @@ static unsigned duplicateCPV(MachineFunction &MF, unsigned &CPI) { 4, ACPV->getModifier(), ACPV->mustAddCurrentAddress()); else if (ACPV->isExtSymbol()) NewCPV = ARMConstantPoolSymbol:: - Create(MF.getFunction()->getContext(), + Create(MF.getFunction().getContext(), cast<ARMConstantPoolSymbol>(ACPV)->getSymbol(), PCLabelId, 4); else if (ACPV->isBlockAddress()) NewCPV = ARMConstantPoolConstant:: Create(cast<ARMConstantPoolConstant>(ACPV)->getBlockAddress(), PCLabelId, ARMCP::CPBlockAddress, 4); else if (ACPV->isLSDA()) - NewCPV = ARMConstantPoolConstant::Create(MF.getFunction(), PCLabelId, + NewCPV = ARMConstantPoolConstant::Create(&MF.getFunction(), PCLabelId, ARMCP::CPLSDA, 4); else if (ACPV->isMachineBasicBlock()) NewCPV = ARMConstantPoolMBB:: - Create(MF.getFunction()->getContext(), + Create(MF.getFunction().getContext(), cast<ARMConstantPoolMBB>(ACPV)->getMBB(), PCLabelId, 4); else llvm_unreachable("Unexpected ARM constantpool value type!!"); @@ -1550,20 +1559,29 @@ void ARMBaseInstrInfo::reMaterialize(MachineBasicBlock &MBB, } } -MachineInstr *ARMBaseInstrInfo::duplicate(MachineInstr &Orig, - MachineFunction &MF) const { - MachineInstr *MI = TargetInstrInfo::duplicate(Orig, MF); - switch (Orig.getOpcode()) { - case ARM::tLDRpci_pic: - case ARM::t2LDRpci_pic: { - unsigned CPI = Orig.getOperand(1).getIndex(); - unsigned PCLabelId = duplicateCPV(MF, CPI); - Orig.getOperand(1).setIndex(CPI); - Orig.getOperand(2).setImm(PCLabelId); - break; - } +MachineInstr & +ARMBaseInstrInfo::duplicate(MachineBasicBlock &MBB, + MachineBasicBlock::iterator InsertBefore, + const MachineInstr &Orig) const { + MachineInstr &Cloned = TargetInstrInfo::duplicate(MBB, InsertBefore, Orig); + MachineBasicBlock::instr_iterator I = Cloned.getIterator(); + for (;;) { + switch (I->getOpcode()) { + case ARM::tLDRpci_pic: + case ARM::t2LDRpci_pic: { + MachineFunction &MF = *MBB.getParent(); + unsigned CPI = I->getOperand(1).getIndex(); + unsigned PCLabelId = duplicateCPV(MF, CPI); + I->getOperand(1).setIndex(CPI); + I->getOperand(2).setImm(PCLabelId); + break; + } + } + if (!I->isBundledWithSucc()) + break; + ++I; } - return MI; + return Cloned; } bool ARMBaseInstrInfo::produceSameValue(const MachineInstr &MI0, @@ -1641,7 +1659,7 @@ bool ARMBaseInstrInfo::produceSameValue(const MachineInstr &MI0, } for (unsigned i = 3, e = MI0.getNumOperands(); i != e; ++i) { - // %vreg12<def> = PICLDR %vreg11, 0, pred:14, pred:%noreg + // %12 = PICLDR %11, 0, pred:14, pred:%noreg const MachineOperand &MO0 = MI0.getOperand(i); const MachineOperand &MO1 = MI1.getOperand(i); if (!MO0.isIdenticalTo(MO1)) @@ -1825,7 +1843,7 @@ isProfitableToIfCvt(MachineBasicBlock &MBB, // If we are optimizing for size, see if the branch in the predecessor can be // lowered to cbn?z by the constant island lowering pass, and return false if // so. This results in a shorter instruction sequence. - if (MBB.getParent()->getFunction()->optForSize()) { + if (MBB.getParent()->getFunction().optForSize()) { MachineBasicBlock *Pred = *MBB.pred_begin(); if (!Pred->empty()) { MachineInstr *LastMI = &*Pred->rbegin(); @@ -2192,7 +2210,7 @@ bool llvm::tryFoldSPUpdateIntoPushPop(const ARMSubtarget &Subtarget, unsigned NumBytes) { // This optimisation potentially adds lots of load and store // micro-operations, it's only really a great benefit to code-size. - if (!MF.getFunction()->optForMinSize()) + if (!MF.getFunction().optForMinSize()) return false; // If only one register is pushed/popped, LLVM can use an LDR/STR @@ -2864,7 +2882,7 @@ bool ARMBaseInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, if (DefOpc != ARM::t2MOVi32imm && DefOpc != ARM::MOVi32imm) return false; if (!DefMI.getOperand(1).isImm()) - // Could be t2MOVi32imm <ga:xx> + // Could be t2MOVi32imm @xx return false; if (!MRI->hasOneNonDBGUse(Reg)) @@ -3964,7 +3982,7 @@ int ARMBaseInstrInfo::getOperandLatencyImpl( if (Latency > 0 && Subtarget.isThumb2()) { const MachineFunction *MF = DefMI.getParent()->getParent(); // FIXME: Use Function::optForSize(). - if (MF->getFunction()->hasFnAttribute(Attribute::OptimizeForSize)) + if (MF->getFunction().hasFnAttribute(Attribute::OptimizeForSize)) --Latency; } return Latency; @@ -4659,7 +4677,7 @@ void ARMBaseInstrInfo::setExecutionDomain(MachineInstr &MI, NewMIB = BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), get(ARM::VEXTd32), DDst); - // On the first instruction, both DSrc and DDst may be <undef> if present. + // On the first instruction, both DSrc and DDst may be undef if present. // Specifically when the original instruction didn't have them as an // <imp-use>. unsigned CurReg = SrcLane == 1 && DstLane == 1 ? DSrc : DDst; @@ -4679,7 +4697,7 @@ void ARMBaseInstrInfo::setExecutionDomain(MachineInstr &MI, MIB.addReg(DDst, RegState::Define); // On the second instruction, DDst has definitely been defined above, so - // it is not <undef>. DSrc, if present, can be <undef> as above. + // it is not undef. DSrc, if present, can be undef as above. CurReg = SrcLane == 1 && DstLane == 0 ? DSrc : DDst; CurUndef = CurReg == DSrc && !MI.readsRegister(CurReg, TRI); MIB.addReg(CurReg, getUndefRegState(CurUndef)); @@ -4762,7 +4780,7 @@ unsigned ARMBaseInstrInfo::getPartialRegUpdateClearance( // We must be able to clobber the whole D-reg. if (TargetRegisterInfo::isVirtualRegister(Reg)) { - // Virtual register must be a foo:ssub_0<def,undef> operand. + // Virtual register must be a def undef foo:ssub_0 operand. if (!MO.getSubReg() || MI.readsVirtualRegister(Reg)) return 0; } else if (ARM::SPRRegClass.contains(Reg)) { diff --git a/lib/Target/ARM/ARMBaseInstrInfo.h b/lib/Target/ARM/ARMBaseInstrInfo.h index c52e572786d48..d375f40d6e147 100644 --- a/lib/Target/ARM/ARMBaseInstrInfo.h +++ b/lib/Target/ARM/ARMBaseInstrInfo.h @@ -21,7 +21,7 @@ #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineOperand.h" -#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #include <array> #include <cstdint> @@ -47,10 +47,10 @@ protected: /// and \p DefIdx. /// \p [out] InputRegs of the equivalent REG_SEQUENCE. Each element of /// the list is modeled as <Reg:SubReg, SubIdx>. - /// E.g., REG_SEQUENCE vreg1:sub1, sub0, vreg2, sub1 would produce + /// E.g., REG_SEQUENCE %1:sub1, sub0, %2, sub1 would produce /// two elements: - /// - vreg1:sub1, sub0 - /// - vreg2<:0>, sub1 + /// - %1:sub1, sub0 + /// - %2<:0>, sub1 /// /// \returns true if it is possible to build such an input sequence /// with the pair \p MI, \p DefIdx. False otherwise. @@ -63,8 +63,8 @@ protected: /// Build the equivalent inputs of a EXTRACT_SUBREG for the given \p MI /// and \p DefIdx. /// \p [out] InputReg of the equivalent EXTRACT_SUBREG. - /// E.g., EXTRACT_SUBREG vreg1:sub1, sub0, sub1 would produce: - /// - vreg1:sub1, sub0 + /// E.g., EXTRACT_SUBREG %1:sub1, sub0, sub1 would produce: + /// - %1:sub1, sub0 /// /// \returns true if it is possible to build such an input sequence /// with the pair \p MI, \p DefIdx. False otherwise. @@ -77,9 +77,9 @@ protected: /// and \p DefIdx. /// \p [out] BaseReg and \p [out] InsertedReg contain /// the equivalent inputs of INSERT_SUBREG. - /// E.g., INSERT_SUBREG vreg0:sub0, vreg1:sub1, sub3 would produce: - /// - BaseReg: vreg0:sub0 - /// - InsertedReg: vreg1:sub1, sub3 + /// E.g., INSERT_SUBREG %0:sub0, %1:sub1, sub3 would produce: + /// - BaseReg: %0:sub0 + /// - InsertedReg: %1:sub1, sub3 /// /// \returns true if it is possible to build such an input sequence /// with the pair \p MI, \p DefIdx. False otherwise. @@ -220,8 +220,9 @@ public: const MachineInstr &Orig, const TargetRegisterInfo &TRI) const override; - MachineInstr *duplicate(MachineInstr &Orig, - MachineFunction &MF) const override; + MachineInstr & + duplicate(MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertBefore, + const MachineInstr &Orig) const override; const MachineInstrBuilder &AddDReg(MachineInstrBuilder &MIB, unsigned Reg, unsigned SubIdx, unsigned State, @@ -467,10 +468,10 @@ bool isCondBranchOpcode(int Opc) { return Opc == ARM::Bcc || Opc == ARM::tBcc || Opc == ARM::t2Bcc; } -static inline -bool isJumpTableBranchOpcode(int Opc) { - return Opc == ARM::BR_JTr || Opc == ARM::BR_JTm || Opc == ARM::BR_JTadd || - Opc == ARM::tBR_JTr || Opc == ARM::t2BR_JT; +static inline bool isJumpTableBranchOpcode(int Opc) { + return Opc == ARM::BR_JTr || Opc == ARM::BR_JTm_i12 || + Opc == ARM::BR_JTm_rs || Opc == ARM::BR_JTadd || Opc == ARM::tBR_JTr || + Opc == ARM::t2BR_JT; } static inline diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/lib/Target/ARM/ARMBaseRegisterInfo.cpp index 370c0a7f5c537..4b9a4376adf89 100644 --- a/lib/Target/ARM/ARMBaseRegisterInfo.cpp +++ b/lib/Target/ARM/ARMBaseRegisterInfo.cpp @@ -31,6 +31,8 @@ #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/RegisterScavenging.h" +#include "llvm/CodeGen/TargetInstrInfo.h" +#include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/CodeGen/VirtRegMap.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/Constants.h" @@ -41,10 +43,8 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" -#include "llvm/Target/TargetRegisterInfo.h" #include <cassert> #include <utility> @@ -71,17 +71,17 @@ ARMBaseRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { ? CSR_iOS_SaveList : (UseSplitPush ? CSR_AAPCS_SplitPush_SaveList : CSR_AAPCS_SaveList); - const Function *F = MF->getFunction(); - if (F->getCallingConv() == CallingConv::GHC) { + const Function &F = MF->getFunction(); + if (F.getCallingConv() == CallingConv::GHC) { // GHC set of callee saved regs is empty as all those regs are // used for passing STG regs around return CSR_NoRegs_SaveList; - } else if (F->hasFnAttribute("interrupt")) { + } else if (F.hasFnAttribute("interrupt")) { if (STI.isMClass()) { // M-class CPUs have hardware which saves the registers needed to allow a // function conforming to the AAPCS to function as a handler. return UseSplitPush ? CSR_AAPCS_SplitPush_SaveList : CSR_AAPCS_SaveList; - } else if (F->getFnAttribute("interrupt").getValueAsString() == "FIQ") { + } else if (F.getFnAttribute("interrupt").getValueAsString() == "FIQ") { // Fast interrupt mode gives the handler a private copy of R8-R14, so less // need to be saved to restore user-mode state. return CSR_FIQ_SaveList; @@ -92,11 +92,16 @@ ARMBaseRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { } } - if (STI.isTargetDarwin() && STI.getTargetLowering()->supportSwiftError() && - F->getAttributes().hasAttrSomewhere(Attribute::SwiftError)) - return CSR_iOS_SwiftError_SaveList; + if (STI.getTargetLowering()->supportSwiftError() && + F.getAttributes().hasAttrSomewhere(Attribute::SwiftError)) { + if (STI.isTargetDarwin()) + return CSR_iOS_SwiftError_SaveList; - if (STI.isTargetDarwin() && F->getCallingConv() == CallingConv::CXX_FAST_TLS) + return UseSplitPush ? CSR_AAPCS_SplitPush_SwiftError_SaveList : + CSR_AAPCS_SwiftError_SaveList; + } + + if (STI.isTargetDarwin() && F.getCallingConv() == CallingConv::CXX_FAST_TLS) return MF->getInfo<ARMFunctionInfo>()->isSplitCSR() ? CSR_iOS_CXX_TLS_PE_SaveList : CSR_iOS_CXX_TLS_SaveList; @@ -106,7 +111,7 @@ ARMBaseRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { const MCPhysReg *ARMBaseRegisterInfo::getCalleeSavedRegsViaCopy( const MachineFunction *MF) const { assert(MF && "Invalid MachineFunction pointer."); - if (MF->getFunction()->getCallingConv() == CallingConv::CXX_FAST_TLS && + if (MF->getFunction().getCallingConv() == CallingConv::CXX_FAST_TLS && MF->getInfo<ARMFunctionInfo>()->isSplitCSR()) return CSR_iOS_CXX_TLS_ViaCopy_SaveList; return nullptr; @@ -120,9 +125,10 @@ ARMBaseRegisterInfo::getCallPreservedMask(const MachineFunction &MF, // This is academic because all GHC calls are (supposed to be) tail calls return CSR_NoRegs_RegMask; - if (STI.isTargetDarwin() && STI.getTargetLowering()->supportSwiftError() && - MF.getFunction()->getAttributes().hasAttrSomewhere(Attribute::SwiftError)) - return CSR_iOS_SwiftError_RegMask; + if (STI.getTargetLowering()->supportSwiftError() && + MF.getFunction().getAttributes().hasAttrSomewhere(Attribute::SwiftError)) + return STI.isTargetDarwin() ? CSR_iOS_SwiftError_RegMask + : CSR_AAPCS_SwiftError_RegMask; if (STI.isTargetDarwin() && CC == CallingConv::CXX_FAST_TLS) return CSR_iOS_CXX_TLS_RegMask; @@ -274,7 +280,7 @@ static unsigned getPairedGPR(unsigned Reg, bool Odd, const MCRegisterInfo *RI) { } // Resolve the RegPairEven / RegPairOdd register allocator hints. -void +bool ARMBaseRegisterInfo::getRegAllocationHints(unsigned VirtReg, ArrayRef<MCPhysReg> Order, SmallVectorImpl<MCPhysReg> &Hints, @@ -294,7 +300,7 @@ ARMBaseRegisterInfo::getRegAllocationHints(unsigned VirtReg, break; default: TargetRegisterInfo::getRegAllocationHints(VirtReg, Order, Hints, MF, VRM); - return; + return false; } // This register should preferably be even (Odd == 0) or odd (Odd == 1). @@ -302,7 +308,7 @@ ARMBaseRegisterInfo::getRegAllocationHints(unsigned VirtReg, // the paired register as the first hint. unsigned Paired = Hint.second; if (Paired == 0) - return; + return false; unsigned PairedPhys = 0; if (TargetRegisterInfo::isPhysicalRegister(Paired)) { @@ -325,6 +331,7 @@ ARMBaseRegisterInfo::getRegAllocationHints(unsigned VirtReg, continue; Hints.push_back(Reg); } + return false; } void @@ -385,16 +392,12 @@ bool ARMBaseRegisterInfo::hasBasePointer(const MachineFunction &MF) const { bool ARMBaseRegisterInfo::canRealignStack(const MachineFunction &MF) const { const MachineRegisterInfo *MRI = &MF.getRegInfo(); - const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); const ARMFrameLowering *TFI = getFrameLowering(MF); // We can't realign the stack if: // 1. Dynamic stack realignment is explicitly disabled, - // 2. This is a Thumb1 function (it's not useful, so we don't bother), or - // 3. There are VLAs in the function and the base pointer is disabled. + // 2. There are VLAs in the function and the base pointer is disabled. if (!TargetRegisterInfo::canRealignStack(MF)) return false; - if (AFI->isThumb1OnlyFunction()) - return false; // Stack realignment requires a frame pointer. If we already started // register allocation with frame pointer elimination, it is too late now. if (!MRI->canReserveReg(getFramePointerReg(MF.getSubtarget<ARMSubtarget>()))) @@ -437,7 +440,7 @@ void ARMBaseRegisterInfo::emitLoadConstPool( const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); MachineConstantPool *ConstantPool = MF.getConstantPool(); const Constant *C = - ConstantInt::get(Type::getInt32Ty(MF.getFunction()->getContext()), Val); + ConstantInt::get(Type::getInt32Ty(MF.getFunction().getContext()), Val); unsigned Idx = ConstantPool->getConstantPoolIndex(C, 4); BuildMI(MBB, MBBI, dl, TII.get(ARM::LDRcp)) @@ -801,7 +804,8 @@ bool ARMBaseRegisterInfo::shouldCoalesce(MachineInstr *MI, unsigned SubReg, const TargetRegisterClass *DstRC, unsigned DstSubReg, - const TargetRegisterClass *NewRC) const { + const TargetRegisterClass *NewRC, + LiveIntervals &LIS) const { auto MBB = MI->getParent(); auto MF = MBB->getParent(); const MachineRegisterInfo &MRI = MF->getRegInfo(); diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.h b/lib/Target/ARM/ARMBaseRegisterInfo.h index 2e91d9d4be246..5801e6bdbd0ea 100644 --- a/lib/Target/ARM/ARMBaseRegisterInfo.h +++ b/lib/Target/ARM/ARMBaseRegisterInfo.h @@ -17,9 +17,9 @@ #include "MCTargetDesc/ARMBaseInfo.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/IR/CallingConv.h" #include "llvm/MC/MCRegisterInfo.h" -#include "llvm/Target/TargetRegisterInfo.h" #include <cstdint> #define GET_REGINFO_HEADER @@ -27,6 +27,8 @@ namespace llvm { +class LiveIntervals; + /// Register allocation hints. namespace ARMRI { @@ -143,7 +145,7 @@ public: unsigned getRegPressureLimit(const TargetRegisterClass *RC, MachineFunction &MF) const override; - void getRegAllocationHints(unsigned VirtReg, + bool getRegAllocationHints(unsigned VirtReg, ArrayRef<MCPhysReg> Order, SmallVectorImpl<MCPhysReg> &Hints, const MachineFunction &MF, @@ -204,7 +206,8 @@ public: unsigned SubReg, const TargetRegisterClass *DstRC, unsigned DstSubReg, - const TargetRegisterClass *NewRC) const override; + const TargetRegisterClass *NewRC, + LiveIntervals &LIS) const override; }; } // end namespace llvm diff --git a/lib/Target/ARM/ARMCallLowering.cpp b/lib/Target/ARM/ARMCallLowering.cpp index a1a31e1e7fae2..eab4b3b13f313 100644 --- a/lib/Target/ARM/ARMCallLowering.cpp +++ b/lib/Target/ARM/ARMCallLowering.cpp @@ -1,4 +1,4 @@ -//===-- llvm/lib/Target/ARM/ARMCallLowering.cpp - Call lowering -----------===// +//===- llvm/lib/Target/ARM/ARMCallLowering.cpp - Call lowering ------------===// // // The LLVM Compiler Infrastructure // @@ -6,30 +6,50 @@ // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// -/// +// /// \file /// This file implements the lowering of LLVM calls to machine code calls for /// GlobalISel. -/// +// //===----------------------------------------------------------------------===// #include "ARMCallLowering.h" - #include "ARMBaseInstrInfo.h" #include "ARMISelLowering.h" #include "ARMSubtarget.h" - +#include "Utils/ARMBaseInfo.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/CodeGen/Analysis.h" +#include "llvm/CodeGen/CallingConvLower.h" #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" #include "llvm/CodeGen/GlobalISel/Utils.h" +#include "llvm/CodeGen/LowLevelType.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineMemOperand.h" +#include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/MachineValueType.h" +#include "llvm/CodeGen/TargetRegisterInfo.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" +#include "llvm/CodeGen/ValueTypes.h" +#include "llvm/IR/Attributes.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Type.h" +#include "llvm/IR/Value.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/LowLevelTypeImpl.h" +#include <algorithm> +#include <cassert> +#include <cstdint> +#include <utility> using namespace llvm; -#ifndef LLVM_BUILD_GLOBAL_ISEL -#error "This shouldn't be built without GISel" -#endif - ARMCallLowering::ARMCallLowering(const ARMTargetLowering &TLI) : CallLowering(&TLI) {} @@ -63,12 +83,13 @@ static bool isSupportedType(const DataLayout &DL, const ARMTargetLowering &TLI, } namespace { + /// Helper class for values going out through an ABI boundary (used for handling /// function return values and call parameters). struct OutgoingValueHandler : public CallLowering::ValueHandler { OutgoingValueHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI, MachineInstrBuilder &MIB, CCAssignFn *AssignFn) - : ValueHandler(MIRBuilder, MRI, AssignFn), MIB(MIB), StackSize(0) {} + : ValueHandler(MIRBuilder, MRI, AssignFn), MIB(MIB) {} unsigned getStackAddress(uint64_t Size, int64_t Offset, MachinePointerInfo &MPO) override { @@ -157,9 +178,10 @@ struct OutgoingValueHandler : public CallLowering::ValueHandler { } MachineInstrBuilder &MIB; - uint64_t StackSize; + uint64_t StackSize = 0; }; -} // End anonymous namespace. + +} // end anonymous namespace void ARMCallLowering::splitToValueTypes( const ArgInfo &OrigArg, SmallVectorImpl<ArgInfo> &SplitArgs, @@ -168,7 +190,7 @@ void ARMCallLowering::splitToValueTypes( LLVMContext &Ctx = OrigArg.Ty->getContext(); const DataLayout &DL = MF.getDataLayout(); MachineRegisterInfo &MRI = MF.getRegInfo(); - const Function *F = MF.getFunction(); + const Function &F = MF.getFunction(); SmallVector<EVT, 4> SplitVTs; SmallVector<uint64_t, 4> Offsets; @@ -196,7 +218,7 @@ void ARMCallLowering::splitToValueTypes( bool NeedsConsecutiveRegisters = TLI.functionArgumentNeedsConsecutiveRegisters( - SplitTy, F->getCallingConv(), F->isVarArg()); + SplitTy, F.getCallingConv(), F.isVarArg()); if (NeedsConsecutiveRegisters) { Flags.setInConsecutiveRegs(); if (i == e - 1) @@ -222,7 +244,7 @@ bool ARMCallLowering::lowerReturnVal(MachineIRBuilder &MIRBuilder, return true; auto &MF = MIRBuilder.getMF(); - const auto &F = *MF.getFunction(); + const auto &F = MF.getFunction(); auto DL = MF.getDataLayout(); auto &TLI = *getTLI<ARMTargetLowering>(); @@ -263,6 +285,7 @@ bool ARMCallLowering::lowerReturn(MachineIRBuilder &MIRBuilder, } namespace { + /// Helper class for values coming in through an ABI boundary (used for handling /// formal arguments and call return values). struct IncomingValueHandler : public CallLowering::ValueHandler { @@ -320,13 +343,26 @@ struct IncomingValueHandler : public CallLowering::ValueHandler { assert(VA.isRegLoc() && "Value shouldn't be assigned to reg"); assert(VA.getLocReg() == PhysReg && "Assigning to the wrong reg?"); - assert(VA.getValVT().getSizeInBits() <= 64 && "Unsupported value size"); - assert(VA.getLocVT().getSizeInBits() <= 64 && "Unsupported location size"); + auto ValSize = VA.getValVT().getSizeInBits(); + auto LocSize = VA.getLocVT().getSizeInBits(); + + assert(ValSize <= 64 && "Unsupported value size"); + assert(LocSize <= 64 && "Unsupported location size"); - // The necessary extensions are handled on the other side of the ABI - // boundary. markPhysRegUsed(PhysReg); - MIRBuilder.buildCopy(ValVReg, PhysReg); + if (ValSize == LocSize) { + MIRBuilder.buildCopy(ValVReg, PhysReg); + } else { + assert(ValSize < LocSize && "Extensions not supported"); + + // We cannot create a truncating copy, nor a trunc of a physical register. + // Therefore, we need to copy the content of the physical register into a + // virtual one and then truncate that. + auto PhysRegToVReg = + MRI.createGenericVirtualRegister(LLT::scalar(LocSize)); + MIRBuilder.buildCopy(PhysRegToVReg, PhysReg); + MIRBuilder.buildTrunc(ValVReg, PhysRegToVReg); + } } unsigned assignCustomValue(const ARMCallLowering::ArgInfo &Arg, @@ -375,11 +411,18 @@ struct FormalArgHandler : public IncomingValueHandler { MIRBuilder.getMBB().addLiveIn(PhysReg); } }; -} // End anonymous namespace + +} // end anonymous namespace bool ARMCallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder, const Function &F, ArrayRef<unsigned> VRegs) const { + auto &TLI = *getTLI<ARMTargetLowering>(); + auto Subtarget = TLI.getSubtarget(); + + if (Subtarget->isThumb()) + return false; + // Quick exit if there aren't any args if (F.arg_empty()) return true; @@ -390,16 +433,13 @@ bool ARMCallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder, auto &MF = MIRBuilder.getMF(); auto &MBB = MIRBuilder.getMBB(); auto DL = MF.getDataLayout(); - auto &TLI = *getTLI<ARMTargetLowering>(); - auto Subtarget = TLI.getSubtarget(); - - if (Subtarget->isThumb()) - return false; - - for (auto &Arg : F.args()) + for (auto &Arg : F.args()) { if (!isSupportedType(DL, TLI, Arg.getType())) return false; + if (Arg.hasByValOrInAllocaAttr()) + return false; + } CCAssignFn *AssignFn = TLI.CCAssignFnForCall(F.getCallingConv(), F.isVarArg()); @@ -433,6 +473,7 @@ bool ARMCallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder, } namespace { + struct CallReturnHandler : public IncomingValueHandler { CallReturnHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI, MachineInstrBuilder MIB, CCAssignFn *AssignFn) @@ -444,7 +485,8 @@ struct CallReturnHandler : public IncomingValueHandler { MachineInstrBuilder MIB; }; -} // End anonymous namespace. + +} // end anonymous namespace bool ARMCallLowering::lowerCall(MachineIRBuilder &MIRBuilder, CallingConv::ID CallConv, @@ -454,19 +496,26 @@ bool ARMCallLowering::lowerCall(MachineIRBuilder &MIRBuilder, MachineFunction &MF = MIRBuilder.getMF(); const auto &TLI = *getTLI<ARMTargetLowering>(); const auto &DL = MF.getDataLayout(); - const auto &STI = MF.getSubtarget(); + const auto &STI = MF.getSubtarget<ARMSubtarget>(); const TargetRegisterInfo *TRI = STI.getRegisterInfo(); MachineRegisterInfo &MRI = MF.getRegInfo(); - if (MF.getSubtarget<ARMSubtarget>().genLongCalls()) + if (STI.genLongCalls()) return false; auto CallSeqStart = MIRBuilder.buildInstr(ARM::ADJCALLSTACKDOWN); // Create the call instruction so we can add the implicit uses of arg // registers, but don't insert it yet. - auto MIB = MIRBuilder.buildInstrNoInsert(ARM::BLX).add(Callee).addRegMask( - TRI->getCallPreservedMask(MF, CallConv)); + bool isDirect = !Callee.isReg(); + auto CallOpcode = + isDirect ? ARM::BL + : STI.hasV5TOps() + ? ARM::BLX + : STI.hasV4TOps() ? ARM::BX_CALL : ARM::BMOVPCRX_CALL; + auto MIB = MIRBuilder.buildInstrNoInsert(CallOpcode) + .add(Callee) + .addRegMask(TRI->getCallPreservedMask(MF, CallConv)); if (Callee.isReg()) { auto CalleeReg = Callee.getReg(); if (CalleeReg && !TRI->isPhysicalRegister(CalleeReg)) @@ -483,6 +532,9 @@ bool ARMCallLowering::lowerCall(MachineIRBuilder &MIRBuilder, if (!Arg.IsFixed) return false; + if (Arg.Flags.isByVal()) + return false; + SmallVector<unsigned, 8> Regs; splitToValueTypes(Arg, ArgInfos, MF, [&](unsigned Reg, uint64_t Offset) { Regs.push_back(Reg); diff --git a/lib/Target/ARM/ARMCallLowering.h b/lib/Target/ARM/ARMCallLowering.h index f5a6872336f60..86854c53f1797 100644 --- a/lib/Target/ARM/ARMCallLowering.h +++ b/lib/Target/ARM/ARMCallLowering.h @@ -1,4 +1,4 @@ -//===-- llvm/lib/Target/ARM/ARMCallLowering.h - Call lowering -------------===// +//===- llvm/lib/Target/ARM/ARMCallLowering.h - Call lowering ----*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -6,23 +6,28 @@ // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// -/// +// /// \file /// This file describes how to lower LLVM calls to machine code calls. -/// +// //===----------------------------------------------------------------------===// -#ifndef LLVM_LIB_TARGET_ARM_ARMCALLLOWERING -#define LLVM_LIB_TARGET_ARM_ARMCALLLOWERING +#ifndef LLVM_LIB_TARGET_ARM_ARMCALLLOWERING_H +#define LLVM_LIB_TARGET_ARM_ARMCALLLOWERING_H -#include "llvm/CodeGen/CallingConvLower.h" +#include "llvm/ADT/ArrayRef.h" #include "llvm/CodeGen/GlobalISel/CallLowering.h" -#include "llvm/CodeGen/ValueTypes.h" +#include "llvm/IR/CallingConv.h" +#include <cstdint> +#include <functional> namespace llvm { class ARMTargetLowering; +class MachineFunction; class MachineInstrBuilder; +class MachineIRBuilder; +class Value; class ARMCallLowering : public CallLowering { public: @@ -42,7 +47,7 @@ private: bool lowerReturnVal(MachineIRBuilder &MIRBuilder, const Value *Val, unsigned VReg, MachineInstrBuilder &Ret) const; - typedef std::function<void(unsigned Reg, uint64_t Offset)> SplitArgTy; + using SplitArgTy = std::function<void(unsigned Reg, uint64_t Offset)>; /// Split an argument into one or more arguments that the CC lowering can cope /// with (e.g. replace pointers with integers). @@ -51,5 +56,7 @@ private: MachineFunction &MF, const SplitArgTy &PerformArgSplit) const; }; -} // End of namespace llvm -#endif + +} // end namespace llvm + +#endif // LLVM_LIB_TARGET_ARM_ARMCALLLOWERING_H diff --git a/lib/Target/ARM/ARMCallingConv.h b/lib/Target/ARM/ARMCallingConv.h index 71b8193624043..284b67fd59b6b 100644 --- a/lib/Target/ARM/ARMCallingConv.h +++ b/lib/Target/ARM/ARMCallingConv.h @@ -19,8 +19,8 @@ #include "ARMBaseInstrInfo.h" #include "ARMSubtarget.h" #include "llvm/CodeGen/CallingConvLower.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/IR/CallingConv.h" -#include "llvm/Target/TargetInstrInfo.h" namespace llvm { diff --git a/lib/Target/ARM/ARMCallingConv.td b/lib/Target/ARM/ARMCallingConv.td index bc7afdb7f1c9f..dcfd6518a840b 100644 --- a/lib/Target/ARM/ARMCallingConv.td +++ b/lib/Target/ARM/ARMCallingConv.td @@ -247,6 +247,9 @@ def CSR_FPRegs : CalleeSavedRegs<(add (sequence "D%u", 0, 31))>; def CSR_AAPCS : CalleeSavedRegs<(add LR, R11, R10, R9, R8, R7, R6, R5, R4, (sequence "D%u", 15, 8))>; +// R8 is used to pass swifterror, remove it from CSR. +def CSR_AAPCS_SwiftError : CalleeSavedRegs<(sub CSR_AAPCS, R8)>; + // The order of callee-saved registers needs to match the order we actually push // them in FrameLowering, because this order is what's used by // PrologEpilogInserter to allocate frame index slots. So when R7 is the frame @@ -255,6 +258,10 @@ def CSR_AAPCS_SplitPush : CalleeSavedRegs<(add LR, R7, R6, R5, R4, R11, R10, R9, R8, (sequence "D%u", 15, 8))>; +// R8 is used to pass swifterror, remove it from CSR. +def CSR_AAPCS_SplitPush_SwiftError : CalleeSavedRegs<(sub CSR_AAPCS_SplitPush, + R8)>; + // Constructors and destructors return 'this' in the ARM C++ ABI; since 'this' // and the pointer return value are both passed in R0 in these cases, this can // be partially modelled by treating R0 as a callee-saved register diff --git a/lib/Target/ARM/ARMComputeBlockSize.cpp b/lib/Target/ARM/ARMComputeBlockSize.cpp index e145d0a49ae64..2e97b99b05a7d 100644 --- a/lib/Target/ARM/ARMComputeBlockSize.cpp +++ b/lib/Target/ARM/ARMComputeBlockSize.cpp @@ -14,7 +14,7 @@ #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstr.h" -#include "llvm/Target/TargetSubtargetInfo.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" #include <vector> using namespace llvm; diff --git a/lib/Target/ARM/ARMConstantIslandPass.cpp b/lib/Target/ARM/ARMConstantIslandPass.cpp index 667337dc9267f..8baee1ce281d7 100644 --- a/lib/Target/ARM/ARMConstantIslandPass.cpp +++ b/lib/Target/ARM/ARMConstantIslandPass.cpp @@ -1,4 +1,4 @@ -//===-- ARMConstantIslandPass.cpp - ARM constant islands ------------------===// +//===- ARMConstantIslandPass.cpp - ARM constant islands -------------------===// // // The LLVM Compiler Infrastructure // @@ -20,6 +20,7 @@ #include "ARMSubtarget.h" #include "MCTargetDesc/ARMBaseInfo.h" #include "Thumb2InstrInfo.h" +#include "Utils/ARMBaseInfo.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallSet.h" @@ -37,6 +38,7 @@ #include "llvm/IR/DataLayout.h" #include "llvm/IR/DebugLoc.h" #include "llvm/MC/MCInstrDesc.h" +#include "llvm/Pass.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" @@ -48,7 +50,6 @@ #include <cassert> #include <cstdint> #include <iterator> -#include <new> #include <utility> #include <vector> @@ -107,7 +108,7 @@ namespace { /// previous iteration by inserting unconditional branches. SmallSet<MachineBasicBlock*, 4> NewWaterList; - typedef std::vector<MachineBasicBlock*>::iterator water_iterator; + using water_iterator = std::vector<MachineBasicBlock *>::iterator; /// CPUser - One user of a constant pool, keeping the machine instruction /// pointer, the constant pool being referenced, and the max displacement @@ -128,12 +129,11 @@ namespace { unsigned MaxDisp; bool NegOk; bool IsSoImm; - bool KnownAlignment; + bool KnownAlignment = false; CPUser(MachineInstr *mi, MachineInstr *cpemi, unsigned maxdisp, bool neg, bool soimm) - : MI(mi), CPEMI(cpemi), MaxDisp(maxdisp), NegOk(neg), IsSoImm(soimm), - KnownAlignment(false) { + : MI(mi), CPEMI(cpemi), MaxDisp(maxdisp), NegOk(neg), IsSoImm(soimm) { HighWaterMark = CPEMI->getParent(); } @@ -195,11 +195,9 @@ namespace { }; /// ImmBranches - Keep track of all the immediate branch instructions. - /// std::vector<ImmBranch> ImmBranches; /// PushPopMIs - Keep track of all the Thumb push / pop instructions. - /// SmallVector<MachineInstr*, 4> PushPopMIs; /// T2JumpTables - Keep track of all the Thumb2 jumptable instructions. @@ -290,10 +288,10 @@ namespace { } }; - char ARMConstantIslands::ID = 0; - } // end anonymous namespace +char ARMConstantIslands::ID = 0; + /// verify - check BBOffsets, BBSizes, alignment of islands void ARMConstantIslands::verify() { #ifndef NDEBUG @@ -328,7 +326,7 @@ LLVM_DUMP_METHOD void ARMConstantIslands::dumpBBs() { DEBUG({ for (unsigned J = 0, E = BBInfo.size(); J !=E; ++J) { const BasicBlockInfo &BBI = BBInfo[J]; - dbgs() << format("%08x BB#%u\t", BBI.Offset, J) + dbgs() << format("%08x %bb.%u\t", BBI.Offset, J) << " kb=" << unsigned(BBI.KnownBits) << " ua=" << unsigned(BBI.Unalign) << " pa=" << unsigned(BBI.PostAlign) @@ -566,7 +564,8 @@ void ARMConstantIslands::doInitialJumpTablePlacement( case ARM::BR_JTadd: case ARM::BR_JTr: case ARM::tBR_JTr: - case ARM::BR_JTm: + case ARM::BR_JTm_i12: + case ARM::BR_JTm_rs: JTOpcode = ARM::JUMPTABLE_ADDRS; break; case ARM::t2BR_JT: @@ -629,9 +628,9 @@ bool ARMConstantIslands::BBHasFallthrough(MachineBasicBlock *MBB) { /// findConstPoolEntry - Given the constpool index and CONSTPOOL_ENTRY MI, /// look up the corresponding CPEntry. -ARMConstantIslands::CPEntry -*ARMConstantIslands::findConstPoolEntry(unsigned CPI, - const MachineInstr *CPEMI) { +ARMConstantIslands::CPEntry * +ARMConstantIslands::findConstPoolEntry(unsigned CPI, + const MachineInstr *CPEMI) { std::vector<CPEntry> &CPEs = CPEntries[CPI]; // Number of entries per constpool index should be small, just do a // linear search. @@ -1072,11 +1071,11 @@ bool ARMConstantIslands::isCPEntryInRange(MachineInstr *MI, unsigned UserOffset, const BasicBlockInfo &BBI = BBInfo[Block]; dbgs() << "User of CPE#" << CPEMI->getOperand(0).getImm() << " max delta=" << MaxDisp - << format(" insn address=%#x", UserOffset) - << " in BB#" << Block << ": " + << format(" insn address=%#x", UserOffset) << " in " + << printMBBReference(*MI->getParent()) << ": " << format("%#x-%x\t", BBI.Offset, BBI.postOffset()) << *MI << format("CPE address=%#x offset=%+d: ", CPEOffset, - int(CPEOffset-UserOffset)); + int(CPEOffset - UserOffset)); }); } @@ -1126,7 +1125,6 @@ void ARMConstantIslands::adjustBBOffsetsAfter(MachineBasicBlock *BB) { /// and instruction CPEMI, and decrement its refcount. If the refcount /// becomes 0 remove the entry and instruction. Returns true if we removed /// the entry, false if we didn't. - bool ARMConstantIslands::decrementCPEReferenceCount(unsigned CPI, MachineInstr *CPEMI) { // Find the old entry. Eliminate it if it is no longer used. @@ -1154,8 +1152,7 @@ unsigned ARMConstantIslands::getCombinedIndex(const MachineInstr *CPEMI) { /// 0 = no existing entry found /// 1 = entry found, and there were no code insertions or deletions /// 2 = entry found, and there were code insertions or deletions -int ARMConstantIslands::findInRangeCPEntry(CPUser& U, unsigned UserOffset) -{ +int ARMConstantIslands::findInRangeCPEntry(CPUser& U, unsigned UserOffset) { MachineInstr *UserMI = U.MI; MachineInstr *CPEMI = U.CPEMI; @@ -1264,7 +1261,7 @@ bool ARMConstantIslands::findAvailableWater(CPUser &U, unsigned UserOffset, // This is the least amount of required padding seen so far. BestGrowth = Growth; WaterIter = IP; - DEBUG(dbgs() << "Found water after BB#" << WaterBB->getNumber() + DEBUG(dbgs() << "Found water after " << printMBBReference(*WaterBB) << " Growth=" << Growth << '\n'); if (CloserWater && WaterBB == U.MI->getParent()) @@ -1308,8 +1305,8 @@ void ARMConstantIslands::createNewWater(unsigned CPUserIndex, unsigned CPEOffset = UserBBI.postOffset(CPELogAlign) + Delta; if (isOffsetInRange(UserOffset, CPEOffset, U)) { - DEBUG(dbgs() << "Split at end of BB#" << UserMBB->getNumber() - << format(", expected CPE offset %#x\n", CPEOffset)); + DEBUG(dbgs() << "Split at end of " << printMBBReference(*UserMBB) + << format(", expected CPE offset %#x\n", CPEOffset)); NewMBB = &*++UserMBB->getIterator(); // Add an unconditional branch from UserMBB to fallthrough block. Record // it for branch lengthening; this new branch will not get out of range, @@ -1581,11 +1578,11 @@ bool ARMConstantIslands::isBBInRange(MachineInstr *MI,MachineBasicBlock *DestBB, unsigned BrOffset = getOffsetOf(MI) + PCAdj; unsigned DestOffset = BBInfo[DestBB->getNumber()].Offset; - DEBUG(dbgs() << "Branch of destination BB#" << DestBB->getNumber() - << " from BB#" << MI->getParent()->getNumber() - << " max delta=" << MaxDisp - << " from " << getOffsetOf(MI) << " to " << DestOffset - << " offset " << int(DestOffset-BrOffset) << "\t" << *MI); + DEBUG(dbgs() << "Branch of destination " << printMBBReference(*DestBB) + << " from " << printMBBReference(*MI->getParent()) + << " max delta=" << MaxDisp << " from " << getOffsetOf(MI) + << " to " << DestOffset << " offset " + << int(DestOffset - BrOffset) << "\t" << *MI); if (BrOffset <= DestOffset) { // Branch before the Dest. @@ -1693,13 +1690,19 @@ ARMConstantIslands::fixupConditionalBr(ImmBranch &Br) { int delta = TII->getInstSizeInBytes(MBB->back()); BBInfo[MBB->getNumber()].Size -= delta; MBB->back().eraseFromParent(); + + // The conditional successor will be swapped between the BBs after this, so + // update CFG. + MBB->addSuccessor(DestBB); + std::next(MBB->getIterator())->removeSuccessor(DestBB); + // BBInfo[SplitBB].Offset is wrong temporarily, fixed below } MachineBasicBlock *NextBB = &*++MBB->getIterator(); - DEBUG(dbgs() << " Insert B to BB#" << DestBB->getNumber() - << " also invert condition and change dest. to BB#" - << NextBB->getNumber() << "\n"); + DEBUG(dbgs() << " Insert B to " << printMBBReference(*DestBB) + << " also invert condition and change dest. to " + << printMBBReference(*NextBB) << "\n"); // Insert a new conditional branch and a new unconditional branch. // Also update the ImmBranch as well as adding a new entry for the new branch. @@ -2128,7 +2131,7 @@ bool ARMConstantIslands::optimizeThumb2JumpTables() { // We're in thumb-1 mode, so we must have something like: // %idx = tLSLri %idx, 2 // %base = tLEApcrelJT - // %t = tLDRr %idx, %base + // %t = tLDRr %base, %idx unsigned BaseReg = User.MI->getOperand(0).getReg(); if (User.MI->getIterator() == User.MI->getParent()->begin()) @@ -2150,9 +2153,9 @@ bool ARMConstantIslands::optimizeThumb2JumpTables() { MachineInstr *Load = User.MI->getNextNode(); if (Load->getOpcode() != ARM::tLDRr) continue; - if (Load->getOperand(1).getReg() != ShiftedIdxReg || - Load->getOperand(2).getReg() != BaseReg || - !Load->getOperand(1).isKill()) + if (Load->getOperand(1).getReg() != BaseReg || + Load->getOperand(2).getReg() != ShiftedIdxReg || + !Load->getOperand(2).isKill()) continue; // If we're in PIC mode, there should be another ADD following. @@ -2169,9 +2172,9 @@ bool ARMConstantIslands::optimizeThumb2JumpTables() { if (isPositionIndependentOrROPI) { MachineInstr *Add = Load->getNextNode(); if (Add->getOpcode() != ARM::tADDrr || - Add->getOperand(2).getReg() != Load->getOperand(0).getReg() || - Add->getOperand(3).getReg() != BaseReg || - !Add->getOperand(2).isKill()) + Add->getOperand(2).getReg() != BaseReg || + Add->getOperand(3).getReg() != Load->getOperand(0).getReg() || + !Add->getOperand(3).isKill()) continue; if (Add->getOperand(0).getReg() != MI->getOperand(0).getReg()) continue; @@ -2209,7 +2212,7 @@ bool ARMConstantIslands::optimizeThumb2JumpTables() { .addReg(IdxReg, getKillRegState(IdxRegKill)) .addJumpTableIndex(JTI, JTOP.getTargetFlags()) .addImm(CPEMI->getOperand(0).getImm()); - DEBUG(dbgs() << "BB#" << MBB->getNumber() << ": " << *NewJTMI); + DEBUG(dbgs() << printMBBReference(*MBB) << ": " << *NewJTMI); unsigned JTOpc = ByteOk ? ARM::JUMPTABLE_TBB : ARM::JUMPTABLE_TBH; CPEMI->setDesc(TII->get(JTOpc)); diff --git a/lib/Target/ARM/ARMConstantPoolValue.cpp b/lib/Target/ARM/ARMConstantPoolValue.cpp index 9705c8b718b70..39ae02af513b6 100644 --- a/lib/Target/ARM/ARMConstantPoolValue.cpp +++ b/lib/Target/ARM/ARMConstantPoolValue.cpp @@ -1,4 +1,4 @@ -//===-- ARMConstantPoolValue.cpp - ARM constantpool value -----------------===// +//===- ARMConstantPoolValue.cpp - ARM constantpool value ------------------===// // // The LLVM Compiler Infrastructure // @@ -13,7 +13,6 @@ #include "ARMConstantPoolValue.h" #include "llvm/ADT/FoldingSet.h" -#include "llvm/ADT/StringRef.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/IR/Constant.h" #include "llvm/IR/Constants.h" @@ -140,8 +139,9 @@ ARMConstantPoolConstant::ARMConstantPoolConstant(const Constant *C, ARMConstantPoolConstant::ARMConstantPoolConstant(const GlobalVariable *GV, const Constant *C) : ARMConstantPoolValue((Type *)C->getType(), 0, ARMCP::CPPromotedGlobal, 0, - ARMCP::no_modifier, false), - CVal(C), GVar(GV) {} + ARMCP::no_modifier, false), CVal(C) { + GVars.insert(GV); +} ARMConstantPoolConstant * ARMConstantPoolConstant::Create(const Constant *C, unsigned ID) { @@ -189,7 +189,15 @@ const BlockAddress *ARMConstantPoolConstant::getBlockAddress() const { int ARMConstantPoolConstant::getExistingMachineCPValue(MachineConstantPool *CP, unsigned Alignment) { - return getExistingMachineCPValueImpl<ARMConstantPoolConstant>(CP, Alignment); + int index = + getExistingMachineCPValueImpl<ARMConstantPoolConstant>(CP, Alignment); + if (index != -1) { + auto *CPV = static_cast<ARMConstantPoolValue*>( + CP->getConstants()[index].Val.MachineCPVal); + auto *Constant = cast<ARMConstantPoolConstant>(CPV); + Constant->GVars.insert(GVars.begin(), GVars.end()); + } + return index; } bool ARMConstantPoolConstant::hasSameValue(ARMConstantPoolValue *ACPV) { @@ -199,6 +207,8 @@ bool ARMConstantPoolConstant::hasSameValue(ARMConstantPoolValue *ACPV) { void ARMConstantPoolConstant::addSelectionDAGCSEId(FoldingSetNodeID &ID) { ID.AddPointer(CVal); + for (const auto *GV : GVars) + ID.AddPointer(GV); ARMConstantPoolValue::addSelectionDAGCSEId(ID); } @@ -282,6 +292,6 @@ void ARMConstantPoolMBB::addSelectionDAGCSEId(FoldingSetNodeID &ID) { } void ARMConstantPoolMBB::print(raw_ostream &O) const { - O << "BB#" << MBB->getNumber(); + O << printMBBReference(*MBB); ARMConstantPoolValue::print(O); } diff --git a/lib/Target/ARM/ARMConstantPoolValue.h b/lib/Target/ARM/ARMConstantPoolValue.h index 61c521581f795..5139a18f92635 100644 --- a/lib/Target/ARM/ARMConstantPoolValue.h +++ b/lib/Target/ARM/ARMConstantPoolValue.h @@ -1,4 +1,4 @@ -//===-- ARMConstantPoolValue.h - ARM constantpool value ---------*- C++ -*-===// +//===- ARMConstantPoolValue.h - ARM constantpool value ----------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -14,7 +14,9 @@ #ifndef LLVM_LIB_TARGET_ARM_ARMCONSTANTPOOLVALUE_H #define LLVM_LIB_TARGET_ARM_ARMCONSTANTPOOLVALUE_H +#include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/StringRef.h" +#include "llvm/ADT/iterator_range.h" #include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/Support/Casting.h" #include <string> @@ -28,6 +30,8 @@ class GlobalValue; class GlobalVariable; class LLVMContext; class MachineBasicBlock; +class raw_ostream; +class Type; namespace ARMCP { @@ -80,8 +84,8 @@ protected: for (unsigned i = 0, e = Constants.size(); i != e; ++i) { if (Constants[i].isMachineConstantPoolEntry() && (Constants[i].getAlignment() & AlignMask) == 0) { - ARMConstantPoolValue *CPV = - (ARMConstantPoolValue *)Constants[i].Val.MachineCPVal; + auto *CPV = + static_cast<ARMConstantPoolValue*>(Constants[i].Val.MachineCPVal); if (Derived *APC = dyn_cast<Derived>(CPV)) if (cast<Derived>(this)->equals(APC)) return i; @@ -139,7 +143,7 @@ inline raw_ostream &operator<<(raw_ostream &O, const ARMConstantPoolValue &V) { /// Functions, and BlockAddresses. class ARMConstantPoolConstant : public ARMConstantPoolValue { const Constant *CVal; // Constant being loaded. - const GlobalVariable *GVar = nullptr; + SmallPtrSet<const GlobalVariable*, 1> GVars; ARMConstantPoolConstant(const Constant *C, unsigned ID, @@ -173,8 +177,10 @@ public: const GlobalValue *getGV() const; const BlockAddress *getBlockAddress() const; - const GlobalVariable *getPromotedGlobal() const { - return dyn_cast_or_null<GlobalVariable>(GVar); + using promoted_iterator = SmallPtrSet<const GlobalVariable *, 1>::iterator; + + iterator_range<promoted_iterator> promotedGlobals() { + return iterator_range<promoted_iterator>(GVars.begin(), GVars.end()); } const Constant *getPromotedGlobalInit() const { diff --git a/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/lib/Target/ARM/ARMExpandPseudoInsts.cpp index 376727729d893..b14b2c6a813fa 100644 --- a/lib/Target/ARM/ARMExpandPseudoInsts.cpp +++ b/lib/Target/ARM/ARMExpandPseudoInsts.cpp @@ -24,13 +24,6 @@ #include "llvm/CodeGen/LivePhysRegs.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/CodeGen/MachineInstrBundle.h" -#include "llvm/IR/GlobalValue.h" -#include "llvm/Support/CommandLine.h" -#include "llvm/Support/raw_ostream.h" // FIXME: for debug only. remove! -#include "llvm/Target/TargetFrameLowering.h" -#include "llvm/Target/TargetRegisterInfo.h" using namespace llvm; @@ -40,6 +33,8 @@ static cl::opt<bool> VerifyARMPseudo("verify-arm-pseudo-expand", cl::Hidden, cl::desc("Verify machine code after expanding ARM pseudos")); +#define ARM_EXPAND_PSEUDO_NAME "ARM pseudo instruction expansion pass" + namespace { class ARMExpandPseudo : public MachineFunctionPass { public: @@ -59,7 +54,7 @@ namespace { } StringRef getPassName() const override { - return "ARM pseudo instruction expansion pass"; + return ARM_EXPAND_PSEUDO_NAME; } private: @@ -88,6 +83,9 @@ namespace { char ARMExpandPseudo::ID = 0; } +INITIALIZE_PASS(ARMExpandPseudo, DEBUG_TYPE, ARM_EXPAND_PSEUDO_NAME, false, + false) + /// TransferImpOps - Transfer implicit operands on the pseudo instruction to /// the instructions created from the expansion. void ARMExpandPseudo::TransferImpOps(MachineInstr &OldMI, @@ -608,8 +606,11 @@ void ARMExpandPseudo::ExpandVTBL(MachineBasicBlock::iterator &MBBI, // Transfer the destination register operand. MIB.add(MI.getOperand(OpIdx++)); - if (IsExt) - MIB.add(MI.getOperand(OpIdx++)); + if (IsExt) { + MachineOperand VdSrc(MI.getOperand(OpIdx++)); + VdSrc.setIsRenamable(false); + MIB.add(VdSrc); + } bool SrcIsKill = MI.getOperand(OpIdx).isKill(); unsigned SrcReg = MI.getOperand(OpIdx++).getReg(); @@ -618,7 +619,9 @@ void ARMExpandPseudo::ExpandVTBL(MachineBasicBlock::iterator &MBBI, MIB.addReg(D0); // Copy the other source register operand. - MIB.add(MI.getOperand(OpIdx++)); + MachineOperand VmSrc(MI.getOperand(OpIdx++)); + VmSrc.setIsRenamable(false); + MIB.add(VmSrc); // Copy the predicate operands. MIB.add(MI.getOperand(OpIdx++)); @@ -666,6 +669,12 @@ static bool IsAnAddressOperand(const MachineOperand &MO) { llvm_unreachable("unhandled machine operand type"); } +static MachineOperand makeImplicit(const MachineOperand &MO) { + MachineOperand NewMO = MO; + NewMO.setImplicit(); + return NewMO; +} + void ARMExpandPseudo::ExpandMOV32BitImm(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI) { MachineInstr &MI = *MBBI; @@ -700,6 +709,8 @@ void ARMExpandPseudo::ExpandMOV32BitImm(MachineBasicBlock &MBB, HI16->setMemRefs(MI.memoperands_begin(), MI.memoperands_end()); LO16.addImm(Pred).addReg(PredReg).add(condCodeOp()); HI16.addImm(Pred).addReg(PredReg).add(condCodeOp()); + if (isCC) + LO16.add(makeImplicit(MI.getOperand(1))); TransferImpOps(MI, LO16, HI16); MI.eraseFromParent(); return; @@ -753,6 +764,8 @@ void ARMExpandPseudo::ExpandMOV32BitImm(MachineBasicBlock &MBB, if (RequiresBundling) finalizeBundle(MBB, LO16->getIterator(), MBBI->getIterator()); + if (isCC) + LO16.add(makeImplicit(MI.getOperand(1))); TransferImpOps(MI, LO16, HI16); MI.eraseFromParent(); } @@ -852,16 +865,15 @@ bool ARMExpandPseudo::ExpandCMP_SWAP(MachineBasicBlock &MBB, MI.eraseFromParent(); // Recompute livein lists. - const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); LivePhysRegs LiveRegs; - computeLiveIns(LiveRegs, MRI, *DoneBB); - computeLiveIns(LiveRegs, MRI, *StoreBB); - computeLiveIns(LiveRegs, MRI, *LoadCmpBB); + computeAndAddLiveIns(LiveRegs, *DoneBB); + computeAndAddLiveIns(LiveRegs, *StoreBB); + computeAndAddLiveIns(LiveRegs, *LoadCmpBB); // Do an extra pass around the loop to get loop carried registers right. StoreBB->clearLiveIns(); - computeLiveIns(LiveRegs, MRI, *StoreBB); + computeAndAddLiveIns(LiveRegs, *StoreBB); LoadCmpBB->clearLiveIns(); - computeLiveIns(LiveRegs, MRI, *LoadCmpBB); + computeAndAddLiveIns(LiveRegs, *LoadCmpBB); return true; } @@ -915,7 +927,7 @@ bool ARMExpandPseudo::ExpandCMP_SWAP_64(MachineBasicBlock &MBB, // .Lloadcmp: // ldrexd rDestLo, rDestHi, [rAddr] // cmp rDestLo, rDesiredLo - // sbcs rTempReg<dead>, rDestHi, rDesiredHi + // sbcs dead rTempReg, rDestHi, rDesiredHi // bne .Ldone unsigned LDREXD = IsThumb ? ARM::t2LDREXD : ARM::LDREXD; MachineInstrBuilder MIB; @@ -972,16 +984,15 @@ bool ARMExpandPseudo::ExpandCMP_SWAP_64(MachineBasicBlock &MBB, MI.eraseFromParent(); // Recompute livein lists. - const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); LivePhysRegs LiveRegs; - computeLiveIns(LiveRegs, MRI, *DoneBB); - computeLiveIns(LiveRegs, MRI, *StoreBB); - computeLiveIns(LiveRegs, MRI, *LoadCmpBB); + computeAndAddLiveIns(LiveRegs, *DoneBB); + computeAndAddLiveIns(LiveRegs, *StoreBB); + computeAndAddLiveIns(LiveRegs, *LoadCmpBB); // Do an extra pass around the loop to get loop carried registers right. StoreBB->clearLiveIns(); - computeLiveIns(LiveRegs, MRI, *StoreBB); + computeAndAddLiveIns(LiveRegs, *StoreBB); LoadCmpBB->clearLiveIns(); - computeLiveIns(LiveRegs, MRI, *LoadCmpBB); + computeAndAddLiveIns(LiveRegs, *LoadCmpBB); return true; } @@ -1054,7 +1065,8 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB, MI.getOperand(1).getReg()) .add(MI.getOperand(2)) .addImm(MI.getOperand(3).getImm()) // 'pred' - .add(MI.getOperand(4)); + .add(MI.getOperand(4)) + .add(makeImplicit(MI.getOperand(1))); MI.eraseFromParent(); return true; @@ -1067,7 +1079,8 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB, .add(MI.getOperand(2)) .addImm(MI.getOperand(3).getImm()) // 'pred' .add(MI.getOperand(4)) - .add(condCodeOp()); // 's' bit + .add(condCodeOp()) // 's' bit + .add(makeImplicit(MI.getOperand(1))); MI.eraseFromParent(); return true; @@ -1079,7 +1092,8 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB, .addImm(MI.getOperand(3).getImm()) .addImm(MI.getOperand(4).getImm()) // 'pred' .add(MI.getOperand(5)) - .add(condCodeOp()); // 's' bit + .add(condCodeOp()) // 's' bit + .add(makeImplicit(MI.getOperand(1))); MI.eraseFromParent(); return true; @@ -1092,7 +1106,8 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB, .addImm(MI.getOperand(4).getImm()) .addImm(MI.getOperand(5).getImm()) // 'pred' .add(MI.getOperand(6)) - .add(condCodeOp()); // 's' bit + .add(condCodeOp()) // 's' bit + .add(makeImplicit(MI.getOperand(1))); MI.eraseFromParent(); return true; @@ -1104,7 +1119,8 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB, MI.getOperand(1).getReg()) .addImm(MI.getOperand(2).getImm()) .addImm(MI.getOperand(3).getImm()) // 'pred' - .add(MI.getOperand(4)); + .add(MI.getOperand(4)) + .add(makeImplicit(MI.getOperand(1))); MI.eraseFromParent(); return true; } @@ -1116,7 +1132,8 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB, .addImm(MI.getOperand(2).getImm()) .addImm(MI.getOperand(3).getImm()) // 'pred' .add(MI.getOperand(4)) - .add(condCodeOp()); // 's' bit + .add(condCodeOp()) // 's' bit + .add(makeImplicit(MI.getOperand(1))); MI.eraseFromParent(); return true; @@ -1129,7 +1146,8 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB, .addImm(MI.getOperand(2).getImm()) .addImm(MI.getOperand(3).getImm()) // 'pred' .add(MI.getOperand(4)) - .add(condCodeOp()); // 's' bit + .add(condCodeOp()) // 's' bit + .add(makeImplicit(MI.getOperand(1))); MI.eraseFromParent(); return true; @@ -1152,7 +1170,8 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB, .addImm(MI.getOperand(3).getImm()) .addImm(MI.getOperand(4).getImm()) // 'pred' .add(MI.getOperand(5)) - .add(condCodeOp()); // 's' bit + .add(condCodeOp()) // 's' bit + .add(makeImplicit(MI.getOperand(1))); MI.eraseFromParent(); return true; } @@ -1240,7 +1259,7 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB, MachineConstantPool *MCP = MF->getConstantPool(); unsigned PCLabelID = AFI->createPICLabelUId(); MachineConstantPoolValue *CPV = - ARMConstantPoolSymbol::Create(MF->getFunction()->getContext(), + ARMConstantPoolSymbol::Create(MF->getFunction().getContext(), "__aeabi_read_tp", PCLabelID, 0); unsigned Reg = MI.getOperand(0).getReg(); MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(), @@ -1297,6 +1316,7 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB, unsigned DstReg = MI.getOperand(0).getReg(); bool DstIsDead = MI.getOperand(0).isDead(); const MachineOperand &MO1 = MI.getOperand(1); + auto Flags = MO1.getTargetFlags(); const GlobalValue *GV = MO1.getGlobal(); bool IsARM = Opcode != ARM::tLDRLIT_ga_pcrel && Opcode != ARM::tLDRLIT_ga_abs; @@ -1315,9 +1335,13 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB, if (IsPIC) { unsigned PCAdj = IsARM ? 8 : 4; + auto Modifier = (Flags & ARMII::MO_GOT) + ? ARMCP::GOT_PREL + : ARMCP::no_modifier; ARMPCLabelIndex = AFI->createPICLabelUId(); - CPV = ARMConstantPoolConstant::Create(GV, ARMPCLabelIndex, - ARMCP::CPValue, PCAdj); + CPV = ARMConstantPoolConstant::Create( + GV, ARMPCLabelIndex, ARMCP::CPValue, PCAdj, Modifier, + /*AddCurrentAddr*/ Modifier == ARMCP::GOT_PREL); } else CPV = ARMConstantPoolConstant::Create(GV, ARMCP::no_modifier); @@ -1445,7 +1469,9 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB, unsigned SrcReg = MI.getOperand(OpIdx++).getReg(); // Copy the destination register. - MIB.add(MI.getOperand(OpIdx++)); + MachineOperand Dst(MI.getOperand(OpIdx++)); + Dst.setIsRenamable(false); + MIB.add(Dst); // Copy the predicate operands. MIB.add(MI.getOperand(OpIdx++)); @@ -1698,9 +1724,8 @@ bool ARMExpandPseudo::runOnMachineFunction(MachineFunction &MF) { AFI = MF.getInfo<ARMFunctionInfo>(); bool Modified = false; - for (MachineFunction::iterator MFI = MF.begin(), E = MF.end(); MFI != E; - ++MFI) - Modified |= ExpandMBB(*MFI); + for (MachineBasicBlock &MBB : MF) + Modified |= ExpandMBB(MBB); if (VerifyARMPseudo) MF.verify(this, "After expanding ARM pseudo instructions."); return Modified; diff --git a/lib/Target/ARM/ARMFastISel.cpp b/lib/Target/ARM/ARMFastISel.cpp index 5dc93734ab593..0ea435062ec03 100644 --- a/lib/Target/ARM/ARMFastISel.cpp +++ b/lib/Target/ARM/ARMFastISel.cpp @@ -1,4 +1,4 @@ -//===-- ARMFastISel.cpp - ARM FastISel implementation ---------------------===// +//===- ARMFastISel.cpp - ARM FastISel implementation ----------------------===// // // The LLVM Compiler Infrastructure // @@ -23,17 +23,19 @@ #include "ARMSubtarget.h" #include "MCTargetDesc/ARMAddressingModes.h" #include "MCTargetDesc/ARMBaseInfo.h" +#include "Utils/ARMBaseInfo.h" #include "llvm/ADT/APFloat.h" #include "llvm/ADT/APInt.h" #include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallVector.h" #include "llvm/CodeGen/CallingConvLower.h" #include "llvm/CodeGen/FastISel.h" #include "llvm/CodeGen/FunctionLoweringInfo.h" #include "llvm/CodeGen/ISDOpcodes.h" +#include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineMemOperand.h" @@ -41,6 +43,10 @@ #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/MachineValueType.h" #include "llvm/CodeGen/RuntimeLibcalls.h" +#include "llvm/CodeGen/TargetInstrInfo.h" +#include "llvm/CodeGen/TargetLowering.h" +#include "llvm/CodeGen/TargetOpcodes.h" +#include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/CodeGen/ValueTypes.h" #include "llvm/IR/Argument.h" #include "llvm/IR/Attributes.h" @@ -58,6 +64,7 @@ #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Intrinsics.h" #include "llvm/IR/Module.h" #include "llvm/IR/Operator.h" #include "llvm/IR/Type.h" @@ -69,8 +76,6 @@ #include "llvm/Support/Compiler.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" -#include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" #include <cassert> @@ -82,7 +87,7 @@ using namespace llvm; namespace { // All possible address modes, plus some. - typedef struct Address { + struct Address { enum { RegBase, FrameIndexBase @@ -99,7 +104,7 @@ namespace { Address() { Base.Reg = 0; } - } Address; + }; class ARMFastISel final : public FastISel { /// Subtarget - Keep a pointer to the ARMSubtarget around so that we can @@ -1848,7 +1853,7 @@ CCAssignFn *ARMFastISel::CCAssignFnForCall(CallingConv::ID CC, bool isVarArg) { switch (CC) { default: - llvm_unreachable("Unsupported calling convention"); + report_fatal_error("Unsupported calling convention"); case CallingConv::Fast: if (Subtarget->hasVFP2() && !isVarArg) { if (!Subtarget->isAAPCS_ABI()) @@ -1882,7 +1887,7 @@ CCAssignFn *ARMFastISel::CCAssignFnForCall(CallingConv::ID CC, return (Return ? RetCC_ARM_APCS: CC_ARM_APCS); case CallingConv::GHC: if (Return) - llvm_unreachable("Can't return in GHC call convention"); + report_fatal_error("Can't return in GHC call convention"); else return CC_ARM_APCS_GHC; } @@ -2890,13 +2895,11 @@ bool ARMFastISel::fastSelectInstruction(const Instruction *I) { return false; } -namespace { - // This table describes sign- and zero-extend instructions which can be // folded into a preceding load. All of these extends have an immediate // (sometimes a mask and sometimes a shift) that's applied after // extension. -const struct FoldableLoadExtendsStruct { +static const struct FoldableLoadExtendsStruct { uint16_t Opc[2]; // ARM, Thumb. uint8_t ExpectedImm; uint8_t isZExt : 1; @@ -2909,8 +2912,6 @@ const struct FoldableLoadExtendsStruct { { { ARM::UXTB, ARM::t2UXTB }, 0, 1, MVT::i8 } }; -} // end anonymous namespace - /// \brief The specified machine instr operand is a vreg, and that /// vreg is being provided by the specified load instruction. If possible, /// try to fold the load as an operand to the instruction, returning true if @@ -2957,7 +2958,7 @@ unsigned ARMFastISel::ARMLowerPICELF(const GlobalValue *GV, unsigned Align, MVT VT) { bool UseGOT_PREL = !TM.shouldAssumeDSOLocal(*GV->getParent(), GV); - LLVMContext *Context = &MF->getFunction()->getContext(); + LLVMContext *Context = &MF->getFunction().getContext(); unsigned ARMPCLabelIndex = AFI->createPICLabelUId(); unsigned PCAdj = Subtarget->isThumb() ? 4 : 8; ARMConstantPoolValue *CPV = ARMConstantPoolConstant::Create( diff --git a/lib/Target/ARM/ARMFrameLowering.cpp b/lib/Target/ARM/ARMFrameLowering.cpp index 00b788a1b530b..4ff864ac6ccdf 100644 --- a/lib/Target/ARM/ARMFrameLowering.cpp +++ b/lib/Target/ARM/ARMFrameLowering.cpp @@ -1,4 +1,4 @@ -//===-- ARMFrameLowering.cpp - ARM Frame Information ----------------------===// +//===- ARMFrameLowering.cpp - ARM Frame Information -----------------------===// // // The LLVM Compiler Infrastructure // @@ -19,6 +19,7 @@ #include "ARMSubtarget.h" #include "MCTargetDesc/ARMAddressingModes.h" #include "MCTargetDesc/ARMBaseInfo.h" +#include "Utils/ARMBaseInfo.h" #include "llvm/ADT/BitVector.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallPtrSet.h" @@ -33,12 +34,17 @@ #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/RegisterScavenging.h" +#include "llvm/CodeGen/TargetInstrInfo.h" +#include "llvm/CodeGen/TargetOpcodes.h" +#include "llvm/CodeGen/TargetRegisterInfo.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/CallingConv.h" #include "llvm/IR/DebugLoc.h" #include "llvm/IR/Function.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCDwarf.h" +#include "llvm/MC/MCInstrDesc.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/Support/CodeGen.h" #include "llvm/Support/CommandLine.h" @@ -47,11 +53,8 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" -#include "llvm/Target/TargetRegisterInfo.h" -#include "llvm/Target/TargetSubtargetInfo.h" #include <algorithm> #include <cassert> #include <cstddef> @@ -200,10 +203,10 @@ static int sizeOfSPAdjustment(const MachineInstr &MI) { static bool WindowsRequiresStackProbe(const MachineFunction &MF, size_t StackSizeInBytes) { const MachineFrameInfo &MFI = MF.getFrameInfo(); - const Function *F = MF.getFunction(); + const Function &F = MF.getFunction(); unsigned StackProbeSize = (MFI.getStackProtectorIndex() > 0) ? 4080 : 4096; - if (F->hasFnAttribute("stack-probe-size")) - F->getFnAttribute("stack-probe-size") + if (F.hasFnAttribute("stack-probe-size")) + F.getFnAttribute("stack-probe-size") .getValueAsString() .getAsInteger(0, StackProbeSize); return StackSizeInBytes >= StackProbeSize; @@ -367,7 +370,7 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF, // All calls are tail calls in GHC calling conv, and functions have no // prologue/epilogue. - if (MF.getFunction()->getCallingConv() == CallingConv::GHC) + if (MF.getFunction().getCallingConv() == CallingConv::GHC) return; StackAdjustingInsts DefCFAOffsetCandidates; @@ -445,7 +448,7 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF, int FramePtrOffsetInPush = 0; if (HasFP) { int FPOffset = MFI.getObjectOffset(FramePtrSpillFI); - assert(getMaxFPOffset(*MF.getFunction(), *AFI) <= FPOffset && + assert(getMaxFPOffset(MF.getFunction(), *AFI) <= FPOffset && "Max FP estimation is wrong"); FramePtrOffsetInPush = FPOffset + ArgRegsSaveSize; AFI->setFramePtrSpillOffset(MFI.getObjectOffset(FramePtrSpillFI) + @@ -512,7 +515,6 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF, switch (TM.getCodeModel()) { case CodeModel::Small: case CodeModel::Medium: - case CodeModel::Default: case CodeModel::Kernel: BuildMI(MBB, MBBI, dl, TII.get(ARM::tBL)) .add(predOps(ARMCC::AL)) @@ -521,7 +523,6 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF, .setMIFlags(MachineInstr::FrameSetup); break; case CodeModel::Large: - case CodeModel::JITDefault: BuildMI(MBB, MBBI, dl, TII.get(ARM::t2MOVi32imm), ARM::R12) .addExternalSymbol("__chkstk") .setMIFlags(MachineInstr::FrameSetup); @@ -765,7 +766,7 @@ void ARMFrameLowering::emitEpilogue(MachineFunction &MF, // All calls are tail calls in GHC calling conv, and functions have no // prologue/epilogue. - if (MF.getFunction()->getCallingConv() == CallingConv::GHC) + if (MF.getFunction().getCallingConv() == CallingConv::GHC) return; // First put ourselves on the first (from top) terminator instructions. @@ -955,7 +956,8 @@ void ARMFrameLowering::emitPushInst(MachineBasicBlock &MBB, DebugLoc DL; - typedef std::pair<unsigned, bool> RegAndKill; + using RegAndKill = std::pair<unsigned, bool>; + SmallVector<RegAndKill, 4> Regs; unsigned i = CSI.size(); while (i != 0) { @@ -1021,7 +1023,7 @@ void ARMFrameLowering::emitPushInst(MachineBasicBlock &MBB, void ARMFrameLowering::emitPopInst(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, - const std::vector<CalleeSavedInfo> &CSI, + std::vector<CalleeSavedInfo> &CSI, unsigned LdmOpc, unsigned LdrOpc, bool isVarArg, bool NoGap, bool(*Func)(unsigned, bool), @@ -1051,7 +1053,8 @@ void ARMFrameLowering::emitPopInst(MachineBasicBlock &MBB, unsigned LastReg = 0; bool DeleteRet = false; for (; i != 0; --i) { - unsigned Reg = CSI[i-1].getReg(); + CalleeSavedInfo &Info = CSI[i-1]; + unsigned Reg = Info.getReg(); if (!(Func)(Reg, STI.splitFramePushPop(MF))) continue; // The aligned reloads from area DPRCS2 are not inserted here. @@ -1064,6 +1067,9 @@ void ARMFrameLowering::emitPopInst(MachineBasicBlock &MBB, Reg = ARM::PC; DeleteRet = true; LdmOpc = AFI->isThumbFunction() ? ARM::t2LDMIA_RET : ARM::LDMIA_RET; + // We 'restore' LR into PC so it is not live out of the return block: + // Clear Restored bit. + Info.setRestored(false); } else LdmOpc = AFI->isThumbFunction() ? ARM::t2LDMIA_UPD : ARM::LDMIA_UPD; // Fold the return instruction into the LDM. @@ -1092,9 +1098,11 @@ void ARMFrameLowering::emitPopInst(MachineBasicBlock &MBB, .add(predOps(ARMCC::AL)); for (unsigned i = 0, e = Regs.size(); i < e; ++i) MIB.addReg(Regs[i], getDefRegState(true)); - if (DeleteRet && MI != MBB.end()) { - MIB.copyImplicitOps(*MI); - MI->eraseFromParent(); + if (DeleteRet) { + if (MI != MBB.end()) { + MIB.copyImplicitOps(*MI); + MI->eraseFromParent(); + } } MI = MIB; } else if (Regs.size() == 1) { @@ -1283,9 +1291,11 @@ skipAlignedDPRCS2Spills(MachineBasicBlock::iterator MI, case 7: ++MI; assert(MI->mayStore() && "Expecting spill instruction"); + LLVM_FALLTHROUGH; default: ++MI; assert(MI->mayStore() && "Expecting spill instruction"); + LLVM_FALLTHROUGH; case 1: case 2: case 4: @@ -1421,7 +1431,7 @@ bool ARMFrameLowering::spillCalleeSavedRegisters(MachineBasicBlock &MBB, bool ARMFrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, - const std::vector<CalleeSavedInfo> &CSI, + std::vector<CalleeSavedInfo> &CSI, const TargetRegisterInfo *TRI) const { if (CSI.empty()) return false; @@ -1516,7 +1526,6 @@ static unsigned estimateRSStackSizeLimit(MachineFunction &MF, // In functions that realign the stack, it can be an advantage to spill the // callee-saved vector registers after realigning the stack. The vst1 and vld1 // instructions take alignment hints that can improve performance. -// static void checkNumAlignedDPRCS2Regs(MachineFunction &MF, BitVector &SavedRegs) { MF.getInfo<ARMFunctionInfo>()->setNumAlignedDPRCS2Regs(0); @@ -1524,7 +1533,7 @@ checkNumAlignedDPRCS2Regs(MachineFunction &MF, BitVector &SavedRegs) { return; // Naked functions don't spill callee-saved registers. - if (MF.getFunction()->hasFnAttribute(Attribute::Naked)) + if (MF.getFunction().hasFnAttribute(Attribute::Naked)) return; // We are planning to use NEON instructions vst1 / vld1. @@ -1601,14 +1610,14 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF, if (AFI->getArgRegsSaveSize() > 0) SavedRegs.set(ARM::LR); - // Spill R4 if Thumb1 epilogue has to restore SP from FP. We don't know - // for sure what the stack size will be, but for this, an estimate is good - // enough. If there anything changes it, it'll be a spill, which implies - // we've used all the registers and so R4 is already used, so not marking - // it here will be OK. + // Spill R4 if Thumb1 epilogue has to restore SP from FP or the function + // requires stack alignment. We don't know for sure what the stack size + // will be, but for this, an estimate is good enough. If there anything + // changes it, it'll be a spill, which implies we've used all the registers + // and so R4 is already used, so not marking it here will be OK. // FIXME: It will be better just to find spare register here. - unsigned StackSize = MFI.estimateStackSize(MF); - if (MFI.hasVarSizedObjects() || StackSize > 508) + if (MFI.hasVarSizedObjects() || RegInfo->needsStackRealignment(MF) || + MFI.estimateStackSize(MF) > 508) SavedRegs.set(ARM::R4); } @@ -1735,13 +1744,12 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF, EstimatedStackSize += 16; // For possible paddings. unsigned EstimatedRSStackSizeLimit = estimateRSStackSizeLimit(MF, this); - int MaxFPOffset = getMaxFPOffset(*MF.getFunction(), *AFI); + int MaxFPOffset = getMaxFPOffset(MF.getFunction(), *AFI); bool BigFrameOffsets = EstimatedStackSize >= EstimatedRSStackSizeLimit || MFI.hasVarSizedObjects() || (MFI.adjustsStack() && !canSimplifyCallFramePseudos(MF)) || // For large argument stacks fp relative addressed may overflow. (HasFP && (MaxFixedOffset - MaxFPOffset) >= (int)EstimatedRSStackSizeLimit); - bool ExtraCSSpill = false; if (BigFrameOffsets || !CanEliminateFrame || RegInfo->cannotEliminateFrame(MF)) { AFI->setHasStackFrame(true); @@ -1766,6 +1774,10 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF, CS1Spilled = true; } + // This is true when we inserted a spill for an unused register that can now + // be used for register scavenging. + bool ExtraCSSpill = false; + if (AFI->isThumb1OnlyFunction()) { // For Thumb1-only targets, we need some low registers when we save and // restore the high registers (which aren't allocatable, but could be @@ -1785,7 +1797,7 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF, for (unsigned Reg : {ARM::R0, ARM::R1, ARM::R2, ARM::R3}) { if (!MF.getRegInfo().isLiveIn(Reg)) { --EntryRegDeficit; - DEBUG(dbgs() << PrintReg(Reg, TRI) + DEBUG(dbgs() << printReg(Reg, TRI) << " is unused argument register, EntryRegDeficit = " << EntryRegDeficit << "\n"); } @@ -1805,13 +1817,13 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF, for (unsigned Reg : {ARM::R4, ARM::R5, ARM::R6}) { if (SavedRegs.test(Reg)) { --RegDeficit; - DEBUG(dbgs() << PrintReg(Reg, TRI) + DEBUG(dbgs() << printReg(Reg, TRI) << " is saved low register, RegDeficit = " << RegDeficit << "\n"); } else { AvailableRegs.push_back(Reg); DEBUG(dbgs() - << PrintReg(Reg, TRI) + << printReg(Reg, TRI) << " is non-saved low register, adding to AvailableRegs\n"); } } @@ -1820,12 +1832,12 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF, if (!HasFP) { if (SavedRegs.test(ARM::R7)) { --RegDeficit; - DEBUG(dbgs() << "%R7 is saved low register, RegDeficit = " + DEBUG(dbgs() << "%r7 is saved low register, RegDeficit = " << RegDeficit << "\n"); } else { AvailableRegs.push_back(ARM::R7); DEBUG(dbgs() - << "%R7 is non-saved low register, adding to AvailableRegs\n"); + << "%r7 is non-saved low register, adding to AvailableRegs\n"); } } @@ -1833,7 +1845,7 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF, for (unsigned Reg : {ARM::R8, ARM::R9, ARM::R10, ARM::R11}) { if (SavedRegs.test(Reg)) { ++RegDeficit; - DEBUG(dbgs() << PrintReg(Reg, TRI) + DEBUG(dbgs() << printReg(Reg, TRI) << " is saved high register, RegDeficit = " << RegDeficit << "\n"); } @@ -1847,11 +1859,11 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF, MF.getFrameInfo().isReturnAddressTaken())) { if (SavedRegs.test(ARM::LR)) { --RegDeficit; - DEBUG(dbgs() << "%LR is saved register, RegDeficit = " << RegDeficit + DEBUG(dbgs() << "%lr is saved register, RegDeficit = " << RegDeficit << "\n"); } else { AvailableRegs.push_back(ARM::LR); - DEBUG(dbgs() << "%LR is not saved, adding to AvailableRegs\n"); + DEBUG(dbgs() << "%lr is not saved, adding to AvailableRegs\n"); } } @@ -1863,12 +1875,14 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF, DEBUG(dbgs() << "Final RegDeficit = " << RegDeficit << "\n"); for (; RegDeficit > 0 && !AvailableRegs.empty(); --RegDeficit) { unsigned Reg = AvailableRegs.pop_back_val(); - DEBUG(dbgs() << "Spilling " << PrintReg(Reg, TRI) + DEBUG(dbgs() << "Spilling " << printReg(Reg, TRI) << " to make up reg deficit\n"); SavedRegs.set(Reg); NumGPRSpills++; CS1Spilled = true; - ExtraCSSpill = true; + assert(!MRI.isReserved(Reg) && "Should not be reserved"); + if (!MRI.isPhysRegUsed(Reg)) + ExtraCSSpill = true; UnspilledCS1GPRs.erase(llvm::find(UnspilledCS1GPRs, Reg)); if (Reg == ARM::LR) LRSpilled = true; @@ -1887,7 +1901,8 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF, UnspilledCS1GPRs.erase(LRPos); ForceLRSpill = false; - ExtraCSSpill = true; + if (!MRI.isReserved(ARM::LR) && !MRI.isPhysRegUsed(ARM::LR)) + ExtraCSSpill = true; } // If stack and double are 8-byte aligned and we are spilling an odd number @@ -1905,9 +1920,9 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF, (STI.isTargetWindows() && Reg == ARM::R11) || isARMLowRegister(Reg) || Reg == ARM::LR) { SavedRegs.set(Reg); - DEBUG(dbgs() << "Spilling " << PrintReg(Reg, TRI) + DEBUG(dbgs() << "Spilling " << printReg(Reg, TRI) << " to make up alignment\n"); - if (!MRI.isReserved(Reg)) + if (!MRI.isReserved(Reg) && !MRI.isPhysRegUsed(Reg)) ExtraCSSpill = true; break; } @@ -1915,9 +1930,9 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF, } else if (!UnspilledCS2GPRs.empty() && !AFI->isThumb1OnlyFunction()) { unsigned Reg = UnspilledCS2GPRs.front(); SavedRegs.set(Reg); - DEBUG(dbgs() << "Spilling " << PrintReg(Reg, TRI) + DEBUG(dbgs() << "Spilling " << printReg(Reg, TRI) << " to make up alignment\n"); - if (!MRI.isReserved(Reg)) + if (!MRI.isReserved(Reg) && !MRI.isPhysRegUsed(Reg)) ExtraCSSpill = true; } } @@ -1953,11 +1968,14 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF, } } } - if (Extras.size() && NumExtras == 0) { - for (unsigned i = 0, e = Extras.size(); i != e; ++i) { - SavedRegs.set(Extras[i]); + if (NumExtras == 0) { + for (unsigned Reg : Extras) { + SavedRegs.set(Reg); + if (!MRI.isPhysRegUsed(Reg)) + ExtraCSSpill = true; } - } else if (!AFI->isThumb1OnlyFunction()) { + } + if (!ExtraCSSpill && !AFI->isThumb1OnlyFunction()) { // note: Thumb1 functions spill to R12, not the stack. Reserve a slot // closest to SP or frame pointer. assert(RS && "Register scavenging not provided"); @@ -2084,7 +2102,7 @@ void ARMFrameLowering::adjustForSegmentedStacks( // Sadly, this currently doesn't support varargs, platforms other than // android/linux. Note that thumb1/thumb2 are support for android/linux. - if (MF.getFunction()->isVarArg()) + if (MF.getFunction().isVarArg()) report_fatal_error("Segmented stacks do not support vararg functions."); if (!ST->isTargetAndroid() && !ST->isTargetLinux()) report_fatal_error("Segmented stacks not supported on this platform."); @@ -2232,7 +2250,7 @@ void ARMFrameLowering::adjustForSegmentedStacks( if (Thumb && ST->isThumb1Only()) { unsigned PCLabelId = ARMFI->createPICLabelUId(); ARMConstantPoolValue *NewCPV = ARMConstantPoolSymbol::Create( - MF.getFunction()->getContext(), "__STACK_LIMIT", PCLabelId, 0); + MF.getFunction().getContext(), "__STACK_LIMIT", PCLabelId, 0); MachineConstantPool *MCP = MF.getConstantPool(); unsigned CPI = MCP->getConstantPoolIndex(NewCPV, 4); diff --git a/lib/Target/ARM/ARMFrameLowering.h b/lib/Target/ARM/ARMFrameLowering.h index 21cd78da395c8..1f18e2bf80c46 100644 --- a/lib/Target/ARM/ARMFrameLowering.h +++ b/lib/Target/ARM/ARMFrameLowering.h @@ -1,4 +1,4 @@ -//==-- ARMTargetFrameLowering.h - Define frame lowering for ARM --*- C++ -*-==// +//===- ARMTargetFrameLowering.h - Define frame lowering for ARM -*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -6,18 +6,19 @@ // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// -// -// -// -//===----------------------------------------------------------------------===// #ifndef LLVM_LIB_TARGET_ARM_ARMFRAMELOWERING_H #define LLVM_LIB_TARGET_ARM_ARMFRAMELOWERING_H -#include "llvm/Target/TargetFrameLowering.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/TargetFrameLowering.h" +#include <vector> namespace llvm { - class ARMSubtarget; + +class ARMSubtarget; +class CalleeSavedInfo; +class MachineFunction; class ARMFrameLowering : public TargetFrameLowering { protected: @@ -38,7 +39,7 @@ public: bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, - const std::vector<CalleeSavedInfo> &CSI, + std::vector<CalleeSavedInfo> &CSI, const TargetRegisterInfo *TRI) const override; bool noFramePointerElim(const MachineFunction &MF) const override; @@ -62,14 +63,14 @@ public: return true; } - private: +private: void emitPushInst(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const std::vector<CalleeSavedInfo> &CSI, unsigned StmOpc, unsigned StrOpc, bool NoGap, bool(*Func)(unsigned, bool), unsigned NumAlignedDPRCS2Regs, unsigned MIFlags = 0) const; void emitPopInst(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, - const std::vector<CalleeSavedInfo> &CSI, unsigned LdmOpc, + std::vector<CalleeSavedInfo> &CSI, unsigned LdmOpc, unsigned LdrOpc, bool isVarArg, bool NoGap, bool(*Func)(unsigned, bool), unsigned NumAlignedDPRCS2Regs) const; @@ -80,6 +81,6 @@ public: MachineBasicBlock::iterator MI) const override; }; -} // End llvm namespace +} // end namespace llvm -#endif +#endif // LLVM_LIB_TARGET_ARM_ARMFRAMELOWERING_H diff --git a/lib/Target/ARM/ARMHazardRecognizer.cpp b/lib/Target/ARM/ARMHazardRecognizer.cpp index 0d904ecb62963..f878bf9937a49 100644 --- a/lib/Target/ARM/ARMHazardRecognizer.cpp +++ b/lib/Target/ARM/ARMHazardRecognizer.cpp @@ -13,7 +13,7 @@ #include "ARMSubtarget.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/ScheduleDAG.h" -#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/CodeGen/TargetRegisterInfo.h" using namespace llvm; static bool hasRAWHazard(MachineInstr *DefMI, MachineInstr *MI, diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp index f75dd4de3f96c..8d32510e2004a 100644 --- a/lib/Target/ARM/ARMISelDAGToDAG.cpp +++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp @@ -15,6 +15,7 @@ #include "ARMBaseInstrInfo.h" #include "ARMTargetMachine.h" #include "MCTargetDesc/ARMAddressingModes.h" +#include "Utils/ARMBaseInfo.h" #include "llvm/ADT/StringSwitch.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" @@ -22,6 +23,7 @@ #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/CodeGen/SelectionDAGISel.h" +#include "llvm/CodeGen/TargetLowering.h" #include "llvm/IR/CallingConv.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DerivedTypes.h" @@ -31,7 +33,6 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" -#include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetOptions.h" using namespace llvm; @@ -49,11 +50,6 @@ DisableShifterOp("disable-shifter-op", cl::Hidden, /// namespace { -enum AddrMode2Type { - AM2_BASE, // Simple AM2 (+-imm12) - AM2_SHOP // Shifter-op AM2 -}; - class ARMDAGToDAGISel : public SelectionDAGISel { /// Subtarget - Keep a pointer to the ARMSubtarget around so that we can /// make the right decision when generating code for different targets. @@ -104,26 +100,6 @@ public: bool SelectAddrModeImm12(SDValue N, SDValue &Base, SDValue &OffImm); bool SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset, SDValue &Opc); - AddrMode2Type SelectAddrMode2Worker(SDValue N, SDValue &Base, - SDValue &Offset, SDValue &Opc); - bool SelectAddrMode2Base(SDValue N, SDValue &Base, SDValue &Offset, - SDValue &Opc) { - return SelectAddrMode2Worker(N, Base, Offset, Opc) == AM2_BASE; - } - - bool SelectAddrMode2ShOp(SDValue N, SDValue &Base, SDValue &Offset, - SDValue &Opc) { - return SelectAddrMode2Worker(N, Base, Offset, Opc) == AM2_SHOP; - } - - bool SelectAddrMode2(SDValue N, SDValue &Base, SDValue &Offset, - SDValue &Opc) { - SelectAddrMode2Worker(N, Base, Offset, Opc); -// return SelectAddrMode2ShOp(N, Base, Offset, Opc); - // This always matches one way or another. - return true; - } - bool SelectCMOVPred(SDValue N, SDValue &Pred, SDValue &Reg) { const ConstantSDNode *CN = cast<ConstantSDNode>(N); Pred = CurDAG->getTargetConstant(CN->getZExtValue(), SDLoc(N), MVT::i32); @@ -753,148 +729,6 @@ bool ARMDAGToDAGISel::SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset, return true; } - -//----- - -AddrMode2Type ARMDAGToDAGISel::SelectAddrMode2Worker(SDValue N, - SDValue &Base, - SDValue &Offset, - SDValue &Opc) { - if (N.getOpcode() == ISD::MUL && - (!(Subtarget->isLikeA9() || Subtarget->isSwift()) || N.hasOneUse())) { - if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) { - // X * [3,5,9] -> X + X * [2,4,8] etc. - int RHSC = (int)RHS->getZExtValue(); - if (RHSC & 1) { - RHSC = RHSC & ~1; - ARM_AM::AddrOpc AddSub = ARM_AM::add; - if (RHSC < 0) { - AddSub = ARM_AM::sub; - RHSC = - RHSC; - } - if (isPowerOf2_32(RHSC)) { - unsigned ShAmt = Log2_32(RHSC); - Base = Offset = N.getOperand(0); - Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, - ARM_AM::lsl), - SDLoc(N), MVT::i32); - return AM2_SHOP; - } - } - } - } - - if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB && - // ISD::OR that is equivalent to an ADD. - !CurDAG->isBaseWithConstantOffset(N)) { - Base = N; - if (N.getOpcode() == ISD::FrameIndex) { - int FI = cast<FrameIndexSDNode>(N)->getIndex(); - Base = CurDAG->getTargetFrameIndex( - FI, TLI->getPointerTy(CurDAG->getDataLayout())); - } else if (N.getOpcode() == ARMISD::Wrapper && - N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress && - N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol && - N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) { - Base = N.getOperand(0); - } - Offset = CurDAG->getRegister(0, MVT::i32); - Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(ARM_AM::add, 0, - ARM_AM::no_shift), - SDLoc(N), MVT::i32); - return AM2_BASE; - } - - // Match simple R +/- imm12 operands. - if (N.getOpcode() != ISD::SUB) { - int RHSC; - if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/1, - -0x1000+1, 0x1000, RHSC)) { // 12 bits. - Base = N.getOperand(0); - if (Base.getOpcode() == ISD::FrameIndex) { - int FI = cast<FrameIndexSDNode>(Base)->getIndex(); - Base = CurDAG->getTargetFrameIndex( - FI, TLI->getPointerTy(CurDAG->getDataLayout())); - } - Offset = CurDAG->getRegister(0, MVT::i32); - - ARM_AM::AddrOpc AddSub = ARM_AM::add; - if (RHSC < 0) { - AddSub = ARM_AM::sub; - RHSC = - RHSC; - } - Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, RHSC, - ARM_AM::no_shift), - SDLoc(N), MVT::i32); - return AM2_BASE; - } - } - - if ((Subtarget->isLikeA9() || Subtarget->isSwift()) && !N.hasOneUse()) { - // Compute R +/- (R << N) and reuse it. - Base = N; - Offset = CurDAG->getRegister(0, MVT::i32); - Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(ARM_AM::add, 0, - ARM_AM::no_shift), - SDLoc(N), MVT::i32); - return AM2_BASE; - } - - // Otherwise this is R +/- [possibly shifted] R. - ARM_AM::AddrOpc AddSub = N.getOpcode() != ISD::SUB ? ARM_AM::add:ARM_AM::sub; - ARM_AM::ShiftOpc ShOpcVal = - ARM_AM::getShiftOpcForNode(N.getOperand(1).getOpcode()); - unsigned ShAmt = 0; - - Base = N.getOperand(0); - Offset = N.getOperand(1); - - if (ShOpcVal != ARM_AM::no_shift) { - // Check to see if the RHS of the shift is a constant, if not, we can't fold - // it. - if (ConstantSDNode *Sh = - dyn_cast<ConstantSDNode>(N.getOperand(1).getOperand(1))) { - ShAmt = Sh->getZExtValue(); - if (isShifterOpProfitable(Offset, ShOpcVal, ShAmt)) - Offset = N.getOperand(1).getOperand(0); - else { - ShAmt = 0; - ShOpcVal = ARM_AM::no_shift; - } - } else { - ShOpcVal = ARM_AM::no_shift; - } - } - - // Try matching (R shl C) + (R). - if (N.getOpcode() != ISD::SUB && ShOpcVal == ARM_AM::no_shift && - !(Subtarget->isLikeA9() || Subtarget->isSwift() || - N.getOperand(0).hasOneUse())) { - ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOperand(0).getOpcode()); - if (ShOpcVal != ARM_AM::no_shift) { - // Check to see if the RHS of the shift is a constant, if not, we can't - // fold it. - if (ConstantSDNode *Sh = - dyn_cast<ConstantSDNode>(N.getOperand(0).getOperand(1))) { - ShAmt = Sh->getZExtValue(); - if (isShifterOpProfitable(N.getOperand(0), ShOpcVal, ShAmt)) { - Offset = N.getOperand(0).getOperand(0); - Base = N.getOperand(1); - } else { - ShAmt = 0; - ShOpcVal = ARM_AM::no_shift; - } - } else { - ShOpcVal = ARM_AM::no_shift; - } - } - } - - Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, ShOpcVal), - SDLoc(N), MVT::i32); - return AM2_SHOP; -} - bool ARMDAGToDAGISel::SelectAddrMode2OffsetReg(SDNode *Op, SDValue N, SDValue &Offset, SDValue &Opc) { unsigned Opcode = Op->getOpcode(); @@ -3764,66 +3598,10 @@ static void getIntOperandsFromRegisterString(StringRef RegString, // which mode it is to be used, e.g. usr. Returns -1 to signify that the string // was invalid. static inline int getBankedRegisterMask(StringRef RegString) { - return StringSwitch<int>(RegString.lower()) - .Case("r8_usr", 0x00) - .Case("r9_usr", 0x01) - .Case("r10_usr", 0x02) - .Case("r11_usr", 0x03) - .Case("r12_usr", 0x04) - .Case("sp_usr", 0x05) - .Case("lr_usr", 0x06) - .Case("r8_fiq", 0x08) - .Case("r9_fiq", 0x09) - .Case("r10_fiq", 0x0a) - .Case("r11_fiq", 0x0b) - .Case("r12_fiq", 0x0c) - .Case("sp_fiq", 0x0d) - .Case("lr_fiq", 0x0e) - .Case("lr_irq", 0x10) - .Case("sp_irq", 0x11) - .Case("lr_svc", 0x12) - .Case("sp_svc", 0x13) - .Case("lr_abt", 0x14) - .Case("sp_abt", 0x15) - .Case("lr_und", 0x16) - .Case("sp_und", 0x17) - .Case("lr_mon", 0x1c) - .Case("sp_mon", 0x1d) - .Case("elr_hyp", 0x1e) - .Case("sp_hyp", 0x1f) - .Case("spsr_fiq", 0x2e) - .Case("spsr_irq", 0x30) - .Case("spsr_svc", 0x32) - .Case("spsr_abt", 0x34) - .Case("spsr_und", 0x36) - .Case("spsr_mon", 0x3c) - .Case("spsr_hyp", 0x3e) - .Default(-1); -} - -// Maps a MClass special register string to its value for use in the -// t2MRS_M / t2MSR_M instruction nodes as the SYSm value operand. -// Returns -1 to signify that the string was invalid. -static inline int getMClassRegisterSYSmValueMask(StringRef RegString) { - return StringSwitch<int>(RegString.lower()) - .Case("apsr", 0x0) - .Case("iapsr", 0x1) - .Case("eapsr", 0x2) - .Case("xpsr", 0x3) - .Case("ipsr", 0x5) - .Case("epsr", 0x6) - .Case("iepsr", 0x7) - .Case("msp", 0x8) - .Case("psp", 0x9) - .Case("primask", 0x10) - .Case("basepri", 0x11) - .Case("basepri_max", 0x12) - .Case("faultmask", 0x13) - .Case("control", 0x14) - .Case("msplim", 0x0a) - .Case("psplim", 0x0b) - .Case("sp", 0x18) - .Default(-1); + auto TheReg = ARMBankedReg::lookupBankedRegByName(RegString.lower()); + if (!TheReg) + return -1; + return TheReg->Encoding; } // The flags here are common to those allowed for apsr in the A class cores and @@ -3839,58 +3617,15 @@ static inline int getMClassFlagsMask(StringRef Flags) { .Default(-1); } -static int getMClassRegisterMask(StringRef Reg, StringRef Flags, bool IsRead, - const ARMSubtarget *Subtarget) { - // Ensure that the register (without flags) was a valid M Class special - // register. - int SYSmvalue = getMClassRegisterSYSmValueMask(Reg); - if (SYSmvalue == -1) - return -1; - - // basepri, basepri_max and faultmask are only valid for V7m. - if (!Subtarget->hasV7Ops() && SYSmvalue >= 0x11 && SYSmvalue <= 0x13) - return -1; - - if (Subtarget->has8MSecExt() && Flags.lower() == "ns") { - Flags = ""; - SYSmvalue |= 0x80; - } - - if (!Subtarget->has8MSecExt() && - (SYSmvalue == 0xa || SYSmvalue == 0xb || SYSmvalue > 0x14)) - return -1; - - if (!Subtarget->hasV8MMainlineOps() && - (SYSmvalue == 0x8a || SYSmvalue == 0x8b || SYSmvalue == 0x91 || - SYSmvalue == 0x93)) - return -1; - - // If it was a read then we won't be expecting flags and so at this point - // we can return the mask. - if (IsRead) { - if (Flags.empty()) - return SYSmvalue; - else - return -1; - } - - // We know we are now handling a write so need to get the mask for the flags. - int Mask = getMClassFlagsMask(Flags); - - // Only apsr, iapsr, eapsr, xpsr can have flags. The other register values - // shouldn't have flags present. - if ((SYSmvalue < 0x4 && Mask == -1) || (SYSmvalue > 0x4 && !Flags.empty())) - return -1; - - // The _g and _nzcvqg versions are only valid if the DSP extension is - // available. - if (!Subtarget->hasDSP() && (Mask & 0x1)) +// Maps MClass special registers string to its value for use in the +// t2MRS_M/t2MSR_M instruction nodes as the SYSm value operand. +// Returns -1 to signify that the string was invalid. +static int getMClassRegisterMask(StringRef Reg, const ARMSubtarget *Subtarget) { + auto TheReg = ARMSysReg::lookupMClassSysRegByName(Reg); + const FeatureBitset &FeatureBits = Subtarget->getFeatureBits(); + if (!TheReg || !TheReg->hasRequiredFeatures(FeatureBits)) return -1; - - // The register was valid so need to put the mask in the correct place - // (the flags need to be in bits 11-10) and combine with the SYSmvalue to - // construct the operand for the instruction node. - return SYSmvalue | Mask << 10; + return (int)(TheReg->Encoding & 0xFFF); // SYSm value } static int getARClassRegisterMask(StringRef Reg, StringRef Flags) { @@ -4032,13 +3767,7 @@ bool ARMDAGToDAGISel::tryReadRegister(SDNode *N){ // is an acceptable value, so check that a mask can be constructed from the // string. if (Subtarget->isMClass()) { - StringRef Flags = "", Reg = SpecialReg; - if (Reg.endswith("_ns")) { - Flags = "ns"; - Reg = Reg.drop_back(3); - } - - int SYSmValue = getMClassRegisterMask(Reg, Flags, true, Subtarget); + int SYSmValue = getMClassRegisterMask(SpecialReg, Subtarget); if (SYSmValue == -1) return false; @@ -4149,12 +3878,7 @@ bool ARMDAGToDAGISel::tryWriteRegister(SDNode *N){ // If the target was M Class then need to validate the special register value // and retrieve the mask for use in the instruction node. if (Subtarget->isMClass()) { - // basepri_max gets split so need to correct Reg and Flags. - if (SpecialReg == "basepri_max") { - Reg = SpecialReg; - Flags = ""; - } - int SYSmValue = getMClassRegisterMask(Reg, Flags, false, Subtarget); + int SYSmValue = getMClassRegisterMask(SpecialReg, Subtarget); if (SYSmValue == -1) return false; diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index 27dda93387b6f..1b4d7ff508489 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -1,4 +1,4 @@ -//===-- ARMISelLowering.cpp - ARM DAG Lowering Implementation -------------===// +//===- ARMISelLowering.cpp - ARM DAG Lowering Implementation --------------===// // // The LLVM Compiler Infrastructure // @@ -24,6 +24,7 @@ #include "ARMSubtarget.h" #include "MCTargetDesc/ARMAddressingModes.h" #include "MCTargetDesc/ARMBaseInfo.h" +#include "Utils/ARMBaseInfo.h" #include "llvm/ADT/APFloat.h" #include "llvm/ADT/APInt.h" #include "llvm/ADT/ArrayRef.h" @@ -56,6 +57,11 @@ #include "llvm/CodeGen/RuntimeLibcalls.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/CodeGen/SelectionDAGNodes.h" +#include "llvm/CodeGen/TargetInstrInfo.h" +#include "llvm/CodeGen/TargetLowering.h" +#include "llvm/CodeGen/TargetOpcodes.h" +#include "llvm/CodeGen/TargetRegisterInfo.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/CodeGen/ValueTypes.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/CallingConv.h" @@ -93,7 +99,6 @@ #include "llvm/Support/KnownBits.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" #include <algorithm> @@ -221,19 +226,12 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM, RegInfo = Subtarget->getRegisterInfo(); Itins = Subtarget->getInstrItineraryData(); + setBooleanContents(ZeroOrOneBooleanContent); setBooleanVectorContents(ZeroOrNegativeOneBooleanContent); if (!Subtarget->isTargetDarwin() && !Subtarget->isTargetIOS() && !Subtarget->isTargetWatchOS()) { - const auto &E = Subtarget->getTargetTriple().getEnvironment(); - - bool IsHFTarget = E == Triple::EABIHF || E == Triple::GNUEABIHF || - E == Triple::MuslEABIHF; - // Windows is a special case. Technically, we will replace all of the "GNU" - // calls with calls to MSVCRT if appropriate and adjust the calling - // convention then. - IsHFTarget = IsHFTarget || Subtarget->isTargetWindows(); - + bool IsHFTarget = TM.Options.FloatABIType == FloatABI::Hard; for (int LCID = 0; LCID < RTLIB::UNKNOWN_LIBCALL; ++LCID) setLibcallCallingConv(static_cast<RTLIB::Libcall>(LCID), IsHFTarget ? CallingConv::ARM_AAPCS_VFP @@ -801,6 +799,9 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM, setOperationAction(ISD::SSUBO, MVT::i32, Custom); setOperationAction(ISD::USUBO, MVT::i32, Custom); + setOperationAction(ISD::ADDCARRY, MVT::i32, Custom); + setOperationAction(ISD::SUBCARRY, MVT::i32, Custom); + // i64 operation support. setOperationAction(ISD::MUL, MVT::i64, Expand); setOperationAction(ISD::MULHU, MVT::i32, Expand); @@ -1562,7 +1563,7 @@ ARMTargetLowering::getEffectiveCallingConv(CallingConv::ID CC, bool isVarArg) const { switch (CC) { default: - llvm_unreachable("Unsupported calling convention"); + report_fatal_error("Unsupported calling convention"); case CallingConv::ARM_AAPCS: case CallingConv::ARM_APCS: case CallingConv::GHC: @@ -1611,7 +1612,7 @@ CCAssignFn *ARMTargetLowering::CCAssignFnForNode(CallingConv::ID CC, bool isVarArg) const { switch (getEffectiveCallingConv(CC, isVarArg)) { default: - llvm_unreachable("Unsupported calling convention"); + report_fatal_error("Unsupported calling convention"); case CallingConv::ARM_APCS: return (Return ? RetCC_ARM_APCS : CC_ARM_APCS); case CallingConv::ARM_AAPCS: @@ -1634,7 +1635,6 @@ SDValue ARMTargetLowering::LowerCallResult( const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl, SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals, bool isThisReturn, SDValue ThisVal) const { - // Assign locations to each value returned by this call. SmallVector<CCValAssign, 16> RVLocs; CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs, @@ -1732,7 +1732,6 @@ void ARMTargetLowering::PassF64ArgInRegs(const SDLoc &dl, SelectionDAG &DAG, SDValue &StackPtr, SmallVectorImpl<SDValue> &MemOpChains, ISD::ArgFlagsTy Flags) const { - SDValue fmrrd = DAG.getNode(ARMISD::VMOVRRD, dl, DAG.getVTList(MVT::i32, MVT::i32), Arg); unsigned id = Subtarget->isLittle() ? 0 : 1; @@ -1774,7 +1773,7 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, bool isStructRet = (Outs.empty()) ? false : Outs[0].Flags.isSRet(); bool isThisReturn = false; bool isSibCall = false; - auto Attr = MF.getFunction()->getFnAttribute("disable-tail-calls"); + auto Attr = MF.getFunction().getFnAttribute("disable-tail-calls"); // Disable tail calls if they're not supported. if (!Subtarget->supportsTailCall() || Attr.getValueAsString() == "true") @@ -1783,9 +1782,9 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, if (isTailCall) { // Check if it's really possible to do a tail call. isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv, - isVarArg, isStructRet, MF.getFunction()->hasStructRetAttr(), + isVarArg, isStructRet, MF.getFunction().hasStructRetAttr(), Outs, OutVals, Ins, DAG); - if (!isTailCall && CLI.CS && CLI.CS->isMustTailCall()) + if (!isTailCall && CLI.CS && CLI.CS.isMustTailCall()) report_fatal_error("failed to perform tail call elimination on a call " "site marked musttail"); // We don't support GuaranteedTailCallOpt for ARM, only automatically @@ -1982,7 +1981,7 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, bool isDirect = false; const TargetMachine &TM = getTargetMachine(); - const Module *Mod = MF.getFunction()->getParent(); + const Module *Mod = MF.getFunction().getParent(); const GlobalValue *GV = nullptr; if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) GV = G->getGlobal(); @@ -2032,9 +2031,9 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, // more times in this block, we can improve codesize by calling indirectly // as BLXr has a 16-bit encoding. auto *GV = cast<GlobalAddressSDNode>(Callee)->getGlobal(); - auto *BB = CLI.CS->getParent(); + auto *BB = CLI.CS.getParent(); bool PreferIndirect = - Subtarget->isThumb() && MF.getFunction()->optForMinSize() && + Subtarget->isThumb() && MF.getFunction().optForMinSize() && count_if(GV->users(), [&BB](const User *U) { return isa<Instruction>(U) && cast<Instruction>(U)->getParent() == BB; }) > 2; @@ -2106,7 +2105,7 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, CallOpc = ARMISD::CALL_NOLINK; else if (doesNotRet && isDirect && Subtarget->hasRetAddrStack() && // Emit regular call when code size is the priority - !MF.getFunction()->optForMinSize()) + !MF.getFunction().optForMinSize()) // "mov lr, pc; b _foo" to avoid confusing the RSP CallOpc = ARMISD::CALL_NOLINK; else @@ -2281,18 +2280,25 @@ ARMTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee, const SmallVectorImpl<ISD::InputArg> &Ins, SelectionDAG& DAG) const { MachineFunction &MF = DAG.getMachineFunction(); - const Function *CallerF = MF.getFunction(); - CallingConv::ID CallerCC = CallerF->getCallingConv(); + const Function &CallerF = MF.getFunction(); + CallingConv::ID CallerCC = CallerF.getCallingConv(); assert(Subtarget->supportsTailCall()); + // Tail calls to function pointers cannot be optimized for Thumb1 if the args + // to the call take up r0-r3. The reason is that there are no legal registers + // left to hold the pointer to the function to be called. + if (Subtarget->isThumb1Only() && Outs.size() >= 4 && + !isa<GlobalAddressSDNode>(Callee.getNode())) + return false; + // Look for obvious safe cases to perform tail call optimization that do not // require ABI changes. This is what gcc calls sibcall. // Exception-handling functions need a special set of instructions to indicate // a return to the hardware. Tail-calling another function would probably // break this. - if (CallerF->hasFnAttribute("interrupt")) + if (CallerF.hasFnAttribute("interrupt")) return false; // Also avoid sibcall optimization if either caller or callee uses struct @@ -2404,9 +2410,9 @@ ARMTargetLowering::CanLowerReturn(CallingConv::ID CallConv, static SDValue LowerInterruptReturn(SmallVectorImpl<SDValue> &RetOps, const SDLoc &DL, SelectionDAG &DAG) { const MachineFunction &MF = DAG.getMachineFunction(); - const Function *F = MF.getFunction(); + const Function &F = MF.getFunction(); - StringRef IntKind = F->getFnAttribute("interrupt").getValueAsString(); + StringRef IntKind = F.getFnAttribute("interrupt").getValueAsString(); // See ARM ARM v7 B1.8.3. On exception entry LR is set to a possibly offset // version of the "preferred return address". These offsets affect the return @@ -2440,7 +2446,6 @@ ARMTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, const SmallVectorImpl<ISD::OutputArg> &Outs, const SmallVectorImpl<SDValue> &OutVals, const SDLoc &dl, SelectionDAG &DAG) const { - // CCValAssign - represent the assignment of the return value to a location. SmallVector<CCValAssign, 16> RVLocs; @@ -2548,7 +2553,7 @@ ARMTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, // // M-class CPUs actually use a normal return sequence with a special // (hardware-provided) value in LR, so the normal code path works. - if (DAG.getMachineFunction().getFunction()->hasFnAttribute("interrupt") && + if (DAG.getMachineFunction().getFunction().hasFnAttribute("interrupt") && !Subtarget->isMClass()) { if (Subtarget->isThumb1Only()) report_fatal_error("interrupt attribute is not supported in Thumb1"); @@ -2686,7 +2691,7 @@ SDValue ARMTargetLowering::LowerConstantPool(SDValue Op, auto T = const_cast<Type*>(CP->getType()); auto C = const_cast<Constant*>(CP->getConstVal()); auto M = const_cast<Module*>(DAG.getMachineFunction(). - getFunction()->getParent()); + getFunction().getParent()); auto GV = new GlobalVariable( *M, T, /*isConst=*/true, GlobalVariable::InternalLinkage, C, Twine(DAG.getDataLayout().getPrivateGlobalPrefix()) + "CP" + @@ -2768,7 +2773,8 @@ SDValue ARMTargetLowering::LowerBlockAddress(SDValue Op, SDValue ARMTargetLowering::LowerGlobalTLSAddressDarwin(SDValue Op, SelectionDAG &DAG) const { - assert(Subtarget->isTargetDarwin() && "TLS only supported on Darwin"); + assert(Subtarget->isTargetDarwin() && + "This function expects a Darwin target"); SDLoc DL(Op); // First step is to get the address of the actua global symbol. This is where @@ -2794,7 +2800,7 @@ ARMTargetLowering::LowerGlobalTLSAddressDarwin(SDValue Op, // trashed: R0 (it takes an argument), LR (it's a call) and CPSR (let's not be // silly). auto TRI = - getTargetMachine().getSubtargetImpl(*F.getFunction())->getRegisterInfo(); + getTargetMachine().getSubtargetImpl(F.getFunction())->getRegisterInfo(); auto ARI = static_cast<const ARMRegisterInfo *>(TRI); const uint32_t *Mask = ARI->getTLSCallPreservedMask(DAG.getMachineFunction()); @@ -2960,6 +2966,10 @@ ARMTargetLowering::LowerToTLSExecModels(GlobalAddressSDNode *GA, SDValue ARMTargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const { + GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op); + if (DAG.getTarget().Options.EmulatedTLS) + return LowerToTLSEmulatedModel(GA, DAG); + if (Subtarget->isTargetDarwin()) return LowerGlobalTLSAddressDarwin(Op, DAG); @@ -2968,10 +2978,6 @@ ARMTargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const { // TODO: implement the "local dynamic" model assert(Subtarget->isTargetELF() && "Only ELF implemented here"); - GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op); - if (DAG.getTarget().Options.EmulatedTLS) - return LowerToTLSEmulatedModel(GA, DAG); - TLSModel::Model model = getTargetMachine().getTLSModel(GA->getGlobal()); switch (model) { @@ -3049,7 +3055,7 @@ static SDValue promoteToConstantPool(const GlobalValue *GV, SelectionDAG &DAG, // This is a win if the constant is only used in one function (so it doesn't // need to be duplicated) or duplicating the constant wouldn't increase code // size (implying the constant is no larger than 4 bytes). - const Function *F = DAG.getMachineFunction().getFunction(); + const Function &F = DAG.getMachineFunction().getFunction(); // We rely on this decision to inline being idemopotent and unrelated to the // use-site. We know that if we inline a variable at one use site, we'll @@ -3107,7 +3113,7 @@ static SDValue promoteToConstantPool(const GlobalValue *GV, SelectionDAG &DAG, // in multiple functions but it no larger than a pointer. We also check if // GVar has constant (non-ConstantExpr) users. If so, it essentially has its // address taken. - if (!allUsersAreInFunction(GVar, F) && + if (!allUsersAreInFunction(GVar, &F) && !(Size <= 4 && allUsersAreInFunctions(GVar))) return SDValue(); @@ -3134,7 +3140,7 @@ static SDValue promoteToConstantPool(const GlobalValue *GV, SelectionDAG &DAG, return DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); } -static bool isReadOnly(const GlobalValue *GV) { +bool ARMTargetLowering::isReadOnly(const GlobalValue *GV) const { if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(GV)) GV = GA->getBaseObject(); return (isa<GlobalVariable>(GV) && cast<GlobalVariable>(GV)->isConstant()) || @@ -3169,28 +3175,12 @@ SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op, if (isPositionIndependent()) { bool UseGOT_PREL = !TM.shouldAssumeDSOLocal(*GV->getParent(), GV); - - MachineFunction &MF = DAG.getMachineFunction(); - ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); - unsigned ARMPCLabelIndex = AFI->createPICLabelUId(); - EVT PtrVT = getPointerTy(DAG.getDataLayout()); - SDLoc dl(Op); - unsigned PCAdj = Subtarget->isThumb() ? 4 : 8; - ARMConstantPoolValue *CPV = ARMConstantPoolConstant::Create( - GV, ARMPCLabelIndex, ARMCP::CPValue, PCAdj, - UseGOT_PREL ? ARMCP::GOT_PREL : ARMCP::no_modifier, - /*AddCurrentAddress=*/UseGOT_PREL); - SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4); - CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); - SDValue Result = DAG.getLoad( - PtrVT, dl, DAG.getEntryNode(), CPAddr, - MachinePointerInfo::getConstantPool(DAG.getMachineFunction())); - SDValue Chain = Result.getValue(1); - SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32); - Result = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel); + SDValue G = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, + UseGOT_PREL ? ARMII::MO_GOT : 0); + SDValue Result = DAG.getNode(ARMISD::WrapperPIC, dl, PtrVT, G); if (UseGOT_PREL) Result = - DAG.getLoad(PtrVT, dl, Chain, Result, + DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Result, MachinePointerInfo::getGOT(DAG.getMachineFunction())); return Result; } else if (Subtarget->isROPI() && IsRO) { @@ -3332,7 +3322,7 @@ ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG, bool IsPositionIndependent = isPositionIndependent(); unsigned PCAdj = IsPositionIndependent ? (Subtarget->isThumb() ? 4 : 8) : 0; ARMConstantPoolValue *CPV = - ARMConstantPoolConstant::Create(MF.getFunction(), ARMPCLabelIndex, + ARMConstantPoolConstant::Create(&MF.getFunction(), ARMPCLabelIndex, ARMCP::CPLSDA, PCAdj); CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4); CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); @@ -3608,7 +3598,7 @@ SDValue ARMTargetLowering::LowerFormalArguments( SmallVector<SDValue, 16> ArgValues; SDValue ArgValue; - Function::const_arg_iterator CurOrigArg = MF.getFunction()->arg_begin(); + Function::const_arg_iterator CurOrigArg = MF.getFunction().arg_begin(); unsigned CurArgIdx = 0; // Initially ArgRegsSaveSize is zero. @@ -3690,7 +3680,6 @@ SDValue ARMTargetLowering::LowerFormalArguments( DAG.getIntPtrConstant(1, dl)); } else ArgValue = GetF64FormalArgument(VA, ArgLocs[++i], Chain, DAG, dl); - } else { const TargetRegisterClass *RC; @@ -3733,7 +3722,6 @@ SDValue ARMTargetLowering::LowerFormalArguments( } InVals.push_back(ArgValue); - } else { // VA.isRegLoc() // sanity check assert(VA.isMemLoc()); @@ -3853,6 +3841,12 @@ SDValue ARMTargetLowering::getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC, break; } } + } else if ((ARM_AM::getShiftOpcForNode(LHS.getOpcode()) != ARM_AM::no_shift) && + (ARM_AM::getShiftOpcForNode(RHS.getOpcode()) == ARM_AM::no_shift)) { + // In ARM and Thumb-2, the compare instructions can shift their second + // operand. + CC = ISD::getSetCCSwappedOperands(CC); + std::swap(LHS, RHS); } ARMCC::CondCodes CondCode = IntCCToARMCC(CC); @@ -3952,7 +3946,7 @@ ARMTargetLowering::getARMXALUOOp(SDValue Op, SelectionDAG &DAG, } SDValue -ARMTargetLowering::LowerXALUO(SDValue Op, SelectionDAG &DAG) const { +ARMTargetLowering::LowerSignedALUO(SDValue Op, SelectionDAG &DAG) const { // Let legalize expand this if it isn't a legal type yet. if (!DAG.getTargetLoweringInfo().isTypeLegal(Op.getValueType())) return SDValue(); @@ -3974,6 +3968,66 @@ ARMTargetLowering::LowerXALUO(SDValue Op, SelectionDAG &DAG) const { return DAG.getNode(ISD::MERGE_VALUES, dl, VTs, Value, Overflow); } +static SDValue ConvertBooleanCarryToCarryFlag(SDValue BoolCarry, + SelectionDAG &DAG) { + SDLoc DL(BoolCarry); + EVT CarryVT = BoolCarry.getValueType(); + + APInt NegOne = APInt::getAllOnesValue(CarryVT.getScalarSizeInBits()); + // This converts the boolean value carry into the carry flag by doing + // ARMISD::ADDC Carry, ~0 + return DAG.getNode(ARMISD::ADDC, DL, DAG.getVTList(CarryVT, MVT::i32), + BoolCarry, DAG.getConstant(NegOne, DL, CarryVT)); +} + +static SDValue ConvertCarryFlagToBooleanCarry(SDValue Flags, EVT VT, + SelectionDAG &DAG) { + SDLoc DL(Flags); + + // Now convert the carry flag into a boolean carry. We do this + // using ARMISD:ADDE 0, 0, Carry + return DAG.getNode(ARMISD::ADDE, DL, DAG.getVTList(VT, MVT::i32), + DAG.getConstant(0, DL, MVT::i32), + DAG.getConstant(0, DL, MVT::i32), Flags); +} + +SDValue ARMTargetLowering::LowerUnsignedALUO(SDValue Op, + SelectionDAG &DAG) const { + // Let legalize expand this if it isn't a legal type yet. + if (!DAG.getTargetLoweringInfo().isTypeLegal(Op.getValueType())) + return SDValue(); + + SDValue LHS = Op.getOperand(0); + SDValue RHS = Op.getOperand(1); + SDLoc dl(Op); + + EVT VT = Op.getValueType(); + SDVTList VTs = DAG.getVTList(VT, MVT::i32); + SDValue Value; + SDValue Overflow; + switch (Op.getOpcode()) { + default: + llvm_unreachable("Unknown overflow instruction!"); + case ISD::UADDO: + Value = DAG.getNode(ARMISD::ADDC, dl, VTs, LHS, RHS); + // Convert the carry flag into a boolean value. + Overflow = ConvertCarryFlagToBooleanCarry(Value.getValue(1), VT, DAG); + break; + case ISD::USUBO: { + Value = DAG.getNode(ARMISD::SUBC, dl, VTs, LHS, RHS); + // Convert the carry flag into a boolean value. + Overflow = ConvertCarryFlagToBooleanCarry(Value.getValue(1), VT, DAG); + // ARMISD::SUBC returns 0 when we have to borrow, so make it an overflow + // value. So compute 1 - C. + Overflow = DAG.getNode(ISD::SUB, dl, MVT::i32, + DAG.getConstant(1, dl, MVT::i32), Overflow); + break; + } + } + + return DAG.getNode(ISD::MERGE_VALUES, dl, VTs, Value, Overflow); +} + SDValue ARMTargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const { SDValue Cond = Op.getOperand(0); SDValue SelectTrue = Op.getOperand(1); @@ -4518,7 +4572,7 @@ SDValue ARMTargetLowering::LowerBR_JT(SDValue Op, SelectionDAG &DAG) const { SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PTy); Table = DAG.getNode(ARMISD::WrapperJT, dl, MVT::i32, JTI); Index = DAG.getNode(ISD::MUL, dl, PTy, Index, DAG.getConstant(4, dl, PTy)); - SDValue Addr = DAG.getNode(ISD::ADD, dl, PTy, Index, Table); + SDValue Addr = DAG.getNode(ISD::ADD, dl, PTy, Table, Index); if (Subtarget->isThumb2() || (Subtarget->hasV8MBaselineOps() && Subtarget->isThumb())) { // Thumb2 and ARMv8-M use a two-level jump. That is, it jumps into the jump table // which does another jump to the destination. This also makes it easier @@ -4532,7 +4586,7 @@ SDValue ARMTargetLowering::LowerBR_JT(SDValue Op, SelectionDAG &DAG) const { DAG.getLoad((EVT)MVT::i32, dl, Chain, Addr, MachinePointerInfo::getJumpTable(DAG.getMachineFunction())); Chain = Addr.getValue(1); - Addr = DAG.getNode(ISD::ADD, dl, PTy, Addr, Table); + Addr = DAG.getNode(ISD::ADD, dl, PTy, Table, Addr); return DAG.getNode(ARMISD::BR_JT, dl, MVT::Other, Chain, Addr, JTI); } else { Addr = @@ -4935,7 +4989,6 @@ SDValue ARMTargetLowering::LowerShiftRightParts(SDValue Op, SDValue Lo = DAG.getNode(ARMISD::CMOV, dl, VT, LoSmallShift, LoBigShift, ARMcc, CCR, CmpLo); - SDValue HiSmallShift = DAG.getNode(Opc, dl, VT, ShOpHi, ShAmt); SDValue HiBigShift = Opc == ISD::SRA ? DAG.getNode(Opc, dl, VT, ShOpHi, @@ -5370,7 +5423,6 @@ static SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) { // Detect VTST (Vector Test Bits) = icmp ne (and (op0, op1), zero). if (Opc == ARMISD::VCEQ) { - SDValue AndOp; if (ISD::isBuildVectorAllZeros(Op1.getNode())) AndOp = Op0; @@ -5800,6 +5852,13 @@ static bool isVTBLMask(ArrayRef<int> M, EVT VT) { return VT == MVT::v8i8 && M.size() == 8; } +static unsigned SelectPairHalf(unsigned Elements, ArrayRef<int> Mask, + unsigned Index) { + if (Mask.size() == Elements * 2) + return Index / Elements; + return Mask[Index] == 0 ? 0 : 1; +} + // Checks whether the shuffle mask represents a vector transpose (VTRN) by // checking that pairs of elements in the shuffle mask represent the same index // in each vector, incrementing the expected index by 2 at each step. @@ -5836,10 +5895,7 @@ static bool isVTRNMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) { // element is undefined, e.g. [-1, 4, 2, 6] will be rejected, because only // M[0] is used to determine WhichResult for (unsigned i = 0; i < M.size(); i += NumElts) { - if (M.size() == NumElts * 2) - WhichResult = i / NumElts; - else - WhichResult = M[i] == 0 ? 0 : 1; + WhichResult = SelectPairHalf(NumElts, M, i); for (unsigned j = 0; j < NumElts; j += 2) { if ((M[i+j] >= 0 && (unsigned) M[i+j] != j + WhichResult) || (M[i+j+1] >= 0 && (unsigned) M[i+j+1] != j + NumElts + WhichResult)) @@ -5866,10 +5922,7 @@ static bool isVTRN_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult){ return false; for (unsigned i = 0; i < M.size(); i += NumElts) { - if (M.size() == NumElts * 2) - WhichResult = i / NumElts; - else - WhichResult = M[i] == 0 ? 0 : 1; + WhichResult = SelectPairHalf(NumElts, M, i); for (unsigned j = 0; j < NumElts; j += 2) { if ((M[i+j] >= 0 && (unsigned) M[i+j] != j + WhichResult) || (M[i+j+1] >= 0 && (unsigned) M[i+j+1] != j + WhichResult)) @@ -5901,10 +5954,7 @@ static bool isVUZPMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) { return false; for (unsigned i = 0; i < M.size(); i += NumElts) { - if (M.size() == NumElts * 2) - WhichResult = i / NumElts; - else - WhichResult = M[i] == 0 ? 0 : 1; + WhichResult = SelectPairHalf(NumElts, M, i); for (unsigned j = 0; j < NumElts; ++j) { if (M[i+j] >= 0 && (unsigned) M[i+j] != 2 * j + WhichResult) return false; @@ -5935,10 +5985,7 @@ static bool isVUZP_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult){ unsigned Half = NumElts / 2; for (unsigned i = 0; i < M.size(); i += NumElts) { - if (M.size() == NumElts * 2) - WhichResult = i / NumElts; - else - WhichResult = M[i] == 0 ? 0 : 1; + WhichResult = SelectPairHalf(NumElts, M, i); for (unsigned j = 0; j < NumElts; j += Half) { unsigned Idx = WhichResult; for (unsigned k = 0; k < Half; ++k) { @@ -5978,10 +6025,7 @@ static bool isVZIPMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) { return false; for (unsigned i = 0; i < M.size(); i += NumElts) { - if (M.size() == NumElts * 2) - WhichResult = i / NumElts; - else - WhichResult = M[i] == 0 ? 0 : 1; + WhichResult = SelectPairHalf(NumElts, M, i); unsigned Idx = WhichResult * NumElts / 2; for (unsigned j = 0; j < NumElts; j += 2) { if ((M[i+j] >= 0 && (unsigned) M[i+j] != Idx) || @@ -6014,10 +6058,7 @@ static bool isVZIP_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult){ return false; for (unsigned i = 0; i < M.size(); i += NumElts) { - if (M.size() == NumElts * 2) - WhichResult = i / NumElts; - else - WhichResult = M[i] == 0 ? 0 : 1; + WhichResult = SelectPairHalf(NumElts, M, i); unsigned Idx = WhichResult * NumElts / 2; for (unsigned j = 0; j < NumElts; j += 2) { if ((M[i+j] >= 0 && (unsigned) M[i+j] != Idx) || @@ -6532,9 +6573,7 @@ SDValue ARMTargetLowering::ReconstructShuffle(SDValue Op, /// support *some* VECTOR_SHUFFLE operations, those with specific masks. /// By default, if a target supports the VECTOR_SHUFFLE node, all mask values /// are assumed to be legal. -bool -ARMTargetLowering::isShuffleMaskLegal(const SmallVectorImpl<int> &M, - EVT VT) const { +bool ARMTargetLowering::isShuffleMaskLegal(ArrayRef<int> M, EVT VT) const { if (VT.getVectorNumElements() == 4 && (VT.is128BitVector() || VT.is64BitVector())) { unsigned PFIndexes[4]; @@ -7392,6 +7431,53 @@ static SDValue LowerADDC_ADDE_SUBC_SUBE(SDValue Op, SelectionDAG &DAG) { Op.getOperand(1), Op.getOperand(2)); } +static SDValue LowerADDSUBCARRY(SDValue Op, SelectionDAG &DAG) { + SDNode *N = Op.getNode(); + EVT VT = N->getValueType(0); + SDVTList VTs = DAG.getVTList(VT, MVT::i32); + + SDValue Carry = Op.getOperand(2); + EVT CarryVT = Carry.getValueType(); + + SDLoc DL(Op); + + APInt NegOne = APInt::getAllOnesValue(CarryVT.getScalarSizeInBits()); + + SDValue Result; + if (Op.getOpcode() == ISD::ADDCARRY) { + // This converts the boolean value carry into the carry flag. + Carry = ConvertBooleanCarryToCarryFlag(Carry, DAG); + + // Do the addition proper using the carry flag we wanted. + Result = DAG.getNode(ARMISD::ADDE, DL, VTs, Op.getOperand(0), + Op.getOperand(1), Carry.getValue(1)); + + // Now convert the carry flag into a boolean value. + Carry = ConvertCarryFlagToBooleanCarry(Result.getValue(1), VT, DAG); + } else { + // ARMISD::SUBE expects a carry not a borrow like ISD::SUBCARRY so we + // have to invert the carry first. + Carry = DAG.getNode(ISD::SUB, DL, MVT::i32, + DAG.getConstant(1, DL, MVT::i32), Carry); + // This converts the boolean value carry into the carry flag. + Carry = ConvertBooleanCarryToCarryFlag(Carry, DAG); + + // Do the subtraction proper using the carry flag we wanted. + Result = DAG.getNode(ARMISD::SUBE, DL, VTs, Op.getOperand(0), + Op.getOperand(1), Carry.getValue(1)); + + // Now convert the carry flag into a boolean value. + Carry = ConvertCarryFlagToBooleanCarry(Result.getValue(1), VT, DAG); + // But the carry returned by ARMISD::SUBE is not a borrow as expected + // by ISD::SUBCARRY, so compute 1 - C. + Carry = DAG.getNode(ISD::SUB, DL, MVT::i32, + DAG.getConstant(1, DL, MVT::i32), Carry); + } + + // Return both values. + return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Result, Carry); +} + SDValue ARMTargetLowering::LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const { assert(Subtarget->isTargetDarwin()); @@ -7668,9 +7754,9 @@ static SDValue LowerFPOWI(SDValue Op, const ARMSubtarget &Subtarget, SDValue InChain = DAG.getEntryNode(); SDValue TCChain = InChain; - const auto *F = DAG.getMachineFunction().getFunction(); + const Function &F = DAG.getMachineFunction().getFunction(); bool IsTC = TLI.isInTailCallPosition(DAG, Op.getNode(), TCChain) && - F->getReturnType() == LCRTy; + F.getReturnType() == LCRTy; if (IsTC) InChain = TCChain; @@ -7686,6 +7772,7 @@ static SDValue LowerFPOWI(SDValue Op, const ARMSubtarget &Subtarget, } SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { + DEBUG(dbgs() << "Lowering node: "; Op.dump()); switch (Op.getOpcode()) { default: llvm_unreachable("Don't know how to custom lower this!"); case ISD::WRITE_REGISTER: return LowerWRITE_REGISTER(Op, DAG); @@ -7746,11 +7833,14 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::ADDE: case ISD::SUBC: case ISD::SUBE: return LowerADDC_ADDE_SUBC_SUBE(Op, DAG); + case ISD::ADDCARRY: + case ISD::SUBCARRY: return LowerADDSUBCARRY(Op, DAG); case ISD::SADDO: - case ISD::UADDO: case ISD::SSUBO: + return LowerSignedALUO(Op, DAG); + case ISD::UADDO: case ISD::USUBO: - return LowerXALUO(Op, DAG); + return LowerUnsignedALUO(Op, DAG); case ISD::ATOMIC_LOAD: case ISD::ATOMIC_STORE: return LowerAtomicLoadStore(Op, DAG); case ISD::FSINCOS: return LowerFSINCOS(Op, DAG); @@ -7864,7 +7954,7 @@ void ARMTargetLowering::SetupEntryBlockForSjLj(MachineInstr &MI, MachineRegisterInfo *MRI = &MF->getRegInfo(); MachineConstantPool *MCP = MF->getConstantPool(); ARMFunctionInfo *AFI = MF->getInfo<ARMFunctionInfo>(); - const Function *F = MF->getFunction(); + const Function &F = MF->getFunction(); bool isThumb = Subtarget->isThumb(); bool isThumb2 = Subtarget->isThumb2(); @@ -7872,7 +7962,7 @@ void ARMTargetLowering::SetupEntryBlockForSjLj(MachineInstr &MI, unsigned PCLabelId = AFI->createPICLabelUId(); unsigned PCAdj = (isThumb || isThumb2) ? 4 : 8; ARMConstantPoolValue *CPV = - ARMConstantPoolMBB::Create(F->getContext(), DispatchBB, PCLabelId, PCAdj); + ARMConstantPoolMBB::Create(F.getContext(), DispatchBB, PCLabelId, PCAdj); unsigned CPI = MCP->getConstantPoolIndex(CPV, 4); const TargetRegisterClass *TRC = isThumb ? &ARM::tGPRRegClass @@ -8158,7 +8248,7 @@ void ARMTargetLowering::EmitSjLjDispatchBlock(MachineInstr &MI, .add(predOps(ARMCC::AL)); } else { MachineConstantPool *ConstantPool = MF->getConstantPool(); - Type *Int32Ty = Type::getInt32Ty(MF->getFunction()->getContext()); + Type *Int32Ty = Type::getInt32Ty(MF->getFunction().getContext()); const Constant *C = ConstantInt::get(Int32Ty, NumLPads); // MachineConstantPool wants an explicit alignment. @@ -8259,7 +8349,7 @@ void ARMTargetLowering::EmitSjLjDispatchBlock(MachineInstr &MI, .add(predOps(ARMCC::AL)); } else { MachineConstantPool *ConstantPool = MF->getConstantPool(); - Type *Int32Ty = Type::getInt32Ty(MF->getFunction()->getContext()); + Type *Int32Ty = Type::getInt32Ty(MF->getFunction().getContext()); const Constant *C = ConstantInt::get(Int32Ty, NumLPads); // MachineConstantPool wants an explicit alignment. @@ -8555,7 +8645,7 @@ ARMTargetLowering::EmitStructByval(MachineInstr &MI, UnitSize = 2; } else { // Check whether we can use NEON instructions. - if (!MF->getFunction()->hasFnAttribute(Attribute::NoImplicitFloat) && + if (!MF->getFunction().hasFnAttribute(Attribute::NoImplicitFloat) && Subtarget->hasNEON()) { if ((Align % 16 == 0) && SizeVal >= 16) UnitSize = 16; @@ -8661,7 +8751,7 @@ ARMTargetLowering::EmitStructByval(MachineInstr &MI, .add(predOps(ARMCC::AL)); } else { MachineConstantPool *ConstantPool = MF->getConstantPool(); - Type *Int32Ty = Type::getInt32Ty(MF->getFunction()->getContext()); + Type *Int32Ty = Type::getInt32Ty(MF->getFunction().getContext()); const Constant *C = ConstantInt::get(Int32Ty, LoopSize); // MachineConstantPool wants an explicit alignment. @@ -8797,7 +8887,6 @@ ARMTargetLowering::EmitLowered__chkstk(MachineInstr &MI, switch (TM.getCodeModel()) { case CodeModel::Small: case CodeModel::Medium: - case CodeModel::Default: case CodeModel::Kernel: BuildMI(*MBB, MI, DL, TII.get(ARM::tBL)) .add(predOps(ARMCC::AL)) @@ -8809,8 +8898,7 @@ ARMTargetLowering::EmitLowered__chkstk(MachineInstr &MI, .addReg(ARM::CPSR, RegState::Implicit | RegState::Define | RegState::Dead); break; - case CodeModel::Large: - case CodeModel::JITDefault: { + case CodeModel::Large: { MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo(); unsigned Reg = MRI.createVirtualRegister(&ARM::rGPRRegClass); @@ -8886,8 +8974,11 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, // Thumb1 post-indexed loads are really just single-register LDMs. case ARM::tLDR_postidx: { + MachineOperand Def(MI.getOperand(1)); + if (TargetRegisterInfo::isPhysicalRegister(Def.getReg())) + Def.setIsRenamable(false); BuildMI(*BB, MI, dl, TII->get(ARM::tLDMIA_UPD)) - .add(MI.getOperand(1)) // Rn_wb + .add(Def) // Rn_wb .add(MI.getOperand(2)) // Rn .add(MI.getOperand(3)) // PredImm .add(MI.getOperand(4)) // PredReg @@ -9193,7 +9284,7 @@ void ARMTargetLowering::AdjustInstrPostInstrSelection(MachineInstr &MI, // operand is still set to noreg. If needed, set the optional operand's // register to CPSR, and remove the redundant implicit def. // - // e.g. ADCS (..., CPSR<imp-def>) -> ADC (... opt:CPSR<def>). + // e.g. ADCS (..., implicit-def CPSR) -> ADC (... opt:def CPSR). // Rename pseudo opcodes. unsigned NewOpc = convertAddSubFlagsOpcode(MI.getOpcode()); @@ -9612,7 +9703,6 @@ static SDValue findMUL_LOHI(SDValue V) { static SDValue AddCombineTo64BitSMLAL16(SDNode *AddcNode, SDNode *AddeNode, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget) { - if (Subtarget->isThumb()) { if (!Subtarget->hasDSP()) return SDValue(); @@ -9701,11 +9791,11 @@ static SDValue AddCombineTo64bitMLAL(SDNode *AddeNode, // a S/UMLAL instruction. // UMUL_LOHI // / :lo \ :hi - // / \ [no multiline comment] - // loAdd -> ADDE | - // \ :glue / - // \ / - // ADDC <- hiAdd + // V \ [no multiline comment] + // loAdd -> ADDC | + // \ :carry / + // V V + // ADDE <- hiAdd // assert(AddeNode->getOpcode() == ARMISD::ADDE && "Expect an ADDE"); @@ -9713,7 +9803,7 @@ static SDValue AddCombineTo64bitMLAL(SDNode *AddeNode, AddeNode->getOperand(2).getValueType() == MVT::i32 && "ADDE node has the wrong inputs"); - // Check that we have a glued ADDC node. + // Check that we are chained to the right ADDC node. SDNode* AddcNode = AddeNode->getOperand(2).getNode(); if (AddcNode->getOpcode() != ARMISD::ADDC) return SDValue(); @@ -9764,7 +9854,7 @@ static SDValue AddCombineTo64bitMLAL(SDNode *AddeNode, SDValue* LoMul = nullptr; SDValue* LowAdd = nullptr; - // Ensure that ADDE is from high result of ISD::SMUL_LOHI. + // Ensure that ADDE is from high result of ISD::xMUL_LOHI. if ((AddeOp0 != MULOp.getValue(1)) && (AddeOp1 != MULOp.getValue(1))) return SDValue(); @@ -9789,6 +9879,12 @@ static SDValue AddCombineTo64bitMLAL(SDNode *AddeNode, if (!LoMul) return SDValue(); + // If HiAdd is the same node as ADDC or is a predecessor of ADDC the + // replacement below will create a cycle. + if (AddcNode == HiAdd->getNode() || + AddcNode->isPredecessorOf(HiAdd->getNode())) + return SDValue(); + // Create the merged node. SelectionDAG &DAG = DCI.DAG; @@ -9852,7 +9948,6 @@ static SDValue AddCombineTo64bitUMAAL(SDNode *AddeNode, AddeNode->getOperand(1).getNode() == UmlalNode) || (AddeNode->getOperand(0).getNode() == UmlalNode && isNullConstant(AddeNode->getOperand(1)))) { - SelectionDAG &DAG = DCI.DAG; SDValue Ops[] = { UmlalNode->getOperand(0), UmlalNode->getOperand(1), UmlalNode->getOperand(2), AddHi }; @@ -9891,13 +9986,27 @@ static SDValue PerformUMLALCombine(SDNode *N, SelectionDAG &DAG, return SDValue(); } -static SDValue PerformAddcSubcCombine(SDNode *N, SelectionDAG &DAG, +static SDValue PerformAddcSubcCombine(SDNode *N, + TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget) { + SelectionDAG &DAG(DCI.DAG); + + if (N->getOpcode() == ARMISD::ADDC) { + // (ADDC (ADDE 0, 0, C), -1) -> C + SDValue LHS = N->getOperand(0); + SDValue RHS = N->getOperand(1); + if (LHS->getOpcode() == ARMISD::ADDE && + isNullConstant(LHS->getOperand(0)) && + isNullConstant(LHS->getOperand(1)) && isAllOnesConstant(RHS)) { + return DCI.CombineTo(N, SDValue(N, 0), LHS->getOperand(2)); + } + } + if (Subtarget->isThumb1Only()) { SDValue RHS = N->getOperand(1); if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(RHS)) { int32_t imm = C->getSExtValue(); - if (imm < 0 && imm > INT_MIN) { + if (imm < 0 && imm > std::numeric_limits<int>::min()) { SDLoc DL(N); RHS = DAG.getConstant(-imm, DL, MVT::i32); unsigned Opcode = (N->getOpcode() == ARMISD::ADDC) ? ARMISD::SUBC @@ -9974,6 +10083,102 @@ static SDValue PerformADDCombineWithOperands(SDNode *N, SDValue N0, SDValue N1, return SDValue(); } +static SDValue PerformSHLSimplify(SDNode *N, + TargetLowering::DAGCombinerInfo &DCI, + const ARMSubtarget *ST) { + // Allow the generic combiner to identify potential bswaps. + if (DCI.isBeforeLegalize()) + return SDValue(); + + // DAG combiner will fold: + // (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2) + // (shl (or x, c1), c2) -> (or (shl x, c2), c1 << c2 + // Other code patterns that can be also be modified have the following form: + // b + ((a << 1) | 510) + // b + ((a << 1) & 510) + // b + ((a << 1) ^ 510) + // b + ((a << 1) + 510) + + // Many instructions can perform the shift for free, but it requires both + // the operands to be registers. If c1 << c2 is too large, a mov immediate + // instruction will needed. So, unfold back to the original pattern if: + // - if c1 and c2 are small enough that they don't require mov imms. + // - the user(s) of the node can perform an shl + + // No shifted operands for 16-bit instructions. + if (ST->isThumb() && ST->isThumb1Only()) + return SDValue(); + + // Check that all the users could perform the shl themselves. + for (auto U : N->uses()) { + switch(U->getOpcode()) { + default: + return SDValue(); + case ISD::SUB: + case ISD::ADD: + case ISD::AND: + case ISD::OR: + case ISD::XOR: + case ISD::SETCC: + case ARMISD::CMP: + // Check that its not already using a shl. + if (U->getOperand(0).getOpcode() == ISD::SHL || + U->getOperand(1).getOpcode() == ISD::SHL) + return SDValue(); + break; + } + } + + if (N->getOpcode() != ISD::ADD && N->getOpcode() != ISD::OR && + N->getOpcode() != ISD::XOR && N->getOpcode() != ISD::AND) + return SDValue(); + + if (N->getOperand(0).getOpcode() != ISD::SHL) + return SDValue(); + + SDValue SHL = N->getOperand(0); + + auto *C1ShlC2 = dyn_cast<ConstantSDNode>(N->getOperand(1)); + auto *C2 = dyn_cast<ConstantSDNode>(SHL.getOperand(1)); + if (!C1ShlC2 || !C2) + return SDValue(); + + DEBUG(dbgs() << "Trying to simplify shl: "; N->dump()); + + APInt C2Int = C2->getAPIntValue(); + APInt C1Int = C1ShlC2->getAPIntValue(); + + // Check that performing a lshr will not lose any information. + APInt Mask = APInt::getHighBitsSet(C2Int.getBitWidth(), + C2Int.getBitWidth() - C2->getZExtValue()); + if ((C1Int & Mask) != C1Int) + return SDValue(); + + // Shift the first constant. + C1Int.lshrInPlace(C2Int); + + // The immediates are encoded as an 8-bit value that can be rotated. + unsigned Zeros = C1Int.countLeadingZeros() + C1Int.countTrailingZeros(); + if (C1Int.getBitWidth() - Zeros > 8) + return SDValue(); + + Zeros = C2Int.countLeadingZeros() + C2Int.countTrailingZeros(); + if (C2Int.getBitWidth() - Zeros > 8) + return SDValue(); + + SelectionDAG &DAG = DCI.DAG; + SDLoc dl(N); + SDValue X = SHL.getOperand(0); + SDValue BinOp = DAG.getNode(N->getOpcode(), dl, MVT::i32, X, + DAG.getConstant(C1Int, dl, MVT::i32)); + // Shift left to compensate for the lshr of C1Int. + SDValue Res = DAG.getNode(ISD::SHL, dl, MVT::i32, BinOp, SHL.getOperand(1)); + + DAG.ReplaceAllUsesWith(SDValue(N, 0), Res); + return SDValue(N, 0); +} + + /// PerformADDCombine - Target-specific dag combine xforms for ISD::ADD. /// static SDValue PerformADDCombine(SDNode *N, @@ -9982,6 +10187,10 @@ static SDValue PerformADDCombine(SDNode *N, SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); + // Only works one way, because it needs an immediate operand. + if (SDValue Result = PerformSHLSimplify(N, DCI, Subtarget)) + return Result; + // First try with the default operand order. if (SDValue Result = PerformADDCombineWithOperands(N, N0, N1, DCI, Subtarget)) return Result; @@ -10121,7 +10330,6 @@ static SDValue PerformMULCombine(SDNode *N, MVT::i32))); Res = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, MVT::i32), Res); - } else return SDValue(); } @@ -10171,6 +10379,9 @@ static SDValue PerformANDCombine(SDNode *N, // fold (and (select cc, -1, c), x) -> (select cc, x, (and, x, c)) if (SDValue Result = combineSelectAndUseCommutative(N, true, DCI)) return Result; + + if (SDValue Result = PerformSHLSimplify(N, DCI, Subtarget)) + return Result; } return SDValue(); @@ -10237,95 +10448,17 @@ static SDValue PerformORCombineToSMULWBT(SDNode *OR, return SDValue(OR, 0); } -/// PerformORCombine - Target-specific dag combine xforms for ISD::OR -static SDValue PerformORCombine(SDNode *N, - TargetLowering::DAGCombinerInfo &DCI, - const ARMSubtarget *Subtarget) { - // Attempt to use immediate-form VORR - BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(N->getOperand(1)); - SDLoc dl(N); - EVT VT = N->getValueType(0); - SelectionDAG &DAG = DCI.DAG; - - if(!DAG.getTargetLoweringInfo().isTypeLegal(VT)) - return SDValue(); - - APInt SplatBits, SplatUndef; - unsigned SplatBitSize; - bool HasAnyUndefs; - if (BVN && Subtarget->hasNEON() && - BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) { - if (SplatBitSize <= 64) { - EVT VorrVT; - SDValue Val = isNEONModifiedImm(SplatBits.getZExtValue(), - SplatUndef.getZExtValue(), SplatBitSize, - DAG, dl, VorrVT, VT.is128BitVector(), - OtherModImm); - if (Val.getNode()) { - SDValue Input = - DAG.getNode(ISD::BITCAST, dl, VorrVT, N->getOperand(0)); - SDValue Vorr = DAG.getNode(ARMISD::VORRIMM, dl, VorrVT, Input, Val); - return DAG.getNode(ISD::BITCAST, dl, VT, Vorr); - } - } - } - - if (!Subtarget->isThumb1Only()) { - // fold (or (select cc, 0, c), x) -> (select cc, x, (or, x, c)) - if (SDValue Result = combineSelectAndUseCommutative(N, false, DCI)) - return Result; - if (SDValue Result = PerformORCombineToSMULWBT(N, DCI, Subtarget)) - return Result; - } - - // The code below optimizes (or (and X, Y), Z). - // The AND operand needs to have a single user to make these optimizations - // profitable. - SDValue N0 = N->getOperand(0); - if (N0.getOpcode() != ISD::AND || !N0.hasOneUse()) - return SDValue(); - SDValue N1 = N->getOperand(1); - - // (or (and B, A), (and C, ~A)) => (VBSL A, B, C) when A is a constant. - if (Subtarget->hasNEON() && N1.getOpcode() == ISD::AND && VT.isVector() && - DAG.getTargetLoweringInfo().isTypeLegal(VT)) { - APInt SplatUndef; - unsigned SplatBitSize; - bool HasAnyUndefs; - - APInt SplatBits0, SplatBits1; - BuildVectorSDNode *BVN0 = dyn_cast<BuildVectorSDNode>(N0->getOperand(1)); - BuildVectorSDNode *BVN1 = dyn_cast<BuildVectorSDNode>(N1->getOperand(1)); - // Ensure that the second operand of both ands are constants - if (BVN0 && BVN0->isConstantSplat(SplatBits0, SplatUndef, SplatBitSize, - HasAnyUndefs) && !HasAnyUndefs) { - if (BVN1 && BVN1->isConstantSplat(SplatBits1, SplatUndef, SplatBitSize, - HasAnyUndefs) && !HasAnyUndefs) { - // Ensure that the bit width of the constants are the same and that - // the splat arguments are logical inverses as per the pattern we - // are trying to simplify. - if (SplatBits0.getBitWidth() == SplatBits1.getBitWidth() && - SplatBits0 == ~SplatBits1) { - // Canonicalize the vector type to make instruction selection - // simpler. - EVT CanonicalVT = VT.is128BitVector() ? MVT::v4i32 : MVT::v2i32; - SDValue Result = DAG.getNode(ARMISD::VBSL, dl, CanonicalVT, - N0->getOperand(1), - N0->getOperand(0), - N1->getOperand(0)); - return DAG.getNode(ISD::BITCAST, dl, VT, Result); - } - } - } - } - - // Try to use the ARM/Thumb2 BFI (bitfield insert) instruction when - // reasonable. - +static SDValue PerformORCombineToBFI(SDNode *N, + TargetLowering::DAGCombinerInfo &DCI, + const ARMSubtarget *Subtarget) { // BFI is only available on V6T2+ if (Subtarget->isThumb1Only() || !Subtarget->hasV6T2Ops()) return SDValue(); + EVT VT = N->getValueType(0); + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + SelectionDAG &DAG = DCI.DAG; SDLoc DL(N); // 1) or (and A, mask), val => ARMbfi A, val, mask // iff (val & mask) == val @@ -10367,9 +10500,10 @@ static SDValue PerformORCombine(SDNode *N, DAG.getConstant(Val, DL, MVT::i32), DAG.getConstant(Mask, DL, MVT::i32)); - // Do not add new nodes to DAG combiner worklist. DCI.CombineTo(N, Res, false); - return SDValue(); + // Return value from the original node to inform the combiner than N is + // now dead. + return SDValue(N, 0); } } else if (N1.getOpcode() == ISD::AND) { // case (2) or (and A, mask), (and B, mask2) => ARMbfi A, (lsr B, amt), mask @@ -10393,9 +10527,10 @@ static SDValue PerformORCombine(SDNode *N, DAG.getConstant(amt, DL, MVT::i32)); Res = DAG.getNode(ARMISD::BFI, DL, VT, N00, Res, DAG.getConstant(Mask, DL, MVT::i32)); - // Do not add new nodes to DAG combiner worklist. DCI.CombineTo(N, Res, false); - return SDValue(); + // Return value from the original node to inform the combiner than N is + // now dead. + return SDValue(N, 0); } else if (ARM::isBitFieldInvertedMask(~Mask) && (~Mask == Mask2)) { // The pack halfword instruction works better for masks that fit it, @@ -10409,9 +10544,10 @@ static SDValue PerformORCombine(SDNode *N, DAG.getConstant(lsb, DL, MVT::i32)); Res = DAG.getNode(ARMISD::BFI, DL, VT, N1.getOperand(0), Res, DAG.getConstant(Mask2, DL, MVT::i32)); - // Do not add new nodes to DAG combiner worklist. DCI.CombineTo(N, Res, false); - return SDValue(); + // Return value from the original node to inform the combiner than N is + // now dead. + return SDValue(N, 0); } } @@ -10429,13 +10565,112 @@ static SDValue PerformORCombine(SDNode *N, Res = DAG.getNode(ARMISD::BFI, DL, VT, N1, N00.getOperand(0), DAG.getConstant(~Mask, DL, MVT::i32)); - // Do not add new nodes to DAG combiner worklist. DCI.CombineTo(N, Res, false); + // Return value from the original node to inform the combiner than N is + // now dead. + return SDValue(N, 0); } return SDValue(); } +/// PerformORCombine - Target-specific dag combine xforms for ISD::OR +static SDValue PerformORCombine(SDNode *N, + TargetLowering::DAGCombinerInfo &DCI, + const ARMSubtarget *Subtarget) { + // Attempt to use immediate-form VORR + BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(N->getOperand(1)); + SDLoc dl(N); + EVT VT = N->getValueType(0); + SelectionDAG &DAG = DCI.DAG; + + if(!DAG.getTargetLoweringInfo().isTypeLegal(VT)) + return SDValue(); + + APInt SplatBits, SplatUndef; + unsigned SplatBitSize; + bool HasAnyUndefs; + if (BVN && Subtarget->hasNEON() && + BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) { + if (SplatBitSize <= 64) { + EVT VorrVT; + SDValue Val = isNEONModifiedImm(SplatBits.getZExtValue(), + SplatUndef.getZExtValue(), SplatBitSize, + DAG, dl, VorrVT, VT.is128BitVector(), + OtherModImm); + if (Val.getNode()) { + SDValue Input = + DAG.getNode(ISD::BITCAST, dl, VorrVT, N->getOperand(0)); + SDValue Vorr = DAG.getNode(ARMISD::VORRIMM, dl, VorrVT, Input, Val); + return DAG.getNode(ISD::BITCAST, dl, VT, Vorr); + } + } + } + + if (!Subtarget->isThumb1Only()) { + // fold (or (select cc, 0, c), x) -> (select cc, x, (or, x, c)) + if (SDValue Result = combineSelectAndUseCommutative(N, false, DCI)) + return Result; + if (SDValue Result = PerformORCombineToSMULWBT(N, DCI, Subtarget)) + return Result; + } + + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + + // (or (and B, A), (and C, ~A)) => (VBSL A, B, C) when A is a constant. + if (Subtarget->hasNEON() && N1.getOpcode() == ISD::AND && VT.isVector() && + DAG.getTargetLoweringInfo().isTypeLegal(VT)) { + + // The code below optimizes (or (and X, Y), Z). + // The AND operand needs to have a single user to make these optimizations + // profitable. + if (N0.getOpcode() != ISD::AND || !N0.hasOneUse()) + return SDValue(); + + APInt SplatUndef; + unsigned SplatBitSize; + bool HasAnyUndefs; + + APInt SplatBits0, SplatBits1; + BuildVectorSDNode *BVN0 = dyn_cast<BuildVectorSDNode>(N0->getOperand(1)); + BuildVectorSDNode *BVN1 = dyn_cast<BuildVectorSDNode>(N1->getOperand(1)); + // Ensure that the second operand of both ands are constants + if (BVN0 && BVN0->isConstantSplat(SplatBits0, SplatUndef, SplatBitSize, + HasAnyUndefs) && !HasAnyUndefs) { + if (BVN1 && BVN1->isConstantSplat(SplatBits1, SplatUndef, SplatBitSize, + HasAnyUndefs) && !HasAnyUndefs) { + // Ensure that the bit width of the constants are the same and that + // the splat arguments are logical inverses as per the pattern we + // are trying to simplify. + if (SplatBits0.getBitWidth() == SplatBits1.getBitWidth() && + SplatBits0 == ~SplatBits1) { + // Canonicalize the vector type to make instruction selection + // simpler. + EVT CanonicalVT = VT.is128BitVector() ? MVT::v4i32 : MVT::v2i32; + SDValue Result = DAG.getNode(ARMISD::VBSL, dl, CanonicalVT, + N0->getOperand(1), + N0->getOperand(0), + N1->getOperand(0)); + return DAG.getNode(ISD::BITCAST, dl, VT, Result); + } + } + } + } + + // Try to use the ARM/Thumb2 BFI (bitfield insert) instruction when + // reasonable. + if (N0.getOpcode() == ISD::AND && N0.hasOneUse()) { + if (SDValue Res = PerformORCombineToBFI(N, DCI, Subtarget)) + return Res; + } + + if (SDValue Result = PerformSHLSimplify(N, DCI, Subtarget)) + return Result; + + return SDValue(); +} + static SDValue PerformXORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget) { @@ -10449,6 +10684,9 @@ static SDValue PerformXORCombine(SDNode *N, // fold (xor (select cc, 0, c), x) -> (select cc, x, (xor, x, c)) if (SDValue Result = combineSelectAndUseCommutative(N, false, DCI)) return Result; + + if (SDValue Result = PerformSHLSimplify(N, DCI, Subtarget)) + return Result; } return SDValue(); @@ -11781,6 +12019,14 @@ static SDValue PerformExtendCombine(SDNode *N, SelectionDAG &DAG, return SDValue(); } +static const APInt *isPowerOf2Constant(SDValue V) { + ConstantSDNode *C = dyn_cast<ConstantSDNode>(V); + if (!C) + return nullptr; + const APInt *CV = &C->getAPIntValue(); + return CV->isPowerOf2() ? CV : nullptr; +} + SDValue ARMTargetLowering::PerformCMOVToBFICombine(SDNode *CMOV, SelectionDAG &DAG) const { // If we have a CMOV, OR and AND combination such as: // if (x & CN) @@ -11809,8 +12055,8 @@ SDValue ARMTargetLowering::PerformCMOVToBFICombine(SDNode *CMOV, SelectionDAG &D SDValue And = CmpZ->getOperand(0); if (And->getOpcode() != ISD::AND) return SDValue(); - ConstantSDNode *AndC = dyn_cast<ConstantSDNode>(And->getOperand(1)); - if (!AndC || !AndC->getAPIntValue().isPowerOf2()) + const APInt *AndC = isPowerOf2Constant(And->getOperand(1)); + if (!AndC) return SDValue(); SDValue X = And->getOperand(0); @@ -11850,7 +12096,7 @@ SDValue ARMTargetLowering::PerformCMOVToBFICombine(SDNode *CMOV, SelectionDAG &D SDValue V = Y; SDLoc dl(X); EVT VT = X.getValueType(); - unsigned BitInX = AndC->getAPIntValue().logBase2(); + unsigned BitInX = AndC->logBase2(); if (BitInX != 0) { // We must shift X first. @@ -12011,7 +12257,7 @@ SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N, case ISD::XOR: return PerformXORCombine(N, DCI, Subtarget); case ISD::AND: return PerformANDCombine(N, DCI, Subtarget); case ARMISD::ADDC: - case ARMISD::SUBC: return PerformAddcSubcCombine(N, DCI.DAG, Subtarget); + case ARMISD::SUBC: return PerformAddcSubcCombine(N, DCI, Subtarget); case ARMISD::SUBE: return PerformAddeSubeCombine(N, DCI.DAG, Subtarget); case ARMISD::BFI: return PerformBFICombine(N, DCI); case ARMISD::VMOVRRD: return PerformVMOVRRDCombine(N, DCI, Subtarget); @@ -12171,11 +12417,11 @@ EVT ARMTargetLowering::getOptimalMemOpType(uint64_t Size, bool IsMemset, bool ZeroMemset, bool MemcpyStrSrc, MachineFunction &MF) const { - const Function *F = MF.getFunction(); + const Function &F = MF.getFunction(); // See if we can use NEON instructions for this... if ((!IsMemset || ZeroMemset) && Subtarget->hasNEON() && - !F->hasFnAttribute(Attribute::NoImplicitFloat)) { + !F.hasFnAttribute(Attribute::NoImplicitFloat)) { bool Fast; if (Size >= 16 && (memOpAlign(SrcAlign, DstAlign, 16) || @@ -12193,6 +12439,26 @@ EVT ARMTargetLowering::getOptimalMemOpType(uint64_t Size, return MVT::Other; } +// 64-bit integers are split into their high and low parts and held in two +// different registers, so the trunc is free since the low register can just +// be used. +bool ARMTargetLowering::isTruncateFree(Type *SrcTy, Type *DstTy) const { + if (!SrcTy->isIntegerTy() || !DstTy->isIntegerTy()) + return false; + unsigned SrcBits = SrcTy->getPrimitiveSizeInBits(); + unsigned DestBits = DstTy->getPrimitiveSizeInBits(); + return (SrcBits == 64 && DestBits == 32); +} + +bool ARMTargetLowering::isTruncateFree(EVT SrcVT, EVT DstVT) const { + if (SrcVT.isVector() || DstVT.isVector() || !SrcVT.isInteger() || + !DstVT.isInteger()) + return false; + unsigned SrcBits = SrcVT.getSizeInBits(); + unsigned DestBits = DstVT.getSizeInBits(); + return (SrcBits == 64 && DestBits == 32); +} + bool ARMTargetLowering::isZExtFree(SDValue Val, EVT VT2) const { if (Val.getOpcode() != ISD::LOAD) return false; @@ -12261,7 +12527,6 @@ int ARMTargetLowering::getScalingFactorCost(const DataLayout &DL, return -1; } - static bool isLegalT1AddressImmediate(int64_t V, EVT VT) { if (V < 0) return false; @@ -12377,8 +12642,13 @@ bool ARMTargetLowering::isLegalT2ScaledAddressingMode(const AddrMode &AM, Scale = Scale & ~1; return Scale == 2 || Scale == 4 || Scale == 8; case MVT::i64: + // FIXME: What are we trying to model here? ldrd doesn't have an r + r + // version in Thumb mode. // r + r - if (((unsigned)AM.HasBaseReg + Scale) <= 2) + if (Scale == 1) + return true; + // r * 2 (this can be lowered to r + r). + if (!AM.HasBaseReg && Scale == 2) return true; return false; case MVT::isVoid: @@ -12392,11 +12662,26 @@ bool ARMTargetLowering::isLegalT2ScaledAddressingMode(const AddrMode &AM, } } +bool ARMTargetLowering::isLegalT1ScaledAddressingMode(const AddrMode &AM, + EVT VT) const { + const int Scale = AM.Scale; + + // Negative scales are not supported in Thumb1. + if (Scale < 0) + return false; + + // Thumb1 addressing modes do not support register scaling excepting the + // following cases: + // 1. Scale == 1 means no scaling. + // 2. Scale == 2 this can be lowered to r + r if there is no base register. + return (Scale == 1) || (!AM.HasBaseReg && Scale == 2); +} + /// isLegalAddressingMode - Return true if the addressing mode represented /// by AM is legal for this target, for a load/store of the specified type. bool ARMTargetLowering::isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, - unsigned AS) const { + unsigned AS, Instruction *I) const { EVT VT = getValueType(DL, Ty, true); if (!isLegalAddressImmediate(AM.BaseOffs, VT, Subtarget)) return false; @@ -12408,10 +12693,6 @@ bool ARMTargetLowering::isLegalAddressingMode(const DataLayout &DL, switch (AM.Scale) { case 0: // no scale reg, must be "r+i" or "r", or "i". break; - case 1: - if (Subtarget->isThumb1Only()) - return false; - LLVM_FALLTHROUGH; default: // ARM doesn't support any R+R*scale+imm addr modes. if (AM.BaseOffs) @@ -12420,6 +12701,9 @@ bool ARMTargetLowering::isLegalAddressingMode(const DataLayout &DL, if (!VT.isSimple()) return false; + if (Subtarget->isThumb1Only()) + return isLegalT1ScaledAddressingMode(AM, VT); + if (Subtarget->isThumb2()) return isLegalT2ScaledAddressingMode(AM, VT); @@ -12436,8 +12720,11 @@ bool ARMTargetLowering::isLegalAddressingMode(const DataLayout &DL, return isPowerOf2_32(Scale & ~1); case MVT::i16: case MVT::i64: - // r + r - if (((unsigned)AM.HasBaseReg + Scale) <= 2) + // r +/- r + if (Scale == 1 || (AM.HasBaseReg && Scale == -1)) + return true; + // r * 2 (this can be lowered to r + r). + if (!AM.HasBaseReg && Scale == 2) return true; return false; @@ -12685,10 +12972,17 @@ void ARMTargetLowering::computeKnownBitsForTargetNode(const SDValue Op, case ARMISD::ADDE: case ARMISD::SUBC: case ARMISD::SUBE: - // These nodes' second result is a boolean - if (Op.getResNo() == 0) - break; - Known.Zero |= APInt::getHighBitsSet(BitWidth, BitWidth - 1); + // Special cases when we convert a carry to a boolean. + if (Op.getResNo() == 0) { + SDValue LHS = Op.getOperand(0); + SDValue RHS = Op.getOperand(1); + // (ADDE 0, 0, C) will give us a single bit. + if (Op->getOpcode() == ARMISD::ADDE && isNullConstant(LHS) && + isNullConstant(RHS)) { + Known.Zero |= APInt::getHighBitsSet(BitWidth, BitWidth - 1); + return; + } + } break; case ARMISD::CMOV: { // Bits are known zero/one if known on the LHS and RHS. @@ -12848,7 +13142,8 @@ ARMTargetLowering::getSingleConstraintMatchWeight( return weight; } -typedef std::pair<unsigned, const TargetRegisterClass*> RCPair; +using RCPair = std::pair<unsigned, const TargetRegisterClass *>; + RCPair ARMTargetLowering::getRegForInlineAsmConstraint( const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const { if (Constraint.size() == 1) { @@ -12887,7 +13182,7 @@ RCPair ARMTargetLowering::getRegForInlineAsmConstraint( return RCPair(0U, &ARM::QPR_8RegClass); break; case 't': - if (VT == MVT::f32) + if (VT == MVT::f32 || VT == MVT::i32) return RCPair(0U, &ARM::SPRRegClass); break; } @@ -13293,6 +13588,7 @@ bool ARMTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const { /// specified in the intrinsic calls. bool ARMTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I, + MachineFunction &MF, unsigned Intrinsic) const { switch (Intrinsic) { case Intrinsic::arm_neon_vld1: @@ -13311,9 +13607,8 @@ bool ARMTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, Info.offset = 0; Value *AlignArg = I.getArgOperand(I.getNumArgOperands() - 1); Info.align = cast<ConstantInt>(AlignArg)->getZExtValue(); - Info.vol = false; // volatile loads with NEON intrinsics not supported - Info.readMem = true; - Info.writeMem = false; + // volatile loads with NEON intrinsics not supported + Info.flags = MachineMemOperand::MOLoad; return true; } case Intrinsic::arm_neon_vst1: @@ -13338,9 +13633,8 @@ bool ARMTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, Info.offset = 0; Value *AlignArg = I.getArgOperand(I.getNumArgOperands() - 1); Info.align = cast<ConstantInt>(AlignArg)->getZExtValue(); - Info.vol = false; // volatile stores with NEON intrinsics not supported - Info.readMem = false; - Info.writeMem = true; + // volatile stores with NEON intrinsics not supported + Info.flags = MachineMemOperand::MOStore; return true; } case Intrinsic::arm_ldaex: @@ -13352,9 +13646,7 @@ bool ARMTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, Info.ptrVal = I.getArgOperand(0); Info.offset = 0; Info.align = DL.getABITypeAlignment(PtrTy->getElementType()); - Info.vol = true; - Info.readMem = true; - Info.writeMem = false; + Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOVolatile; return true; } case Intrinsic::arm_stlex: @@ -13366,9 +13658,7 @@ bool ARMTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, Info.ptrVal = I.getArgOperand(1); Info.offset = 0; Info.align = DL.getABITypeAlignment(PtrTy->getElementType()); - Info.vol = true; - Info.readMem = false; - Info.writeMem = true; + Info.flags = MachineMemOperand::MOStore | MachineMemOperand::MOVolatile; return true; } case Intrinsic::arm_stlexd: @@ -13378,9 +13668,7 @@ bool ARMTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, Info.ptrVal = I.getArgOperand(2); Info.offset = 0; Info.align = 8; - Info.vol = true; - Info.readMem = false; - Info.writeMem = true; + Info.flags = MachineMemOperand::MOStore | MachineMemOperand::MOVolatile; return true; case Intrinsic::arm_ldaexd: @@ -13390,9 +13678,7 @@ bool ARMTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, Info.ptrVal = I.getArgOperand(0); Info.offset = 0; Info.align = 8; - Info.vol = true; - Info.readMem = true; - Info.writeMem = false; + Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOVolatile; return true; default: @@ -13414,7 +13700,7 @@ bool ARMTargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm, return true; } -bool ARMTargetLowering::isExtractSubvectorCheap(EVT ResVT, +bool ARMTargetLowering::isExtractSubvectorCheap(EVT ResVT, EVT SrcVT, unsigned Index) const { if (!isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, ResVT)) return false; @@ -13650,7 +13936,7 @@ Value *ARMTargetLowering::emitStoreConditional(IRBuilder<> &Builder, Value *Val, Value *Lo = Builder.CreateTrunc(Val, Int32Ty, "lo"); Value *Hi = Builder.CreateTrunc(Builder.CreateLShr(Val, 32), Int32Ty, "hi"); if (!Subtarget->isLittle()) - std::swap (Lo, Hi); + std::swap(Lo, Hi); Addr = Builder.CreateBitCast(Addr, Type::getInt8PtrTy(M->getContext())); return Builder.CreateCall(Strex, {Lo, Hi, Addr}); } @@ -13772,7 +14058,6 @@ bool ARMTargetLowering::lowerInterleavedLoad( DenseMap<ShuffleVectorInst *, SmallVector<Value *, 4>> SubVecs; for (unsigned LoadCount = 0; LoadCount < NumLoads; ++LoadCount) { - // If we're generating more than one load, compute the base address of // subsequent loads as an offset from the previous. if (LoadCount > 0) @@ -13913,7 +14198,6 @@ bool ARMTargetLowering::lowerInterleavedStore(StoreInst *SI, Intrinsic::arm_neon_vst4}; for (unsigned StoreCount = 0; StoreCount < NumStores; ++StoreCount) { - // If we generating more than one store, we compute the base address of // subsequent stores as an offset from the previous. if (StoreCount > 0) @@ -14080,7 +14364,7 @@ void ARMTargetLowering::insertCopiesSplitCSR( // fine for CXX_FAST_TLS since the C++-style TLS access functions should be // nounwind. If we want to generalize this later, we may need to emit // CFI pseudo-instructions. - assert(Entry->getParent()->getFunction()->hasFnAttribute( + assert(Entry->getParent()->getFunction().hasFnAttribute( Attribute::NoUnwind) && "Function should be nounwind in insertCopiesSplitCSR!"); Entry->addLiveIn(*I); diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h index f05b142552369..0a1af8d89f9bf 100644 --- a/lib/Target/ARM/ARMISelLowering.h +++ b/lib/Target/ARM/ARMISelLowering.h @@ -1,4 +1,4 @@ -//===-- ARMISelLowering.h - ARM DAG Lowering Interface ----------*- C++ -*-===// +//===- ARMISelLowering.h - ARM DAG Lowering Interface -----------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -19,22 +19,36 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" #include "llvm/CodeGen/CallingConvLower.h" +#include "llvm/CodeGen/ISDOpcodes.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineValueType.h" -#include "llvm/CodeGen/SelectionDAG.h" #include "llvm/CodeGen/SelectionDAGNodes.h" +#include "llvm/CodeGen/TargetLowering.h" #include "llvm/CodeGen/ValueTypes.h" +#include "llvm/IR/Attributes.h" #include "llvm/IR/CallingConv.h" +#include "llvm/IR/Function.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/InlineAsm.h" #include "llvm/Support/CodeGen.h" -#include "llvm/Target/TargetLowering.h" #include <utility> namespace llvm { class ARMSubtarget; +class DataLayout; +class FastISel; +class FunctionLoweringInfo; +class GlobalValue; class InstrItineraryData; +class Instruction; +class MachineBasicBlock; +class MachineInstr; +class SelectionDAG; +class TargetLibraryInfo; +class TargetMachine; +class TargetRegisterInfo; +class VectorType; namespace ARMISD { @@ -264,7 +278,6 @@ class InstrItineraryData; /// ReplaceNodeResults - Replace the results of node with an illegal result /// type with new values built out of custom code. - /// void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue>&Results, SelectionDAG &DAG) const override; @@ -275,6 +288,8 @@ class InstrItineraryData; return (Kind != ScalarCondVectorVal); } + bool isReadOnly(const GlobalValue *GV) const; + /// getSetCCResultType - Return the value type to use for ISD::SETCC. EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, EVT VT) const override; @@ -306,7 +321,8 @@ class InstrItineraryData; bool MemcpyStrSrc, MachineFunction &MF) const override; - using TargetLowering::isZExtFree; + bool isTruncateFree(Type *SrcTy, Type *DstTy) const override; + bool isTruncateFree(EVT SrcVT, EVT DstVT) const override; bool isZExtFree(SDValue Val, EVT VT2) const override; bool isVectorLoadExtDesirable(SDValue ExtVal) const override; @@ -317,7 +333,8 @@ class InstrItineraryData; /// isLegalAddressingMode - Return true if the addressing mode represented /// by AM is legal for this target, for a load/store of the specified type. bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, - Type *Ty, unsigned AS) const override; + Type *Ty, unsigned AS, + Instruction *I = nullptr) const override; /// getScalingFactorCost - Return the cost of the scaling used in /// addressing mode represented by AM. @@ -328,6 +345,10 @@ class InstrItineraryData; bool isLegalT2ScaledAddressingMode(const AddrMode &AM, EVT VT) const; + /// \brief Returns true if the addresing mode representing by AM is legal + /// for the Thumb1 target, for a load/store of the specified type. + bool isLegalT1ScaledAddressingMode(const AddrMode &AM, EVT VT) const; + /// isLegalICmpImmediate - Return true if the specified immediate is legal /// icmp immediate, that is the target has icmp instructions which can /// compare a register against the immediate without having to materialize @@ -439,7 +460,7 @@ class InstrItineraryData; Sched::Preference getSchedulingPreference(SDNode *N) const override; bool - isShuffleMaskLegal(const SmallVectorImpl<int> &M, EVT VT) const override; + isShuffleMaskLegal(ArrayRef<int> M, EVT VT) const override; bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override; /// isFPImmLegal - Returns true if the target can instruction select the @@ -449,6 +470,7 @@ class InstrItineraryData; bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I, + MachineFunction &MF, unsigned Intrinsic) const override; /// \brief Returns true if it is beneficial to convert a load of a constant @@ -458,7 +480,8 @@ class InstrItineraryData; /// Return true if EXTRACT_SUBVECTOR is cheap for this result type /// with this index. - bool isExtractSubvectorCheap(EVT ResVT, unsigned Index) const override; + bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT, + unsigned Index) const override; /// \brief Returns true if an argument of type Ty needs to be passed in a /// contiguous block of registers in calling convention CallConv. @@ -562,7 +585,6 @@ class InstrItineraryData; const InstrItineraryData *Itins; /// ARMPCLabelIndex - Keep track of the number of ARM PC labels created. - /// unsigned ARMPCLabelIndex; // TODO: remove this, and have shouldInsertFencesForAtomic do the proper @@ -576,7 +598,7 @@ class InstrItineraryData; void addQRTypeForNEON(MVT VT); std::pair<SDValue, SDValue> getARMXALUOOp(SDValue Op, SelectionDAG &DAG, SDValue &ARMcc) const; - typedef SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPassVector; + using RegsToPassVector = SmallVector<std::pair<unsigned, SDValue>, 8>; void PassF64ArgInRegs(const SDLoc &dl, SelectionDAG &DAG, SDValue Chain, SDValue &Arg, RegsToPassVector &RegsToPass, @@ -617,7 +639,8 @@ class InstrItineraryData; SDValue LowerGlobalTLSAddressWindows(SDValue Op, SelectionDAG &DAG) const; SDValue LowerGLOBAL_OFFSET_TABLE(SDValue Op, SelectionDAG &DAG) const; SDValue LowerBR_JT(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerXALUO(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerSignedALUO(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerUnsignedALUO(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const; SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG) const; @@ -669,8 +692,8 @@ class InstrItineraryData; SDValue ThisVal) const; bool supportSplitCSR(MachineFunction *MF) const override { - return MF->getFunction()->getCallingConv() == CallingConv::CXX_FAST_TLS && - MF->getFunction()->hasFnAttribute(Attribute::NoUnwind); + return MF->getFunction().getCallingConv() == CallingConv::CXX_FAST_TLS && + MF->getFunction().hasFnAttribute(Attribute::NoUnwind); } void initializeSplitCSR(MachineBasicBlock *Entry) const override; diff --git a/lib/Target/ARM/ARMInstrFormats.td b/lib/Target/ARM/ARMInstrFormats.td index 1bbe7f0d275ed..f7c6c32eb4dc0 100644 --- a/lib/Target/ARM/ARMInstrFormats.td +++ b/lib/Target/ARM/ARMInstrFormats.td @@ -69,6 +69,7 @@ def NVExtFrm : Format<39>; def NVMulSLFrm : Format<40>; def NVTBLFrm : Format<41>; def DPSoRegImmFrm : Format<42>; +def N3RegCplxFrm : Format<43>; // Misc flags. @@ -2513,6 +2514,80 @@ multiclass NEONDTAnyInstAlias<string opc, string asm, dag Result, bit EmitPriori class NEONDataTypeAsmPseudoInst<string opc, string dt, string asm, dag iops> : AsmPseudoInst<!strconcat(opc, dt, "\t", asm), iops>, Requires<[HasNEON]>; +// Extension of NEON 3-vector data processing instructions in coprocessor 8 +// encoding space, introduced in ARMv8.3-A. +class N3VCP8<bits<2> op24_23, bits<2> op21_20, bit op6, bit op4, + dag oops, dag iops, InstrItinClass itin, + string opc, string dt, string asm, string cstr, list<dag> pattern> + : NeonInp<oops, iops, AddrModeNone, IndexModeNone, N3RegCplxFrm, itin, opc, + dt, asm, cstr, pattern> { + bits<5> Vd; + bits<5> Vn; + bits<5> Vm; + + let DecoderNamespace = "VFPV8"; + // These have the same encodings in ARM and Thumb2 + let PostEncoderMethod = ""; + + let Inst{31-25} = 0b1111110; + let Inst{24-23} = op24_23; + let Inst{22} = Vd{4}; + let Inst{21-20} = op21_20; + let Inst{19-16} = Vn{3-0}; + let Inst{15-12} = Vd{3-0}; + let Inst{11-8} = 0b1000; + let Inst{7} = Vn{4}; + let Inst{6} = op6; + let Inst{5} = Vm{4}; + let Inst{4} = op4; + let Inst{3-0} = Vm{3-0}; +} + +// Extension of NEON 2-vector-and-scalar data processing instructions in +// coprocessor 8 encoding space, introduced in ARMv8.3-A. +class N3VLaneCP8<bit op23, bits<2> op21_20, bit op6, bit op4, + dag oops, dag iops, InstrItinClass itin, + string opc, string dt, string asm, string cstr, list<dag> pattern> + : NeonInp<oops, iops, AddrModeNone, IndexModeNone, N3RegCplxFrm, itin, opc, + dt, asm, cstr, pattern> { + bits<5> Vd; + bits<5> Vn; + bits<5> Vm; + + let DecoderNamespace = "VFPV8"; + // These have the same encodings in ARM and Thumb2 + let PostEncoderMethod = ""; + + let Inst{31-24} = 0b11111110; + let Inst{23} = op23; + let Inst{22} = Vd{4}; + let Inst{21-20} = op21_20; + let Inst{19-16} = Vn{3-0}; + let Inst{15-12} = Vd{3-0}; + let Inst{11-8} = 0b1000; + let Inst{7} = Vn{4}; + let Inst{6} = op6; + // Bit 5 set by sub-classes + let Inst{4} = op4; + let Inst{3-0} = Vm{3-0}; +} + +// Operand types for complex instructions +class ComplexRotationOperand<int Angle, int Remainder, string Type, string Diag> + : AsmOperandClass { + let PredicateMethod = "isComplexRotation<" # Angle # ", " # Remainder # ">"; + let DiagnosticString = "complex rotation must be " # Diag; + let Name = "ComplexRotation" # Type; +} +def complexrotateop : Operand<i32> { + let ParserMatchClass = ComplexRotationOperand<90, 0, "Even", "0, 90, 180 or 270">; + let PrintMethod = "printComplexRotationOp<90, 0>"; +} +def complexrotateopodd : Operand<i32> { + let ParserMatchClass = ComplexRotationOperand<180, 90, "Odd", "90 or 270">; + let PrintMethod = "printComplexRotationOp<180, 90>"; +} + // Data type suffix token aliases. Implements Table A7-3 in the ARM ARM. def : TokenAlias<".s8", ".i8">; def : TokenAlias<".u8", ".i8">; diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td index c488cd347fe1e..4e13af5963008 100644 --- a/lib/Target/ARM/ARMInstrInfo.td +++ b/lib/Target/ARM/ARMInstrInfo.td @@ -241,6 +241,8 @@ def HasV8_1a : Predicate<"Subtarget->hasV8_1aOps()">, AssemblerPredicate<"HasV8_1aOps", "armv8.1a">; def HasV8_2a : Predicate<"Subtarget->hasV8_2aOps()">, AssemblerPredicate<"HasV8_2aOps", "armv8.2a">; +def HasV8_3a : Predicate<"Subtarget->hasV8_3aOps()">, + AssemblerPredicate<"HasV8_3aOps", "armv8.3a">; def NoVFP : Predicate<"!Subtarget->hasVFP2()">; def HasVFP2 : Predicate<"Subtarget->hasVFP2()">, AssemblerPredicate<"FeatureVFP2", "VFP2">; @@ -257,6 +259,8 @@ def HasNEON : Predicate<"Subtarget->hasNEON()">, AssemblerPredicate<"FeatureNEON", "NEON">; def HasCrypto : Predicate<"Subtarget->hasCrypto()">, AssemblerPredicate<"FeatureCrypto", "crypto">; +def HasDotProd : Predicate<"Subtarget->hasDotProd()">, + AssemblerPredicate<"FeatureDotProd", "dotprod">; def HasCRC : Predicate<"Subtarget->hasCRC()">, AssemblerPredicate<"FeatureCRC", "crc">; def HasRAS : Predicate<"Subtarget->hasRAS()">, @@ -313,6 +317,8 @@ def IsNotMachO : Predicate<"!Subtarget->isTargetMachO()">; def IsNaCl : Predicate<"Subtarget->isTargetNaCl()">; def IsWindows : Predicate<"Subtarget->isTargetWindows()">; def IsNotWindows : Predicate<"!Subtarget->isTargetWindows()">; +def IsReadTPHard : Predicate<"Subtarget->isReadTPHard()">; +def IsReadTPSoft : Predicate<"!Subtarget->isReadTPHard()">; def UseNaClTrap : Predicate<"Subtarget->useNaClTrap()">, AssemblerPredicate<"FeatureNaClTrap", "NaCl">; def DontUseNaClTrap : Predicate<"!Subtarget->useNaClTrap()">; @@ -326,6 +332,8 @@ def UseNegativeImmediates : let RecomputePerFunction = 1 in { def UseMovt : Predicate<"Subtarget->useMovt(*MF)">; def DontUseMovt : Predicate<"!Subtarget->useMovt(*MF)">; + def UseMovtInPic : Predicate<"Subtarget->useMovt(*MF) && Subtarget->allowPositionIndependentMovt()">; + def DontUseMovtInPic : Predicate<"!Subtarget->useMovt(*MF) || !Subtarget->allowPositionIndependentMovt()">; } def UseFPVMLx : Predicate<"Subtarget->useFPVMLx()">; def UseMulOps : Predicate<"Subtarget->useMulOps()">; @@ -454,12 +462,13 @@ def fsub_mlx : PatFrag<(ops node:$lhs, node:$rhs),(fsub node:$lhs, node:$rhs),[{ class ImmAsmOperand<int Low, int High> : AsmOperandClass { let RenderMethod = "addImmOperands"; let PredicateMethod = "isImmediate<" # Low # "," # High # ">"; - let DiagnosticType = "ImmRange" # Low # "_" # High; + let DiagnosticString = "operand must be an immediate in the range [" # Low # "," # High # "]"; } class ImmAsmOperandMinusOne<int Low, int High> : AsmOperandClass { let PredicateMethod = "isImmediate<" # Low # "," # High # ">"; let DiagnosticType = "ImmRange" # Low # "_" # High; + let DiagnosticString = "operand must be an immediate in the range [" # Low # "," # High # "]"; } // Operands that are part of a memory addressing mode. @@ -517,7 +526,10 @@ def reglist : Operand<i32> { def GPRPairOp : RegisterOperand<GPRPair, "printGPRPairOperand">; -def DPRRegListAsmOperand : AsmOperandClass { let Name = "DPRRegList"; } +def DPRRegListAsmOperand : AsmOperandClass { + let Name = "DPRRegList"; + let DiagnosticType = "DPR_RegList"; +} def dpr_reglist : Operand<i32> { let EncoderMethod = "getRegisterListOpValue"; let ParserMatchClass = DPRRegListAsmOperand; @@ -525,7 +537,10 @@ def dpr_reglist : Operand<i32> { let DecoderMethod = "DecodeDPRRegListOperand"; } -def SPRRegListAsmOperand : AsmOperandClass { let Name = "SPRRegList"; } +def SPRRegListAsmOperand : AsmOperandClass { + let Name = "SPRRegList"; + let DiagnosticString = "operand must be a list of registers in range [s0, s31]"; +} def spr_reglist : Operand<i32> { let EncoderMethod = "getRegisterListOpValue"; let ParserMatchClass = SPRRegListAsmOperand; @@ -748,7 +763,6 @@ def imm1_31 : Operand<i32>, ImmLeaf<i32, [{ return Imm > 0 && Imm < 32; }]> { /// imm0_15 predicate - Immediate in the range [0,15]. def Imm0_15AsmOperand: ImmAsmOperand<0,15> { let Name = "Imm0_15"; - let DiagnosticType = "ImmRange0_15"; } def imm0_15 : Operand<i32>, ImmLeaf<i32, [{ return Imm >= 0 && Imm < 16; @@ -783,7 +797,6 @@ def imm0_63 : Operand<i32>, ImmLeaf<i32, [{ /// imm0_239 predicate - Immediate in the range [0,239]. def Imm0_239AsmOperand : ImmAsmOperand<0,239> { let Name = "Imm0_239"; - let DiagnosticType = "ImmRange0_239"; } def imm0_239 : Operand<i32>, ImmLeaf<i32, [{ return Imm >= 0 && Imm < 240; }]> { let ParserMatchClass = Imm0_239AsmOperand; @@ -817,6 +830,7 @@ def imm0_65535_neg : Operand<i32>, ImmLeaf<i32, [{ def Imm0_65535ExprAsmOperand: AsmOperandClass { let Name = "Imm0_65535Expr"; let RenderMethod = "addImmOperands"; + let DiagnosticString = "operand must be an immediate in the range [0,0xffff] or a relocatable expression"; } def imm0_65535_expr : Operand<i32> { @@ -830,7 +844,10 @@ def imm256_65535_expr : Operand<i32> { } /// imm24b - True if the 32-bit immediate is encodable in 24 bits. -def Imm24bitAsmOperand: ImmAsmOperand<0,0xffffff> { let Name = "Imm24bit"; } +def Imm24bitAsmOperand: ImmAsmOperand<0,0xffffff> { + let Name = "Imm24bit"; + let DiagnosticString = "operand must be an immediate in the range [0,0xffffff]"; +} def imm24b : Operand<i32>, ImmLeaf<i32, [{ return Imm >= 0 && Imm <= 0xffffff; }]> { @@ -960,21 +977,6 @@ def postidx_reg : MemOperand { let MIOperandInfo = (ops GPRnopc, i32imm); } - -// addrmode2 := reg +/- imm12 -// := reg +/- reg shop imm -// -// FIXME: addrmode2 should be refactored the rest of the way to always -// use explicit imm vs. reg versions above (addrmode_imm12 and ldst_so_reg). -def AddrMode2AsmOperand : AsmOperandClass { let Name = "AddrMode2"; } -def addrmode2 : MemOperand, - ComplexPattern<i32, 3, "SelectAddrMode2", []> { - let EncoderMethod = "getAddrMode2OpValue"; - let PrintMethod = "printAddrMode2Operand"; - let ParserMatchClass = AddrMode2AsmOperand; - let MIOperandInfo = (ops GPR:$base, GPR:$offsreg, i32imm:$offsimm); -} - def PostIdxRegShiftedAsmOperand : AsmOperandClass { let Name = "PostIdxRegShifted"; let ParserMethod = "parsePostIdxReg"; @@ -1123,7 +1125,7 @@ class AddrMode6Align : MemOperand, // VLD/VST instructions and checking the alignment is not specified. def AddrMode6AlignNoneAsmOperand : AsmOperandClass { let Name = "AlignedMemoryNone"; - let DiagnosticType = "AlignedMemoryRequiresNone"; + let DiagnosticString = "alignment must be omitted"; } def addrmode6alignNone : AddrMode6Align { // The alignment specifier can only be omitted. @@ -1134,7 +1136,7 @@ def addrmode6alignNone : AddrMode6Align { // VLD/VST instructions and checking the alignment value. def AddrMode6Align16AsmOperand : AsmOperandClass { let Name = "AlignedMemory16"; - let DiagnosticType = "AlignedMemoryRequires16"; + let DiagnosticString = "alignment must be 16 or omitted"; } def addrmode6align16 : AddrMode6Align { // The alignment specifier can only be 16 or omitted. @@ -1145,7 +1147,7 @@ def addrmode6align16 : AddrMode6Align { // VLD/VST instructions and checking the alignment value. def AddrMode6Align32AsmOperand : AsmOperandClass { let Name = "AlignedMemory32"; - let DiagnosticType = "AlignedMemoryRequires32"; + let DiagnosticString = "alignment must be 32 or omitted"; } def addrmode6align32 : AddrMode6Align { // The alignment specifier can only be 32 or omitted. @@ -1156,7 +1158,7 @@ def addrmode6align32 : AddrMode6Align { // VLD/VST instructions and checking the alignment value. def AddrMode6Align64AsmOperand : AsmOperandClass { let Name = "AlignedMemory64"; - let DiagnosticType = "AlignedMemoryRequires64"; + let DiagnosticString = "alignment must be 64 or omitted"; } def addrmode6align64 : AddrMode6Align { // The alignment specifier can only be 64 or omitted. @@ -1167,7 +1169,7 @@ def addrmode6align64 : AddrMode6Align { // for VLD/VST instructions and checking the alignment value. def AddrMode6Align64or128AsmOperand : AsmOperandClass { let Name = "AlignedMemory64or128"; - let DiagnosticType = "AlignedMemoryRequires64or128"; + let DiagnosticString = "alignment must be 64, 128 or omitted"; } def addrmode6align64or128 : AddrMode6Align { // The alignment specifier can only be 64, 128 or omitted. @@ -1178,7 +1180,7 @@ def addrmode6align64or128 : AddrMode6Align { // encoding for VLD/VST instructions and checking the alignment value. def AddrMode6Align64or128or256AsmOperand : AsmOperandClass { let Name = "AlignedMemory64or128or256"; - let DiagnosticType = "AlignedMemoryRequires64or128or256"; + let DiagnosticString = "alignment must be 64, 128, 256 or omitted"; } def addrmode6align64or128or256 : AddrMode6Align { // The alignment specifier can only be 64, 128, 256 or omitted. @@ -1209,7 +1211,7 @@ class AddrMode6DupAlign : MemOperand, // VLD-dup instruction and checking the alignment is not specified. def AddrMode6dupAlignNoneAsmOperand : AsmOperandClass { let Name = "DupAlignedMemoryNone"; - let DiagnosticType = "DupAlignedMemoryRequiresNone"; + let DiagnosticString = "alignment must be omitted"; } def addrmode6dupalignNone : AddrMode6DupAlign { // The alignment specifier can only be omitted. @@ -1220,7 +1222,7 @@ def addrmode6dupalignNone : AddrMode6DupAlign { // instruction and checking the alignment value. def AddrMode6dupAlign16AsmOperand : AsmOperandClass { let Name = "DupAlignedMemory16"; - let DiagnosticType = "DupAlignedMemoryRequires16"; + let DiagnosticString = "alignment must be 16 or omitted"; } def addrmode6dupalign16 : AddrMode6DupAlign { // The alignment specifier can only be 16 or omitted. @@ -1231,7 +1233,7 @@ def addrmode6dupalign16 : AddrMode6DupAlign { // instruction and checking the alignment value. def AddrMode6dupAlign32AsmOperand : AsmOperandClass { let Name = "DupAlignedMemory32"; - let DiagnosticType = "DupAlignedMemoryRequires32"; + let DiagnosticString = "alignment must be 32 or omitted"; } def addrmode6dupalign32 : AddrMode6DupAlign { // The alignment specifier can only be 32 or omitted. @@ -1242,7 +1244,7 @@ def addrmode6dupalign32 : AddrMode6DupAlign { // instructions and checking the alignment value. def AddrMode6dupAlign64AsmOperand : AsmOperandClass { let Name = "DupAlignedMemory64"; - let DiagnosticType = "DupAlignedMemoryRequires64"; + let DiagnosticString = "alignment must be 64 or omitted"; } def addrmode6dupalign64 : AddrMode6DupAlign { // The alignment specifier can only be 64 or omitted. @@ -1253,7 +1255,7 @@ def addrmode6dupalign64 : AddrMode6DupAlign { // for VLD instructions and checking the alignment value. def AddrMode6dupAlign64or128AsmOperand : AsmOperandClass { let Name = "DupAlignedMemory64or128"; - let DiagnosticType = "DupAlignedMemoryRequires64or128"; + let DiagnosticString = "alignment must be 64, 128 or omitted"; } def addrmode6dupalign64or128 : AddrMode6DupAlign { // The alignment specifier can only be 64, 128 or omitted. @@ -2369,12 +2371,15 @@ let isBranch = 1, isTerminator = 1 in { 0, IIC_Br, [(ARMbrjt GPR:$target, tjumptable:$jt)]>, Sched<[WriteBr]>; - // FIXME: This shouldn't use the generic "addrmode2," but rather be split - // into i12 and rs suffixed versions. - def BR_JTm : ARMPseudoInst<(outs), - (ins addrmode2:$target, i32imm:$jt), + def BR_JTm_i12 : ARMPseudoInst<(outs), + (ins addrmode_imm12:$target, i32imm:$jt), 0, IIC_Br, - [(ARMbrjt (i32 (load addrmode2:$target)), + [(ARMbrjt (i32 (load addrmode_imm12:$target)), + tjumptable:$jt)]>, Sched<[WriteBrTbl]>; + def BR_JTm_rs : ARMPseudoInst<(outs), + (ins ldst_so_reg:$target, i32imm:$jt), + 0, IIC_Br, + [(ARMbrjt (i32 (load ldst_so_reg:$target)), tjumptable:$jt)]>, Sched<[WriteBrTbl]>; def BR_JTadd : ARMPseudoInst<(outs), (ins GPR:$target, GPR:$idx, i32imm:$jt), @@ -5033,6 +5038,8 @@ def CDP : ABI<0b1110, (outs), (ins p_imm:$cop, imm0_15:$opc1, let Inst{15-12} = CRd; let Inst{19-16} = CRn; let Inst{23-20} = opc1; + + let DecoderNamespace = "CoProc"; } def CDP2 : ABXI<0b1110, (outs), (ins p_imm:$cop, imm0_15:$opc1, @@ -5056,6 +5063,8 @@ def CDP2 : ABXI<0b1110, (outs), (ins p_imm:$cop, imm0_15:$opc1, let Inst{15-12} = CRd; let Inst{19-16} = CRn; let Inst{23-20} = opc1; + + let DecoderNamespace = "CoProc"; } class ACI<dag oops, dag iops, string opc, string asm, @@ -5071,6 +5080,8 @@ class ACInoP<dag oops, dag iops, string opc, string asm, let Inst{31-28} = 0b1111; let Inst{27-25} = 0b110; } + +let DecoderNamespace = "CoProc" in { multiclass LdStCop<bit load, bit Dbit, string asm, list<dag> pattern> { def _OFFSET : ACI<(outs), (ins p_imm:$cop, c_imm:$CRd, addrmode5:$addr), asm, "\t$cop, $CRd, $addr", pattern> { @@ -5224,6 +5235,8 @@ defm STCL : LdStCop <0, 1, "stcl", [(int_arm_stcl imm:$cop, imm:$CRd, addrmode5 defm STC2 : LdSt2Cop<0, 0, "stc2", [(int_arm_stc2 imm:$cop, imm:$CRd, addrmode5:$addr)]>, Requires<[IsARM,PreV8]>; defm STC2L : LdSt2Cop<0, 1, "stc2l", [(int_arm_stc2l imm:$cop, imm:$CRd, addrmode5:$addr)]>, Requires<[IsARM,PreV8]>; +} // DecoderNamespace = "CoProc" + //===----------------------------------------------------------------------===// // Move between coprocessor and ARM core register. // @@ -5248,6 +5261,8 @@ class MovRCopro<string opc, bit direction, dag oops, dag iops, let Inst{7-5} = opc2; let Inst{3-0} = CRm; let Inst{19-16} = CRn; + + let DecoderNamespace = "CoProc"; } def MCR : MovRCopro<"mcr", 0 /* from ARM core register to coprocessor */, @@ -5292,6 +5307,8 @@ class MovRCopro2<string opc, bit direction, dag oops, dag iops, let Inst{7-5} = opc2; let Inst{3-0} = CRm; let Inst{19-16} = CRn; + + let DecoderNamespace = "CoProc"; } def MCR2 : MovRCopro2<"mcr2", 0 /* from ARM core register to coprocessor */, @@ -5519,9 +5536,14 @@ let usesCustomInserter = 1, Defs = [CPSR] in let isCall = 1, Defs = [R0, R12, LR, CPSR], Uses = [SP] in { def TPsoft : ARMPseudoInst<(outs), (ins), 4, IIC_Br, - [(set R0, ARMthread_pointer)]>, Sched<[WriteBr]>; + [(set R0, ARMthread_pointer)]>, Sched<[WriteBr]>, + Requires<[IsARM, IsReadTPSoft]>; } +// Reading thread pointer from coprocessor register +def : ARMPat<(ARMthread_pointer), (MRC 15, 0, 13, 0, 3)>, + Requires<[IsARM, IsReadTPHard]>; + //===----------------------------------------------------------------------===// // SJLJ Exception handling intrinsics // eh_sjlj_setjmp() is an instruction sequence to store the return @@ -5618,26 +5640,26 @@ let isReMaterializable = 1 in { def MOV_ga_pcrel : PseudoInst<(outs GPR:$dst), (ins i32imm:$addr), IIC_iMOVix2addpc, [(set GPR:$dst, (ARMWrapperPIC tglobaladdr:$addr))]>, - Requires<[IsARM, UseMovt]>; + Requires<[IsARM, UseMovtInPic]>; def LDRLIT_ga_pcrel : PseudoInst<(outs GPR:$dst), (ins i32imm:$addr), IIC_iLoadiALU, [(set GPR:$dst, (ARMWrapperPIC tglobaladdr:$addr))]>, - Requires<[IsARM, DontUseMovt]>; + Requires<[IsARM, DontUseMovtInPic]>; let AddedComplexity = 10 in def LDRLIT_ga_pcrel_ldr : PseudoInst<(outs GPR:$dst), (ins i32imm:$addr), NoItinerary, [(set GPR:$dst, (load (ARMWrapperPIC tglobaladdr:$addr)))]>, - Requires<[IsARM, DontUseMovt]>; + Requires<[IsARM, DontUseMovtInPic]>; let AddedComplexity = 10 in def MOV_ga_pcrel_ldr : PseudoInst<(outs GPR:$dst), (ins i32imm:$addr), IIC_iMOVix2ld, [(set GPR:$dst, (load (ARMWrapperPIC tglobaladdr:$addr)))]>, - Requires<[IsARM, UseMovt]>; + Requires<[IsARM, UseMovtInPic]>; } // isReMaterializable // The many different faces of TLS access. @@ -5650,15 +5672,15 @@ def : Pat<(ARMWrapper tglobaltlsaddr:$src), Requires<[IsARM, DontUseMovt]>; def : Pat<(ARMWrapperPIC tglobaltlsaddr:$addr), - (MOV_ga_pcrel tglobaltlsaddr:$addr)>, Requires<[IsARM, UseMovt]>; + (MOV_ga_pcrel tglobaltlsaddr:$addr)>, Requires<[IsARM, UseMovtInPic]>; def : Pat<(ARMWrapperPIC tglobaltlsaddr:$addr), (LDRLIT_ga_pcrel tglobaltlsaddr:$addr)>, - Requires<[IsARM, DontUseMovt]>; + Requires<[IsARM, DontUseMovtInPic]>; let AddedComplexity = 10 in def : Pat<(load (ARMWrapperPIC tglobaltlsaddr:$addr)), (MOV_ga_pcrel_ldr tglobaltlsaddr:$addr)>, - Requires<[IsARM, UseMovt]>; + Requires<[IsARM, UseMovtInPic]>; // ConstantPool, GlobalAddress, and JumpTable diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td index 858136a820784..cd67dded5853f 100644 --- a/lib/Target/ARM/ARMInstrNEON.td +++ b/lib/Target/ARM/ARMInstrNEON.td @@ -108,6 +108,7 @@ def nImmSplatI64 : Operand<i32> { def VectorIndex8Operand : AsmOperandClass { let Name = "VectorIndex8"; } def VectorIndex16Operand : AsmOperandClass { let Name = "VectorIndex16"; } def VectorIndex32Operand : AsmOperandClass { let Name = "VectorIndex32"; } +def VectorIndex64Operand : AsmOperandClass { let Name = "VectorIndex64"; } def VectorIndex8 : Operand<i32>, ImmLeaf<i32, [{ return ((uint64_t)Imm) < 8; }]> { @@ -129,6 +130,13 @@ def VectorIndex32 : Operand<i32>, ImmLeaf<i32, [{ let PrintMethod = "printVectorIndex"; let MIOperandInfo = (ops i32imm); } +def VectorIndex64 : Operand<i32>, ImmLeaf<i32, [{ + return ((uint64_t)Imm) < 1; +}]> { + let ParserMatchClass = VectorIndex64Operand; + let PrintMethod = "printVectorIndex"; + let MIOperandInfo = (ops i32imm); +} // Register list of one D register. def VecListOneDAsmOperand : AsmOperandClass { @@ -1111,6 +1119,22 @@ def : Pat<(vector_insert (v4f32 QPR:$src), (f32 (load addrmode6:$addr)), imm:$lane), (VLD1LNq32Pseudo addrmode6:$addr, QPR:$src, imm:$lane)>; +// A 64-bit subvector insert to the first 128-bit vector position +// is a subregister copy that needs no instruction. +def : Pat<(insert_subvector undef, (v1i64 DPR:$src), (i32 0)), + (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), DPR:$src, dsub_0)>; +def : Pat<(insert_subvector undef, (v2i32 DPR:$src), (i32 0)), + (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), DPR:$src, dsub_0)>; +def : Pat<(insert_subvector undef, (v2f32 DPR:$src), (i32 0)), + (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), DPR:$src, dsub_0)>; +def : Pat<(insert_subvector undef, (v4i16 DPR:$src), (i32 0)), + (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), DPR:$src, dsub_0)>; +def : Pat<(insert_subvector undef, (v4f16 DPR:$src), (i32 0)), + (INSERT_SUBREG (v8f16 (IMPLICIT_DEF)), DPR:$src, dsub_0)>; +def : Pat<(insert_subvector (v16i8 undef), (v8i8 DPR:$src), (i32 0)), + (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), DPR:$src, dsub_0)>; + + let mayLoad = 1, hasSideEffects = 0, hasExtraDefRegAllocReq = 1 in { // ...with address register writeback: @@ -4672,6 +4696,167 @@ def : Pat<(v4f32 (fma (fneg QPR:$Vn), QPR:$Vm, QPR:$src1)), (VFMSfq QPR:$src1, QPR:$Vn, QPR:$Vm)>, Requires<[HasVFP4]>; +// ARMv8.2a dot product instructions. +// We put them in the VFPV8 decoder namespace because the ARM and Thumb +// encodings are the same and thus no further bit twiddling is necessary +// in the disassembler. +let Predicates = [HasDotProd], DecoderNamespace = "VFPV8" in { + +def VUDOTD : N3Vnp<0b11000, 0b10, 0b1101, 0b0, 0b1, + (outs DPR:$Vd), (ins DPR:$Vn, DPR:$Vm), + N3RegFrm, IIC_VDOTPROD, "vudot", "u8", []>; +def VSDOTD : N3Vnp<0b11000, 0b10, 0b1101, 0b0, 0b0, + (outs DPR:$Vd), (ins DPR:$Vn, DPR:$Vm), + N3RegFrm, IIC_VDOTPROD, "vsdot", "s8", []>; +def VUDOTQ : N3Vnp<0b11000, 0b10, 0b1101, 0b1, 0b1, + (outs QPR:$Vd), (ins QPR:$Vn, QPR:$Vm), + N3RegFrm, IIC_VDOTPROD, "vudot", "u8", []>; +def VSDOTQ : N3Vnp<0b11000, 0b10, 0b1101, 0b1, 0b0, + (outs QPR:$Vd), (ins QPR:$Vn, QPR:$Vm), + N3RegFrm, IIC_VDOTPROD, "vsdot", "s8", []>; + +// Indexed dot product instructions: +class DOTI<string opc, string dt, bit Q, bit U, RegisterClass Ty> : + N3Vnp<0b11100, 0b10, 0b1101, Q, U, + (outs Ty:$Vd), (ins Ty:$Vn, DPR_VFP2:$Vm, VectorIndex32:$lane), + N3RegFrm, IIC_VDOTPROD, opc, dt, []> { + bit lane; + let Inst{5} = lane; + let AsmString = !strconcat(opc, ".", dt, "\t$Vd, $Vn, $Vm$lane"); +} + +def VUDOTDI : DOTI<"vudot", "u8", 0b0, 0b1, DPR>; +def VSDOTDI : DOTI<"vsdot", "s8", 0b0, 0b0, DPR>; +def VUDOTQI : DOTI<"vudot", "u8", 0b1, 0b1, QPR>; +def VSDOTQI : DOTI<"vsdot", "s8", 0b1, 0b0, QPR>; + +} // HasDotProd + +// ARMv8.3 complex operations +class BaseN3VCP8ComplexTied<bit op21, bit op4, bit s, bit q, + InstrItinClass itin, dag oops, dag iops, + string opc, string dt, list<dag> pattern> + : N3VCP8<{?,?}, {op21,s}, q, op4, oops, + iops, itin, opc, dt, "$Vd, $Vn, $Vm, $rot", "$src1 = $Vd", pattern>{ + bits<2> rot; + let Inst{24-23} = rot; +} + +class BaseN3VCP8ComplexOdd<bit op23, bit op21, bit op4, bit s, bit q, + InstrItinClass itin, dag oops, dag iops, string opc, + string dt, list<dag> pattern> + : N3VCP8<{?,op23}, {op21,s}, q, op4, oops, + iops, itin, opc, dt, "$Vd, $Vn, $Vm, $rot", "", pattern> { + bits<1> rot; + let Inst{24} = rot; +} + +class BaseN3VCP8ComplexTiedLane32<bit op4, bit s, bit q, InstrItinClass itin, + dag oops, dag iops, string opc, string dt, + list<dag> pattern> + : N3VLaneCP8<s, {?,?}, q, op4, oops, iops, itin, opc, dt, + "$Vd, $Vn, $Vm$lane, $rot", "$src1 = $Vd", pattern> { + bits<2> rot; + bit lane; + + let Inst{21-20} = rot; + let Inst{5} = lane; +} + +class BaseN3VCP8ComplexTiedLane64<bit op4, bit s, bit q, InstrItinClass itin, + dag oops, dag iops, string opc, string dt, + list<dag> pattern> + : N3VLaneCP8<s, {?,?}, q, op4, oops, iops, itin, opc, dt, + "$Vd, $Vn, $Vm$lane, $rot", "$src1 = $Vd", pattern> { + bits<2> rot; + bit lane; + + let Inst{21-20} = rot; + let Inst{5} = Vm{4}; + // This is needed because the lane operand does not have any bits in the + // encoding (it only has one possible value), so we need to manually set it + // to it's default value. + let DecoderMethod = "DecodeNEONComplexLane64Instruction"; +} + +multiclass N3VCP8ComplexTied<bit op21, bit op4, + string OpcodeStr, SDPatternOperator Op> { + let Predicates = [HasNEON,HasV8_3a,HasFullFP16] in { + def v4f16 : BaseN3VCP8ComplexTied<op21, op4, 0, 0, IIC_VMACD, (outs DPR:$Vd), + (ins DPR:$src1, DPR:$Vn, DPR:$Vm, complexrotateop:$rot), + OpcodeStr, "f16", []>; + def v8f16 : BaseN3VCP8ComplexTied<op21, op4, 0, 1, IIC_VMACQ, (outs QPR:$Vd), + (ins QPR:$src1, QPR:$Vn, QPR:$Vm, complexrotateop:$rot), + OpcodeStr, "f16", []>; + } + let Predicates = [HasNEON,HasV8_3a] in { + def v2f32 : BaseN3VCP8ComplexTied<op21, op4, 1, 0, IIC_VMACD, (outs DPR:$Vd), + (ins DPR:$src1, DPR:$Vn, DPR:$Vm, complexrotateop:$rot), + OpcodeStr, "f32", []>; + def v4f32 : BaseN3VCP8ComplexTied<op21, op4, 1, 1, IIC_VMACQ, (outs QPR:$Vd), + (ins QPR:$src1, QPR:$Vn, QPR:$Vm, complexrotateop:$rot), + OpcodeStr, "f32", []>; + } +} + +multiclass N3VCP8ComplexOdd<bit op23, bit op21, bit op4, + string OpcodeStr, SDPatternOperator Op> { + let Predicates = [HasNEON,HasV8_3a,HasFullFP16] in { + def v4f16 : BaseN3VCP8ComplexOdd<op23, op21, op4, 0, 0, IIC_VMACD, + (outs DPR:$Vd), + (ins DPR:$Vn, DPR:$Vm, complexrotateopodd:$rot), + OpcodeStr, "f16", []>; + def v8f16 : BaseN3VCP8ComplexOdd<op23, op21, op4, 0, 1, IIC_VMACQ, + (outs QPR:$Vd), + (ins QPR:$Vn, QPR:$Vm, complexrotateopodd:$rot), + OpcodeStr, "f16", []>; + } + let Predicates = [HasNEON,HasV8_3a] in { + def v2f32 : BaseN3VCP8ComplexOdd<op23, op21, op4, 1, 0, IIC_VMACD, + (outs DPR:$Vd), + (ins DPR:$Vn, DPR:$Vm, complexrotateopodd:$rot), + OpcodeStr, "f32", []>; + def v4f32 : BaseN3VCP8ComplexOdd<op23, op21, op4, 1, 1, IIC_VMACQ, + (outs QPR:$Vd), + (ins QPR:$Vn, QPR:$Vm, complexrotateopodd:$rot), + OpcodeStr, "f32", []>; + } +} + +// These instructions index by pairs of lanes, so the VectorIndexes are twice +// as wide as the data types. +multiclass N3VCP8ComplexTiedLane<bit op4, string OpcodeStr, + SDPatternOperator Op> { + let Predicates = [HasNEON,HasV8_3a,HasFullFP16] in { + def v4f16_indexed : BaseN3VCP8ComplexTiedLane32<op4, 0, 0, IIC_VMACD, + (outs DPR:$Vd), + (ins DPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, + VectorIndex32:$lane, complexrotateop:$rot), + OpcodeStr, "f16", []>; + def v8f16_indexed : BaseN3VCP8ComplexTiedLane32<op4, 0, 1, IIC_VMACQ, + (outs QPR:$Vd), + (ins QPR:$src1, QPR:$Vn, DPR_VFP2:$Vm, + VectorIndex32:$lane, complexrotateop:$rot), + OpcodeStr, "f16", []>; + } + let Predicates = [HasNEON,HasV8_3a] in { + def v2f32_indexed : BaseN3VCP8ComplexTiedLane64<op4, 1, 0, IIC_VMACD, + (outs DPR:$Vd), + (ins DPR:$src1, DPR:$Vn, DPR:$Vm, VectorIndex64:$lane, + complexrotateop:$rot), + OpcodeStr, "f32", []>; + def v4f32_indexed : BaseN3VCP8ComplexTiedLane64<op4, 1, 1, IIC_VMACQ, + (outs QPR:$Vd), + (ins QPR:$src1, QPR:$Vn, DPR:$Vm, VectorIndex64:$lane, + complexrotateop:$rot), + OpcodeStr, "f32", []>; + } +} + +defm VCMLA : N3VCP8ComplexTied<1, 0, "vcmla", null_frag>; +defm VCADD : N3VCP8ComplexOdd<1, 0, 0, "vcadd", null_frag>; +defm VCMLA : N3VCP8ComplexTiedLane<0, "vcmla", null_frag>; + // Vector Subtract Operations. // VSUB : Vector Subtract (integer and floating-point) @@ -5477,6 +5662,12 @@ def : Pat<(v4i32 (NEONvshl (sext (v4i16 DPR:$Rn)), (i32 16))), (VSHLLi16 DPR:$Rn, 16)>; def : Pat<(v2i64 (NEONvshl (sext (v2i32 DPR:$Rn)), (i32 32))), (VSHLLi32 DPR:$Rn, 32)>; +def : Pat<(v8i16 (NEONvshl (anyext (v8i8 DPR:$Rn)), (i32 8))), + (VSHLLi8 DPR:$Rn, 8)>; +def : Pat<(v4i32 (NEONvshl (anyext (v4i16 DPR:$Rn)), (i32 16))), + (VSHLLi16 DPR:$Rn, 16)>; +def : Pat<(v2i64 (NEONvshl (anyext (v2i32 DPR:$Rn)), (i32 32))), + (VSHLLi32 DPR:$Rn, 32)>; // VSHRN : Vector Shift Right and Narrow defm VSHRN : N2VNSh_HSD<0,1,0b1000,0,0,1, IIC_VSHLiD, "vshrn", "i", diff --git a/lib/Target/ARM/ARMInstrThumb.td b/lib/Target/ARM/ARMInstrThumb.td index 891a8f482f0a0..c2bcc087e077a 100644 --- a/lib/Target/ARM/ARMInstrThumb.td +++ b/lib/Target/ARM/ARMInstrThumb.td @@ -338,7 +338,7 @@ def tHLT : T1I<(outs), (ins imm0_63:$val), NoItinerary, "hlt\t$val", } def tSETEND : T1I<(outs), (ins setend_op:$end), NoItinerary, "setend\t$end", - []>, T1Encoding<0b101101>, Requires<[IsNotMClass]>, Deprecated<HasV8Ops> { + []>, T1Encoding<0b101101>, Requires<[IsThumb, IsNotMClass]>, Deprecated<HasV8Ops> { bits<1> end; // A8.6.156 let Inst{9-5} = 0b10010; @@ -997,6 +997,9 @@ let isAdd = 1 in { } } +def : tInstAlias <"add${s}${p} $Rdn, $Rm", + (tADDrr tGPR:$Rdn,s_cc_out:$s, tGPR:$Rdn, tGPR:$Rm, pred:$p)>; + def : tInstSubst<"sub${s}${p} $rd, $rn, $imm", (tADDi3 tGPR:$rd, s_cc_out:$s, tGPR:$rn, mod_imm1_7_neg:$imm, pred:$p)>; def : tInstSubst<"sub${s}${p} $rdn, $imm", @@ -1286,6 +1289,9 @@ def tSUBrr : // A8.6.212 [(set tGPR:$Rd, (sub tGPR:$Rn, tGPR:$Rm))]>, Sched<[WriteALU]>; +def : tInstAlias <"sub${s}${p} $Rdn, $Rm", + (tSUBrr tGPR:$Rdn,s_cc_out:$s, tGPR:$Rdn, tGPR:$Rm, pred:$p)>; + /// Similar to the above except these set the 's' bit so the /// instruction modifies the CPSR register. /// @@ -1503,7 +1509,7 @@ def tLDRLIT_ga_pcrel : PseudoInst<(outs tGPR:$dst), (ins i32imm:$addr), IIC_iLoadiALU, [(set tGPR:$dst, (ARMWrapperPIC tglobaladdr:$addr))]>, - Requires<[IsThumb, DontUseMovt]>; + Requires<[IsThumb, DontUseMovtInPic]>; def tLDRLIT_ga_abs : PseudoInst<(outs tGPR:$dst), (ins i32imm:$src), IIC_iLoad_i, @@ -1514,7 +1520,7 @@ def tLDRLIT_ga_abs : PseudoInst<(outs tGPR:$dst), (ins i32imm:$src), // TLS globals def : Pat<(ARMWrapperPIC tglobaltlsaddr:$addr), (tLDRLIT_ga_pcrel tglobaltlsaddr:$addr)>, - Requires<[IsThumb, DontUseMovt]>; + Requires<[IsThumb, DontUseMovtInPic]>; def : Pat<(ARMWrapper tglobaltlsaddr:$addr), (tLDRLIT_ga_abs tglobaltlsaddr:$addr)>, Requires<[IsThumb, DontUseMovt]>; @@ -1665,13 +1671,6 @@ let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in { def : InstAlias<"nop", (tMOVr R8, R8, 14, 0), 0>, Requires<[IsThumb, IsThumb1Only]>; -// For round-trip assembly/disassembly, we have to handle a CPS instruction -// without any iflags. That's not, strictly speaking, valid syntax, but it's -// a useful extension and assembles to defined behaviour (the insn does -// nothing). -def : tInstAlias<"cps$imod", (tCPS imod_op:$imod, 0)>; -def : tInstAlias<"cps$imod", (tCPS imod_op:$imod, 0)>; - // "neg" is and alias for "rsb rd, rn, #0" def : tInstAlias<"neg${s}${p} $Rd, $Rm", (tRSB tGPR:$Rd, s_cc_out:$s, tGPR:$Rm, pred:$p)>; diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td index 42eac12e457b2..670ed127da7e9 100644 --- a/lib/Target/ARM/ARMInstrThumb2.td +++ b/lib/Target/ARM/ARMInstrThumb2.td @@ -3843,13 +3843,13 @@ let isReMaterializable = 1 in { def t2MOV_ga_pcrel : PseudoInst<(outs rGPR:$dst), (ins i32imm:$addr), IIC_iMOVix2addpc, [(set rGPR:$dst, (ARMWrapperPIC tglobaladdr:$addr))]>, - Requires<[IsThumb, HasV8MBaseline, UseMovt]>; + Requires<[IsThumb, HasV8MBaseline, UseMovtInPic]>; } def : T2Pat<(ARMWrapperPIC tglobaltlsaddr :$dst), (t2MOV_ga_pcrel tglobaltlsaddr:$dst)>, - Requires<[IsThumb2, UseMovt]>; + Requires<[IsThumb2, UseMovtInPic]>; def : T2Pat<(ARMWrapper tglobaltlsaddr:$dst), (t2MOVi32imm tglobaltlsaddr:$dst)>, Requires<[IsThumb2, UseMovt]>; @@ -3964,6 +3964,7 @@ multiclass t2LdStCop<bits<4> op31_28, bit load, bit Dbit, string asm, list<dag> } } +let DecoderNamespace = "Thumb2CoProc" in { defm t2LDC : t2LdStCop<0b1110, 1, 0, "ldc", [(int_arm_ldc imm:$cop, imm:$CRd, addrmode5:$addr)]>; defm t2LDCL : t2LdStCop<0b1110, 1, 1, "ldcl", [(int_arm_ldcl imm:$cop, imm:$CRd, addrmode5:$addr)]>; defm t2LDC2 : t2LdStCop<0b1111, 1, 0, "ldc2", [(int_arm_ldc2 imm:$cop, imm:$CRd, addrmode5:$addr)]>, Requires<[PreV8,IsThumb2]>; @@ -3973,6 +3974,7 @@ defm t2STC : t2LdStCop<0b1110, 0, 0, "stc", [(int_arm_stc imm:$cop, imm:$CRd, defm t2STCL : t2LdStCop<0b1110, 0, 1, "stcl", [(int_arm_stcl imm:$cop, imm:$CRd, addrmode5:$addr)]>; defm t2STC2 : t2LdStCop<0b1111, 0, 0, "stc2", [(int_arm_stc2 imm:$cop, imm:$CRd, addrmode5:$addr)]>, Requires<[PreV8,IsThumb2]>; defm t2STC2L : t2LdStCop<0b1111, 0, 1, "stc2l", [(int_arm_stc2l imm:$cop, imm:$CRd, addrmode5:$addr)]>, Requires<[PreV8,IsThumb2]>; +} //===----------------------------------------------------------------------===// @@ -4125,6 +4127,8 @@ class t2MovRCopro<bits<4> Op, string opc, bit direction, dag oops, dag iops, let Inst{7-5} = opc2; let Inst{3-0} = CRm; let Inst{19-16} = CRn; + + let DecoderNamespace = "Thumb2CoProc"; } class t2MovRRCopro<bits<4> Op, string opc, bit direction, dag oops, dag iops, @@ -4145,6 +4149,8 @@ class t2MovRRCopro<bits<4> Op, string opc, bit direction, dag oops, dag iops, let Inst{11-8} = cop; let Inst{7-4} = opc1; let Inst{3-0} = CRm; + + let DecoderNamespace = "Thumb2CoProc"; } /* from ARM core register to coprocessor */ @@ -4243,6 +4249,7 @@ def t2CDP : T2Cop<0b1110, (outs), (ins p_imm:$cop, imm0_15:$opc1, let Inst{23-20} = opc1; let Predicates = [IsThumb2, PreV8]; + let DecoderNamespace = "Thumb2CoProc"; } def t2CDP2 : T2Cop<0b1111, (outs), (ins p_imm:$cop, imm0_15:$opc1, @@ -4268,6 +4275,7 @@ def t2CDP2 : T2Cop<0b1111, (outs), (ins p_imm:$cop, imm0_15:$opc1, let Inst{23-20} = opc1; let Predicates = [IsThumb2, PreV8]; + let DecoderNamespace = "Thumb2CoProc"; } diff --git a/lib/Target/ARM/ARMInstrVFP.td b/lib/Target/ARM/ARMInstrVFP.td index 5d887c4fcbf24..22e157a7480b5 100644 --- a/lib/Target/ARM/ARMInstrVFP.td +++ b/lib/Target/ARM/ARMInstrVFP.td @@ -255,28 +255,6 @@ def VLSTM : AXSI4<(outs), (ins GPRnopc:$Rn, pred:$p), IndexModeNone, let mayStore = 1; } - -// FLDM/FSTM - Load / Store multiple single / double precision registers for -// pre-ARMv6 cores. -// These instructions are deprecated! -def : VFP2MnemonicAlias<"fldmias", "vldmia">; -def : VFP2MnemonicAlias<"fldmdbs", "vldmdb">; -def : VFP2MnemonicAlias<"fldmeas", "vldmdb">; -def : VFP2MnemonicAlias<"fldmfds", "vldmia">; -def : VFP2MnemonicAlias<"fldmiad", "vldmia">; -def : VFP2MnemonicAlias<"fldmdbd", "vldmdb">; -def : VFP2MnemonicAlias<"fldmead", "vldmdb">; -def : VFP2MnemonicAlias<"fldmfdd", "vldmia">; - -def : VFP2MnemonicAlias<"fstmias", "vstmia">; -def : VFP2MnemonicAlias<"fstmdbs", "vstmdb">; -def : VFP2MnemonicAlias<"fstmeas", "vstmia">; -def : VFP2MnemonicAlias<"fstmfds", "vstmdb">; -def : VFP2MnemonicAlias<"fstmiad", "vstmia">; -def : VFP2MnemonicAlias<"fstmdbd", "vstmdb">; -def : VFP2MnemonicAlias<"fstmead", "vstmia">; -def : VFP2MnemonicAlias<"fstmfdd", "vstmdb">; - def : InstAlias<"vpush${p} $r", (VSTMDDB_UPD SP, pred:$p, dpr_reglist:$r), 0>, Requires<[HasVFP2]>; def : InstAlias<"vpush${p} $r", (VSTMSDB_UPD SP, pred:$p, spr_reglist:$r), 0>, @@ -297,6 +275,8 @@ defm : VFPDTAnyInstAlias<"vpop${p}", "$r", // FLDMX, FSTMX - Load and store multiple unknown precision registers for // pre-armv6 cores. // These instruction are deprecated so we don't want them to get selected. +// However, there is no UAL syntax for them, so we keep them around for +// (dis)assembly only. multiclass vfp_ldstx_mult<string asm, bit L_bit> { // Unknown precision def XIA : @@ -1561,6 +1541,15 @@ def VTOUIRH : AVConv1IsH_Encode<0b11101, 0b11, 0b1100, 0b1001, } } +// v8.3-a Javascript Convert to Signed fixed-point +def VJCVT : AVConv1IsD_Encode<0b11101, 0b11, 0b1001, 0b1011, + (outs SPR:$Sd), (ins DPR:$Dm), + IIC_fpCVTDI, "vjcvt", ".s32.f64\t$Sd, $Dm", + []>, + Requires<[HasFPARMv8, HasV8_3a]> { + let Inst{7} = 1; // Z bit +} + // Convert between floating-point and fixed-point // Data type for fixed-point naming convention: // S16 (U=0, sx=0) -> SH @@ -1862,6 +1851,7 @@ def VNMLAH : AHbI<0b11100, 0b01, 1, 0, RegConstraint<"$Sdin = $Sd">, Requires<[HasFullFP16,UseFPVMLx,DontUseFusedMAC]>; +// (-(a * b) - dst) -> -(dst + (a * b)) def : Pat<(fsub_mlx (fneg (fmul_su DPR:$a, (f64 DPR:$b))), DPR:$dstin), (VNMLAD DPR:$dstin, DPR:$a, DPR:$b)>, Requires<[HasVFP2,HasDPVFP,UseFPVMLx,DontUseFusedMAC]>; @@ -1869,6 +1859,14 @@ def : Pat<(fsub_mlx (fneg (fmul_su SPR:$a, SPR:$b)), SPR:$dstin), (VNMLAS SPR:$dstin, SPR:$a, SPR:$b)>, Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,DontUseFusedMAC]>; +// (-dst - (a * b)) -> -(dst + (a * b)) +def : Pat<(fsub_mlx (fneg DPR:$dstin), (fmul_su DPR:$a, (f64 DPR:$b))), + (VNMLAD DPR:$dstin, DPR:$a, DPR:$b)>, + Requires<[HasVFP2,HasDPVFP,UseFPVMLx,DontUseFusedMAC]>; +def : Pat<(fsub_mlx (fneg SPR:$dstin), (fmul_su SPR:$a, SPR:$b)), + (VNMLAS SPR:$dstin, SPR:$a, SPR:$b)>, + Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,DontUseFusedMAC]>; + def VNMLSD : ADbI<0b11100, 0b01, 0, 0, (outs DPR:$Dd), (ins DPR:$Ddin, DPR:$Dn, DPR:$Dm), IIC_fpMAC64, "vnmls", ".f64\t$Dd, $Dn, $Dm", @@ -2160,28 +2158,32 @@ let Defs = [CPSR], Uses = [FPSCR_NZCV], Rt = 0b1111 /* apsr_nzcv */ in def FMSTAT : MovFromVFP<0b0001 /* fpscr */, (outs), (ins), "vmrs", "\tAPSR_nzcv, fpscr", [(arm_fmstat)]>; -// Application level FPSCR -> GPR -let hasSideEffects = 1, Uses = [FPSCR] in -def VMRS : MovFromVFP<0b0001 /* fpscr */, (outs GPR:$Rt), (ins), - "vmrs", "\t$Rt, fpscr", - [(set GPR:$Rt, (int_arm_get_fpscr))]>; +let DecoderMethod = "DecodeForVMRSandVMSR" in { + // Application level FPSCR -> GPR + let hasSideEffects = 1, Uses = [FPSCR] in + def VMRS : MovFromVFP<0b0001 /* fpscr */, (outs GPRnopc:$Rt), (ins), + "vmrs", "\t$Rt, fpscr", + [(set GPRnopc:$Rt, (int_arm_get_fpscr))]>; -// System level FPEXC, FPSID -> GPR -let Uses = [FPSCR] in { - def VMRS_FPEXC : MovFromVFP<0b1000 /* fpexc */, (outs GPR:$Rt), (ins), - "vmrs", "\t$Rt, fpexc", []>; - def VMRS_FPSID : MovFromVFP<0b0000 /* fpsid */, (outs GPR:$Rt), (ins), - "vmrs", "\t$Rt, fpsid", []>; - def VMRS_MVFR0 : MovFromVFP<0b0111 /* mvfr0 */, (outs GPR:$Rt), (ins), + // System level FPEXC, FPSID -> GPR + let Uses = [FPSCR] in { + def VMRS_FPEXC : MovFromVFP<0b1000 /* fpexc */, (outs GPRnopc:$Rt), (ins), + "vmrs", "\t$Rt, fpexc", []>; + def VMRS_FPSID : MovFromVFP<0b0000 /* fpsid */, (outs GPRnopc:$Rt), (ins), + "vmrs", "\t$Rt, fpsid", []>; + def VMRS_MVFR0 : MovFromVFP<0b0111 /* mvfr0 */, (outs GPRnopc:$Rt), (ins), "vmrs", "\t$Rt, mvfr0", []>; - def VMRS_MVFR1 : MovFromVFP<0b0110 /* mvfr1 */, (outs GPR:$Rt), (ins), - "vmrs", "\t$Rt, mvfr1", []>; - def VMRS_MVFR2 : MovFromVFP<0b0101 /* mvfr2 */, (outs GPR:$Rt), (ins), - "vmrs", "\t$Rt, mvfr2", []>, Requires<[HasFPARMv8]>; - def VMRS_FPINST : MovFromVFP<0b1001 /* fpinst */, (outs GPR:$Rt), (ins), - "vmrs", "\t$Rt, fpinst", []>; - def VMRS_FPINST2 : MovFromVFP<0b1010 /* fpinst2 */, (outs GPR:$Rt), (ins), - "vmrs", "\t$Rt, fpinst2", []>; + def VMRS_MVFR1 : MovFromVFP<0b0110 /* mvfr1 */, (outs GPRnopc:$Rt), (ins), + "vmrs", "\t$Rt, mvfr1", []>; + let Predicates = [HasFPARMv8] in { + def VMRS_MVFR2 : MovFromVFP<0b0101 /* mvfr2 */, (outs GPRnopc:$Rt), (ins), + "vmrs", "\t$Rt, mvfr2", []>; + } + def VMRS_FPINST : MovFromVFP<0b1001 /* fpinst */, (outs GPRnopc:$Rt), (ins), + "vmrs", "\t$Rt, fpinst", []>; + def VMRS_FPINST2 : MovFromVFP<0b1010 /* fpinst2 */, (outs GPRnopc:$Rt), + (ins), "vmrs", "\t$Rt, fpinst2", []>; + } } //===----------------------------------------------------------------------===// @@ -2205,21 +2207,23 @@ class MovToVFP<bits<4> opc19_16, dag oops, dag iops, string opc, string asm, let Inst{4} = 1; } -let Defs = [FPSCR] in { - // Application level GPR -> FPSCR - def VMSR : MovToVFP<0b0001 /* fpscr */, (outs), (ins GPR:$src), - "vmsr", "\tfpscr, $src", [(int_arm_set_fpscr GPR:$src)]>; - // System level GPR -> FPEXC - def VMSR_FPEXC : MovToVFP<0b1000 /* fpexc */, (outs), (ins GPR:$src), - "vmsr", "\tfpexc, $src", []>; - // System level GPR -> FPSID - def VMSR_FPSID : MovToVFP<0b0000 /* fpsid */, (outs), (ins GPR:$src), - "vmsr", "\tfpsid, $src", []>; - - def VMSR_FPINST : MovToVFP<0b1001 /* fpinst */, (outs), (ins GPR:$src), +let DecoderMethod = "DecodeForVMRSandVMSR" in { + let Defs = [FPSCR] in { + // Application level GPR -> FPSCR + def VMSR : MovToVFP<0b0001 /* fpscr */, (outs), (ins GPRnopc:$src), + "vmsr", "\tfpscr, $src", + [(int_arm_set_fpscr GPRnopc:$src)]>; + // System level GPR -> FPEXC + def VMSR_FPEXC : MovToVFP<0b1000 /* fpexc */, (outs), (ins GPRnopc:$src), + "vmsr", "\tfpexc, $src", []>; + // System level GPR -> FPSID + def VMSR_FPSID : MovToVFP<0b0000 /* fpsid */, (outs), (ins GPRnopc:$src), + "vmsr", "\tfpsid, $src", []>; + def VMSR_FPINST : MovToVFP<0b1001 /* fpinst */, (outs), (ins GPRnopc:$src), "vmsr", "\tfpinst, $src", []>; - def VMSR_FPINST2 : MovToVFP<0b1010 /* fpinst2 */, (outs), (ins GPR:$src), - "vmsr", "\tfpinst2, $src", []>; + def VMSR_FPINST2 : MovToVFP<0b1010 /* fpinst2 */, (outs), (ins GPRnopc:$src), + "vmsr", "\tfpinst2, $src", []>; + } } //===----------------------------------------------------------------------===// diff --git a/lib/Target/ARM/ARMInstructionSelector.cpp b/lib/Target/ARM/ARMInstructionSelector.cpp index faed6b867e2bc..6bbeae2e11514 100644 --- a/lib/Target/ARM/ARMInstructionSelector.cpp +++ b/lib/Target/ARM/ARMInstructionSelector.cpp @@ -15,19 +15,15 @@ #include "ARMSubtarget.h" #include "ARMTargetMachine.h" #include "llvm/CodeGen/GlobalISel/InstructionSelector.h" +#include "llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h" +#include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/Support/Debug.h" #define DEBUG_TYPE "arm-isel" -#include "llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h" - using namespace llvm; -#ifndef LLVM_BUILD_GLOBAL_ISEL -#error "You shouldn't build this" -#endif - namespace { #define GET_GLOBALISEL_PREDICATE_BITSET @@ -39,10 +35,11 @@ public: ARMInstructionSelector(const ARMBaseTargetMachine &TM, const ARMSubtarget &STI, const ARMRegisterBankInfo &RBI); - bool select(MachineInstr &I) const override; + bool select(MachineInstr &I, CodeGenCoverage &CoverageInfo) const override; + static const char *getName() { return DEBUG_TYPE; } private: - bool selectImpl(MachineInstr &I) const; + bool selectImpl(MachineInstr &I, CodeGenCoverage &CoverageInfo) const; struct CmpConstants; struct InsertInfo; @@ -60,7 +57,9 @@ private: // Set \p DestReg to \p Constant. void putConstant(InsertInfo I, unsigned DestReg, unsigned Constant) const; + bool selectGlobal(MachineInstrBuilder &MIB, MachineRegisterInfo &MRI) const; bool selectSelect(MachineInstrBuilder &MIB, MachineRegisterInfo &MRI) const; + bool selectShift(unsigned ShiftOpc, MachineInstrBuilder &MIB) const; // Check if the types match and both operands have the expected size and // register bank. @@ -98,7 +97,7 @@ createARMInstructionSelector(const ARMBaseTargetMachine &TM, } } -unsigned zero_reg = 0; +const unsigned zero_reg = 0; #define GET_GLOBALISEL_IMPL #include "ARMGenGlobalISel.inc" @@ -488,6 +487,127 @@ bool ARMInstructionSelector::insertComparison(CmpConstants Helper, InsertInfo I, return true; } +bool ARMInstructionSelector::selectGlobal(MachineInstrBuilder &MIB, + MachineRegisterInfo &MRI) const { + if ((STI.isROPI() || STI.isRWPI()) && !STI.isTargetELF()) { + DEBUG(dbgs() << "ROPI and RWPI only supported for ELF\n"); + return false; + } + + auto GV = MIB->getOperand(1).getGlobal(); + if (GV->isThreadLocal()) { + DEBUG(dbgs() << "TLS variables not supported yet\n"); + return false; + } + + auto &MBB = *MIB->getParent(); + auto &MF = *MBB.getParent(); + + bool UseMovt = STI.useMovt(MF); + + unsigned Size = TM.getPointerSize(); + unsigned Alignment = 4; + + auto addOpsForConstantPoolLoad = [&MF, Alignment, + Size](MachineInstrBuilder &MIB, + const GlobalValue *GV, bool IsSBREL) { + assert(MIB->getOpcode() == ARM::LDRi12 && "Unsupported instruction"); + auto ConstPool = MF.getConstantPool(); + auto CPIndex = + // For SB relative entries we need a target-specific constant pool. + // Otherwise, just use a regular constant pool entry. + IsSBREL + ? ConstPool->getConstantPoolIndex( + ARMConstantPoolConstant::Create(GV, ARMCP::SBREL), Alignment) + : ConstPool->getConstantPoolIndex(GV, Alignment); + MIB.addConstantPoolIndex(CPIndex, /*Offset*/ 0, /*TargetFlags*/ 0) + .addMemOperand( + MF.getMachineMemOperand(MachinePointerInfo::getConstantPool(MF), + MachineMemOperand::MOLoad, Size, Alignment)) + .addImm(0) + .add(predOps(ARMCC::AL)); + }; + + if (TM.isPositionIndependent()) { + bool Indirect = STI.isGVIndirectSymbol(GV); + // FIXME: Taking advantage of MOVT for ELF is pretty involved, so we don't + // support it yet. See PR28229. + unsigned Opc = + UseMovt && !STI.isTargetELF() + ? (Indirect ? ARM::MOV_ga_pcrel_ldr : ARM::MOV_ga_pcrel) + : (Indirect ? ARM::LDRLIT_ga_pcrel_ldr : ARM::LDRLIT_ga_pcrel); + MIB->setDesc(TII.get(Opc)); + + int TargetFlags = ARMII::MO_NO_FLAG; + if (STI.isTargetDarwin()) + TargetFlags |= ARMII::MO_NONLAZY; + if (STI.isGVInGOT(GV)) + TargetFlags |= ARMII::MO_GOT; + MIB->getOperand(1).setTargetFlags(TargetFlags); + + if (Indirect) + MIB.addMemOperand(MF.getMachineMemOperand( + MachinePointerInfo::getGOT(MF), MachineMemOperand::MOLoad, + TM.getPointerSize(), Alignment)); + + return constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI); + } + + bool isReadOnly = STI.getTargetLowering()->isReadOnly(GV); + if (STI.isROPI() && isReadOnly) { + unsigned Opc = UseMovt ? ARM::MOV_ga_pcrel : ARM::LDRLIT_ga_pcrel; + MIB->setDesc(TII.get(Opc)); + return constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI); + } + if (STI.isRWPI() && !isReadOnly) { + auto Offset = MRI.createVirtualRegister(&ARM::GPRRegClass); + MachineInstrBuilder OffsetMIB; + if (UseMovt) { + OffsetMIB = BuildMI(MBB, *MIB, MIB->getDebugLoc(), + TII.get(ARM::MOVi32imm), Offset); + OffsetMIB.addGlobalAddress(GV, /*Offset*/ 0, ARMII::MO_SBREL); + } else { + // Load the offset from the constant pool. + OffsetMIB = + BuildMI(MBB, *MIB, MIB->getDebugLoc(), TII.get(ARM::LDRi12), Offset); + addOpsForConstantPoolLoad(OffsetMIB, GV, /*IsSBREL*/ true); + } + if (!constrainSelectedInstRegOperands(*OffsetMIB, TII, TRI, RBI)) + return false; + + // Add the offset to the SB register. + MIB->setDesc(TII.get(ARM::ADDrr)); + MIB->RemoveOperand(1); + MIB.addReg(ARM::R9) // FIXME: don't hardcode R9 + .addReg(Offset) + .add(predOps(ARMCC::AL)) + .add(condCodeOp()); + + return constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI); + } + + if (STI.isTargetELF()) { + if (UseMovt) { + MIB->setDesc(TII.get(ARM::MOVi32imm)); + } else { + // Load the global's address from the constant pool. + MIB->setDesc(TII.get(ARM::LDRi12)); + MIB->RemoveOperand(1); + addOpsForConstantPoolLoad(MIB, GV, /*IsSBREL*/ false); + } + } else if (STI.isTargetMachO()) { + if (UseMovt) + MIB->setDesc(TII.get(ARM::MOVi32imm)); + else + MIB->setDesc(TII.get(ARM::LDRLIT_ga_abs)); + } else { + DEBUG(dbgs() << "Object format not supported yet\n"); + return false; + } + + return constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI); +} + bool ARMInstructionSelector::selectSelect(MachineInstrBuilder &MIB, MachineRegisterInfo &MRI) const { auto &MBB = *MIB->getParent(); @@ -525,7 +645,16 @@ bool ARMInstructionSelector::selectSelect(MachineInstrBuilder &MIB, return true; } -bool ARMInstructionSelector::select(MachineInstr &I) const { +bool ARMInstructionSelector::selectShift(unsigned ShiftOpc, + MachineInstrBuilder &MIB) const { + MIB->setDesc(TII.get(ARM::MOVsr)); + MIB.addImm(ShiftOpc); + MIB.add(predOps(ARMCC::AL)).add(condCodeOp()); + return constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI); +} + +bool ARMInstructionSelector::select(MachineInstr &I, + CodeGenCoverage &CoverageInfo) const { assert(I.getParent() && "Instruction should be in a basic block!"); assert(I.getParent()->getParent() && "Instruction should be in a function!"); @@ -540,7 +669,7 @@ bool ARMInstructionSelector::select(MachineInstr &I) const { return true; } - if (selectImpl(I)) + if (selectImpl(I, CoverageInfo)) return true; MachineInstrBuilder MIB{MF, I}; @@ -633,12 +762,12 @@ bool ARMInstructionSelector::select(MachineInstr &I) const { return selectCmp(Helper, MIB, MRI); } case G_FCMP: { - assert(TII.getSubtarget().hasVFP2() && "Can't select fcmp without VFP"); + assert(STI.hasVFP2() && "Can't select fcmp without VFP"); unsigned OpReg = I.getOperand(2).getReg(); unsigned Size = MRI.getType(OpReg).getSizeInBits(); - if (Size == 64 && TII.getSubtarget().isFPOnlySP()) { + if (Size == 64 && STI.isFPOnlySP()) { DEBUG(dbgs() << "Subtarget only supports single precision"); return false; } @@ -651,6 +780,13 @@ bool ARMInstructionSelector::select(MachineInstr &I) const { ARM::FPRRegBankID, Size); return selectCmp(Helper, MIB, MRI); } + case G_LSHR: + return selectShift(ARM_AM::ShiftOpc::lsr, MIB); + case G_ASHR: + return selectShift(ARM_AM::ShiftOpc::asr, MIB); + case G_SHL: { + return selectShift(ARM_AM::ShiftOpc::lsl, MIB); + } case G_GEP: I.setDesc(TII.get(ARM::ADDrr)); MIB.add(predOps(ARMCC::AL)).add(condCodeOp()); @@ -661,28 +797,8 @@ bool ARMInstructionSelector::select(MachineInstr &I) const { I.setDesc(TII.get(ARM::ADDri)); MIB.addImm(0).add(predOps(ARMCC::AL)).add(condCodeOp()); break; - case G_CONSTANT: { - unsigned Reg = I.getOperand(0).getReg(); - - if (!validReg(MRI, Reg, 32, ARM::GPRRegBankID)) - return false; - - I.setDesc(TII.get(ARM::MOVi)); - MIB.add(predOps(ARMCC::AL)).add(condCodeOp()); - - auto &Val = I.getOperand(1); - if (Val.isCImm()) { - if (Val.getCImm()->getBitWidth() > 32) - return false; - Val.ChangeToImmediate(Val.getCImm()->getZExtValue()); - } - - if (!Val.isImm()) { - return false; - } - - break; - } + case G_GLOBAL_VALUE: + return selectGlobal(MIB, MRI); case G_STORE: case G_LOAD: { const auto &MemOp = **I.memoperands_begin(); @@ -697,7 +813,7 @@ bool ARMInstructionSelector::select(MachineInstr &I) const { LLT ValTy = MRI.getType(Reg); const auto ValSize = ValTy.getSizeInBits(); - assert((ValSize != 64 || TII.getSubtarget().hasVFP2()) && + assert((ValSize != 64 || STI.hasVFP2()) && "Don't know how to load/store 64-bit value without VFP"); const auto NewOpc = selectLoadStoreOpCode(I.getOpcode(), RegBank, ValSize); @@ -739,7 +855,7 @@ bool ARMInstructionSelector::select(MachineInstr &I) const { // Branch conditionally. auto Branch = BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(ARM::Bcc)) .add(I.getOperand(1)) - .add(predOps(ARMCC::EQ, ARM::CPSR)); + .add(predOps(ARMCC::NE, ARM::CPSR)); if (!constrainSelectedInstRegOperands(*Branch, TII, TRI, RBI)) return false; I.eraseFromParent(); diff --git a/lib/Target/ARM/ARMLegalizerInfo.cpp b/lib/Target/ARM/ARMLegalizerInfo.cpp index 1c17c07e4cb00..2dd1dff64e878 100644 --- a/lib/Target/ARM/ARMLegalizerInfo.cpp +++ b/lib/Target/ARM/ARMLegalizerInfo.cpp @@ -17,16 +17,60 @@ #include "llvm/CodeGen/GlobalISel/LegalizerHelper.h" #include "llvm/CodeGen/LowLevelType.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/TargetOpcodes.h" #include "llvm/CodeGen/ValueTypes.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Type.h" -#include "llvm/Target/TargetOpcodes.h" using namespace llvm; -#ifndef LLVM_BUILD_GLOBAL_ISEL -#error "You shouldn't build this" -#endif +/// FIXME: The following static functions are SizeChangeStrategy functions +/// that are meant to temporarily mimic the behaviour of the old legalization +/// based on doubling/halving non-legal types as closely as possible. This is +/// not entirly possible as only legalizing the types that are exactly a power +/// of 2 times the size of the legal types would require specifying all those +/// sizes explicitly. +/// In practice, not specifying those isn't a problem, and the below functions +/// should disappear quickly as we add support for legalizing non-power-of-2 +/// sized types further. +static void +addAndInterleaveWithUnsupported(LegalizerInfo::SizeAndActionsVec &result, + const LegalizerInfo::SizeAndActionsVec &v) { + for (unsigned i = 0; i < v.size(); ++i) { + result.push_back(v[i]); + if (i + 1 < v[i].first && i + 1 < v.size() && + v[i + 1].first != v[i].first + 1) + result.push_back({v[i].first + 1, LegalizerInfo::Unsupported}); + } +} + +static LegalizerInfo::SizeAndActionsVec +widen_8_16(const LegalizerInfo::SizeAndActionsVec &v) { + assert(v.size() >= 1); + assert(v[0].first > 17); + LegalizerInfo::SizeAndActionsVec result = { + {1, LegalizerInfo::Unsupported}, + {8, LegalizerInfo::WidenScalar}, {9, LegalizerInfo::Unsupported}, + {16, LegalizerInfo::WidenScalar}, {17, LegalizerInfo::Unsupported}}; + addAndInterleaveWithUnsupported(result, v); + auto Largest = result.back().first; + result.push_back({Largest + 1, LegalizerInfo::Unsupported}); + return result; +} + +static LegalizerInfo::SizeAndActionsVec +widen_1_8_16(const LegalizerInfo::SizeAndActionsVec &v) { + assert(v.size() >= 1); + assert(v[0].first > 17); + LegalizerInfo::SizeAndActionsVec result = { + {1, LegalizerInfo::WidenScalar}, {2, LegalizerInfo::Unsupported}, + {8, LegalizerInfo::WidenScalar}, {9, LegalizerInfo::Unsupported}, + {16, LegalizerInfo::WidenScalar}, {17, LegalizerInfo::Unsupported}}; + addAndInterleaveWithUnsupported(result, v); + auto Largest = result.back().first; + result.push_back({Largest + 1, LegalizerInfo::Unsupported}); + return result; +} static bool AEABI(const ARMSubtarget &ST) { return ST.isTargetAEABI() || ST.isTargetGNUAEABI() || ST.isTargetMuslAEABI(); @@ -43,6 +87,7 @@ ARMLegalizerInfo::ARMLegalizerInfo(const ARMSubtarget &ST) { const LLT s32 = LLT::scalar(32); const LLT s64 = LLT::scalar(64); + setAction({G_GLOBAL_VALUE, p0}, Legal); setAction({G_FRAME_INDEX, p0}, Legal); for (unsigned Op : {G_LOAD, G_STORE}) { @@ -52,14 +97,15 @@ ARMLegalizerInfo::ARMLegalizerInfo(const ARMSubtarget &ST) { } for (unsigned Op : {G_ADD, G_SUB, G_MUL, G_AND, G_OR, G_XOR}) { - for (auto Ty : {s1, s8, s16}) - setAction({Op, Ty}, WidenScalar); + if (Op != G_ADD) + setLegalizeScalarToDifferentSizeStrategy( + Op, 0, widenToLargerTypesUnsupportedOtherwise); setAction({Op, s32}, Legal); } for (unsigned Op : {G_SDIV, G_UDIV}) { - for (auto Ty : {s8, s16}) - setAction({Op, Ty}, WidenScalar); + setLegalizeScalarToDifferentSizeStrategy(Op, 0, + widenToLargerTypesUnsupportedOtherwise); if (ST.hasDivideInARMMode()) setAction({Op, s32}, Legal); else @@ -67,8 +113,7 @@ ARMLegalizerInfo::ARMLegalizerInfo(const ARMSubtarget &ST) { } for (unsigned Op : {G_SREM, G_UREM}) { - for (auto Ty : {s8, s16}) - setAction({Op, Ty}, WidenScalar); + setLegalizeScalarToDifferentSizeStrategy(Op, 0, widen_8_16); if (ST.hasDivideInARMMode()) setAction({Op, s32}, Lower); else if (AEABI(ST)) @@ -77,12 +122,13 @@ ARMLegalizerInfo::ARMLegalizerInfo(const ARMSubtarget &ST) { setAction({Op, s32}, Libcall); } - for (unsigned Op : {G_SEXT, G_ZEXT}) { + for (unsigned Op : {G_SEXT, G_ZEXT, G_ANYEXT}) { setAction({Op, s32}, Legal); - for (auto Ty : {s1, s8, s16}) - setAction({Op, 1, Ty}, Legal); } + for (unsigned Op : {G_ASHR, G_LSHR, G_SHL}) + setAction({Op, s32}, Legal); + setAction({G_GEP, p0}, Legal); setAction({G_GEP, 1, s32}, Legal); @@ -93,18 +139,18 @@ ARMLegalizerInfo::ARMLegalizerInfo(const ARMSubtarget &ST) { setAction({G_BRCOND, s1}, Legal); setAction({G_CONSTANT, s32}, Legal); - for (auto Ty : {s1, s8, s16}) - setAction({G_CONSTANT, Ty}, WidenScalar); + setLegalizeScalarToDifferentSizeStrategy(G_CONSTANT, 0, widen_1_8_16); setAction({G_ICMP, s1}, Legal); - for (auto Ty : {s8, s16}) - setAction({G_ICMP, 1, Ty}, WidenScalar); + setLegalizeScalarToDifferentSizeStrategy(G_ICMP, 1, + widenToLargerTypesUnsupportedOtherwise); for (auto Ty : {s32, p0}) setAction({G_ICMP, 1, Ty}, Legal); if (!ST.useSoftFloat() && ST.hasVFP2()) { - setAction({G_FADD, s32}, Legal); - setAction({G_FADD, s64}, Legal); + for (unsigned BinOp : {G_FADD, G_FSUB, G_FMUL, G_FDIV}) + for (auto Ty : {s32, s64}) + setAction({BinOp, Ty}, Legal); setAction({G_LOAD, s64}, Legal); setAction({G_STORE, s64}, Legal); @@ -112,9 +158,15 @@ ARMLegalizerInfo::ARMLegalizerInfo(const ARMSubtarget &ST) { setAction({G_FCMP, s1}, Legal); setAction({G_FCMP, 1, s32}, Legal); setAction({G_FCMP, 1, s64}, Legal); + + setAction({G_MERGE_VALUES, s64}, Legal); + setAction({G_MERGE_VALUES, 1, s32}, Legal); + setAction({G_UNMERGE_VALUES, s32}, Legal); + setAction({G_UNMERGE_VALUES, 1, s64}, Legal); } else { - for (auto Ty : {s32, s64}) - setAction({G_FADD, Ty}, Libcall); + for (unsigned BinOp : {G_FADD, G_FSUB, G_FMUL, G_FDIV}) + for (auto Ty : {s32, s64}) + setAction({BinOp, Ty}, Libcall); setAction({G_FCMP, s1}, Legal); setAction({G_FCMP, 1, s32}, Custom); @@ -262,7 +314,7 @@ bool ARMLegalizerInfo::legalizeCustom(MachineInstr &MI, // Our divmod libcalls return a struct containing the quotient and the // remainder. We need to create a virtual register for it. - auto &Ctx = MIRBuilder.getMF().getFunction()->getContext(); + auto &Ctx = MIRBuilder.getMF().getFunction().getContext(); Type *ArgTy = Type::getInt32Ty(Ctx); StructType *RetTy = StructType::get(Ctx, {ArgTy, ArgTy}, /* Packed */ true); auto RetVal = MRI.createGenericVirtualRegister( @@ -303,7 +355,7 @@ bool ARMLegalizerInfo::legalizeCustom(MachineInstr &MI, return true; } - auto &Ctx = MIRBuilder.getMF().getFunction()->getContext(); + auto &Ctx = MIRBuilder.getMF().getFunction().getContext(); assert((OpSize == 32 || OpSize == 64) && "Unsupported operand size"); auto *ArgTy = OpSize == 32 ? Type::getFloatTy(Ctx) : Type::getDoubleTy(Ctx); auto *RetTy = Type::getInt32Ty(Ctx); diff --git a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp index 5d57b6803c08a..8b3a2e2237961 100644 --- a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp +++ b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp @@ -1,4 +1,4 @@ -//===-- ARMLoadStoreOptimizer.cpp - ARM load / store opt. pass ------------===// +//===- ARMLoadStoreOptimizer.cpp - ARM load / store opt. pass -------------===// // // The LLVM Compiler Infrastructure // @@ -19,31 +19,53 @@ #include "ARMMachineFunctionInfo.h" #include "ARMSubtarget.h" #include "MCTargetDesc/ARMAddressingModes.h" -#include "ThumbRegisterInfo.h" +#include "MCTargetDesc/ARMBaseInfo.h" +#include "Utils/ARMBaseInfo.h" +#include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/DenseSet.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" +#include "llvm/ADT/iterator_range.h" +#include "llvm/Analysis/AliasAnalysis.h" #include "llvm/CodeGen/LivePhysRegs.h" #include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineMemOperand.h" +#include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/RegisterClassInfo.h" -#include "llvm/CodeGen/SelectionDAGNodes.h" +#include "llvm/CodeGen/TargetFrameLowering.h" +#include "llvm/CodeGen/TargetInstrInfo.h" +#include "llvm/CodeGen/TargetLowering.h" +#include "llvm/CodeGen/TargetRegisterInfo.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/IR/DataLayout.h" +#include "llvm/IR/DebugLoc.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Function.h" +#include "llvm/IR/Type.h" +#include "llvm/MC/MCInstrDesc.h" +#include "llvm/Pass.h" #include "llvm/Support/Allocator.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetRegisterInfo.h" +#include <algorithm> +#include <cassert> +#include <cstddef> +#include <cstdlib> +#include <iterator> +#include <limits> +#include <utility> + using namespace llvm; #define DEBUG_TYPE "arm-ldst-opt" @@ -72,11 +94,11 @@ AssumeMisalignedLoadStores("arm-assume-misaligned-load-store", cl::Hidden, #define ARM_LOAD_STORE_OPT_NAME "ARM load / store optimization pass" namespace { + /// Post- register allocation pass the combine load / store instructions to /// form ldm / stm instructions. struct ARMLoadStoreOpt : public MachineFunctionPass { static char ID; - ARMLoadStoreOpt() : MachineFunctionPass(ID) {} const MachineFunction *MF; const TargetInstrInfo *TII; @@ -91,6 +113,8 @@ namespace { bool RegClassInfoValid; bool isThumb1, isThumb2; + ARMLoadStoreOpt() : MachineFunctionPass(ID) {} + bool runOnMachineFunction(MachineFunction &Fn) override; MachineFunctionProperties getRequiredProperties() const override { @@ -107,25 +131,31 @@ namespace { MachineInstr *MI; int Offset; ///< Load/Store offset. unsigned Position; ///< Position as counted from end of basic block. + MemOpQueueEntry(MachineInstr &MI, int Offset, unsigned Position) : MI(&MI), Offset(Offset), Position(Position) {} }; - typedef SmallVector<MemOpQueueEntry,8> MemOpQueue; + using MemOpQueue = SmallVector<MemOpQueueEntry, 8>; /// A set of MachineInstrs that fulfill (nearly all) conditions to get /// merged into a LDM/STM. struct MergeCandidate { /// List of instructions ordered by load/store offset. SmallVector<MachineInstr*, 4> Instrs; + /// Index in Instrs of the instruction being latest in the schedule. unsigned LatestMIIdx; + /// Index in Instrs of the instruction being earliest in the schedule. unsigned EarliestMIIdx; + /// Index into the basic block where the merged instruction will be /// inserted. (See MemOpQueueEntry.Position) unsigned InsertPos; + /// Whether the instructions can be merged into a ldm/stm instruction. bool CanMergeToLSMulti; + /// Whether the instructions can be merged into a ldrd/strd instruction. bool CanMergeToLSDouble; }; @@ -161,8 +191,10 @@ namespace { bool MergeReturnIntoLDM(MachineBasicBlock &MBB); bool CombineMovBx(MachineBasicBlock &MBB); }; - char ARMLoadStoreOpt::ID = 0; -} + +} // end anonymous namespace + +char ARMLoadStoreOpt::ID = 0; INITIALIZE_PASS(ARMLoadStoreOpt, "arm-ldst-opt", ARM_LOAD_STORE_OPT_NAME, false, false) @@ -482,7 +514,6 @@ void ARMLoadStoreOpt::UpdateBaseRegUses(MachineBasicBlock &MBB, MO.setImm(Offset); else InsertSub = true; - } else if ((Opc == ARM::tSUBi8 || Opc == ARM::tADDi8) && !definesCPSR(*MBBI)) { // SUBS/ADDS using this register, with a dead def of the CPSR. @@ -502,12 +533,10 @@ void ARMLoadStoreOpt::UpdateBaseRegUses(MachineBasicBlock &MBB, } else { InsertSub = true; } - } else { // Can't update the instruction. InsertSub = true; } - } else if (definesCPSR(*MBBI) || MBBI->isCall() || MBBI->isBranch()) { // Since SUBS sets the condition flags, we can't place the base reset // after an instruction that has a live CPSR def. @@ -775,7 +804,6 @@ MachineInstr *ARMLoadStoreOpt::CreateLoadStoreMulti( // Insert a sub instruction after the newly formed instruction to reset. if (!BaseKill) UpdateBaseRegUses(MBB, InsertBefore, DL, Base, NumRegs, Pred, PredReg); - } else { // No writeback, simply build the MachineInstr. MIB = BuildMI(MBB, InsertBefore, DL, TII->get(Opcode)); @@ -853,7 +881,8 @@ MachineInstr *ARMLoadStoreOpt::MergeOpsUpdate(const MergeCandidate &Cand) { } // Attempt the merge. - typedef MachineBasicBlock::iterator iterator; + using iterator = MachineBasicBlock::iterator; + MachineInstr *LatestMI = Cand.Instrs[Cand.LatestMIIdx]; iterator InsertBefore = std::next(iterator(LatestMI)); MachineBasicBlock &MBB = *LatestMI->getParent(); @@ -970,7 +999,8 @@ void ARMLoadStoreOpt::FormCandidates(const MemOpQueue &MemOps) { int Offset = MemOps[SIndex].Offset; const MachineOperand &PMO = getLoadStoreRegOp(*MI); unsigned PReg = PMO.getReg(); - unsigned PRegNum = PMO.isUndef() ? UINT_MAX : TRI->getEncodingValue(PReg); + unsigned PRegNum = PMO.isUndef() ? std::numeric_limits<unsigned>::max() + : TRI->getEncodingValue(PReg); unsigned Latest = SIndex; unsigned Earliest = SIndex; unsigned Count = 1; @@ -1008,7 +1038,8 @@ void ARMLoadStoreOpt::FormCandidates(const MemOpQueue &MemOps) { break; // See if the current load/store may be part of a multi load/store. - unsigned RegNum = MO.isUndef() ? UINT_MAX : TRI->getEncodingValue(Reg); + unsigned RegNum = MO.isUndef() ? std::numeric_limits<unsigned>::max() + : TRI->getEncodingValue(Reg); bool PartOfLSMulti = CanMergeToLSMulti; if (PartOfLSMulti) { // Register numbers must be in ascending order. @@ -1242,7 +1273,7 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineInstr *MI) { // can still change to a writeback form as that will save us 2 bytes // of code size. It can create WAW hazards though, so only do it if // we're minimizing code size. - if (!MBB.getParent()->getFunction()->optForMinSize() || !BaseKill) + if (!MBB.getParent()->getFunction().optForMinSize() || !BaseKill) return false; bool HighRegsUsed = false; @@ -1559,12 +1590,10 @@ static bool isMemoryOp(const MachineInstr &MI) { static void InsertLDR_STR(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, int Offset, - bool isDef, const DebugLoc &DL, unsigned NewOpc, - unsigned Reg, bool RegDeadKill, bool RegUndef, - unsigned BaseReg, bool BaseKill, bool BaseUndef, - bool OffKill, bool OffUndef, ARMCC::CondCodes Pred, - unsigned PredReg, const TargetInstrInfo *TII, - bool isT2) { + bool isDef, unsigned NewOpc, unsigned Reg, + bool RegDeadKill, bool RegUndef, unsigned BaseReg, + bool BaseKill, bool BaseUndef, ARMCC::CondCodes Pred, + unsigned PredReg, const TargetInstrInfo *TII) { if (isDef) { MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc)) @@ -1584,6 +1613,8 @@ bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI) { MachineInstr *MI = &*MBBI; unsigned Opcode = MI->getOpcode(); + // FIXME: Code/comments below check Opcode == t2STRDi8, but this check returns + // if we see this opcode. if (Opcode != ARM::LDRD && Opcode != ARM::STRD && Opcode != ARM::t2LDRDi8) return false; @@ -1615,8 +1646,8 @@ bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB, bool OddUndef = MI->getOperand(1).isUndef(); bool BaseKill = BaseOp.isKill(); bool BaseUndef = BaseOp.isUndef(); - bool OffKill = isT2 ? false : MI->getOperand(3).isKill(); - bool OffUndef = isT2 ? false : MI->getOperand(3).isUndef(); + assert((isT2 || MI->getOperand(3).getReg() == ARM::NoRegister) && + "register offset not handled below"); int OffImm = getMemoryOpOffset(*MI); unsigned PredReg = 0; ARMCC::CondCodes Pred = getInstrPredicate(*MI, PredReg); @@ -1654,40 +1685,29 @@ bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB, unsigned NewOpc2 = (isLd) ? (isT2 ? (OffImm+4 < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12) : (isT2 ? (OffImm+4 < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12); - DebugLoc dl = MBBI->getDebugLoc(); - // If this is a load and base register is killed, it may have been - // re-defed by the load, make sure the first load does not clobber it. - if (isLd && - (BaseKill || OffKill) && - (TRI->regsOverlap(EvenReg, BaseReg))) { + // If this is a load, make sure the first load does not clobber the base + // register before the second load reads it. + if (isLd && TRI->regsOverlap(EvenReg, BaseReg)) { assert(!TRI->regsOverlap(OddReg, BaseReg)); - InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc2, - OddReg, OddDeadKill, false, - BaseReg, false, BaseUndef, false, OffUndef, - Pred, PredReg, TII, isT2); - InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc, - EvenReg, EvenDeadKill, false, - BaseReg, BaseKill, BaseUndef, OffKill, OffUndef, - Pred, PredReg, TII, isT2); + InsertLDR_STR(MBB, MBBI, OffImm + 4, isLd, NewOpc2, OddReg, OddDeadKill, + false, BaseReg, false, BaseUndef, Pred, PredReg, TII); + InsertLDR_STR(MBB, MBBI, OffImm, isLd, NewOpc, EvenReg, EvenDeadKill, + false, BaseReg, BaseKill, BaseUndef, Pred, PredReg, TII); } else { if (OddReg == EvenReg && EvenDeadKill) { // If the two source operands are the same, the kill marker is // probably on the first one. e.g. - // t2STRDi8 %R5<kill>, %R5, %R9<kill>, 0, 14, %reg0 + // t2STRDi8 killed %r5, %r5, killed %r9, 0, 14, %reg0 EvenDeadKill = false; OddDeadKill = true; } // Never kill the base register in the first instruction. if (EvenReg == BaseReg) EvenDeadKill = false; - InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc, - EvenReg, EvenDeadKill, EvenUndef, - BaseReg, false, BaseUndef, false, OffUndef, - Pred, PredReg, TII, isT2); - InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc2, - OddReg, OddDeadKill, OddUndef, - BaseReg, BaseKill, BaseUndef, OffKill, OffUndef, - Pred, PredReg, TII, isT2); + InsertLDR_STR(MBB, MBBI, OffImm, isLd, NewOpc, EvenReg, EvenDeadKill, + EvenUndef, BaseReg, false, BaseUndef, Pred, PredReg, TII); + InsertLDR_STR(MBB, MBBI, OffImm + 4, isLd, NewOpc2, OddReg, OddDeadKill, + OddUndef, BaseReg, BaseKill, BaseUndef, Pred, PredReg, TII); } if (isLd) ++NumLDRD2LDR; @@ -1796,7 +1816,6 @@ bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) { MergeBaseCandidates.push_back(&*MBBI); } - // If we are here then the chain is broken; Extract candidates for a merge. if (MemOps.size() > 0) { FormCandidates(MemOps); @@ -1890,6 +1909,17 @@ bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) { MO.setReg(ARM::PC); PrevMI.copyImplicitOps(*MBB.getParent(), *MBBI); MBB.erase(MBBI); + // We now restore LR into PC so it is not live-out of the return block + // anymore: Clear the CSI Restored bit. + MachineFrameInfo &MFI = MBB.getParent()->getFrameInfo(); + // CSI should be fixed after PrologEpilog Insertion + assert(MFI.isCalleeSavedInfoValid() && "CSI should be valid"); + for (CalleeSavedInfo &Info : MFI.getCalleeSavedInfo()) { + if (Info.getReg() == ARM::LR) { + Info.setRestored(false); + break; + } + } return true; } } @@ -1923,7 +1953,7 @@ bool ARMLoadStoreOpt::CombineMovBx(MachineBasicBlock &MBB) { } bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) { - if (skipFunction(*Fn.getFunction())) + if (skipFunction(Fn.getFunction())) return false; MF = &Fn; @@ -1956,11 +1986,11 @@ bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) { "ARM pre- register allocation load / store optimization pass" namespace { + /// Pre- register allocation pass that move load / stores from consecutive /// locations close to make it more likely they will be combined later. struct ARMPreAllocLoadStoreOpt : public MachineFunctionPass{ static char ID; - ARMPreAllocLoadStoreOpt() : MachineFunctionPass(ID) {} AliasAnalysis *AA; const DataLayout *TD; @@ -1970,13 +2000,15 @@ namespace { MachineRegisterInfo *MRI; MachineFunction *MF; + ARMPreAllocLoadStoreOpt() : MachineFunctionPass(ID) {} + bool runOnMachineFunction(MachineFunction &Fn) override; StringRef getPassName() const override { return ARM_PREALLOC_LOAD_STORE_OPT_NAME; } - virtual void getAnalysisUsage(AnalysisUsage &AU) const override { + void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addRequired<AAResultsWrapperPass>(); MachineFunctionPass::getAnalysisUsage(AU); } @@ -1994,14 +2026,16 @@ namespace { DenseMap<MachineInstr*, unsigned> &MI2LocMap); bool RescheduleLoadStoreInstrs(MachineBasicBlock *MBB); }; - char ARMPreAllocLoadStoreOpt::ID = 0; -} + +} // end anonymous namespace + +char ARMPreAllocLoadStoreOpt::ID = 0; INITIALIZE_PASS(ARMPreAllocLoadStoreOpt, "arm-prera-ldst-opt", ARM_PREALLOC_LOAD_STORE_OPT_NAME, false, false) bool ARMPreAllocLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) { - if (AssumeMisalignedLoadStores || skipFunction(*Fn.getFunction())) + if (AssumeMisalignedLoadStores || skipFunction(Fn.getFunction())) return false; TD = &Fn.getDataLayout(); @@ -2096,9 +2130,9 @@ ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, return false; unsigned Align = (*Op0->memoperands_begin())->getAlignment(); - const Function *Func = MF->getFunction(); + const Function &Func = MF->getFunction(); unsigned ReqAlign = STI->hasV6Ops() - ? TD->getABITypeAlignment(Type::getInt64Ty(Func->getContext())) + ? TD->getABITypeAlignment(Type::getInt64Ty(Func.getContext())) : 8; // Pre-v6 need 8-byte align if (Align < ReqAlign) return false; @@ -2304,8 +2338,8 @@ ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) { bool RetVal = false; DenseMap<MachineInstr*, unsigned> MI2LocMap; - DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2LdsMap; - DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2StsMap; + DenseMap<unsigned, SmallVector<MachineInstr *, 4>> Base2LdsMap; + DenseMap<unsigned, SmallVector<MachineInstr *, 4>> Base2StsMap; SmallVector<unsigned, 4> LdBases; SmallVector<unsigned, 4> StBases; @@ -2337,7 +2371,7 @@ ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) { bool StopHere = false; if (isLd) { - DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI = + DenseMap<unsigned, SmallVector<MachineInstr *, 4>>::iterator BI = Base2LdsMap.find(Base); if (BI != Base2LdsMap.end()) { for (unsigned i = 0, e = BI->second.size(); i != e; ++i) { @@ -2353,7 +2387,7 @@ ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) { LdBases.push_back(Base); } } else { - DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI = + DenseMap<unsigned, SmallVector<MachineInstr *, 4>>::iterator BI = Base2StsMap.find(Base); if (BI != Base2StsMap.end()) { for (unsigned i = 0, e = BI->second.size(); i != e; ++i) { @@ -2405,7 +2439,6 @@ ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) { return RetVal; } - /// Returns an instance of the load / store optimization pass. FunctionPass *llvm::createARMLoadStoreOptimizationPass(bool PreAlloc) { if (PreAlloc) diff --git a/lib/Target/ARM/ARMMacroFusion.cpp b/lib/Target/ARM/ARMMacroFusion.cpp index 1b6e97c28d453..5c9aad417ceb8 100644 --- a/lib/Target/ARM/ARMMacroFusion.cpp +++ b/lib/Target/ARM/ARMMacroFusion.cpp @@ -15,7 +15,7 @@ #include "ARMMacroFusion.h" #include "ARMSubtarget.h" #include "llvm/CodeGen/MacroFusion.h" -#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/CodeGen/TargetInstrInfo.h" namespace llvm { @@ -31,7 +31,7 @@ static bool shouldScheduleAdjacent(const TargetInstrInfo &TII, // Assume wildcards for unspecified instrs. unsigned FirstOpcode = FirstMI ? FirstMI->getOpcode() - : static_cast<unsigned>(ARM::INSTRUCTION_LIST_END); + : static_cast<unsigned>(ARM::INSTRUCTION_LIST_END); unsigned SecondOpcode = SecondMI.getOpcode(); if (ST.hasFuseAES()) diff --git a/lib/Target/ARM/ARMOptimizeBarriersPass.cpp b/lib/Target/ARM/ARMOptimizeBarriersPass.cpp index 7e4d598a6e0be..cff4a256100d9 100644 --- a/lib/Target/ARM/ARMOptimizeBarriersPass.cpp +++ b/lib/Target/ARM/ARMOptimizeBarriersPass.cpp @@ -49,7 +49,7 @@ static bool CanMovePastDMB(const MachineInstr *MI) { } bool ARMOptimizeBarriersPass::runOnMachineFunction(MachineFunction &MF) { - if (skipFunction(*MF.getFunction())) + if (skipFunction(MF.getFunction())) return false; // Vector to store the DMBs we will remove after the first iteration diff --git a/lib/Target/ARM/ARMRegisterBankInfo.cpp b/lib/Target/ARM/ARMRegisterBankInfo.cpp index 8449302358948..b32bfd449544f 100644 --- a/lib/Target/ARM/ARMRegisterBankInfo.cpp +++ b/lib/Target/ARM/ARMRegisterBankInfo.cpp @@ -17,17 +17,13 @@ #include "llvm/CodeGen/GlobalISel/RegisterBank.h" #include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/CodeGen/TargetRegisterInfo.h" #define GET_TARGET_REGBANK_IMPL #include "ARMGenRegisterBank.inc" using namespace llvm; -#ifndef LLVM_BUILD_GLOBAL_ISEL -#error "You shouldn't build this" -#endif - // FIXME: TableGen this. // If it grows too much and TableGen still isn't ready to do the job, extract it // into an ARMGenRegisterBankInfo.def (similar to AArch64). @@ -202,7 +198,7 @@ ARMRegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { // Try the default logic for non-generic instructions that are either copies // or already have some operands assigned to banks. - if (!isPreISelGenericOpcode(Opc)) { + if (!isPreISelGenericOpcode(Opc) || Opc == TargetOpcode::G_PHI) { const InstructionMapping &Mapping = getInstrMappingImpl(MI); if (Mapping.isValid()) return Mapping; @@ -222,6 +218,9 @@ ARMRegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { case G_AND: case G_OR: case G_XOR: + case G_LSHR: + case G_ASHR: + case G_SHL: case G_SDIV: case G_UDIV: case G_SEXT: @@ -243,17 +242,19 @@ ARMRegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { : &ARM::ValueMappings[ARM::GPR3OpsIdx]; break; } - case G_FADD: { + case G_FADD: + case G_FSUB: + case G_FMUL: + case G_FDIV: { LLT Ty = MRI.getType(MI.getOperand(0).getReg()); - assert((Ty.getSizeInBits() == 32 || Ty.getSizeInBits() == 64) && - "Unsupported size for G_FADD"); - OperandsMapping = Ty.getSizeInBits() == 64 + OperandsMapping =Ty.getSizeInBits() == 64 ? &ARM::ValueMappings[ARM::DPR3OpsIdx] : &ARM::ValueMappings[ARM::SPR3OpsIdx]; break; } case G_CONSTANT: case G_FRAME_INDEX: + case G_GLOBAL_VALUE: OperandsMapping = getOperandsMapping({&ARM::ValueMappings[ARM::GPR3OpsIdx], nullptr}); break; diff --git a/lib/Target/ARM/ARMRegisterInfo.td b/lib/Target/ARM/ARMRegisterInfo.td index b10583bc7983c..14526b777c70a 100644 --- a/lib/Target/ARM/ARMRegisterInfo.td +++ b/lib/Target/ARM/ARMRegisterInfo.td @@ -7,6 +7,8 @@ // //===----------------------------------------------------------------------===// +include "ARMSystemRegister.td" + //===----------------------------------------------------------------------===// // Declarations that describe the ARM register file //===----------------------------------------------------------------------===// @@ -49,9 +51,19 @@ def ssub_0 : SubRegIndex<32>; def ssub_1 : SubRegIndex<32, 32>; def ssub_2 : ComposedSubRegIndex<dsub_1, ssub_0>; def ssub_3 : ComposedSubRegIndex<dsub_1, ssub_1>; +def ssub_4 : ComposedSubRegIndex<dsub_2, ssub_0>; +def ssub_5 : ComposedSubRegIndex<dsub_2, ssub_1>; +def ssub_6 : ComposedSubRegIndex<dsub_3, ssub_0>; +def ssub_7 : ComposedSubRegIndex<dsub_3, ssub_1>; +def ssub_8 : ComposedSubRegIndex<dsub_4, ssub_0>; +def ssub_9 : ComposedSubRegIndex<dsub_4, ssub_1>; +def ssub_10 : ComposedSubRegIndex<dsub_5, ssub_0>; +def ssub_11 : ComposedSubRegIndex<dsub_5, ssub_1>; +def ssub_12 : ComposedSubRegIndex<dsub_6, ssub_0>; +def ssub_13 : ComposedSubRegIndex<dsub_6, ssub_1>; -def gsub_0 : SubRegIndex<32>; -def gsub_1 : SubRegIndex<32, 32>; +def gsub_0 : SubRegIndex<32>; +def gsub_1 : SubRegIndex<32, 32>; // Let TableGen synthesize the remaining 12 ssub_* indices. // We don't need to name them. } @@ -201,6 +213,7 @@ def GPR : RegisterClass<"ARM", [i32], 32, (add (sequence "R%u", 0, 12), let AltOrderSelect = [{ return 1 + MF.getSubtarget<ARMSubtarget>().isThumb1Only(); }]; + let DiagnosticString = "operand must be a register in range [r0, r15]"; } // GPRs without the PC. Some ARM instructions do not allow the PC in @@ -211,6 +224,7 @@ def GPRnopc : RegisterClass<"ARM", [i32], 32, (sub GPR, PC)> { let AltOrderSelect = [{ return 1 + MF.getSubtarget<ARMSubtarget>().isThumb1Only(); }]; + let DiagnosticString = "operand must be a register in range [r0, r14]"; } // GPRs without the PC but with APSR. Some instructions allow accessing the @@ -221,6 +235,7 @@ def GPRwithAPSR : RegisterClass<"ARM", [i32], 32, (add (sub GPR, PC), APSR_NZCV) let AltOrderSelect = [{ return 1 + MF.getSubtarget<ARMSubtarget>().isThumb1Only(); }]; + let DiagnosticString = "operand must be a register in range [r0, r14] or apsr_nzcv"; } // GPRsp - Only the SP is legal. Used by Thumb1 instructions that want the @@ -228,7 +243,9 @@ def GPRwithAPSR : RegisterClass<"ARM", [i32], 32, (add (sub GPR, PC), APSR_NZCV) // FIXME: It would be better to not use this at all and refactor the // instructions to not have SP an an explicit argument. That makes // frame index resolution a bit trickier, though. -def GPRsp : RegisterClass<"ARM", [i32], 32, (add SP)>; +def GPRsp : RegisterClass<"ARM", [i32], 32, (add SP)> { + let DiagnosticString = "operand must be a register sp"; +} // restricted GPR register class. Many Thumb2 instructions allow the full // register range for operands, but have undefined behaviours when PC @@ -239,18 +256,23 @@ def rGPR : RegisterClass<"ARM", [i32], 32, (sub GPR, SP, PC)> { let AltOrderSelect = [{ return 1 + MF.getSubtarget<ARMSubtarget>().isThumb1Only(); }]; + let DiagnosticType = "rGPR"; } // Thumb registers are R0-R7 normally. Some instructions can still use // the general GPR register class above (MOV, e.g.) -def tGPR : RegisterClass<"ARM", [i32], 32, (trunc GPR, 8)>; +def tGPR : RegisterClass<"ARM", [i32], 32, (trunc GPR, 8)> { + let DiagnosticString = "operand must be a register in range [r0, r7]"; +} // Thumb registers R0-R7 and the PC. Some instructions like TBB or THH allow // the PC to be used as a destination operand as well. def tGPRwithpc : RegisterClass<"ARM", [i32], 32, (add tGPR, PC)>; // The high registers in thumb mode, R8-R15. -def hGPR : RegisterClass<"ARM", [i32], 32, (sub GPR, tGPR)>; +def hGPR : RegisterClass<"ARM", [i32], 32, (sub GPR, tGPR)> { + let DiagnosticString = "operand must be a register in range [r8, r15]"; +} // For tail calls, we can't use callee-saved registers, as they are restored // to the saved value before the tail call, which would clobber a call address. @@ -282,11 +304,14 @@ def SPR : RegisterClass<"ARM", [f32], 32, (sequence "S%u", 0, 31)> { let AltOrderSelect = [{ return 1 + MF.getSubtarget<ARMSubtarget>().useStride4VFPs(MF); }]; + let DiagnosticString = "operand must be a register in range [s0, s31]"; } // Subset of SPR which can be used as a source of NEON scalars for 16-bit // operations -def SPR_8 : RegisterClass<"ARM", [f32], 32, (sequence "S%u", 0, 15)>; +def SPR_8 : RegisterClass<"ARM", [f32], 32, (sequence "S%u", 0, 15)> { + let DiagnosticString = "operand must be a register in range [s0, s15]"; +} // Scalar double precision floating point / generic 64-bit vector register // class. @@ -301,17 +326,22 @@ def DPR : RegisterClass<"ARM", [f64, v8i8, v4i16, v2i32, v1i64, v2f32, v4f16], 6 let AltOrderSelect = [{ return 1 + MF.getSubtarget<ARMSubtarget>().useStride4VFPs(MF); }]; + let DiagnosticType = "DPR"; } // Subset of DPR that are accessible with VFP2 (and so that also have // 32-bit SPR subregs). def DPR_VFP2 : RegisterClass<"ARM", [f64, v8i8, v4i16, v2i32, v1i64, v2f32, v4f16], 64, - (trunc DPR, 16)>; + (trunc DPR, 16)> { + let DiagnosticString = "operand must be a register in range [d0, d15]"; +} // Subset of DPR which can be used as a source of NEON scalars for 16-bit // operations def DPR_8 : RegisterClass<"ARM", [f64, v8i8, v4i16, v2i32, v1i64, v2f32, v4f16], 64, - (trunc DPR, 8)>; + (trunc DPR, 8)> { + let DiagnosticString = "operand must be a register in range [d0, d7]"; +} // Generic 128-bit vector register class. def QPR : RegisterClass<"ARM", [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64, v8f16], 128, @@ -319,15 +349,20 @@ def QPR : RegisterClass<"ARM", [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64, v8f16] // Allocate non-VFP2 aliases Q8-Q15 first. let AltOrders = [(rotl QPR, 8)]; let AltOrderSelect = [{ return 1; }]; + let DiagnosticString = "operand must be a register in range [q0, q15]"; } // Subset of QPR that have 32-bit SPR subregs. def QPR_VFP2 : RegisterClass<"ARM", [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], - 128, (trunc QPR, 8)>; + 128, (trunc QPR, 8)> { + let DiagnosticString = "operand must be a register in range [q0, q7]"; +} // Subset of QPR that have DPR_8 and SPR_8 subregs. def QPR_8 : RegisterClass<"ARM", [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], - 128, (trunc QPR, 4)>; + 128, (trunc QPR, 4)> { + let DiagnosticString = "operand must be a register in range [q0, q3]"; +} // Pseudo-registers representing odd-even pairs of D registers. The even-odd // pairs are already represented by the Q registers. diff --git a/lib/Target/ARM/ARMSchedule.td b/lib/Target/ARM/ARMSchedule.td index 53e012f13ee24..ed5a3a7bb6966 100644 --- a/lib/Target/ARM/ARMSchedule.td +++ b/lib/Target/ARM/ARMSchedule.td @@ -414,6 +414,7 @@ def IIC_VTBX1 : InstrItinClass; def IIC_VTBX2 : InstrItinClass; def IIC_VTBX3 : InstrItinClass; def IIC_VTBX4 : InstrItinClass; +def IIC_VDOTPROD : InstrItinClass; //===----------------------------------------------------------------------===// // Processor instruction itineraries. diff --git a/lib/Target/ARM/ARMScheduleA57.td b/lib/Target/ARM/ARMScheduleA57.td index 525079d12d516..1ed9e14dfcd64 100644 --- a/lib/Target/ARM/ARMScheduleA57.td +++ b/lib/Target/ARM/ARMScheduleA57.td @@ -971,9 +971,9 @@ def : InstRW<[A57WriteVABAL, A57ReadVABAL], (instregex "VABAL(s|u)")>; def : InstRW<[A57Write_3cyc_1V], (instregex "VABDL(s|u)")>; // ASIMD arith, basic -def : InstRW<[A57Write_3cyc_1V], (instregex "VADD", "VADDL", "VADDW", +def : InstRW<[A57Write_3cyc_1V], (instregex "VADDv", "VADDL", "VADDW", "VNEG(s8d|s16d|s32d|s8q|s16q|s32q|d|q)", - "VPADDi", "VPADDL", "VSUB", "VSUBL", "VSUBW")>; + "VPADDi", "VPADDL", "VSUBv", "VSUBL", "VSUBW")>; // ASIMD arith, complex def : InstRW<[A57Write_3cyc_1V], (instregex "VABS", "VADDHN", "VHADD", "VHSUB", diff --git a/lib/Target/ARM/ARMScheduleR52.td b/lib/Target/ARM/ARMScheduleR52.td index 782be9b60a7ae..ca3172808d362 100644 --- a/lib/Target/ARM/ARMScheduleR52.td +++ b/lib/Target/ARM/ARMScheduleR52.td @@ -24,7 +24,6 @@ def CortexR52Model : SchedMachineModel { let IssueWidth = 2; // 2 micro-ops dispatched per cycle let LoadLatency = 1; // Optimistic, assuming no misses let MispredictPenalty = 8; // A branch direction mispredict, including PFU - let PostRAScheduler = 1; // Enable PostRA scheduler pass. let CompleteModel = 0; // Covers instructions applicable to cortex-r52. } diff --git a/lib/Target/ARM/ARMSelectionDAGInfo.cpp b/lib/Target/ARM/ARMSelectionDAGInfo.cpp index 33dcf9b8fef02..d4fbf76f299f4 100644 --- a/lib/Target/ARM/ARMSelectionDAGInfo.cpp +++ b/lib/Target/ARM/ARMSelectionDAGInfo.cpp @@ -171,7 +171,7 @@ SDValue ARMSelectionDAGInfo::EmitTargetCodeForMemcpy( // Code size optimisation: do not inline memcpy if expansion results in // more instructions than the libary call. - if (NumMEMCPYs > 1 && DAG.getMachineFunction().getFunction()->optForMinSize()) { + if (NumMEMCPYs > 1 && DAG.getMachineFunction().getFunction().optForMinSize()) { return SDValue(); } diff --git a/lib/Target/ARM/ARMSubtarget.cpp b/lib/Target/ARM/ARMSubtarget.cpp index 2c42a13361664..4d4a88126ce65 100644 --- a/lib/Target/ARM/ARMSubtarget.cpp +++ b/lib/Target/ARM/ARMSubtarget.cpp @@ -13,11 +13,9 @@ #include "ARM.h" -#ifdef LLVM_BUILD_GLOBAL_ISEL #include "ARMCallLowering.h" #include "ARMLegalizerInfo.h" #include "ARMRegisterBankInfo.h" -#endif #include "ARMSubtarget.h" #include "ARMFrameLowering.h" #include "ARMInstrInfo.h" @@ -30,13 +28,7 @@ #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Triple.h" #include "llvm/ADT/Twine.h" -#ifdef LLVM_BUILD_GLOBAL_ISEL -#include "llvm/CodeGen/GlobalISel/GISelAccessor.h" -#include "llvm/CodeGen/GlobalISel/IRTranslator.h" #include "llvm/CodeGen/GlobalISel/InstructionSelect.h" -#include "llvm/CodeGen/GlobalISel/Legalizer.h" -#include "llvm/CodeGen/GlobalISel/RegBankSelect.h" -#endif #include "llvm/CodeGen/MachineFunction.h" #include "llvm/IR/Function.h" #include "llvm/IR/GlobalValue.h" @@ -46,8 +38,6 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/TargetParser.h" #include "llvm/Target/TargetOptions.h" -#include <cassert> -#include <string> using namespace llvm; @@ -101,35 +91,6 @@ ARMFrameLowering *ARMSubtarget::initializeFrameLowering(StringRef CPU, return new ARMFrameLowering(STI); } -#ifdef LLVM_BUILD_GLOBAL_ISEL -namespace { - -struct ARMGISelActualAccessor : public GISelAccessor { - std::unique_ptr<CallLowering> CallLoweringInfo; - std::unique_ptr<InstructionSelector> InstSelector; - std::unique_ptr<LegalizerInfo> Legalizer; - std::unique_ptr<RegisterBankInfo> RegBankInfo; - - const CallLowering *getCallLowering() const override { - return CallLoweringInfo.get(); - } - - const InstructionSelector *getInstructionSelector() const override { - return InstSelector.get(); - } - - const LegalizerInfo *getLegalizerInfo() const override { - return Legalizer.get(); - } - - const RegisterBankInfo *getRegBankInfo() const override { - return RegBankInfo.get(); - } -}; - -} // end anonymous namespace -#endif - ARMSubtarget::ARMSubtarget(const Triple &TT, const std::string &CPU, const std::string &FS, const ARMBaseTargetMachine &TM, bool IsLittle) @@ -144,47 +105,35 @@ ARMSubtarget::ARMSubtarget(const Triple &TT, const std::string &CPU, ? (ARMBaseInstrInfo *)new ARMInstrInfo(*this) : (ARMBaseInstrInfo *)new Thumb2InstrInfo(*this)), TLInfo(TM, *this) { - assert((isThumb() || hasARMOps()) && - "Target must either be thumb or support ARM operations!"); -#ifndef LLVM_BUILD_GLOBAL_ISEL - GISelAccessor *GISel = new GISelAccessor(); -#else - ARMGISelActualAccessor *GISel = new ARMGISelActualAccessor(); - GISel->CallLoweringInfo.reset(new ARMCallLowering(*getTargetLowering())); - GISel->Legalizer.reset(new ARMLegalizerInfo(*this)); + CallLoweringInfo.reset(new ARMCallLowering(*getTargetLowering())); + Legalizer.reset(new ARMLegalizerInfo(*this)); auto *RBI = new ARMRegisterBankInfo(*getRegisterInfo()); // FIXME: At this point, we can't rely on Subtarget having RBI. // It's awkward to mix passing RBI and the Subtarget; should we pass // TII/TRI as well? - GISel->InstSelector.reset(createARMInstructionSelector( + InstSelector.reset(createARMInstructionSelector( *static_cast<const ARMBaseTargetMachine *>(&TM), *this, *RBI)); - GISel->RegBankInfo.reset(RBI); -#endif - setGISelAccessor(*GISel); + RegBankInfo.reset(RBI); } const CallLowering *ARMSubtarget::getCallLowering() const { - assert(GISel && "Access to GlobalISel APIs not set"); - return GISel->getCallLowering(); + return CallLoweringInfo.get(); } const InstructionSelector *ARMSubtarget::getInstructionSelector() const { - assert(GISel && "Access to GlobalISel APIs not set"); - return GISel->getInstructionSelector(); + return InstSelector.get(); } const LegalizerInfo *ARMSubtarget::getLegalizerInfo() const { - assert(GISel && "Access to GlobalISel APIs not set"); - return GISel->getLegalizerInfo(); + return Legalizer.get(); } const RegisterBankInfo *ARMSubtarget::getRegBankInfo() const { - assert(GISel && "Access to GlobalISel APIs not set"); - return GISel->getRegBankInfo(); + return RegBankInfo.get(); } bool ARMSubtarget::isXRaySupported() const { @@ -196,7 +145,9 @@ void ARMSubtarget::initializeEnvironment() { // MCAsmInfo isn't always present (e.g. in opt) so we can't initialize this // directly from it, but we can try to make sure they're consistent when both // available. - UseSjLjEH = isTargetDarwin() && !isTargetWatchABI(); + UseSjLjEH = (isTargetDarwin() && !isTargetWatchABI() && + Options.ExceptionModel == ExceptionHandling::None) || + Options.ExceptionModel == ExceptionHandling::SjLj; assert((!TM.getMCAsmInfo() || (TM.getMCAsmInfo()->getExceptionHandlingType() == ExceptionHandling::SjLj) == UseSjLjEH) && @@ -209,11 +160,11 @@ void ARMSubtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) { if (isTargetDarwin()) { StringRef ArchName = TargetTriple.getArchName(); - unsigned ArchKind = ARM::parseArch(ArchName); - if (ArchKind == ARM::AK_ARMV7S) + ARM::ArchKind AK = ARM::parseArch(ArchName); + if (AK == ARM::ArchKind::ARMV7S) // Default to the Swift CPU when targeting armv7s/thumbv7s. CPUString = "swift"; - else if (ArchKind == ARM::AK_ARMV7K) + else if (AK == ARM::ArchKind::ARMV7K) // Default to the Cortex-a7 CPU when targeting armv7k/thumbv7k. // ARMv7k does not use SjLj exception handling. CPUString = "cortex-a7"; @@ -265,8 +216,8 @@ void ARMSubtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) { // baseline, since the LDM/POP instruction on Thumb doesn't take LR. This // means if we need to reload LR, it takes extra instructions, which outweighs // the value of the tail call; but here we don't know yet whether LR is going - // to be used. We generate the tail call here and turn it back into CALL/RET - // in emitEpilogue if LR is used. + // to be used. We take the optimistic approach of generating the tail call and + // perhaps taking a hit if we need to restore the LR. // Thumb1 PIC calls to external symbols use BX, so they can be tail calls, // but we need to make sure there are enough registers; the only valid @@ -325,16 +276,18 @@ void ARMSubtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) { case CortexA32: case CortexA35: case CortexA53: + case CortexA55: case CortexA57: case CortexA72: case CortexA73: + case CortexA75: case CortexR4: case CortexR4F: case CortexR5: case CortexR7: case CortexM3: - case ExynosM1: case CortexR52: + case ExynosM1: case Kryo: break; case Krait: @@ -386,6 +339,11 @@ bool ARMSubtarget::isGVIndirectSymbol(const GlobalValue *GV) const { return false; } +bool ARMSubtarget::isGVInGOT(const GlobalValue *GV) const { + return isTargetELF() && TM.isPositionIndependent() && + !TM.shouldAssumeDSOLocal(*GV->getParent(), GV); +} + unsigned ARMSubtarget::getMispredictionPenalty() const { return SchedModel.MispredictPenalty; } @@ -396,19 +354,17 @@ bool ARMSubtarget::hasSinCos() const { } bool ARMSubtarget::enableMachineScheduler() const { - // Enable the MachineScheduler before register allocation for out-of-order - // architectures where we do not use the PostRA scheduler anymore (for now - // restricted to swift). - return getSchedModel().isOutOfOrder() && isSwift(); + // Enable the MachineScheduler before register allocation for subtargets + // with the use-misched feature. + return useMachineScheduler(); } // This overrides the PostRAScheduler bit in the SchedModel for any CPU. bool ARMSubtarget::enablePostRAScheduler() const { - // No need for PostRA scheduling on out of order CPUs (for now restricted to - // swift). - if (getSchedModel().isOutOfOrder() && isSwift()) + if (disablePostRAScheduler()) return false; - return (!isThumb() || hasThumb2()); + // Don't reschedule potential IT blocks. + return !isThumb1Only(); } bool ARMSubtarget::enableAtomicExpand() const { return hasAnyDataBarrier(); } @@ -417,7 +373,7 @@ bool ARMSubtarget::useStride4VFPs(const MachineFunction &MF) const { // For general targets, the prologue can grow when VFPs are allocated with // stride 4 (more vpush instructions). But WatchOS uses a compact unwind // format which it's more important to get right. - return isTargetWatchABI() || (isSwift() && !MF.getFunction()->optForMinSize()); + return isTargetWatchABI() || (isSwift() && !MF.getFunction().optForMinSize()); } bool ARMSubtarget::useMovt(const MachineFunction &MF) const { @@ -425,7 +381,7 @@ bool ARMSubtarget::useMovt(const MachineFunction &MF) const { // immediates as it is inherently position independent, and may be out of // range otherwise. return !NoMovt && hasV8MBaselineOps() && - (isTargetWindows() || !MF.getFunction()->optForMinSize() || genExecuteOnly()); + (isTargetWindows() || !MF.getFunction().optForMinSize() || genExecuteOnly()); } bool ARMSubtarget::useFastISel() const { diff --git a/lib/Target/ARM/ARMSubtarget.h b/lib/Target/ARM/ARMSubtarget.h index 9d749537dc3b8..9301197e13877 100644 --- a/lib/Target/ARM/ARMSubtarget.h +++ b/lib/Target/ARM/ARMSubtarget.h @@ -16,16 +16,20 @@ #include "ARMBaseInstrInfo.h" #include "ARMBaseRegisterInfo.h" +#include "ARMConstantPoolValue.h" #include "ARMFrameLowering.h" #include "ARMISelLowering.h" #include "ARMSelectionDAGInfo.h" #include "llvm/ADT/Triple.h" -#include "llvm/CodeGen/GlobalISel/GISelAccessor.h" +#include "llvm/CodeGen/GlobalISel/CallLowering.h" +#include "llvm/CodeGen/GlobalISel/InstructionSelector.h" +#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h" +#include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h" #include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/MC/MCInstrItineraries.h" #include "llvm/MC/MCSchedule.h" #include "llvm/Target/TargetOptions.h" -#include "llvm/Target/TargetSubtargetInfo.h" #include <memory> #include <string> @@ -50,10 +54,12 @@ protected: CortexA35, CortexA5, CortexA53, + CortexA55, CortexA57, CortexA7, CortexA72, CortexA73, + CortexA75, CortexA8, CortexA9, CortexM3, @@ -98,6 +104,7 @@ protected: ARMv7ve, ARMv81a, ARMv82a, + ARMv83a, ARMv8a, ARMv8mBaseline, ARMv8mMainline, @@ -143,6 +150,7 @@ protected: bool HasV8Ops = false; bool HasV8_1aOps = false; bool HasV8_2aOps = false; + bool HasV8_3aOps = false; bool HasV8MBaselineOps = false; bool HasV8MMainlineOps = false; @@ -154,6 +162,9 @@ protected: bool HasFPARMv8 = false; bool HasNEON = false; + /// HasDotProd - True if the ARMv8.2A dot product instructions are supported. + bool HasDotProd = false; + /// UseNEONForSinglePrecisionFP - if the NEONFP attribute has been /// specified. Use the method useNEONForSinglePrecisionFP() to /// determine if NEON should actually be used. @@ -180,6 +191,13 @@ protected: /// UseSoftFloat - True if we're using software floating point features. bool UseSoftFloat = false; + /// UseMISched - True if MachineScheduler should be used for this subtarget. + bool UseMISched = false; + + /// DisablePostRAScheduler - False if scheduling should happen again after + /// register allocation. + bool DisablePostRAScheduler = false; + /// HasThumb2 - True if Thumb2 instructions are supported. bool HasThumb2 = false; @@ -328,6 +346,9 @@ protected: /// If true, VFP/NEON VMLA/VMLS have special RAW hazards. bool HasVMLxHazards = false; + // If true, read thread pointer from coprocessor register. + bool ReadTPHard = false; + /// If true, VMOVRS, VMOVSR and VMOVS will be converted from VFP to NEON. bool UseNEONForFPMovs = false; @@ -413,9 +434,6 @@ public: ARMSubtarget(const Triple &TT, const std::string &CPU, const std::string &FS, const ARMBaseTargetMachine &TM, bool IsLittle); - /// This object will take onwership of \p GISelAccessor. - void setGISelAccessor(GISelAccessor &GISel) { this->GISel.reset(&GISel); } - /// getMaxInlineSizeThreshold - Returns the maximum memset / memcpy size /// that still makes it profitable to inline the call. unsigned getMaxInlineSizeThreshold() const { @@ -463,10 +481,11 @@ private: std::unique_ptr<ARMBaseInstrInfo> InstrInfo; ARMTargetLowering TLInfo; - /// Gather the accessor points to GlobalISel-related APIs. - /// This is used to avoid ifndefs spreading around while GISel is - /// an optional library. - std::unique_ptr<GISelAccessor> GISel; + /// GlobalISel related APIs. + std::unique_ptr<CallLowering> CallLoweringInfo; + std::unique_ptr<InstructionSelector> InstSelector; + std::unique_ptr<LegalizerInfo> Legalizer; + std::unique_ptr<RegisterBankInfo> RegBankInfo; void initializeEnvironment(); void initSubtargetFeatures(StringRef CPU, StringRef FS); @@ -486,6 +505,7 @@ public: bool hasV8Ops() const { return HasV8Ops; } bool hasV8_1aOps() const { return HasV8_1aOps; } bool hasV8_2aOps() const { return HasV8_2aOps; } + bool hasV8_3aOps() const { return HasV8_3aOps; } bool hasV8MBaselineOps() const { return HasV8MBaselineOps; } bool hasV8MMainlineOps() const { return HasV8MMainlineOps; } @@ -512,6 +532,7 @@ public: bool hasFPARMv8() const { return HasFPARMv8; } bool hasNEON() const { return HasNEON; } bool hasCrypto() const { return HasCrypto; } + bool hasDotProd() const { return HasDotProd; } bool hasCRC() const { return HasCRC; } bool hasRAS() const { return HasRAS; } bool hasVirtualization() const { return HasVirtualization; } @@ -645,6 +666,8 @@ public: bool isROPI() const; bool isRWPI() const; + bool useMachineScheduler() const { return UseMISched; } + bool disablePostRAScheduler() const { return DisablePostRAScheduler; } bool useSoftFloat() const { return UseSoftFloat; } bool isThumb() const { return InThumbMode; } bool isThumb1Only() const { return InThumbMode && !HasThumb2; } @@ -653,6 +676,7 @@ public: bool isMClass() const { return ARMProcClass == MClass; } bool isRClass() const { return ARMProcClass == RClass; } bool isAClass() const { return ARMProcClass == AClass; } + bool isReadTPHard() const { return ReadTPHard; } bool isR9Reserved() const { return isTargetMachO() ? (ReserveR9 || !HasV6Ops) : ReserveR9; @@ -727,6 +751,9 @@ public: /// True if the GV will be accessed via an indirect symbol. bool isGVIndirectSymbol(const GlobalValue *GV) const; + /// Returns the constant pool modifier needed to access the GV. + bool isGVInGOT(const GlobalValue *GV) const; + /// True if fast-isel is used. bool useFastISel() const; @@ -740,6 +767,13 @@ public: return ARM::BX_RET; return ARM::MOVPCLR; } + + /// Allow movt+movw for PIC global address calculation. + /// ELF does not have GOT relocations for movt+movw. + /// ROPI does not use GOT. + bool allowPositionIndependentMovt() const { + return isROPI() || !isTargetELF(); + } }; } // end namespace llvm diff --git a/lib/Target/ARM/ARMSystemRegister.td b/lib/Target/ARM/ARMSystemRegister.td new file mode 100644 index 0000000000000..ad1d37168e087 --- /dev/null +++ b/lib/Target/ARM/ARMSystemRegister.td @@ -0,0 +1,156 @@ +//===-- ARMSystemRegister.td - ARM Register defs -------------*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +include "llvm/TableGen/SearchableTable.td" + +//===----------------------------------------------------------------------===// +// Declarations that describe the ARM system-registers +//===----------------------------------------------------------------------===// + +// M-Class System Registers. +// 'Mask' bits create unique keys for searches. +// +class MClassSysReg<bits<1> UniqMask1, + bits<1> UniqMask2, + bits<1> UniqMask3, + bits<12> Enc12, + string name> : SearchableTable { + let SearchableFields = ["Name", "M1Encoding12", "M2M3Encoding8", "Encoding"]; + string Name; + bits<13> M1Encoding12; + bits<10> M2M3Encoding8; + bits<12> Encoding; + + let Name = name; + let EnumValueField = "M1Encoding12"; + let EnumValueField = "M2M3Encoding8"; + let EnumValueField = "Encoding"; + + let M1Encoding12{12} = UniqMask1; + let M1Encoding12{11-00} = Enc12; + let Encoding = Enc12; + + let M2M3Encoding8{9} = UniqMask2; + let M2M3Encoding8{8} = UniqMask3; + let M2M3Encoding8{7-0} = Enc12{7-0}; + code Requires = [{ {} }]; +} + +// [|i|e|x]apsr_nzcvq has alias [|i|e|x]apsr. +// Mask1 Mask2 Mask3 Enc12, Name +let Requires = [{ {ARM::FeatureDSP} }] in { +def : MClassSysReg<0, 0, 0, 0x400, "apsr_g">; +def : MClassSysReg<0, 1, 1, 0xc00, "apsr_nzcvqg">; +def : MClassSysReg<0, 0, 0, 0x401, "iapsr_g">; +def : MClassSysReg<0, 1, 1, 0xc01, "iapsr_nzcvqg">; +def : MClassSysReg<0, 0, 0, 0x402, "eapsr_g">; +def : MClassSysReg<0, 1, 1, 0xc02, "eapsr_nzcvqg">; +def : MClassSysReg<0, 0, 0, 0x403, "xpsr_g">; +def : MClassSysReg<0, 1, 1, 0xc03, "xpsr_nzcvqg">; +} + +def : MClassSysReg<0, 0, 1, 0x800, "apsr">; +def : MClassSysReg<1, 1, 0, 0x800, "apsr_nzcvq">; +def : MClassSysReg<0, 0, 1, 0x801, "iapsr">; +def : MClassSysReg<1, 1, 0, 0x801, "iapsr_nzcvq">; +def : MClassSysReg<0, 0, 1, 0x802, "eapsr">; +def : MClassSysReg<1, 1, 0, 0x802, "eapsr_nzcvq">; +def : MClassSysReg<0, 0, 1, 0x803, "xpsr">; +def : MClassSysReg<1, 1, 0, 0x803, "xpsr_nzcvq">; + +def : MClassSysReg<0, 0, 1, 0x805, "ipsr">; +def : MClassSysReg<0, 0, 1, 0x806, "epsr">; +def : MClassSysReg<0, 0, 1, 0x807, "iepsr">; +def : MClassSysReg<0, 0, 1, 0x808, "msp">; +def : MClassSysReg<0, 0, 1, 0x809, "psp">; + +let Requires = [{ {ARM::HasV8MBaselineOps} }] in { +def : MClassSysReg<0, 0, 1, 0x80a, "msplim">; +def : MClassSysReg<0, 0, 1, 0x80b, "psplim">; +} + +def : MClassSysReg<0, 0, 1, 0x810, "primask">; + +let Requires = [{ {ARM::HasV7Ops} }] in { +def : MClassSysReg<0, 0, 1, 0x811, "basepri">; +def : MClassSysReg<0, 0, 1, 0x812, "basepri_max">; +def : MClassSysReg<0, 0, 1, 0x813, "faultmask">; +} + +def : MClassSysReg<0, 0, 1, 0x814, "control">; + +let Requires = [{ {ARM::Feature8MSecExt} }] in { +def : MClassSysReg<0, 0, 1, 0x888, "msp_ns">; +def : MClassSysReg<0, 0, 1, 0x889, "psp_ns">; +} + +let Requires = [{ {ARM::Feature8MSecExt, ARM::HasV8MBaselineOps} }] in { +def : MClassSysReg<0, 0, 1, 0x88a, "msplim_ns">; +def : MClassSysReg<0, 0, 1, 0x88b, "psplim_ns">; +} + +def : MClassSysReg<0, 0, 1, 0x890, "primask_ns">; + +let Requires = [{ {ARM::Feature8MSecExt, ARM::HasV7Ops} }] in { +def : MClassSysReg<0, 0, 1, 0x891, "basepri_ns">; +def : MClassSysReg<0, 0, 1, 0x893, "faultmask_ns">; +} + +let Requires = [{ {ARM::Feature8MSecExt} }] in { +def : MClassSysReg<0, 0, 1, 0x894, "control_ns">; +def : MClassSysReg<0, 0, 1, 0x898, "sp_ns">; +} + + +// Banked Registers +// +class BankedReg<string name, bits<8> enc> + : SearchableTable { + string Name; + bits<8> Encoding; + let Name = name; + let Encoding = enc; + let SearchableFields = ["Name", "Encoding"]; +} + +// The values here come from B9.2.3 of the ARM ARM, where bits 4-0 are SysM +// and bit 5 is R. +def : BankedReg<"r8_usr", 0x00>; +def : BankedReg<"r9_usr", 0x01>; +def : BankedReg<"r10_usr", 0x02>; +def : BankedReg<"r11_usr", 0x03>; +def : BankedReg<"r12_usr", 0x04>; +def : BankedReg<"sp_usr", 0x05>; +def : BankedReg<"lr_usr", 0x06>; +def : BankedReg<"r8_fiq", 0x08>; +def : BankedReg<"r9_fiq", 0x09>; +def : BankedReg<"r10_fiq", 0x0a>; +def : BankedReg<"r11_fiq", 0x0b>; +def : BankedReg<"r12_fiq", 0x0c>; +def : BankedReg<"sp_fiq", 0x0d>; +def : BankedReg<"lr_fiq", 0x0e>; +def : BankedReg<"lr_irq", 0x10>; +def : BankedReg<"sp_irq", 0x11>; +def : BankedReg<"lr_svc", 0x12>; +def : BankedReg<"sp_svc", 0x13>; +def : BankedReg<"lr_abt", 0x14>; +def : BankedReg<"sp_abt", 0x15>; +def : BankedReg<"lr_und", 0x16>; +def : BankedReg<"sp_und", 0x17>; +def : BankedReg<"lr_mon", 0x1c>; +def : BankedReg<"sp_mon", 0x1d>; +def : BankedReg<"elr_hyp", 0x1e>; +def : BankedReg<"sp_hyp", 0x1f>; +def : BankedReg<"spsr_fiq", 0x2e>; +def : BankedReg<"spsr_irq", 0x30>; +def : BankedReg<"spsr_svc", 0x32>; +def : BankedReg<"spsr_abt", 0x34>; +def : BankedReg<"spsr_und", 0x36>; +def : BankedReg<"spsr_mon", 0x3c>; +def : BankedReg<"spsr_hyp", 0x3e>; diff --git a/lib/Target/ARM/ARMTargetMachine.cpp b/lib/Target/ARM/ARMTargetMachine.cpp index c323a1d368dee..51982b2dab14c 100644 --- a/lib/Target/ARM/ARMTargetMachine.cpp +++ b/lib/Target/ARM/ARMTargetMachine.cpp @@ -10,10 +10,10 @@ // //===----------------------------------------------------------------------===// +#include "ARMTargetMachine.h" #include "ARM.h" -#include "ARMSubtarget.h" #include "ARMMacroFusion.h" -#include "ARMTargetMachine.h" +#include "ARMSubtarget.h" #include "ARMTargetObjectFile.h" #include "ARMTargetTransformInfo.h" #include "MCTargetDesc/ARMMCTargetDesc.h" @@ -34,6 +34,7 @@ #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineScheduler.h" #include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/TargetLoweringObjectFile.h" #include "llvm/CodeGen/TargetPassConfig.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/DataLayout.h" @@ -44,7 +45,6 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/TargetParser.h" #include "llvm/Support/TargetRegistry.h" -#include "llvm/Target/TargetLoweringObjectFile.h" #include "llvm/Target/TargetOptions.h" #include "llvm/Transforms/Scalar.h" #include <cassert> @@ -91,6 +91,7 @@ extern "C" void LLVMInitializeARMTarget() { initializeARMPreAllocLoadStoreOptPass(Registry); initializeARMConstantIslandsPass(Registry); initializeARMExecutionDepsFixPass(Registry); + initializeARMExpandPseudoPass(Registry); } static std::unique_ptr<TargetLoweringObjectFile> createTLOF(const Triple &TT) { @@ -190,17 +191,23 @@ static Reloc::Model getEffectiveRelocModel(const Triple &TT, return *RM; } +static CodeModel::Model getEffectiveCodeModel(Optional<CodeModel::Model> CM) { + if (CM) + return *CM; + return CodeModel::Small; +} + /// Create an ARM architecture model. /// ARMBaseTargetMachine::ARMBaseTargetMachine(const Target &T, const Triple &TT, StringRef CPU, StringRef FS, const TargetOptions &Options, Optional<Reloc::Model> RM, - CodeModel::Model CM, + Optional<CodeModel::Model> CM, CodeGenOpt::Level OL, bool isLittle) : LLVMTargetMachine(T, computeDataLayout(TT, CPU, Options, isLittle), TT, - CPU, FS, Options, getEffectiveRelocModel(TT, RM), CM, - OL), + CPU, FS, Options, getEffectiveRelocModel(TT, RM), + getEffectiveCodeModel(CM), OL), TargetABI(computeTargetABI(TT, CPU, Options)), TLOF(createTLOF(getTargetTriple())), isLittle(isLittle) { @@ -221,10 +228,10 @@ ARMBaseTargetMachine::ARMBaseTargetMachine(const Target &T, const Triple &TT, Options.EABIVersion == EABI::Unknown) { // musl is compatible with glibc with regard to EABI version if ((TargetTriple.getEnvironment() == Triple::GNUEABI || - TargetTriple.getEnvironment() == Triple::GNUEABIHF || - TargetTriple.getEnvironment() == Triple::MuslEABI || - TargetTriple.getEnvironment() == Triple::MuslEABIHF) && - !(TargetTriple.isOSWindows() || TargetTriple.isOSDarwin())) + TargetTriple.getEnvironment() == Triple::GNUEABIHF || + TargetTriple.getEnvironment() == Triple::MuslEABI || + TargetTriple.getEnvironment() == Triple::MuslEABIHF) && + !(TargetTriple.isOSWindows() || TargetTriple.isOSDarwin())) this->Options.EABIVersion = EABI::GNU; else this->Options.EABIVersion = EABI::EABI5; @@ -266,7 +273,12 @@ ARMBaseTargetMachine::getSubtargetImpl(const Function &F) const { // function that reside in TargetOptions. resetTargetOptions(F); I = llvm::make_unique<ARMSubtarget>(TargetTriple, CPU, FS, *this, isLittle); + + if (!I->isThumb() && !I->hasARMOps()) + F.getContext().emitError("Function '" + F.getName() + "' uses ARM " + "instructions, but the target does not support ARM mode execution."); } + return I.get(); } @@ -276,21 +288,20 @@ TargetIRAnalysis ARMBaseTargetMachine::getTargetIRAnalysis() { }); } - ARMLETargetMachine::ARMLETargetMachine(const Target &T, const Triple &TT, StringRef CPU, StringRef FS, const TargetOptions &Options, Optional<Reloc::Model> RM, - CodeModel::Model CM, - CodeGenOpt::Level OL) + Optional<CodeModel::Model> CM, + CodeGenOpt::Level OL, bool JIT) : ARMBaseTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, true) {} ARMBETargetMachine::ARMBETargetMachine(const Target &T, const Triple &TT, StringRef CPU, StringRef FS, const TargetOptions &Options, Optional<Reloc::Model> RM, - CodeModel::Model CM, - CodeGenOpt::Level OL) + Optional<CodeModel::Model> CM, + CodeGenOpt::Level OL, bool JIT) : ARMBaseTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, false) {} namespace { @@ -299,7 +310,14 @@ namespace { class ARMPassConfig : public TargetPassConfig { public: ARMPassConfig(ARMBaseTargetMachine &TM, PassManagerBase &PM) - : TargetPassConfig(TM, PM) {} + : TargetPassConfig(TM, PM) { + if (TM.getOptLevel() != CodeGenOpt::None) { + ARMGenSubtargetInfo STI(TM.getTargetTriple(), TM.getTargetCPU(), + TM.getTargetFeatureString()); + if (STI.hasFeature(ARM::FeatureUseMISched)) + substitutePass(&PostRASchedulerID, &PostMachineSchedulerID); + } + } ARMBaseTargetMachine &getARMTargetMachine() const { return getTM<ARMBaseTargetMachine>(); @@ -328,12 +346,10 @@ public: void addIRPasses() override; bool addPreISel() override; bool addInstSelector() override; -#ifdef LLVM_BUILD_GLOBAL_ISEL bool addIRTranslator() override; bool addLegalizeMachineIR() override; bool addRegBankSelect() override; bool addGlobalInstructionSelect() override; -#endif void addPreRegAlloc() override; void addPreSched2() override; void addPreEmitPass() override; @@ -368,10 +384,11 @@ void ARMPassConfig::addIRPasses() { // determine whether it succeeded. We can exploit existing control-flow in // ldrex/strex loops to simplify this, but it needs tidying up. if (TM->getOptLevel() != CodeGenOpt::None && EnableAtomicTidy) - addPass(createCFGSimplificationPass(-1, [this](const Function &F) { - const auto &ST = this->TM->getSubtarget<ARMSubtarget>(F); - return ST.hasAnyDataBarrier() && !ST.isThumb1Only(); - })); + addPass(createCFGSimplificationPass( + 1, false, false, true, true, [this](const Function &F) { + const auto &ST = this->TM->getSubtarget<ARMSubtarget>(F); + return ST.hasAnyDataBarrier() && !ST.isThumb1Only(); + })); TargetPassConfig::addIRPasses(); @@ -408,7 +425,6 @@ bool ARMPassConfig::addInstSelector() { return false; } -#ifdef LLVM_BUILD_GLOBAL_ISEL bool ARMPassConfig::addIRTranslator() { addPass(new IRTranslator()); return false; @@ -428,7 +444,6 @@ bool ARMPassConfig::addGlobalInstructionSelect() { addPass(new InstructionSelect()); return false; } -#endif void ARMPassConfig::addPreRegAlloc() { if (getOptLevel() != CodeGenOpt::None) { diff --git a/lib/Target/ARM/ARMTargetMachine.h b/lib/Target/ARM/ARMTargetMachine.h index 22ce949367f34..655ec3202bfbd 100644 --- a/lib/Target/ARM/ARMTargetMachine.h +++ b/lib/Target/ARM/ARMTargetMachine.h @@ -42,13 +42,14 @@ protected: public: ARMBaseTargetMachine(const Target &T, const Triple &TT, StringRef CPU, StringRef FS, const TargetOptions &Options, - Optional<Reloc::Model> RM, CodeModel::Model CM, + Optional<Reloc::Model> RM, Optional<CodeModel::Model> CM, CodeGenOpt::Level OL, bool isLittle); ~ARMBaseTargetMachine() override; const ARMSubtarget *getSubtargetImpl(const Function &F) const override; - // The no argument getSubtargetImpl, while it exists on some targets, is - // deprecated and should not be used. + // DO NOT IMPLEMENT: There is no such thing as a valid default subtarget, + // subtargets are per-function entities based on the target-specific + // attributes of each function. const ARMSubtarget *getSubtargetImpl() const = delete; bool isLittleEndian() const { return isLittle; } @@ -61,10 +62,6 @@ public: TargetLoweringObjectFile *getObjFileLowering() const override { return TLOF.get(); } - - bool isMachineVerifierClean() const override { - return false; - } }; /// ARM/Thumb little endian target machine. @@ -73,8 +70,8 @@ class ARMLETargetMachine : public ARMBaseTargetMachine { public: ARMLETargetMachine(const Target &T, const Triple &TT, StringRef CPU, StringRef FS, const TargetOptions &Options, - Optional<Reloc::Model> RM, CodeModel::Model CM, - CodeGenOpt::Level OL); + Optional<Reloc::Model> RM, Optional<CodeModel::Model> CM, + CodeGenOpt::Level OL, bool JIT); }; /// ARM/Thumb big endian target machine. @@ -83,8 +80,8 @@ class ARMBETargetMachine : public ARMBaseTargetMachine { public: ARMBETargetMachine(const Target &T, const Triple &TT, StringRef CPU, StringRef FS, const TargetOptions &Options, - Optional<Reloc::Model> RM, CodeModel::Model CM, - CodeGenOpt::Level OL); + Optional<Reloc::Model> RM, Optional<CodeModel::Model> CM, + CodeGenOpt::Level OL, bool JIT); }; } // end namespace llvm diff --git a/lib/Target/ARM/ARMTargetTransformInfo.cpp b/lib/Target/ARM/ARMTargetTransformInfo.cpp index 51b0fedd2b54f..cae01e415eff1 100644 --- a/lib/Target/ARM/ARMTargetTransformInfo.cpp +++ b/lib/Target/ARM/ARMTargetTransformInfo.cpp @@ -1,4 +1,4 @@ -//===-- ARMTargetTransformInfo.cpp - ARM specific TTI ---------------------===// +//===- ARMTargetTransformInfo.cpp - ARM specific TTI ----------------------===// // // The LLVM Compiler Infrastructure // @@ -8,9 +8,30 @@ //===----------------------------------------------------------------------===// #include "ARMTargetTransformInfo.h" -#include "llvm/Support/Debug.h" -#include "llvm/Target/CostTable.h" -#include "llvm/Target/TargetLowering.h" +#include "ARMSubtarget.h" +#include "MCTargetDesc/ARMAddressingModes.h" +#include "llvm/ADT/APInt.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/CodeGen/CostTable.h" +#include "llvm/CodeGen/ISDOpcodes.h" +#include "llvm/CodeGen/MachineValueType.h" +#include "llvm/CodeGen/ValueTypes.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/CallSite.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Instruction.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Type.h" +#include "llvm/MC/SubtargetFeature.h" +#include "llvm/Support/Casting.h" +#include "llvm/Target/TargetMachine.h" +#include <algorithm> +#include <cassert> +#include <cstdint> +#include <utility> + using namespace llvm; #define DEBUG_TYPE "armtti" @@ -65,7 +86,6 @@ int ARMTTIImpl::getIntImmCost(const APInt &Imm, Type *Ty) { return 3; } - // Constants smaller than 256 fit in the immediate field of // Thumb1 instructions so we return a zero cost and 1 otherwise. int ARMTTIImpl::getIntImmCodeSizeCost(unsigned Opcode, unsigned Idx, @@ -109,7 +129,6 @@ int ARMTTIImpl::getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm, return getIntImmCost(Imm, Ty); } - int ARMTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, const Instruction *I) { int ISD = TLI->InstructionOpcodeToISD(Opcode); @@ -331,7 +350,6 @@ int ARMTTIImpl::getVectorInstrCost(unsigned Opcode, Type *ValTy, int ARMTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, const Instruction *I) { - int ISD = TLI->InstructionOpcodeToISD(Opcode); // On NEON a a vector select gets lowered to vbsl. if (ST->hasNEON() && ValTy->isVectorTy() && ISD == ISD::SELECT) { @@ -455,7 +473,6 @@ int ARMTTIImpl::getArithmeticInstrCost( TTI::OperandValueKind Op2Info, TTI::OperandValueProperties Opd1PropInfo, TTI::OperandValueProperties Opd2PropInfo, ArrayRef<const Value *> Args) { - int ISDOpcode = TLI->InstructionOpcodeToISD(Opcode); std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty); @@ -562,3 +579,67 @@ int ARMTTIImpl::getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices, Alignment, AddressSpace); } + +void ARMTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE, + TTI::UnrollingPreferences &UP) { + // Only currently enable these preferences for M-Class cores. + if (!ST->isMClass()) + return BasicTTIImplBase::getUnrollingPreferences(L, SE, UP); + + // Disable loop unrolling for Oz and Os. + UP.OptSizeThreshold = 0; + UP.PartialOptSizeThreshold = 0; + if (L->getHeader()->getParent()->optForSize()) + return; + + // Only enable on Thumb-2 targets. + if (!ST->isThumb2()) + return; + + SmallVector<BasicBlock*, 4> ExitingBlocks; + L->getExitingBlocks(ExitingBlocks); + DEBUG(dbgs() << "Loop has:\n" + << "Blocks: " << L->getNumBlocks() << "\n" + << "Exit blocks: " << ExitingBlocks.size() << "\n"); + + // Only allow another exit other than the latch. This acts as an early exit + // as it mirrors the profitability calculation of the runtime unroller. + if (ExitingBlocks.size() > 2) + return; + + // Limit the CFG of the loop body for targets with a branch predictor. + // Allowing 4 blocks permits if-then-else diamonds in the body. + if (ST->hasBranchPredictor() && L->getNumBlocks() > 4) + return; + + // Scan the loop: don't unroll loops with calls as this could prevent + // inlining. + unsigned Cost = 0; + for (auto *BB : L->getBlocks()) { + for (auto &I : *BB) { + if (isa<CallInst>(I) || isa<InvokeInst>(I)) { + ImmutableCallSite CS(&I); + if (const Function *F = CS.getCalledFunction()) { + if (!isLoweredToCall(F)) + continue; + } + return; + } + SmallVector<const Value*, 4> Operands(I.value_op_begin(), + I.value_op_end()); + Cost += getUserCost(&I, Operands); + } + } + + DEBUG(dbgs() << "Cost of loop: " << Cost << "\n"); + + UP.Partial = true; + UP.Runtime = true; + UP.UnrollRemainder = true; + UP.DefaultUnrollRuntimeCount = 4; + + // Force unrolling small loops can be very useful because of the branch + // taken cost of the backedge. + if (Cost < 12) + UP.Force = true; +} diff --git a/lib/Target/ARM/ARMTargetTransformInfo.h b/lib/Target/ARM/ARMTargetTransformInfo.h index 0695a4e633467..99353a3219a0a 100644 --- a/lib/Target/ARM/ARMTargetTransformInfo.h +++ b/lib/Target/ARM/ARMTargetTransformInfo.h @@ -1,4 +1,4 @@ -//===-- ARMTargetTransformInfo.h - ARM specific TTI -------------*- C++ -*-===// +//===- ARMTargetTransformInfo.h - ARM specific TTI --------------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -6,28 +6,43 @@ // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// +// /// \file /// This file a TargetTransformInfo::Concept conforming object specific to the /// ARM target machine. It uses the target's detailed information to /// provide more precise answers to certain TTI queries, while letting the /// target independent and default TTI implementations handle the rest. -/// +// //===----------------------------------------------------------------------===// #ifndef LLVM_LIB_TARGET_ARM_ARMTARGETTRANSFORMINFO_H #define LLVM_LIB_TARGET_ARM_ARMTARGETTRANSFORMINFO_H #include "ARM.h" +#include "ARMSubtarget.h" #include "ARMTargetMachine.h" +#include "llvm/ADT/ArrayRef.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/CodeGen/BasicTTIImpl.h" -#include "llvm/Target/TargetLowering.h" +#include "llvm/IR/Constant.h" +#include "llvm/IR/Function.h" +#include "llvm/MC/SubtargetFeature.h" namespace llvm { +class APInt; +class ARMTargetLowering; +class Instruction; +class Loop; +class SCEV; +class ScalarEvolution; +class Type; +class Value; + class ARMTTIImpl : public BasicTTIImplBase<ARMTTIImpl> { - typedef BasicTTIImplBase<ARMTTIImpl> BaseT; - typedef TargetTransformInfo TTI; + using BaseT = BasicTTIImplBase<ARMTTIImpl>; + using TTI = TargetTransformInfo; + friend BaseT; const ARMSubtarget *ST; @@ -158,6 +173,9 @@ public: ArrayRef<unsigned> Indices, unsigned Alignment, unsigned AddressSpace); + void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, + TTI::UnrollingPreferences &UP); + bool shouldBuildLookupTablesForConstant(Constant *C) const { // In the ROPI and RWPI relocation models we can't have pointers to global // variables or functions in constant data, so don't convert switches to @@ -172,4 +190,4 @@ public: } // end namespace llvm -#endif +#endif // LLVM_LIB_TARGET_ARM_ARMTARGETTRANSFORMINFO_H diff --git a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp index 1129826f21f64..26fda5f22b4f2 100644 --- a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp +++ b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp @@ -1,4 +1,4 @@ -//===-- ARMAsmParser.cpp - Parse ARM assembly to MCInst instructions ------===// +//===- ARMAsmParser.cpp - Parse ARM assembly to MCInst instructions -------===// // // The LLVM Compiler Infrastructure // @@ -8,22 +8,23 @@ //===----------------------------------------------------------------------===// #include "ARMFeatures.h" +#include "Utils/ARMBaseInfo.h" #include "MCTargetDesc/ARMAddressingModes.h" #include "MCTargetDesc/ARMBaseInfo.h" #include "MCTargetDesc/ARMMCExpr.h" +#include "MCTargetDesc/ARMMCTargetDesc.h" +#include "llvm/ADT/APFloat.h" +#include "llvm/ADT/APInt.h" +#include "llvm/ADT/None.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/StringMap.h" +#include "llvm/ADT/StringRef.h" #include "llvm/ADT/StringSwitch.h" #include "llvm/ADT/Triple.h" #include "llvm/ADT/Twine.h" -#include "llvm/BinaryFormat/COFF.h" -#include "llvm/BinaryFormat/ELF.h" -#include "llvm/MC/MCAsmInfo.h" -#include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCContext.h" -#include "llvm/MC/MCDisassembler/MCDisassembler.h" -#include "llvm/MC/MCELFStreamer.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/MCInstrDesc.h" @@ -31,6 +32,7 @@ #include "llvm/MC/MCObjectFileInfo.h" #include "llvm/MC/MCParser/MCAsmLexer.h" #include "llvm/MC/MCParser/MCAsmParser.h" +#include "llvm/MC/MCParser/MCAsmParserExtension.h" #include "llvm/MC/MCParser/MCAsmParserUtils.h" #include "llvm/MC/MCParser/MCParsedAsmOperand.h" #include "llvm/MC/MCParser/MCTargetAsmParser.h" @@ -39,15 +41,30 @@ #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSubtargetInfo.h" #include "llvm/MC/MCSymbol.h" +#include "llvm/MC/SubtargetFeature.h" #include "llvm/Support/ARMBuildAttributes.h" #include "llvm/Support/ARMEHABI.h" +#include "llvm/Support/Casting.h" #include "llvm/Support/CommandLine.h" -#include "llvm/Support/Debug.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" -#include "llvm/Support/SourceMgr.h" +#include "llvm/Support/SMLoc.h" #include "llvm/Support/TargetParser.h" #include "llvm/Support/TargetRegistry.h" #include "llvm/Support/raw_ostream.h" +#include <algorithm> +#include <cassert> +#include <cstddef> +#include <cstdint> +#include <iterator> +#include <limits> +#include <memory> +#include <string> +#include <utility> +#include <vector> + +#define DEBUG_TYPE "asm-parser" using namespace llvm; @@ -70,15 +87,12 @@ static cl::opt<ImplicitItModeTy> ImplicitItMode( static cl::opt<bool> AddBuildAttributes("arm-add-build-attributes", cl::init(false)); -class ARMOperand; - enum VectorLaneTy { NoLanes, AllLanes, IndexedLane }; class UnwindContext { - MCAsmParser &Parser; - - typedef SmallVector<SMLoc, 4> Locs; + using Locs = SmallVector<SMLoc, 4>; + MCAsmParser &Parser; Locs FnStartLocs; Locs CantUnwindLocs; Locs PersonalityLocs; @@ -92,6 +106,7 @@ public: bool hasFnStart() const { return !FnStartLocs.empty(); } bool cantUnwind() const { return !CantUnwindLocs.empty(); } bool hasHandlerData() const { return !HandlerDataLocs.empty(); } + bool hasPersonality() const { return !(PersonalityLocs.empty() && PersonalityIndexLocs.empty()); } @@ -110,16 +125,19 @@ public: FI != FE; ++FI) Parser.Note(*FI, ".fnstart was specified here"); } + void emitCantUnwindLocNotes() const { for (Locs::const_iterator UI = CantUnwindLocs.begin(), UE = CantUnwindLocs.end(); UI != UE; ++UI) Parser.Note(*UI, ".cantunwind was specified here"); } + void emitHandlerDataLocNotes() const { for (Locs::const_iterator HI = HandlerDataLocs.begin(), HE = HandlerDataLocs.end(); HI != HE; ++HI) Parser.Note(*HI, ".handlerdata was specified here"); } + void emitPersonalityLocNotes() const { for (Locs::const_iterator PI = PersonalityLocs.begin(), PE = PersonalityLocs.end(), @@ -147,7 +165,6 @@ public: }; class ARMAsmParser : public MCTargetAsmParser { - const MCInstrInfo &MII; const MCRegisterInfo *MRI; UnwindContext UC; @@ -198,7 +215,7 @@ class ARMAsmParser : public MCTargetAsmParser { // would be legal. } ITState; - llvm::SmallVector<MCInst, 4> PendingConditionalInsts; + SmallVector<MCInst, 4> PendingConditionalInsts; void flushPendingInstructions(MCStreamer &Out) override { if (!inImplicitITBlock()) { @@ -229,9 +246,11 @@ class ARMAsmParser : public MCTargetAsmParser { bool inITBlock() { return ITState.CurPosition != ~0U; } bool inExplicitITBlock() { return inITBlock() && ITState.IsExplicit; } bool inImplicitITBlock() { return inITBlock() && !ITState.IsExplicit; } + bool lastInITBlock() { return ITState.CurPosition == 4 - countTrailingZeros(ITState.Mask); } + void forwardITPosition() { if (!inITBlock()) return; // Move to the next instruction in the IT block, if there is one. If not, @@ -260,7 +279,11 @@ class ARMAsmParser : public MCTargetAsmParser { assert(inImplicitITBlock()); assert(ITState.CurPosition == 1); ITState.CurPosition = ~0U; - return; + } + + // Return the low-subreg of a given Q register. + unsigned getDRegFromQReg(unsigned QReg) const { + return MRI->getSubReg(QReg, ARM::dsub_0); } // Get the encoding of the IT mask, as it will appear in an IT instruction. @@ -326,7 +349,6 @@ class ARMAsmParser : public MCTargetAsmParser { ITState.Mask = 8; ITState.CurPosition = 1; ITState.IsExplicit = false; - return; } // Create a new explicit IT block with the given condition and mask. The mask @@ -338,15 +360,16 @@ class ARMAsmParser : public MCTargetAsmParser { ITState.Mask = Mask; ITState.CurPosition = 0; ITState.IsExplicit = true; - return; } void Note(SMLoc L, const Twine &Msg, SMRange Range = None) { return getParser().Note(L, Msg, Range); } + bool Warning(SMLoc L, const Twine &Msg, SMRange Range = None) { return getParser().Warning(L, Msg, Range); } + bool Error(SMLoc L, const Twine &Msg, SMRange Range = None) { return getParser().Error(L, Msg, Range); } @@ -410,54 +433,71 @@ class ARMAsmParser : public MCTargetAsmParser { // FIXME: Can tablegen auto-generate this? return getSTI().getFeatureBits()[ARM::ModeThumb]; } + bool isThumbOne() const { return isThumb() && !getSTI().getFeatureBits()[ARM::FeatureThumb2]; } + bool isThumbTwo() const { return isThumb() && getSTI().getFeatureBits()[ARM::FeatureThumb2]; } + bool hasThumb() const { return getSTI().getFeatureBits()[ARM::HasV4TOps]; } + bool hasThumb2() const { return getSTI().getFeatureBits()[ARM::FeatureThumb2]; } + bool hasV6Ops() const { return getSTI().getFeatureBits()[ARM::HasV6Ops]; } + bool hasV6T2Ops() const { return getSTI().getFeatureBits()[ARM::HasV6T2Ops]; } + bool hasV6MOps() const { return getSTI().getFeatureBits()[ARM::HasV6MOps]; } + bool hasV7Ops() const { return getSTI().getFeatureBits()[ARM::HasV7Ops]; } + bool hasV8Ops() const { return getSTI().getFeatureBits()[ARM::HasV8Ops]; } + bool hasV8MBaseline() const { return getSTI().getFeatureBits()[ARM::HasV8MBaselineOps]; } + bool hasV8MMainline() const { return getSTI().getFeatureBits()[ARM::HasV8MMainlineOps]; } + bool has8MSecExt() const { return getSTI().getFeatureBits()[ARM::Feature8MSecExt]; } + bool hasARM() const { return !getSTI().getFeatureBits()[ARM::FeatureNoARM]; } + bool hasDSP() const { return getSTI().getFeatureBits()[ARM::FeatureDSP]; } + bool hasD16() const { return getSTI().getFeatureBits()[ARM::FeatureD16]; } + bool hasV8_1aOps() const { return getSTI().getFeatureBits()[ARM::HasV8_1aOps]; } + bool hasRAS() const { return getSTI().getFeatureBits()[ARM::FeatureRAS]; } @@ -467,7 +507,9 @@ class ARMAsmParser : public MCTargetAsmParser { uint64_t FB = ComputeAvailableFeatures(STI.ToggleFeature(ARM::ModeThumb)); setAvailableFeatures(FB); } + void FixModeAfterArchChange(bool WasThumb, SMLoc Loc); + bool isMClass() const { return getSTI().getFeatureBits()[ARM::FeatureMClass]; } @@ -518,6 +560,7 @@ class ARMAsmParser : public MCTargetAsmParser { bool shouldOmitCCOutOperand(StringRef Mnemonic, OperandVector &Operands); bool shouldOmitPredicateOperand(StringRef Mnemonic, OperandVector &Operands); bool isITBlockTerminator(MCInst &Inst) const; + void fixupGNULDRDAlias(StringRef Mnemonic, OperandVector &Operands); public: enum ARMMatchResultTy { @@ -534,7 +577,7 @@ public: ARMAsmParser(const MCSubtargetInfo &STI, MCAsmParser &Parser, const MCInstrInfo &MII, const MCTargetOptions &Options) - : MCTargetAsmParser(Options, STI), MII(MII), UC(Parser) { + : MCTargetAsmParser(Options, STI, MII), UC(Parser) { MCAsmParserExtension::Initialize(Parser); // Cache the MCRegisterInfo. @@ -568,13 +611,25 @@ public: uint64_t &ErrorInfo, bool MatchingInlineAsm) override; unsigned MatchInstruction(OperandVector &Operands, MCInst &Inst, - uint64_t &ErrorInfo, bool MatchingInlineAsm, - bool &EmitInITBlock, MCStreamer &Out); + SmallVectorImpl<NearMissInfo> &NearMisses, + bool MatchingInlineAsm, bool &EmitInITBlock, + MCStreamer &Out); + + struct NearMissMessage { + SMLoc Loc; + SmallString<128> Message; + }; + + const char *getCustomOperandDiag(ARMMatchResultTy MatchError); + + void FilterNearMisses(SmallVectorImpl<NearMissInfo> &NearMissesIn, + SmallVectorImpl<NearMissMessage> &NearMissesOut, + SMLoc IDLoc, OperandVector &Operands); + void ReportNearMisses(SmallVectorImpl<NearMissInfo> &NearMisses, SMLoc IDLoc, + OperandVector &Operands); + void onLabelParsed(MCSymbol *Symbol) override; }; -} // end anonymous namespace - -namespace { /// ARMOperand - Instances of this class represent a parsed ARM machine /// operand. @@ -759,8 +814,10 @@ public: /// getStartLoc - Get the location of the first token of this operand. SMLoc getStartLoc() const override { return StartLoc; } + /// getEndLoc - Get the location of the last token of this operand. SMLoc getEndLoc() const override { return EndLoc; } + /// getLocRange - Get the range between the first and last token of this /// operand. SMRange getLocRange() const { return SMRange(StartLoc, EndLoc); } @@ -879,6 +936,7 @@ public: } return false; } + // checks whether this operand is an signed offset which fits is a field // of specified width and scaled by a specific number of bits template<unsigned width, unsigned scale> @@ -915,6 +973,7 @@ public: else return false; return ((Val % 4) == 0) && (Val >= 0) && (Val <= 1020); } + bool isFPImm() const { if (!isImm()) return false; const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); @@ -931,6 +990,7 @@ public: int64_t Value = CE->getValue(); return Value >= N && Value <= M; } + template<int64_t N, int64_t M> bool isImmediateS4() const { if (!isImm()) return false; @@ -939,6 +999,7 @@ public: int64_t Value = CE->getValue(); return ((Value & 3) == 0) && Value >= N && Value <= M; } + bool isFBits16() const { return isImmediate<0, 17>(); } @@ -962,6 +1023,7 @@ public: // explicitly exclude zero. we want that to use the normal 0_508 version. return ((Value & 3) == 0) && Value > 0 && Value <= 508; } + bool isImm0_4095Neg() const { if (!isImm()) return false; const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); @@ -969,18 +1031,23 @@ public: int64_t Value = -CE->getValue(); return Value > 0 && Value < 4096; } + bool isImm0_7() const { return isImmediate<0, 7>(); } + bool isImm1_16() const { return isImmediate<1, 16>(); } + bool isImm1_32() const { return isImmediate<1, 32>(); } + bool isImm8_255() const { return isImmediate<8, 255>(); } + bool isImm256_65535Expr() const { if (!isImm()) return false; const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); @@ -990,6 +1057,7 @@ public: int64_t Value = CE->getValue(); return Value >= 256 && Value < 65536; } + bool isImm0_65535Expr() const { if (!isImm()) return false; const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); @@ -999,18 +1067,23 @@ public: int64_t Value = CE->getValue(); return Value >= 0 && Value < 65536; } + bool isImm24bit() const { return isImmediate<0, 0xffffff + 1>(); } + bool isImmThumbSR() const { return isImmediate<1, 33>(); } + bool isPKHLSLImm() const { return isImmediate<0, 32>(); } + bool isPKHASRImm() const { return isImmediate<0, 33>(); } + bool isAdrLabel() const { // If we have an immediate that's not a constant, treat it as a label // reference needing a fixup. @@ -1025,6 +1098,7 @@ public: return (ARM_AM::getSOImmVal(Value) != -1 || ARM_AM::getSOImmVal(-Value) != -1); } + bool isT2SOImm() const { // If we have an immediate that's not a constant, treat it as an expression // needing a fixup. @@ -1041,6 +1115,7 @@ public: int64_t Value = CE->getValue(); return ARM_AM::getT2SOImmVal(Value) != -1; } + bool isT2SOImmNot() const { if (!isImm()) return false; const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); @@ -1049,6 +1124,7 @@ public: return ARM_AM::getT2SOImmVal(Value) == -1 && ARM_AM::getT2SOImmVal(~Value) != -1; } + bool isT2SOImmNeg() const { if (!isImm()) return false; const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); @@ -1058,6 +1134,7 @@ public: return ARM_AM::getT2SOImmVal(Value) == -1 && ARM_AM::getT2SOImmVal(-Value) != -1; } + bool isSetEndImm() const { if (!isImm()) return false; const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); @@ -1065,6 +1142,7 @@ public: int64_t Value = CE->getValue(); return Value == 1 || Value == 0; } + bool isReg() const override { return Kind == k_Register; } bool isRegList() const { return Kind == k_RegisterList; } bool isDPRRegList() const { return Kind == k_DPRRegisterList; } @@ -1078,6 +1156,7 @@ public: bool isRegShiftedImm() const { return Kind == k_ShiftedImmediate; } bool isRotImm() const { return Kind == k_RotateImmediate; } bool isModImm() const { return Kind == k_ModifiedImmediate; } + bool isModImmNot() const { if (!isImm()) return false; const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); @@ -1085,6 +1164,7 @@ public: int64_t Value = CE->getValue(); return ARM_AM::getSOImmVal(~Value) != -1; } + bool isModImmNeg() const { if (!isImm()) return false; const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); @@ -1093,6 +1173,7 @@ public: return ARM_AM::getSOImmVal(Value) == -1 && ARM_AM::getSOImmVal(-Value) != -1; } + bool isThumbModImmNeg1_7() const { if (!isImm()) return false; const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); @@ -1100,6 +1181,7 @@ public: int32_t Value = -(int32_t)CE->getValue(); return 0 < Value && Value < 8; } + bool isThumbModImmNeg8_255() const { if (!isImm()) return false; const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); @@ -1107,6 +1189,7 @@ public: int32_t Value = -(int32_t)CE->getValue(); return 7 < Value && Value < 256; } + bool isConstantPoolImm() const { return Kind == k_ConstantPoolImmediate; } bool isBitfield() const { return Kind == k_BitfieldDescriptor; } bool isPostIdxRegShifted() const { return Kind == k_PostIndexRegister; } @@ -1129,47 +1212,58 @@ public: // Immediate offset in range [-4095, 4095]. if (!Memory.OffsetImm) return true; int64_t Val = Memory.OffsetImm->getValue(); - return (Val > -4096 && Val < 4096) || (Val == INT32_MIN); + return (Val > -4096 && Val < 4096) || + (Val == std::numeric_limits<int32_t>::min()); } + bool isAlignedMemory() const { return isMemNoOffset(true); } + bool isAlignedMemoryNone() const { return isMemNoOffset(false, 0); } + bool isDupAlignedMemoryNone() const { return isMemNoOffset(false, 0); } + bool isAlignedMemory16() const { if (isMemNoOffset(false, 2)) // alignment in bytes for 16-bits is 2. return true; return isMemNoOffset(false, 0); } + bool isDupAlignedMemory16() const { if (isMemNoOffset(false, 2)) // alignment in bytes for 16-bits is 2. return true; return isMemNoOffset(false, 0); } + bool isAlignedMemory32() const { if (isMemNoOffset(false, 4)) // alignment in bytes for 32-bits is 4. return true; return isMemNoOffset(false, 0); } + bool isDupAlignedMemory32() const { if (isMemNoOffset(false, 4)) // alignment in bytes for 32-bits is 4. return true; return isMemNoOffset(false, 0); } + bool isAlignedMemory64() const { if (isMemNoOffset(false, 8)) // alignment in bytes for 64-bits is 8. return true; return isMemNoOffset(false, 0); } + bool isDupAlignedMemory64() const { if (isMemNoOffset(false, 8)) // alignment in bytes for 64-bits is 8. return true; return isMemNoOffset(false, 0); } + bool isAlignedMemory64or128() const { if (isMemNoOffset(false, 8)) // alignment in bytes for 64-bits is 8. return true; @@ -1177,6 +1271,7 @@ public: return true; return isMemNoOffset(false, 0); } + bool isDupAlignedMemory64or128() const { if (isMemNoOffset(false, 8)) // alignment in bytes for 64-bits is 8. return true; @@ -1184,6 +1279,7 @@ public: return true; return isMemNoOffset(false, 0); } + bool isAlignedMemory64or128or256() const { if (isMemNoOffset(false, 8)) // alignment in bytes for 64-bits is 8. return true; @@ -1193,6 +1289,7 @@ public: return true; return isMemNoOffset(false, 0); } + bool isAddrMode2() const { if (!isMem() || Memory.Alignment != 0) return false; // Check for register offset. @@ -1202,14 +1299,17 @@ public: int64_t Val = Memory.OffsetImm->getValue(); return Val > -4096 && Val < 4096; } + bool isAM2OffsetImm() const { if (!isImm()) return false; // Immediate offset in range [-4095, 4095]. const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); if (!CE) return false; int64_t Val = CE->getValue(); - return (Val == INT32_MIN) || (Val > -4096 && Val < 4096); + return (Val == std::numeric_limits<int32_t>::min()) || + (Val > -4096 && Val < 4096); } + bool isAddrMode3() const { // If we have an immediate that's not a constant, treat it as a label // reference needing a fixup. If it is a constant, it's something else @@ -1224,10 +1324,12 @@ public: // Immediate offset in range [-255, 255]. if (!Memory.OffsetImm) return true; int64_t Val = Memory.OffsetImm->getValue(); - // The #-0 offset is encoded as INT32_MIN, and we have to check - // for this too. - return (Val > -256 && Val < 256) || Val == INT32_MIN; + // The #-0 offset is encoded as std::numeric_limits<int32_t>::min(), and we + // have to check for this too. + return (Val > -256 && Val < 256) || + Val == std::numeric_limits<int32_t>::min(); } + bool isAM3Offset() const { if (Kind != k_Immediate && Kind != k_PostIndexRegister) return false; @@ -1237,9 +1339,11 @@ public: const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); if (!CE) return false; int64_t Val = CE->getValue(); - // Special case, #-0 is INT32_MIN. - return (Val > -256 && Val < 256) || Val == INT32_MIN; + // Special case, #-0 is std::numeric_limits<int32_t>::min(). + return (Val > -256 && Val < 256) || + Val == std::numeric_limits<int32_t>::min(); } + bool isAddrMode5() const { // If we have an immediate that's not a constant, treat it as a label // reference needing a fixup. If it is a constant, it's something else @@ -1253,8 +1357,9 @@ public: if (!Memory.OffsetImm) return true; int64_t Val = Memory.OffsetImm->getValue(); return (Val >= -1020 && Val <= 1020 && ((Val & 3) == 0)) || - Val == INT32_MIN; + Val == std::numeric_limits<int32_t>::min(); } + bool isAddrMode5FP16() const { // If we have an immediate that's not a constant, treat it as a label // reference needing a fixup. If it is a constant, it's something else @@ -1267,14 +1372,17 @@ public: // Immediate offset in range [-510, 510] and a multiple of 2. if (!Memory.OffsetImm) return true; int64_t Val = Memory.OffsetImm->getValue(); - return (Val >= -510 && Val <= 510 && ((Val & 1) == 0)) || Val == INT32_MIN; + return (Val >= -510 && Val <= 510 && ((Val & 1) == 0)) || + Val == std::numeric_limits<int32_t>::min(); } + bool isMemTBB() const { if (!isMem() || !Memory.OffsetRegNum || Memory.isNegative || Memory.ShiftType != ARM_AM::no_shift || Memory.Alignment != 0) return false; return true; } + bool isMemTBH() const { if (!isMem() || !Memory.OffsetRegNum || Memory.isNegative || Memory.ShiftType != ARM_AM::lsl || Memory.ShiftImm != 1 || @@ -1282,11 +1390,13 @@ public: return false; return true; } + bool isMemRegOffset() const { if (!isMem() || !Memory.OffsetRegNum || Memory.Alignment != 0) return false; return true; } + bool isT2MemRegOffset() const { if (!isMem() || !Memory.OffsetRegNum || Memory.isNegative || Memory.Alignment != 0 || Memory.BaseRegNum == ARM::PC) @@ -1298,6 +1408,7 @@ public: return false; return true; } + bool isMemThumbRR() const { // Thumb reg+reg addressing is simple. Just two registers, a base and // an offset. No shifts, negations or any other complicating factors. @@ -1307,6 +1418,7 @@ public: return isARMLowRegister(Memory.BaseRegNum) && (!Memory.OffsetRegNum || isARMLowRegister(Memory.OffsetRegNum)); } + bool isMemThumbRIs4() const { if (!isMem() || Memory.OffsetRegNum != 0 || !isARMLowRegister(Memory.BaseRegNum) || Memory.Alignment != 0) @@ -1316,6 +1428,7 @@ public: int64_t Val = Memory.OffsetImm->getValue(); return Val >= 0 && Val <= 124 && (Val % 4) == 0; } + bool isMemThumbRIs2() const { if (!isMem() || Memory.OffsetRegNum != 0 || !isARMLowRegister(Memory.BaseRegNum) || Memory.Alignment != 0) @@ -1325,6 +1438,7 @@ public: int64_t Val = Memory.OffsetImm->getValue(); return Val >= 0 && Val <= 62 && (Val % 2) == 0; } + bool isMemThumbRIs1() const { if (!isMem() || Memory.OffsetRegNum != 0 || !isARMLowRegister(Memory.BaseRegNum) || Memory.Alignment != 0) @@ -1334,6 +1448,7 @@ public: int64_t Val = Memory.OffsetImm->getValue(); return Val >= 0 && Val <= 31; } + bool isMemThumbSPI() const { if (!isMem() || Memory.OffsetRegNum != 0 || Memory.BaseRegNum != ARM::SP || Memory.Alignment != 0) @@ -1343,6 +1458,7 @@ public: int64_t Val = Memory.OffsetImm->getValue(); return Val >= 0 && Val <= 1020 && (Val % 4) == 0; } + bool isMemImm8s4Offset() const { // If we have an immediate that's not a constant, treat it as a label // reference needing a fixup. If it is a constant, it's something else @@ -1354,9 +1470,11 @@ public: // Immediate offset a multiple of 4 in range [-1020, 1020]. if (!Memory.OffsetImm) return true; int64_t Val = Memory.OffsetImm->getValue(); - // Special case, #-0 is INT32_MIN. - return (Val >= -1020 && Val <= 1020 && (Val & 3) == 0) || Val == INT32_MIN; + // Special case, #-0 is std::numeric_limits<int32_t>::min(). + return (Val >= -1020 && Val <= 1020 && (Val & 3) == 0) || + Val == std::numeric_limits<int32_t>::min(); } + bool isMemImm0_1020s4Offset() const { if (!isMem() || Memory.OffsetRegNum != 0 || Memory.Alignment != 0) return false; @@ -1365,6 +1483,7 @@ public: int64_t Val = Memory.OffsetImm->getValue(); return Val >= 0 && Val <= 1020 && (Val & 3) == 0; } + bool isMemImm8Offset() const { if (!isMem() || Memory.OffsetRegNum != 0 || Memory.Alignment != 0) return false; @@ -1373,8 +1492,10 @@ public: // Immediate offset in range [-255, 255]. if (!Memory.OffsetImm) return true; int64_t Val = Memory.OffsetImm->getValue(); - return (Val == INT32_MIN) || (Val > -256 && Val < 256); + return (Val == std::numeric_limits<int32_t>::min()) || + (Val > -256 && Val < 256); } + bool isMemPosImm8Offset() const { if (!isMem() || Memory.OffsetRegNum != 0 || Memory.Alignment != 0) return false; @@ -1383,6 +1504,7 @@ public: int64_t Val = Memory.OffsetImm->getValue(); return Val >= 0 && Val < 256; } + bool isMemNegImm8Offset() const { if (!isMem() || Memory.OffsetRegNum != 0 || Memory.Alignment != 0) return false; @@ -1391,8 +1513,10 @@ public: // Immediate offset in range [-255, -1]. if (!Memory.OffsetImm) return false; int64_t Val = Memory.OffsetImm->getValue(); - return (Val == INT32_MIN) || (Val > -256 && Val < 0); + return (Val == std::numeric_limits<int32_t>::min()) || + (Val > -256 && Val < 0); } + bool isMemUImm12Offset() const { if (!isMem() || Memory.OffsetRegNum != 0 || Memory.Alignment != 0) return false; @@ -1401,6 +1525,7 @@ public: int64_t Val = Memory.OffsetImm->getValue(); return (Val >= 0 && Val < 4096); } + bool isMemImm12Offset() const { // If we have an immediate that's not a constant, treat it as a label // reference needing a fixup. If it is a constant, it's something else @@ -1414,27 +1539,32 @@ public: // Immediate offset in range [-4095, 4095]. if (!Memory.OffsetImm) return true; int64_t Val = Memory.OffsetImm->getValue(); - return (Val > -4096 && Val < 4096) || (Val == INT32_MIN); + return (Val > -4096 && Val < 4096) || + (Val == std::numeric_limits<int32_t>::min()); } + bool isConstPoolAsmImm() const { // Delay processing of Constant Pool Immediate, this will turn into // a constant. Match no other operand return (isConstantPoolImm()); } + bool isPostIdxImm8() const { if (!isImm()) return false; const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); if (!CE) return false; int64_t Val = CE->getValue(); - return (Val > -256 && Val < 256) || (Val == INT32_MIN); + return (Val > -256 && Val < 256) || + (Val == std::numeric_limits<int32_t>::min()); } + bool isPostIdxImm8s4() const { if (!isImm()) return false; const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); if (!CE) return false; int64_t Val = CE->getValue(); return ((Val & 3) == 0 && Val >= -1020 && Val <= 1020) || - (Val == INT32_MIN); + (Val == std::numeric_limits<int32_t>::min()); } bool isMSRMask() const { return Kind == k_MSRMask; } @@ -1445,9 +1575,11 @@ public: bool isSingleSpacedVectorList() const { return Kind == k_VectorList && !VectorList.isDoubleSpaced; } + bool isDoubleSpacedVectorList() const { return Kind == k_VectorList && VectorList.isDoubleSpaced; } + bool isVecListOneD() const { if (!isSingleSpacedVectorList()) return false; return VectorList.Count == 1; @@ -1489,9 +1621,11 @@ public: bool isSingleSpacedVectorAllLanes() const { return Kind == k_VectorListAllLanes && !VectorList.isDoubleSpaced; } + bool isDoubleSpacedVectorAllLanes() const { return Kind == k_VectorListAllLanes && VectorList.isDoubleSpaced; } + bool isVecListOneDAllLanes() const { if (!isSingleSpacedVectorAllLanes()) return false; return VectorList.Count == 1; @@ -1531,9 +1665,11 @@ public: bool isSingleSpacedVectorIndexed() const { return Kind == k_VectorListIndexed && !VectorList.isDoubleSpaced; } + bool isDoubleSpacedVectorIndexed() const { return Kind == k_VectorListIndexed && VectorList.isDoubleSpaced; } + bool isVecListOneDByteIndexed() const { if (!isSingleSpacedVectorIndexed()) return false; return VectorList.Count == 1 && VectorList.LaneIndex <= 7; @@ -1628,14 +1764,20 @@ public: if (Kind != k_VectorIndex) return false; return VectorIndex.Val < 8; } + bool isVectorIndex16() const { if (Kind != k_VectorIndex) return false; return VectorIndex.Val < 4; } + bool isVectorIndex32() const { if (Kind != k_VectorIndex) return false; return VectorIndex.Val < 2; } + bool isVectorIndex64() const { + if (Kind != k_VectorIndex) return false; + return VectorIndex.Val < 1; + } bool isNEONi8splat() const { if (!isImm()) return false; @@ -1711,8 +1853,10 @@ public: } return true; } + bool isNEONi16ByteReplicate() const { return isNEONByteReplicate(2); } bool isNEONi32ByteReplicate() const { return isNEONByteReplicate(4); } + bool isNEONi32vmov() const { if (isNEONByteReplicate(4)) return false; // Let it to be classified as byte-replicate case. @@ -1733,6 +1877,7 @@ public: (Value >= 0x01ff && Value <= 0xffff && (Value & 0xff) == 0xff) || (Value >= 0x01ffff && Value <= 0xffffff && (Value & 0xffff) == 0xffff); } + bool isNEONi32vmovNeg() const { if (!isImm()) return false; const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); @@ -1762,6 +1907,17 @@ public: return true; } + template<int64_t Angle, int64_t Remainder> + bool isComplexRotation() const { + if (!isImm()) return false; + + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); + if (!CE) return false; + uint64_t Value = CE->getValue(); + + return (Value % Angle == Remainder && Value <= 270); + } + void addExpr(MCInst &Inst, const MCExpr *Expr) const { // Add as immediates when possible. Null MCExpr = 0. if (!Expr) @@ -2161,7 +2317,7 @@ public: if (!Memory.OffsetRegNum) { ARM_AM::AddrOpc AddSub = Val < 0 ? ARM_AM::sub : ARM_AM::add; // Special case for #-0 - if (Val == INT32_MIN) Val = 0; + if (Val == std::numeric_limits<int32_t>::min()) Val = 0; if (Val < 0) Val = -Val; Val = ARM_AM::getAM2Opc(AddSub, Val, ARM_AM::no_shift); } else { @@ -2182,7 +2338,7 @@ public: int32_t Val = CE->getValue(); ARM_AM::AddrOpc AddSub = Val < 0 ? ARM_AM::sub : ARM_AM::add; // Special case for #-0 - if (Val == INT32_MIN) Val = 0; + if (Val == std::numeric_limits<int32_t>::min()) Val = 0; if (Val < 0) Val = -Val; Val = ARM_AM::getAM2Opc(AddSub, Val, ARM_AM::no_shift); Inst.addOperand(MCOperand::createReg(0)); @@ -2205,7 +2361,7 @@ public: if (!Memory.OffsetRegNum) { ARM_AM::AddrOpc AddSub = Val < 0 ? ARM_AM::sub : ARM_AM::add; // Special case for #-0 - if (Val == INT32_MIN) Val = 0; + if (Val == std::numeric_limits<int32_t>::min()) Val = 0; if (Val < 0) Val = -Val; Val = ARM_AM::getAM3Opc(AddSub, Val); } else { @@ -2233,7 +2389,7 @@ public: int32_t Val = CE->getValue(); ARM_AM::AddrOpc AddSub = Val < 0 ? ARM_AM::sub : ARM_AM::add; // Special case for #-0 - if (Val == INT32_MIN) Val = 0; + if (Val == std::numeric_limits<int32_t>::min()) Val = 0; if (Val < 0) Val = -Val; Val = ARM_AM::getAM3Opc(AddSub, Val); Inst.addOperand(MCOperand::createReg(0)); @@ -2255,7 +2411,7 @@ public: int32_t Val = Memory.OffsetImm ? Memory.OffsetImm->getValue() / 4 : 0; ARM_AM::AddrOpc AddSub = Val < 0 ? ARM_AM::sub : ARM_AM::add; // Special case for #-0 - if (Val == INT32_MIN) Val = 0; + if (Val == std::numeric_limits<int32_t>::min()) Val = 0; if (Val < 0) Val = -Val; Val = ARM_AM::getAM5Opc(AddSub, Val); Inst.addOperand(MCOperand::createReg(Memory.BaseRegNum)); @@ -2277,7 +2433,7 @@ public: int32_t Val = Memory.OffsetImm ? Memory.OffsetImm->getValue() / 2 : 0; ARM_AM::AddrOpc AddSub = Val < 0 ? ARM_AM::sub : ARM_AM::add; // Special case for #-0 - if (Val == INT32_MIN) Val = 0; + if (Val == std::numeric_limits<int32_t>::min()) Val = 0; if (Val < 0) Val = -Val; Val = ARM_AM::getAM5FP16Opc(AddSub, Val); Inst.addOperand(MCOperand::createReg(Memory.BaseRegNum)); @@ -2430,7 +2586,7 @@ public: assert(CE && "non-constant post-idx-imm8 operand!"); int Imm = CE->getValue(); bool isAdd = Imm >= 0; - if (Imm == INT32_MIN) Imm = 0; + if (Imm == std::numeric_limits<int32_t>::min()) Imm = 0; Imm = (Imm < 0 ? -Imm : Imm) | (int)isAdd << 8; Inst.addOperand(MCOperand::createImm(Imm)); } @@ -2441,7 +2597,7 @@ public: assert(CE && "non-constant post-idx-imm8s4 operand!"); int Imm = CE->getValue(); bool isAdd = Imm >= 0; - if (Imm == INT32_MIN) Imm = 0; + if (Imm == std::numeric_limits<int32_t>::min()) Imm = 0; // Immediate is scaled by 4. Imm = ((Imm < 0 ? -Imm : Imm) / 4) | (int)isAdd << 8; Inst.addOperand(MCOperand::createImm(Imm)); @@ -2505,6 +2661,11 @@ public: Inst.addOperand(MCOperand::createImm(getVectorIndex())); } + void addVectorIndex64Operands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + Inst.addOperand(MCOperand::createImm(getVectorIndex())); + } + void addNEONi8splatOperands(MCInst &Inst, unsigned N) const { assert(N == 1 && "Invalid number of operands!"); // The immediate encodes the type of constant as well as the value. @@ -2562,6 +2723,7 @@ public: B |= 0xe00; // cmode = 0b1110 Inst.addOperand(MCOperand::createImm(B)); } + void addNEONi32vmovOperands(MCInst &Inst, unsigned N) const { assert(N == 1 && "Invalid number of operands!"); // The immediate encodes the type of constant as well as the value. @@ -2589,6 +2751,7 @@ public: B |= 0xe00; // cmode = 0b1110 Inst.addOperand(MCOperand::createImm(B)); } + void addNEONi32vmovNegOperands(MCInst &Inst, unsigned N) const { assert(N == 1 && "Invalid number of operands!"); // The immediate encodes the type of constant as well as the value. @@ -2615,6 +2778,18 @@ public: Inst.addOperand(MCOperand::createImm(Imm | 0x1e00)); } + void addComplexRotationEvenOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); + Inst.addOperand(MCOperand::createImm(CE->getValue() / 90)); + } + + void addComplexRotationOddOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm()); + Inst.addOperand(MCOperand::createImm((CE->getValue() - 90) / 180)); + } + void print(raw_ostream &OS) const override; static std::unique_ptr<ARMOperand> CreateITMask(unsigned Mask, SMLoc S) { @@ -2762,7 +2937,7 @@ public: static std::unique_ptr<ARMOperand> CreateRegList(SmallVectorImpl<std::pair<unsigned, unsigned>> &Regs, SMLoc StartLoc, SMLoc EndLoc) { - assert (Regs.size() > 0 && "RegList contains no registers?"); + assert(Regs.size() > 0 && "RegList contains no registers?"); KindTy Kind = k_RegisterList; if (ARMMCRegisterClasses[ARM::DPRRegClassID].contains(Regs.front().second)) @@ -2775,7 +2950,7 @@ public: array_pod_sort(Regs.begin(), Regs.end()); auto Op = make_unique<ARMOperand>(Kind); - for (SmallVectorImpl<std::pair<unsigned, unsigned> >::const_iterator + for (SmallVectorImpl<std::pair<unsigned, unsigned>>::const_iterator I = Regs.begin(), E = Regs.end(); I != E; ++I) Op->Registers.push_back(I->second); Op->StartLoc = StartLoc; @@ -3069,7 +3244,6 @@ bool ARMAsmParser::ParseRegister(unsigned &RegNo, /// Try to parse a register name. The token must be an Identifier when called, /// and if it is a register name the token is eaten and the register number is /// returned. Otherwise return -1. -/// int ARMAsmParser::tryParseRegister() { MCAsmParser &Parser = getParser(); const AsmToken &Tok = Parser.getTok(); @@ -3223,7 +3397,6 @@ int ARMAsmParser::tryParseShiftRegister(OperandVector &Operands) { return 0; } - /// Try to parse a register name. The token must be an Identifier when called. /// If it's a register, an AsmOperand is created. Another AsmOperand is created /// if there is a "writeback". 'true' if it's not a register. @@ -3232,13 +3405,13 @@ int ARMAsmParser::tryParseShiftRegister(OperandVector &Operands) { /// parse for a specific register type. bool ARMAsmParser::tryParseRegisterWithWriteBack(OperandVector &Operands) { MCAsmParser &Parser = getParser(); - const AsmToken &RegTok = Parser.getTok(); + SMLoc RegStartLoc = Parser.getTok().getLoc(); + SMLoc RegEndLoc = Parser.getTok().getEndLoc(); int RegNo = tryParseRegister(); if (RegNo == -1) return true; - Operands.push_back(ARMOperand::CreateReg(RegNo, RegTok.getLoc(), - RegTok.getEndLoc())); + Operands.push_back(ARMOperand::CreateReg(RegNo, RegStartLoc, RegEndLoc)); const AsmToken &ExclaimTok = Parser.getTok(); if (ExclaimTok.is(AsmToken::Exclaim)) { @@ -3333,25 +3506,7 @@ ARMAsmParser::parseITCondCode(OperandVector &Operands) { const AsmToken &Tok = Parser.getTok(); if (!Tok.is(AsmToken::Identifier)) return MatchOperand_NoMatch; - unsigned CC = StringSwitch<unsigned>(Tok.getString().lower()) - .Case("eq", ARMCC::EQ) - .Case("ne", ARMCC::NE) - .Case("hs", ARMCC::HS) - .Case("cs", ARMCC::HS) - .Case("lo", ARMCC::LO) - .Case("cc", ARMCC::LO) - .Case("mi", ARMCC::MI) - .Case("pl", ARMCC::PL) - .Case("vs", ARMCC::VS) - .Case("vc", ARMCC::VC) - .Case("hi", ARMCC::HI) - .Case("ls", ARMCC::LS) - .Case("ge", ARMCC::GE) - .Case("lt", ARMCC::LT) - .Case("gt", ARMCC::GT) - .Case("le", ARMCC::LE) - .Case("al", ARMCC::AL) - .Default(~0U); + unsigned CC = ARMCondCodeFromString(Tok.getString()); if (CC == ~0U) return MatchOperand_NoMatch; Parser.Lex(); // Eat the token. @@ -3461,29 +3616,6 @@ static unsigned getNextRegister(unsigned Reg) { } } -// Return the low-subreg of a given Q register. -static unsigned getDRegFromQReg(unsigned QReg) { - switch (QReg) { - default: llvm_unreachable("expected a Q register!"); - case ARM::Q0: return ARM::D0; - case ARM::Q1: return ARM::D2; - case ARM::Q2: return ARM::D4; - case ARM::Q3: return ARM::D6; - case ARM::Q4: return ARM::D8; - case ARM::Q5: return ARM::D10; - case ARM::Q6: return ARM::D12; - case ARM::Q7: return ARM::D14; - case ARM::Q8: return ARM::D16; - case ARM::Q9: return ARM::D18; - case ARM::Q10: return ARM::D20; - case ARM::Q11: return ARM::D22; - case ARM::Q12: return ARM::D24; - case ARM::Q13: return ARM::D26; - case ARM::Q14: return ARM::D28; - case ARM::Q15: return ARM::D30; - } -} - /// Parse a register list. bool ARMAsmParser::parseRegisterList(OperandVector &Operands) { MCAsmParser &Parser = getParser(); @@ -3892,7 +4024,6 @@ ARMAsmParser::parseVectorList(OperandVector &Operands) { &ARMMCRegisterClasses[ARM::DPairSpcRegClassID]; FirstReg = MRI->getMatchingSuperReg(FirstReg, ARM::dsub_0, RC); } - Operands.push_back(ARMOperand::CreateVectorList(FirstReg, Count, (Spacing == 2), S, E)); break; @@ -4058,7 +4189,7 @@ ARMAsmParser::parseProcIFlagsOperand(OperandVector &Operands) { unsigned IFlags = 0; if (IFlagsStr != "none") { for (int i = 0, e = IFlagsStr.size(); i != e; ++i) { - unsigned Flag = StringSwitch<unsigned>(IFlagsStr.substr(i, 1)) + unsigned Flag = StringSwitch<unsigned>(IFlagsStr.substr(i, 1).lower()) .Case("a", ARM_PROC::A) .Case("i", ARM_PROC::I) .Case("f", ARM_PROC::F) @@ -4089,81 +4220,14 @@ ARMAsmParser::parseMSRMaskOperand(OperandVector &Operands) { StringRef Mask = Tok.getString(); if (isMClass()) { - // See ARMv6-M 10.1.1 - std::string Name = Mask.lower(); - unsigned FlagsVal = StringSwitch<unsigned>(Name) - // Note: in the documentation: - // ARM deprecates using MSR APSR without a _<bits> qualifier as an alias - // for MSR APSR_nzcvq. - // but we do make it an alias here. This is so to get the "mask encoding" - // bits correct on MSR APSR writes. - // - // FIXME: Note the 0xc00 "mask encoding" bits version of the registers - // should really only be allowed when writing a special register. Note - // they get dropped in the MRS instruction reading a special register as - // the SYSm field is only 8 bits. - .Case("apsr", 0x800) - .Case("apsr_nzcvq", 0x800) - .Case("apsr_g", 0x400) - .Case("apsr_nzcvqg", 0xc00) - .Case("iapsr", 0x801) - .Case("iapsr_nzcvq", 0x801) - .Case("iapsr_g", 0x401) - .Case("iapsr_nzcvqg", 0xc01) - .Case("eapsr", 0x802) - .Case("eapsr_nzcvq", 0x802) - .Case("eapsr_g", 0x402) - .Case("eapsr_nzcvqg", 0xc02) - .Case("xpsr", 0x803) - .Case("xpsr_nzcvq", 0x803) - .Case("xpsr_g", 0x403) - .Case("xpsr_nzcvqg", 0xc03) - .Case("ipsr", 0x805) - .Case("epsr", 0x806) - .Case("iepsr", 0x807) - .Case("msp", 0x808) - .Case("psp", 0x809) - .Case("primask", 0x810) - .Case("basepri", 0x811) - .Case("basepri_max", 0x812) - .Case("faultmask", 0x813) - .Case("control", 0x814) - .Case("msplim", 0x80a) - .Case("psplim", 0x80b) - .Case("msp_ns", 0x888) - .Case("psp_ns", 0x889) - .Case("msplim_ns", 0x88a) - .Case("psplim_ns", 0x88b) - .Case("primask_ns", 0x890) - .Case("basepri_ns", 0x891) - .Case("basepri_max_ns", 0x892) - .Case("faultmask_ns", 0x893) - .Case("control_ns", 0x894) - .Case("sp_ns", 0x898) - .Default(~0U); - - if (FlagsVal == ~0U) - return MatchOperand_NoMatch; - - if (!hasDSP() && (FlagsVal & 0x400)) - // The _g and _nzcvqg versions are only valid if the DSP extension is - // available. + auto TheReg = ARMSysReg::lookupMClassSysRegByName(Mask.lower()); + if (!TheReg || !TheReg->hasRequiredFeatures(getSTI().getFeatureBits())) return MatchOperand_NoMatch; - if (!hasV7Ops() && FlagsVal >= 0x811 && FlagsVal <= 0x813) - // basepri, basepri_max and faultmask only valid for V7m. - return MatchOperand_NoMatch; - - if (!has8MSecExt() && (FlagsVal == 0x80a || FlagsVal == 0x80b || - (FlagsVal > 0x814 && FlagsVal < 0xc00))) - return MatchOperand_NoMatch; - - if (!hasV8MMainline() && (FlagsVal == 0x88a || FlagsVal == 0x88b || - (FlagsVal > 0x890 && FlagsVal <= 0x893))) - return MatchOperand_NoMatch; + unsigned SYSmvalue = TheReg->Encoding & 0xFFF; Parser.Lex(); // Eat identifier token. - Operands.push_back(ARMOperand::CreateMSRMask(FlagsVal, S)); + Operands.push_back(ARMOperand::CreateMSRMask(SYSmvalue, S)); return MatchOperand_Success; } @@ -4241,46 +4305,10 @@ ARMAsmParser::parseBankedRegOperand(OperandVector &Operands) { return MatchOperand_NoMatch; StringRef RegName = Tok.getString(); - // The values here come from B9.2.3 of the ARM ARM, where bits 4-0 are SysM - // and bit 5 is R. - unsigned Encoding = StringSwitch<unsigned>(RegName.lower()) - .Case("r8_usr", 0x00) - .Case("r9_usr", 0x01) - .Case("r10_usr", 0x02) - .Case("r11_usr", 0x03) - .Case("r12_usr", 0x04) - .Case("sp_usr", 0x05) - .Case("lr_usr", 0x06) - .Case("r8_fiq", 0x08) - .Case("r9_fiq", 0x09) - .Case("r10_fiq", 0x0a) - .Case("r11_fiq", 0x0b) - .Case("r12_fiq", 0x0c) - .Case("sp_fiq", 0x0d) - .Case("lr_fiq", 0x0e) - .Case("lr_irq", 0x10) - .Case("sp_irq", 0x11) - .Case("lr_svc", 0x12) - .Case("sp_svc", 0x13) - .Case("lr_abt", 0x14) - .Case("sp_abt", 0x15) - .Case("lr_und", 0x16) - .Case("sp_und", 0x17) - .Case("lr_mon", 0x1c) - .Case("sp_mon", 0x1d) - .Case("elr_hyp", 0x1e) - .Case("sp_hyp", 0x1f) - .Case("spsr_fiq", 0x2e) - .Case("spsr_irq", 0x30) - .Case("spsr_svc", 0x32) - .Case("spsr_abt", 0x34) - .Case("spsr_und", 0x36) - .Case("spsr_mon", 0x3c) - .Case("spsr_hyp", 0x3e) - .Default(~0U); - - if (Encoding == ~0U) + auto TheReg = ARMBankedReg::lookupBankedRegByName(RegName.lower()); + if (!TheReg) return MatchOperand_NoMatch; + unsigned Encoding = TheReg->Encoding; Parser.Lex(); // Eat identifier token. Operands.push_back(ARMOperand::CreateBankedReg(Encoding, S)); @@ -4753,10 +4781,11 @@ ARMAsmParser::parseAM3Offset(OperandVector &Operands) { Error(S, "constant expression expected"); return MatchOperand_ParseFail; } - // Negative zero is encoded as the flag value INT32_MIN. + // Negative zero is encoded as the flag value + // std::numeric_limits<int32_t>::min(). int32_t Val = CE->getValue(); if (isNegative && Val == 0) - Val = INT32_MIN; + Val = std::numeric_limits<int32_t>::min(); Operands.push_back( ARMOperand::CreateImm(MCConstantExpr::create(Val, getContext()), S, E)); @@ -4764,7 +4793,6 @@ ARMAsmParser::parseAM3Offset(OperandVector &Operands) { return MatchOperand_Success; } - bool haveEaten = false; bool isAdd = true; if (Tok.is(AsmToken::Plus)) { @@ -4986,10 +5014,12 @@ bool ARMAsmParser::parseMemory(OperandVector &Operands) { if (!CE) return Error (E, "constant expression expected"); - // If the constant was #-0, represent it as INT32_MIN. + // If the constant was #-0, represent it as + // std::numeric_limits<int32_t>::min(). int32_t Val = CE->getValue(); if (isNegative && Val == 0) - CE = MCConstantExpr::create(INT32_MIN, getContext()); + CE = MCConstantExpr::create(std::numeric_limits<int32_t>::min(), + getContext()); // Now we should have the closing ']' if (Parser.getTok().isNot(AsmToken::RBrac)) @@ -5067,7 +5097,7 @@ bool ARMAsmParser::parseMemRegOffsetShift(ARM_AM::ShiftOpc &St, SMLoc Loc = Parser.getTok().getLoc(); const AsmToken &Tok = Parser.getTok(); if (Tok.isNot(AsmToken::Identifier)) - return true; + return Error(Loc, "illegal shift operator"); StringRef ShiftName = Tok.getString(); if (ShiftName == "lsl" || ShiftName == "LSL" || ShiftName == "asl" || ShiftName == "ASL") @@ -5270,7 +5300,7 @@ bool ARMAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic) { case AsmToken::LCurly: return parseRegisterList(Operands); case AsmToken::Dollar: - case AsmToken::Hash: { + case AsmToken::Hash: // #42 -> immediate. S = Parser.getTok().getLoc(); Parser.Lex(); @@ -5284,7 +5314,8 @@ bool ARMAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic) { if (CE) { int32_t Val = CE->getValue(); if (isNegative && Val == 0) - ImmVal = MCConstantExpr::create(INT32_MIN, getContext()); + ImmVal = MCConstantExpr::create(std::numeric_limits<int32_t>::min(), + getContext()); } E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1); Operands.push_back(ARMOperand::CreateImm(ImmVal, S, E)); @@ -5301,7 +5332,7 @@ bool ARMAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic) { } // w/ a ':' after the '#', it's just like a plain ':'. LLVM_FALLTHROUGH; - } + case AsmToken::Colon: { S = Parser.getTok().getLoc(); // ":lower16:" and ":upper16:" expression prefixes @@ -5450,7 +5481,9 @@ StringRef ARMAsmParser::splitMnemonic(StringRef Mnemonic, Mnemonic == "vcvtm" || Mnemonic == "vrinta" || Mnemonic == "vrintn" || Mnemonic == "vrintp" || Mnemonic == "vrintm" || Mnemonic == "hvc" || Mnemonic.startswith("vsel") || Mnemonic == "vins" || Mnemonic == "vmovx" || - Mnemonic == "bxns" || Mnemonic == "blxns") + Mnemonic == "bxns" || Mnemonic == "blxns" || + Mnemonic == "vudot" || Mnemonic == "vsdot" || + Mnemonic == "vcmla" || Mnemonic == "vcadd") return Mnemonic; // First, split out any predication code. Ignore mnemonics we know aren't @@ -5459,25 +5492,7 @@ StringRef ARMAsmParser::splitMnemonic(StringRef Mnemonic, Mnemonic != "muls" && Mnemonic != "smlals" && Mnemonic != "smulls" && Mnemonic != "umlals" && Mnemonic != "umulls" && Mnemonic != "lsls" && Mnemonic != "sbcs" && Mnemonic != "rscs") { - unsigned CC = StringSwitch<unsigned>(Mnemonic.substr(Mnemonic.size()-2)) - .Case("eq", ARMCC::EQ) - .Case("ne", ARMCC::NE) - .Case("hs", ARMCC::HS) - .Case("cs", ARMCC::HS) - .Case("lo", ARMCC::LO) - .Case("cc", ARMCC::LO) - .Case("mi", ARMCC::MI) - .Case("pl", ARMCC::PL) - .Case("vs", ARMCC::VS) - .Case("vc", ARMCC::VC) - .Case("hi", ARMCC::HI) - .Case("ls", ARMCC::LS) - .Case("ge", ARMCC::GE) - .Case("lt", ARMCC::LT) - .Case("gt", ARMCC::GT) - .Case("le", ARMCC::LE) - .Case("al", ARMCC::AL) - .Default(~0U); + unsigned CC = ARMCondCodeFromString(Mnemonic.substr(Mnemonic.size()-2)); if (CC != ~0U) { Mnemonic = Mnemonic.slice(0, Mnemonic.size() - 2); PredicationCode = CC; @@ -5556,7 +5571,9 @@ void ARMAsmParser::getMnemonicAcceptInfo(StringRef Mnemonic, StringRef FullInst, Mnemonic.startswith("aes") || Mnemonic == "hvc" || Mnemonic == "setpan" || Mnemonic.startswith("sha1") || Mnemonic.startswith("sha256") || (FullInst.startswith("vmull") && FullInst.endswith(".p64")) || - Mnemonic == "vmovx" || Mnemonic == "vins") { + Mnemonic == "vmovx" || Mnemonic == "vins" || + Mnemonic == "vudot" || Mnemonic == "vsdot" || + Mnemonic == "vcmla" || Mnemonic == "vcadd") { // These mnemonics are never predicable CanAcceptPredicationCode = false; } else if (!isThumb()) { @@ -5767,8 +5784,6 @@ bool ARMAsmParser::shouldOmitCCOutOperand(StringRef Mnemonic, !inITBlock())) return true; - - // Register-register 'add/sub' for thumb does not have a cc_out operand // when it's an ADD/SUB SP, #imm. Be lenient on count since there's also // the "add/sub SP, SP, #imm" version. If the follow-up operands aren't @@ -5789,9 +5804,9 @@ bool ARMAsmParser::shouldOmitCCOutOperand(StringRef Mnemonic, bool ARMAsmParser::shouldOmitPredicateOperand(StringRef Mnemonic, OperandVector &Operands) { - // VRINT{Z, R, X} have a predicate operand in VFP, but not in NEON + // VRINT{Z, X} have a predicate operand in VFP, but not in NEON unsigned RegIdx = 3; - if ((Mnemonic == "vrintz" || Mnemonic == "vrintx" || Mnemonic == "vrintr") && + if ((Mnemonic == "vrintz" || Mnemonic == "vrintx") && (static_cast<ARMOperand &>(*Operands[2]).getToken() == ".f32" || static_cast<ARMOperand &>(*Operands[2]).getToken() == ".f16")) { if (static_cast<ARMOperand &>(*Operands[3]).isToken() && @@ -5824,39 +5839,59 @@ static bool isDataTypeToken(StringRef Tok) { static bool doesIgnoreDataTypeSuffix(StringRef Mnemonic, StringRef DT) { return Mnemonic.startswith("vldm") || Mnemonic.startswith("vstm"); } + static void applyMnemonicAliases(StringRef &Mnemonic, uint64_t Features, unsigned VariantID); -static bool RequiresVFPRegListValidation(StringRef Inst, - bool &AcceptSinglePrecisionOnly, - bool &AcceptDoublePrecisionOnly) { - if (Inst.size() < 7) - return false; +// The GNU assembler has aliases of ldrd and strd with the second register +// omitted. We don't have a way to do that in tablegen, so fix it up here. +// +// We have to be careful to not emit an invalid Rt2 here, because the rest of +// the assmebly parser could then generate confusing diagnostics refering to +// it. If we do find anything that prevents us from doing the transformation we +// bail out, and let the assembly parser report an error on the instruction as +// it is written. +void ARMAsmParser::fixupGNULDRDAlias(StringRef Mnemonic, + OperandVector &Operands) { + if (Mnemonic != "ldrd" && Mnemonic != "strd") + return; + if (Operands.size() < 4) + return; - if (Inst.startswith("fldm") || Inst.startswith("fstm")) { - StringRef AddressingMode = Inst.substr(4, 2); - if (AddressingMode == "ia" || AddressingMode == "db" || - AddressingMode == "ea" || AddressingMode == "fd") { - AcceptSinglePrecisionOnly = Inst[6] == 's'; - AcceptDoublePrecisionOnly = Inst[6] == 'd' || Inst[6] == 'x'; - return true; - } + ARMOperand &Op2 = static_cast<ARMOperand &>(*Operands[2]); + ARMOperand &Op3 = static_cast<ARMOperand &>(*Operands[3]); + + if (!Op2.isReg()) + return; + if (!Op3.isMem()) + return; + + const MCRegisterClass &GPR = MRI->getRegClass(ARM::GPRRegClassID); + if (!GPR.contains(Op2.getReg())) + return; + + unsigned RtEncoding = MRI->getEncodingValue(Op2.getReg()); + if (!isThumb() && (RtEncoding & 1)) { + // In ARM mode, the registers must be from an aligned pair, this + // restriction does not apply in Thumb mode. + return; } + if (Op2.getReg() == ARM::PC) + return; + unsigned PairedReg = GPR.getRegister(RtEncoding + 1); + if (!PairedReg || PairedReg == ARM::PC || + (PairedReg == ARM::SP && !hasV8Ops())) + return; - return false; + Operands.insert( + Operands.begin() + 3, + ARMOperand::CreateReg(PairedReg, Op2.getStartLoc(), Op2.getEndLoc())); } /// Parse an arm instruction mnemonic followed by its operands. bool ARMAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name, SMLoc NameLoc, OperandVector &Operands) { MCAsmParser &Parser = getParser(); - // FIXME: Can this be done via tablegen in some fashion? - bool RequireVFPRegisterListCheck; - bool AcceptSinglePrecisionOnly; - bool AcceptDoublePrecisionOnly; - RequireVFPRegisterListCheck = - RequiresVFPRegListValidation(Name, AcceptSinglePrecisionOnly, - AcceptDoublePrecisionOnly); // Apply mnemonic aliases before doing anything else, as the destination // mnemonic may include suffices and we want to handle them normally. @@ -6014,16 +6049,6 @@ bool ARMAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name, if (parseToken(AsmToken::EndOfStatement, "unexpected token in argument list")) return true; - if (RequireVFPRegisterListCheck) { - ARMOperand &Op = static_cast<ARMOperand &>(*Operands.back()); - if (AcceptSinglePrecisionOnly && !Op.isSPRRegList()) - return Error(Op.getStartLoc(), - "VFP/Neon single precision register expected"); - if (AcceptDoublePrecisionOnly && !Op.isDPRRegList()) - return Error(Op.getStartLoc(), - "VFP/Neon double precision register expected"); - } - tryConvertingToTwoOperandForm(Mnemonic, CarrySetting, Operands); // Some instructions, mostly Thumb, have forms for the same mnemonic that @@ -6039,7 +6064,8 @@ bool ARMAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name, // Some instructions have the same mnemonic, but don't always // have a predicate. Distinguish them here and delete the // predicate if needed. - if (shouldOmitPredicateOperand(Mnemonic, Operands)) + if (PredicationCode == ARMCC::AL && + shouldOmitPredicateOperand(Mnemonic, Operands)) Operands.erase(Operands.begin() + 1); // ARM mode 'blx' need special handling, as the register operand version @@ -6088,25 +6114,8 @@ bool ARMAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name, } } - // GNU Assembler extension (compatibility) - if ((Mnemonic == "ldrd" || Mnemonic == "strd")) { - ARMOperand &Op2 = static_cast<ARMOperand &>(*Operands[2]); - ARMOperand &Op3 = static_cast<ARMOperand &>(*Operands[3]); - if (Op3.isMem()) { - assert(Op2.isReg() && "expected register argument"); - - unsigned SuperReg = MRI->getMatchingSuperReg( - Op2.getReg(), ARM::gsub_0, &MRI->getRegClass(ARM::GPRPairRegClassID)); - - assert(SuperReg && "expected register pair"); - - unsigned PairedReg = MRI->getSubReg(SuperReg, ARM::gsub_1); - - Operands.insert( - Operands.begin() + 3, - ARMOperand::CreateReg(PairedReg, Op2.getStartLoc(), Op2.getEndLoc())); - } - } + // GNU Assembler extension (compatibility). + fixupGNULDRDAlias(Mnemonic, Operands); // FIXME: As said above, this is all a pretty gross hack. This instruction // does not fit with other "subs" and tblgen. @@ -6163,7 +6172,6 @@ static bool instIsBreakpoint(const MCInst &Inst) { Inst.getOpcode() == ARM::BKPT || Inst.getOpcode() == ARM::tHLT || Inst.getOpcode() == ARM::HLT; - } bool ARMAsmParser::validatetLDMRegList(const MCInst &Inst, @@ -6431,7 +6439,7 @@ bool ARMAsmParser::validateInstruction(MCInst &Inst, case ARM::t2LDMIA_UPD: case ARM::t2LDMDB_UPD: case ARM::t2STMIA_UPD: - case ARM::t2STMDB_UPD: { + case ARM::t2STMDB_UPD: if (listContainsReg(Inst, 3, Inst.getOperand(0).getReg())) return Error(Operands.back()->getStartLoc(), "writeback register not allowed in register list"); @@ -6444,7 +6452,7 @@ bool ARMAsmParser::validateInstruction(MCInst &Inst, return true; } break; - } + case ARM::sysLDMIA_UPD: case ARM::sysLDMDA_UPD: case ARM::sysLDMDB_UPD: @@ -6460,7 +6468,7 @@ bool ARMAsmParser::validateInstruction(MCInst &Inst, case ARM::sysSTMIB_UPD: return Error(Operands[2]->getStartLoc(), "system STM cannot have writeback register"); - case ARM::tMUL: { + case ARM::tMUL: // The second source operand must be the same register as the destination // operand. // @@ -6477,7 +6485,7 @@ bool ARMAsmParser::validateInstruction(MCInst &Inst, "destination register must match source register"); } break; - } + // Like for ldm/stm, push and pop have hi-reg handling version in Thumb2, // so only issue a diagnostic for thumb1. The instructions will be // switched to the t2 encodings in processInstruction() if necessary. @@ -6520,7 +6528,7 @@ bool ARMAsmParser::validateInstruction(MCInst &Inst, return true; break; } - case ARM::tADDrSP: { + case ARM::tADDrSP: // If the non-SP source operand and the destination operand are not the // same, we need thumb2 (for the wide encoding), or we have an error. if (!isThumbTwo() && @@ -6529,7 +6537,7 @@ bool ARMAsmParser::validateInstruction(MCInst &Inst, "source register must be the same as destination"); } break; - } + // Final range checking for Thumb unconditional branch instructions. case ARM::tB: if (!(static_cast<ARMOperand &>(*Operands[2])).isSignedOffset<11, 1>()) @@ -6584,7 +6592,7 @@ bool ARMAsmParser::validateInstruction(MCInst &Inst, break; } case ARM::HINT: - case ARM::t2HINT: { + case ARM::t2HINT: if (hasRAS()) { // ESB is not predicable (pred must be AL) unsigned Imm8 = Inst.getOperand(0).getImm(); @@ -6597,7 +6605,6 @@ bool ARMAsmParser::validateInstruction(MCInst &Inst, // Without the RAS extension, this behaves as any other unallocated hint. break; } - } return false; } @@ -8123,7 +8130,7 @@ bool ARMAsmParser::processInstruction(MCInst &Inst, // Handle encoding choice for the shift-immediate instructions. case ARM::t2LSLri: case ARM::t2LSRri: - case ARM::t2ASRri: { + case ARM::t2ASRri: if (isARMLowRegister(Inst.getOperand(0).getReg()) && isARMLowRegister(Inst.getOperand(1).getReg()) && Inst.getOperand(5).getReg() == (inITBlock() ? 0 : ARM::CPSR) && @@ -8148,7 +8155,6 @@ bool ARMAsmParser::processInstruction(MCInst &Inst, return true; } return false; - } // Handle the Thumb2 mode MOV complex aliases. case ARM::t2MOVsr: @@ -8468,7 +8474,7 @@ bool ARMAsmParser::processInstruction(MCInst &Inst, Inst = TmpInst; return true; } - case ARM::tADDrSP: { + case ARM::tADDrSP: // If the non-SP source operand and the destination operand are not the // same, we need to use the 32-bit encoding if it's available. if (Inst.getOperand(0).getReg() != Inst.getOperand(2).getReg()) { @@ -8477,7 +8483,6 @@ bool ARMAsmParser::processInstruction(MCInst &Inst, return true; } break; - } case ARM::tB: // A Thumb conditional branch outside of an IT block is a tBcc. if (Inst.getOperand(1).getImm() != ARMCC::AL && !inITBlock()) { @@ -8520,7 +8525,7 @@ bool ARMAsmParser::processInstruction(MCInst &Inst, (!listContainsBase && !hasWritebackToken) || (listContainsBase && hasWritebackToken)) { // 16-bit encoding isn't sufficient. Switch to the 32-bit version. - assert (isThumbTwo()); + assert(isThumbTwo()); Inst.setOpcode(hasWritebackToken ? ARM::t2LDMIA_UPD : ARM::t2LDMIA); // If we're switching to the updating version, we need to insert // the writeback tied operand. @@ -8539,7 +8544,7 @@ bool ARMAsmParser::processInstruction(MCInst &Inst, bool listContainsBase; if (checkLowRegisterList(Inst, 4, Rn, 0, listContainsBase)) { // 16-bit encoding isn't sufficient. Switch to the 32-bit version. - assert (isThumbTwo()); + assert(isThumbTwo()); Inst.setOpcode(ARM::t2STMIA_UPD); return true; } @@ -8552,7 +8557,7 @@ bool ARMAsmParser::processInstruction(MCInst &Inst, // should have generated an error in validateInstruction(). if (!checkLowRegisterList(Inst, 2, 0, ARM::PC, listContainsBase)) return false; - assert (isThumbTwo()); + assert(isThumbTwo()); Inst.setOpcode(ARM::t2LDMIA_UPD); // Add the base register and writeback operands. Inst.insert(Inst.begin(), MCOperand::createReg(ARM::SP)); @@ -8563,14 +8568,14 @@ bool ARMAsmParser::processInstruction(MCInst &Inst, bool listContainsBase; if (!checkLowRegisterList(Inst, 2, 0, ARM::LR, listContainsBase)) return false; - assert (isThumbTwo()); + assert(isThumbTwo()); Inst.setOpcode(ARM::t2STMDB_UPD); // Add the base register and writeback operands. Inst.insert(Inst.begin(), MCOperand::createReg(ARM::SP)); Inst.insert(Inst.begin(), MCOperand::createReg(ARM::SP)); return true; } - case ARM::t2MOVi: { + case ARM::t2MOVi: // If we can use the 16-bit encoding and the user didn't explicitly // request the 32-bit variant, transform it here. if (isARMLowRegister(Inst.getOperand(0).getReg()) && @@ -8590,8 +8595,8 @@ bool ARMAsmParser::processInstruction(MCInst &Inst, return true; } break; - } - case ARM::t2MOVr: { + + case ARM::t2MOVr: // If we can use the 16-bit encoding and the user didn't explicitly // request the 32-bit variant, transform it here. if (isARMLowRegister(Inst.getOperand(0).getReg()) && @@ -8610,11 +8615,11 @@ bool ARMAsmParser::processInstruction(MCInst &Inst, return true; } break; - } + case ARM::t2SXTH: case ARM::t2SXTB: case ARM::t2UXTH: - case ARM::t2UXTB: { + case ARM::t2UXTB: // If we can use the 16-bit encoding and the user didn't explicitly // request the 32-bit variant, transform it here. if (isARMLowRegister(Inst.getOperand(0).getReg()) && @@ -8640,7 +8645,7 @@ bool ARMAsmParser::processInstruction(MCInst &Inst, return true; } break; - } + case ARM::MOVsi: { ARM_AM::ShiftOpc SOpc = ARM_AM::getSORegShOp(Inst.getOperand(2).getImm()); // rrx shifts and asr/lsr of #32 is encoded as 0 @@ -8714,7 +8719,6 @@ bool ARMAsmParser::processInstruction(MCInst &Inst, case ARM::t2SBCrr: case ARM::t2RORrr: case ARM::t2BICrr: - { // Assemblers should use the narrow encodings of these instructions when permissible. if ((isARMLowRegister(Inst.getOperand(1).getReg()) && isARMLowRegister(Inst.getOperand(2).getReg())) && @@ -8743,12 +8747,11 @@ bool ARMAsmParser::processInstruction(MCInst &Inst, return true; } return false; - } + case ARM::t2ANDrr: case ARM::t2EORrr: case ARM::t2ADCrr: case ARM::t2ORRrr: - { // Assemblers should use the narrow encodings of these instructions when permissible. // These instructions are special in that they are commutable, so shorter encodings // are available more often. @@ -8784,7 +8787,6 @@ bool ARMAsmParser::processInstruction(MCInst &Inst, } return false; } - } return false; } @@ -8848,6 +8850,12 @@ unsigned ARMAsmParser::checkTargetMatchPredicate(MCInst &Inst) { return Match_RequiresV8; } + // Use of SP for VMRS/VMSR is only allowed in ARM mode with the exception of + // ARMv8-A. + if ((Inst.getOpcode() == ARM::VMRS || Inst.getOpcode() == ARM::VMSR) && + Inst.getOperand(0).getReg() == ARM::SP && (isThumb() && !hasV8Ops())) + return Match_InvalidOperand; + for (unsigned I = 0; I < MCID.NumOperands; ++I) if (MCID.OpInfo[I].RegClass == ARM::rGPRRegClassID) { // rGPRRegClass excludes PC, and also excluded SP before ARMv8 @@ -8861,19 +8869,22 @@ unsigned ARMAsmParser::checkTargetMatchPredicate(MCInst &Inst) { } namespace llvm { + template <> inline bool IsCPSRDead<MCInst>(const MCInst *Instr) { return true; // In an assembly source, no need to second-guess } -} + +} // end namespace llvm // Returns true if Inst is unpredictable if it is in and IT block, but is not // the last instruction in the block. bool ARMAsmParser::isITBlockTerminator(MCInst &Inst) const { const MCInstrDesc &MCID = MII.get(Inst.getOpcode()); - // All branch & call instructions terminate IT blocks. - if (MCID.isTerminator() || MCID.isCall() || MCID.isReturn() || - MCID.isBranch() || MCID.isIndirectBranch()) + // All branch & call instructions terminate IT blocks with the exception of + // SVC. + if (MCID.isTerminator() || (MCID.isCall() && Inst.getOpcode() != ARM::tSVC) || + MCID.isReturn() || MCID.isBranch() || MCID.isIndirectBranch()) return true; // Any arithmetic instruction which writes to the PC also terminates the IT @@ -8909,19 +8920,19 @@ bool ARMAsmParser::isITBlockTerminator(MCInst &Inst) const { } unsigned ARMAsmParser::MatchInstruction(OperandVector &Operands, MCInst &Inst, - uint64_t &ErrorInfo, + SmallVectorImpl<NearMissInfo> &NearMisses, bool MatchingInlineAsm, bool &EmitInITBlock, MCStreamer &Out) { // If we can't use an implicit IT block here, just match as normal. if (inExplicitITBlock() || !isThumbTwo() || !useImplicitITThumb()) - return MatchInstructionImpl(Operands, Inst, ErrorInfo, MatchingInlineAsm); + return MatchInstructionImpl(Operands, Inst, &NearMisses, MatchingInlineAsm); // Try to match the instruction in an extension of the current IT block (if // there is one). if (inImplicitITBlock()) { extendImplicitITBlock(ITState.Cond); - if (MatchInstructionImpl(Operands, Inst, ErrorInfo, MatchingInlineAsm) == + if (MatchInstructionImpl(Operands, Inst, nullptr, MatchingInlineAsm) == Match_Success) { // The match succeded, but we still have to check that the instruction is // valid in this implicit IT block. @@ -8947,7 +8958,7 @@ unsigned ARMAsmParser::MatchInstruction(OperandVector &Operands, MCInst &Inst, // Finish the current IT block, and try to match outside any IT block. flushPendingInstructions(Out); unsigned PlainMatchResult = - MatchInstructionImpl(Operands, Inst, ErrorInfo, MatchingInlineAsm); + MatchInstructionImpl(Operands, Inst, &NearMisses, MatchingInlineAsm); if (PlainMatchResult == Match_Success) { const MCInstrDesc &MCID = MII.get(Inst.getOpcode()); if (MCID.isPredicable()) { @@ -8974,7 +8985,7 @@ unsigned ARMAsmParser::MatchInstruction(OperandVector &Operands, MCInst &Inst, // condition, so we create an IT block with a dummy condition, and fix it up // once we know the actual condition. startImplicitITBlock(); - if (MatchInstructionImpl(Operands, Inst, ErrorInfo, MatchingInlineAsm) == + if (MatchInstructionImpl(Operands, Inst, nullptr, MatchingInlineAsm) == Match_Success) { const MCInstrDesc &MCID = MII.get(Inst.getOpcode()); if (MCID.isPredicable()) { @@ -8993,7 +9004,8 @@ unsigned ARMAsmParser::MatchInstruction(OperandVector &Operands, MCInst &Inst, return PlainMatchResult; } -std::string ARMMnemonicSpellCheck(StringRef S, uint64_t FBS); +static std::string ARMMnemonicSpellCheck(StringRef S, uint64_t FBS, + unsigned VariantID = 0); static const char *getSubtargetFeatureName(uint64_t Val); bool ARMAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, @@ -9004,16 +9016,10 @@ bool ARMAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, unsigned MatchResult; bool PendConditionalInstruction = false; - MatchResult = MatchInstruction(Operands, Inst, ErrorInfo, MatchingInlineAsm, + SmallVector<NearMissInfo, 4> NearMisses; + MatchResult = MatchInstruction(Operands, Inst, NearMisses, MatchingInlineAsm, PendConditionalInstruction, Out); - SMLoc ErrorLoc; - if (ErrorInfo < Operands.size()) { - ErrorLoc = ((ARMOperand &)*Operands[ErrorInfo]).getStartLoc(); - if (ErrorLoc == SMLoc()) - ErrorLoc = IDLoc; - } - switch (MatchResult) { case Match_Success: // Context sensitive operand constraints aren't handled by the matcher, @@ -9061,33 +9067,9 @@ bool ARMAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, Out.EmitInstruction(Inst, getSTI()); } return false; - case Match_MissingFeature: { - assert(ErrorInfo && "Unknown missing feature!"); - // Special case the error message for the very common case where only - // a single subtarget feature is missing (Thumb vs. ARM, e.g.). - std::string Msg = "instruction requires:"; - uint64_t Mask = 1; - for (unsigned i = 0; i < (sizeof(ErrorInfo)*8-1); ++i) { - if (ErrorInfo & Mask) { - Msg += " "; - Msg += getSubtargetFeatureName(ErrorInfo & Mask); - } - Mask <<= 1; - } - return Error(IDLoc, Msg); - } - case Match_InvalidOperand: { - SMLoc ErrorLoc = IDLoc; - if (ErrorInfo != ~0ULL) { - if (ErrorInfo >= Operands.size()) - return Error(IDLoc, "too few operands for instruction"); - - ErrorLoc = ((ARMOperand &)*Operands[ErrorInfo]).getStartLoc(); - if (ErrorLoc == SMLoc()) ErrorLoc = IDLoc; - } - - return Error(ErrorLoc, "invalid operand for instruction"); - } + case Match_NearMisses: + ReportNearMisses(NearMisses, IDLoc, Operands); + return true; case Match_MnemonicFail: { uint64_t FBS = ComputeAvailableFeatures(getSTI().getFeatureBits()); std::string Suggestion = ARMMnemonicSpellCheck( @@ -9095,100 +9077,6 @@ bool ARMAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, return Error(IDLoc, "invalid instruction" + Suggestion, ((ARMOperand &)*Operands[0]).getLocRange()); } - case Match_RequiresNotITBlock: - return Error(IDLoc, "flag setting instruction only valid outside IT block"); - case Match_RequiresITBlock: - return Error(IDLoc, "instruction only valid inside IT block"); - case Match_RequiresV6: - return Error(IDLoc, "instruction variant requires ARMv6 or later"); - case Match_RequiresThumb2: - return Error(IDLoc, "instruction variant requires Thumb2"); - case Match_RequiresV8: - return Error(IDLoc, "instruction variant requires ARMv8 or later"); - case Match_RequiresFlagSetting: - return Error(IDLoc, "no flag-preserving variant of this instruction available"); - case Match_ImmRange0_1: - return Error(ErrorLoc, "immediate operand must be in the range [0,1]"); - case Match_ImmRange0_3: - return Error(ErrorLoc, "immediate operand must be in the range [0,3]"); - case Match_ImmRange0_7: - return Error(ErrorLoc, "immediate operand must be in the range [0,7]"); - case Match_ImmRange0_15: - return Error(ErrorLoc, "immediate operand must be in the range [0,15]"); - case Match_ImmRange0_31: - return Error(ErrorLoc, "immediate operand must be in the range [0,31]"); - case Match_ImmRange0_32: - return Error(ErrorLoc, "immediate operand must be in the range [0,32]"); - case Match_ImmRange0_63: - return Error(ErrorLoc, "immediate operand must be in the range [0,63]"); - case Match_ImmRange0_239: - return Error(ErrorLoc, "immediate operand must be in the range [0,239]"); - case Match_ImmRange0_255: - return Error(ErrorLoc, "immediate operand must be in the range [0,255]"); - case Match_ImmRange0_4095: - return Error(ErrorLoc, "immediate operand must be in the range [0,4095]"); - case Match_ImmRange0_65535: - return Error(ErrorLoc, "immediate operand must be in the range [0,65535]"); - case Match_ImmRange1_7: - return Error(ErrorLoc, "immediate operand must be in the range [1,7]"); - case Match_ImmRange1_8: - return Error(ErrorLoc, "immediate operand must be in the range [1,8]"); - case Match_ImmRange1_15: - return Error(ErrorLoc, "immediate operand must be in the range [1,15]"); - case Match_ImmRange1_16: - return Error(ErrorLoc, "immediate operand must be in the range [1,16]"); - case Match_ImmRange1_31: - return Error(ErrorLoc, "immediate operand must be in the range [1,31]"); - case Match_ImmRange1_32: - return Error(ErrorLoc, "immediate operand must be in the range [1,32]"); - case Match_ImmRange1_64: - return Error(ErrorLoc, "immediate operand must be in the range [1,64]"); - case Match_ImmRange8_8: - return Error(ErrorLoc, "immediate operand must be 8."); - case Match_ImmRange16_16: - return Error(ErrorLoc, "immediate operand must be 16."); - case Match_ImmRange32_32: - return Error(ErrorLoc, "immediate operand must be 32."); - case Match_ImmRange256_65535: - return Error(ErrorLoc, "immediate operand must be in the range [255,65535]"); - case Match_ImmRange0_16777215: - return Error(ErrorLoc, "immediate operand must be in the range [0,0xffffff]"); - case Match_AlignedMemoryRequiresNone: - case Match_DupAlignedMemoryRequiresNone: - case Match_AlignedMemoryRequires16: - case Match_DupAlignedMemoryRequires16: - case Match_AlignedMemoryRequires32: - case Match_DupAlignedMemoryRequires32: - case Match_AlignedMemoryRequires64: - case Match_DupAlignedMemoryRequires64: - case Match_AlignedMemoryRequires64or128: - case Match_DupAlignedMemoryRequires64or128: - case Match_AlignedMemoryRequires64or128or256: - { - SMLoc ErrorLoc = ((ARMOperand &)*Operands[ErrorInfo]).getAlignmentLoc(); - if (ErrorLoc == SMLoc()) ErrorLoc = IDLoc; - switch (MatchResult) { - default: - llvm_unreachable("Missing Match_Aligned type"); - case Match_AlignedMemoryRequiresNone: - case Match_DupAlignedMemoryRequiresNone: - return Error(ErrorLoc, "alignment must be omitted"); - case Match_AlignedMemoryRequires16: - case Match_DupAlignedMemoryRequires16: - return Error(ErrorLoc, "alignment must be 16 or omitted"); - case Match_AlignedMemoryRequires32: - case Match_DupAlignedMemoryRequires32: - return Error(ErrorLoc, "alignment must be 32 or omitted"); - case Match_AlignedMemoryRequires64: - case Match_DupAlignedMemoryRequires64: - return Error(ErrorLoc, "alignment must be 64 or omitted"); - case Match_AlignedMemoryRequires64or128: - case Match_DupAlignedMemoryRequires64or128: - return Error(ErrorLoc, "alignment must be 64, 128 or omitted"); - case Match_AlignedMemoryRequires64or128or256: - return Error(ErrorLoc, "alignment must be 64, 128, 256 or omitted"); - } - } } llvm_unreachable("Implement any new match types added!"); @@ -9485,9 +9373,9 @@ void ARMAsmParser::FixModeAfterArchChange(bool WasThumb, SMLoc Loc) { /// ::= .arch token bool ARMAsmParser::parseDirectiveArch(SMLoc L) { StringRef Arch = getParser().parseStringToEndOfStatement().trim(); - unsigned ID = ARM::parseArch(Arch); + ARM::ArchKind ID = ARM::parseArch(Arch); - if (ID == ARM::AK_INVALID) + if (ID == ARM::ArchKind::INVALID) return Error(L, "Unknown arch name"); bool WasThumb = isThumb(); @@ -9610,6 +9498,7 @@ bool ARMAsmParser::parseDirectiveCPU(SMLoc L) { return false; } + /// parseDirectiveFPU /// ::= .fpu str bool ARMAsmParser::parseDirectiveFPU(SMLoc L) { @@ -10135,9 +10024,9 @@ bool ARMAsmParser::parseDirectiveObjectArch(SMLoc L) { SMLoc ArchLoc = Parser.getTok().getLoc(); Lex(); - unsigned ID = ARM::parseArch(Arch); + ARM::ArchKind ID = ARM::parseArch(Arch); - if (ID == ARM::AK_INVALID) + if (ID == ARM::ArchKind::INVALID) return Error(ArchLoc, "unknown architecture '" + Arch + "'"); if (parseToken(AsmToken::EndOfStatement)) return true; @@ -10196,8 +10085,213 @@ extern "C" void LLVMInitializeARMAsmParser() { #define GET_REGISTER_MATCHER #define GET_SUBTARGET_FEATURE_NAME #define GET_MATCHER_IMPLEMENTATION +#define GET_MNEMONIC_SPELL_CHECKER #include "ARMGenAsmMatcher.inc" +// Some diagnostics need to vary with subtarget features, so they are handled +// here. For example, the DPR class has either 16 or 32 registers, depending +// on the FPU available. +const char * +ARMAsmParser::getCustomOperandDiag(ARMMatchResultTy MatchError) { + switch (MatchError) { + // rGPR contains sp starting with ARMv8. + case Match_rGPR: + return hasV8Ops() ? "operand must be a register in range [r0, r14]" + : "operand must be a register in range [r0, r12] or r14"; + // DPR contains 16 registers for some FPUs, and 32 for others. + case Match_DPR: + return hasD16() ? "operand must be a register in range [d0, d15]" + : "operand must be a register in range [d0, d31]"; + case Match_DPR_RegList: + return hasD16() ? "operand must be a list of registers in range [d0, d15]" + : "operand must be a list of registers in range [d0, d31]"; + + // For all other diags, use the static string from tablegen. + default: + return getMatchKindDiag(MatchError); + } +} + +// Process the list of near-misses, throwing away ones we don't want to report +// to the user, and converting the rest to a source location and string that +// should be reported. +void +ARMAsmParser::FilterNearMisses(SmallVectorImpl<NearMissInfo> &NearMissesIn, + SmallVectorImpl<NearMissMessage> &NearMissesOut, + SMLoc IDLoc, OperandVector &Operands) { + // TODO: If operand didn't match, sub in a dummy one and run target + // predicate, so that we can avoid reporting near-misses that are invalid? + // TODO: Many operand types dont have SuperClasses set, so we report + // redundant ones. + // TODO: Some operands are superclasses of registers (e.g. + // MCK_RegShiftedImm), we don't have any way to represent that currently. + // TODO: This is not all ARM-specific, can some of it be factored out? + + // Record some information about near-misses that we have already seen, so + // that we can avoid reporting redundant ones. For example, if there are + // variants of an instruction that take 8- and 16-bit immediates, we want + // to only report the widest one. + std::multimap<unsigned, unsigned> OperandMissesSeen; + SmallSet<uint64_t, 4> FeatureMissesSeen; + bool ReportedTooFewOperands = false; + + // Process the near-misses in reverse order, so that we see more general ones + // first, and so can avoid emitting more specific ones. + for (NearMissInfo &I : reverse(NearMissesIn)) { + switch (I.getKind()) { + case NearMissInfo::NearMissOperand: { + SMLoc OperandLoc = + ((ARMOperand &)*Operands[I.getOperandIndex()]).getStartLoc(); + const char *OperandDiag = + getCustomOperandDiag((ARMMatchResultTy)I.getOperandError()); + + // If we have already emitted a message for a superclass, don't also report + // the sub-class. We consider all operand classes that we don't have a + // specialised diagnostic for to be equal for the propose of this check, + // so that we don't report the generic error multiple times on the same + // operand. + unsigned DupCheckMatchClass = OperandDiag ? I.getOperandClass() : ~0U; + auto PrevReports = OperandMissesSeen.equal_range(I.getOperandIndex()); + if (std::any_of(PrevReports.first, PrevReports.second, + [DupCheckMatchClass]( + const std::pair<unsigned, unsigned> Pair) { + if (DupCheckMatchClass == ~0U || Pair.second == ~0U) + return Pair.second == DupCheckMatchClass; + else + return isSubclass((MatchClassKind)DupCheckMatchClass, + (MatchClassKind)Pair.second); + })) + break; + OperandMissesSeen.insert( + std::make_pair(I.getOperandIndex(), DupCheckMatchClass)); + + NearMissMessage Message; + Message.Loc = OperandLoc; + if (OperandDiag) { + Message.Message = OperandDiag; + } else if (I.getOperandClass() == InvalidMatchClass) { + Message.Message = "too many operands for instruction"; + } else { + Message.Message = "invalid operand for instruction"; + DEBUG(dbgs() << "Missing diagnostic string for operand class " << + getMatchClassName((MatchClassKind)I.getOperandClass()) + << I.getOperandClass() << ", error " << I.getOperandError() + << ", opcode " << MII.getName(I.getOpcode()) << "\n"); + } + NearMissesOut.emplace_back(Message); + break; + } + case NearMissInfo::NearMissFeature: { + uint64_t MissingFeatures = I.getFeatures(); + // Don't report the same set of features twice. + if (FeatureMissesSeen.count(MissingFeatures)) + break; + FeatureMissesSeen.insert(MissingFeatures); + + // Special case: don't report a feature set which includes arm-mode for + // targets that don't have ARM mode. + if ((MissingFeatures & Feature_IsARM) && !hasARM()) + break; + // Don't report any near-misses that both require switching instruction + // set, and adding other subtarget features. + if (isThumb() && (MissingFeatures & Feature_IsARM) && + (MissingFeatures & ~Feature_IsARM)) + break; + if (!isThumb() && (MissingFeatures & Feature_IsThumb) && + (MissingFeatures & ~Feature_IsThumb)) + break; + if (!isThumb() && (MissingFeatures & Feature_IsThumb2) && + (MissingFeatures & ~(Feature_IsThumb2 | Feature_IsThumb))) + break; + + NearMissMessage Message; + Message.Loc = IDLoc; + raw_svector_ostream OS(Message.Message); + + OS << "instruction requires:"; + uint64_t Mask = 1; + for (unsigned MaskPos = 0; MaskPos < (sizeof(MissingFeatures) * 8 - 1); + ++MaskPos) { + if (MissingFeatures & Mask) { + OS << " " << getSubtargetFeatureName(MissingFeatures & Mask); + } + Mask <<= 1; + } + NearMissesOut.emplace_back(Message); + + break; + } + case NearMissInfo::NearMissPredicate: { + NearMissMessage Message; + Message.Loc = IDLoc; + switch (I.getPredicateError()) { + case Match_RequiresNotITBlock: + Message.Message = "flag setting instruction only valid outside IT block"; + break; + case Match_RequiresITBlock: + Message.Message = "instruction only valid inside IT block"; + break; + case Match_RequiresV6: + Message.Message = "instruction variant requires ARMv6 or later"; + break; + case Match_RequiresThumb2: + Message.Message = "instruction variant requires Thumb2"; + break; + case Match_RequiresV8: + Message.Message = "instruction variant requires ARMv8 or later"; + break; + case Match_RequiresFlagSetting: + Message.Message = "no flag-preserving variant of this instruction available"; + break; + case Match_InvalidOperand: + Message.Message = "invalid operand for instruction"; + break; + default: + llvm_unreachable("Unhandled target predicate error"); + break; + } + NearMissesOut.emplace_back(Message); + break; + } + case NearMissInfo::NearMissTooFewOperands: { + if (!ReportedTooFewOperands) { + SMLoc EndLoc = ((ARMOperand &)*Operands.back()).getEndLoc(); + NearMissesOut.emplace_back(NearMissMessage{ + EndLoc, StringRef("too few operands for instruction")}); + ReportedTooFewOperands = true; + } + break; + } + case NearMissInfo::NoNearMiss: + // This should never leave the matcher. + llvm_unreachable("not a near-miss"); + break; + } + } +} + +void ARMAsmParser::ReportNearMisses(SmallVectorImpl<NearMissInfo> &NearMisses, + SMLoc IDLoc, OperandVector &Operands) { + SmallVector<NearMissMessage, 4> Messages; + FilterNearMisses(NearMisses, Messages, IDLoc, Operands); + + if (Messages.size() == 0) { + // No near-misses were found, so the best we can do is "invalid + // instruction". + Error(IDLoc, "invalid instruction"); + } else if (Messages.size() == 1) { + // One near miss was found, report it as the sole error. + Error(Messages[0].Loc, Messages[0].Message); + } else { + // More than one near miss, so report a generic "invalid instruction" + // error, followed by notes for each of the near-misses. + Error(IDLoc, "invalid instruction, any one of the following would fix this:"); + for (auto &M : Messages) { + Note(M.Loc, M.Message); + } + } +} + // FIXME: This structure should be moved inside ARMTargetParser // when we start to table-generate them, and we can use the ARM // flags below, that were generated by table-gen. @@ -10300,14 +10394,15 @@ unsigned ARMAsmParser::validateTargetOperandClass(MCParsedAsmOperand &AsmOp, int64_t Value; if (!SOExpr->evaluateAsAbsolute(Value)) return Match_Success; - assert((Value >= INT32_MIN && Value <= UINT32_MAX) && + assert((Value >= std::numeric_limits<int32_t>::min() && + Value <= std::numeric_limits<uint32_t>::max()) && "expression value must be representable in 32 bits"); } break; case MCK_rGPR: if (hasV8Ops() && Op.isReg() && Op.getReg() == ARM::SP) return Match_Success; - break; + return Match_rGPR; case MCK_GPRPair: if (Op.isReg() && MRI->getRegClass(ARM::GPRRegClassID).contains(Op.getReg())) diff --git a/lib/Target/ARM/AsmParser/LLVMBuild.txt b/lib/Target/ARM/AsmParser/LLVMBuild.txt index f0184b675dacb..061885ad4fac8 100644 --- a/lib/Target/ARM/AsmParser/LLVMBuild.txt +++ b/lib/Target/ARM/AsmParser/LLVMBuild.txt @@ -19,5 +19,5 @@ type = Library name = ARMAsmParser parent = ARM -required_libraries = ARMDesc ARMInfo MC MCParser Support +required_libraries = ARMDesc ARMInfo MC MCParser Support ARMUtils add_to_library_groups = ARM diff --git a/lib/Target/ARM/CMakeLists.txt b/lib/Target/ARM/CMakeLists.txt index cf6827fd6ca19..014ac2ae8b489 100644 --- a/lib/Target/ARM/CMakeLists.txt +++ b/lib/Target/ARM/CMakeLists.txt @@ -1,9 +1,7 @@ set(LLVM_TARGET_DEFINITIONS ARM.td) -if(LLVM_BUILD_GLOBAL_ISEL) - tablegen(LLVM ARMGenRegisterBank.inc -gen-register-bank) - tablegen(LLVM ARMGenGlobalISel.inc -gen-global-isel) -endif() +tablegen(LLVM ARMGenRegisterBank.inc -gen-register-bank) +tablegen(LLVM ARMGenGlobalISel.inc -gen-global-isel) tablegen(LLVM ARMGenRegisterInfo.inc -gen-register-info) tablegen(LLVM ARMGenInstrInfo.inc -gen-instr-info) tablegen(LLVM ARMGenMCCodeEmitter.inc -gen-emitter) @@ -15,43 +13,33 @@ tablegen(LLVM ARMGenFastISel.inc -gen-fast-isel) tablegen(LLVM ARMGenCallingConv.inc -gen-callingconv) tablegen(LLVM ARMGenSubtargetInfo.inc -gen-subtarget) tablegen(LLVM ARMGenDisassemblerTables.inc -gen-disassembler) +tablegen(LLVM ARMGenSystemRegister.inc -gen-searchable-tables) add_public_tablegen_target(ARMCommonTableGen) -# Add GlobalISel files if the user wants to build it. -set(GLOBAL_ISEL_FILES - ARMCallLowering.cpp - ARMInstructionSelector.cpp - ARMLegalizerInfo.cpp - ARMRegisterBankInfo.cpp - ) - -if(LLVM_BUILD_GLOBAL_ISEL) - set(GLOBAL_ISEL_BUILD_FILES ${GLOBAL_ISEL_FILES}) -else() - set(GLOBAL_ISEL_BUILD_FILES "") - set(LLVM_OPTIONAL_SOURCES LLVMGlobalISel ${GLOBAL_ISEL_FILES}) -endif() - add_llvm_target(ARMCodeGen A15SDOptimizer.cpp ARMAsmPrinter.cpp ARMBaseInstrInfo.cpp ARMBaseRegisterInfo.cpp + ARMCallLowering.cpp ARMConstantIslandPass.cpp ARMConstantPoolValue.cpp ARMExpandPseudoInsts.cpp ARMFastISel.cpp ARMFrameLowering.cpp ARMHazardRecognizer.cpp + ARMInstructionSelector.cpp ARMISelDAGToDAG.cpp ARMISelLowering.cpp ARMInstrInfo.cpp + ARMLegalizerInfo.cpp ARMLoadStoreOptimizer.cpp ARMMCInstLower.cpp ARMMachineFunctionInfo.cpp ARMMacroFusion.cpp ARMRegisterInfo.cpp ARMOptimizeBarriersPass.cpp + ARMRegisterBankInfo.cpp ARMSelectionDAGInfo.cpp ARMSubtarget.cpp ARMTargetMachine.cpp @@ -65,7 +53,6 @@ add_llvm_target(ARMCodeGen Thumb2InstrInfo.cpp Thumb2SizeReduction.cpp ARMComputeBlockSize.cpp - ${GLOBAL_ISEL_BUILD_FILES} ) add_subdirectory(TargetInfo) @@ -73,3 +60,4 @@ add_subdirectory(AsmParser) add_subdirectory(Disassembler) add_subdirectory(InstPrinter) add_subdirectory(MCTargetDesc) +add_subdirectory(Utils) diff --git a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp index 5ab236b7fd4c0..a29a2eeccfe87 100644 --- a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp +++ b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp @@ -1,4 +1,4 @@ -//===-- ARMDisassembler.cpp - Disassembler for ARM/Thumb ISA --------------===// +//===- ARMDisassembler.cpp - Disassembler for ARM/Thumb ISA ---------------===// // // The LLVM Compiler Infrastructure // @@ -10,6 +10,7 @@ #include "MCTargetDesc/ARMAddressingModes.h" #include "MCTargetDesc/ARMBaseInfo.h" #include "MCTargetDesc/ARMMCTargetDesc.h" +#include "Utils/ARMBaseInfo.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCDisassembler/MCDisassembler.h" #include "llvm/MC/MCFixedLenDisassembler.h" @@ -31,7 +32,7 @@ using namespace llvm; #define DEBUG_TYPE "arm-disassembler" -typedef MCDisassembler::DecodeStatus DecodeStatus; +using DecodeStatus = MCDisassembler::DecodeStatus; namespace { @@ -117,6 +118,7 @@ public: private: mutable ITStatus ITBlock; + DecodeStatus AddThumbPredicate(MCInst&) const; void UpdateThumbVFPPredicate(MCInst&) const; }; @@ -320,6 +322,10 @@ static DecodeStatus DecodeVCVTD(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder); static DecodeStatus DecodeVCVTQ(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder); +static DecodeStatus DecodeNEONComplexLane64Instruction(MCInst &Inst, + unsigned Val, + uint64_t Address, + const void *Decoder); static DecodeStatus DecodeThumbAddSpecialReg(MCInst &Inst, uint16_t Insn, uint64_t Address, const void *Decoder); @@ -398,6 +404,8 @@ static DecodeStatus DecodeLDR(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder); static DecodeStatus DecoderForMRRC2AndMCRR2(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder); +static DecodeStatus DecodeForVMRSandVMSR(MCInst &Inst, unsigned Val, + uint64_t Address, const void *Decoder); #include "ARMGenDisassemblerTables.inc" @@ -486,6 +494,13 @@ DecodeStatus ARMDisassembler::getInstruction(MCInst &MI, uint64_t &Size, } } + Result = + decodeInstruction(DecoderTableCoProc32, MI, Insn, Address, this, STI); + if (Result != MCDisassembler::Fail) { + Size = 4; + return checkDecodedInstruction(MI, Size, Address, OS, CS, Insn, Result); + } + Size = 4; return MCDisassembler::Fail; } @@ -821,6 +836,14 @@ DecodeStatus ThumbDisassembler::getInstruction(MCInst &MI, uint64_t &Size, } } + Result = + decodeInstruction(DecoderTableThumb2CoProc32, MI, Insn32, Address, this, STI); + if (Result != MCDisassembler::Fail) { + Size = 4; + Check(Result, AddThumbPredicate(MI)); + return Result; + } + Size = 0; return MCDisassembler::Fail; } @@ -2744,7 +2767,6 @@ static DecodeStatus DecodeVSTInstruction(MCInst &Inst, unsigned Insn, break; } - // First input register switch (Inst.getOpcode()) { case ARM::VST1q16: @@ -3843,7 +3865,6 @@ static DecodeStatus DecodeT2AddrModeImm12(MCInst &Inst, unsigned Val, return S; } - static DecodeStatus DecodeThumbAddSPImm(MCInst &Inst, uint16_t Insn, uint64_t Address, const void *Decoder) { unsigned imm = fieldFromInstruction(Insn, 0, 7); @@ -4167,7 +4188,6 @@ static DecodeStatus DecodeMSRMask(MCInst &Inst, unsigned Val, static DecodeStatus DecodeBankedReg(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder) { - unsigned R = fieldFromInstruction(Val, 5, 1); unsigned SysM = fieldFromInstruction(Val, 0, 5); @@ -5199,6 +5219,39 @@ static DecodeStatus DecodeVCVTQ(MCInst &Inst, unsigned Insn, return S; } +static DecodeStatus DecodeNEONComplexLane64Instruction(MCInst &Inst, + unsigned Insn, + uint64_t Address, + const void *Decoder) { + unsigned Vd = (fieldFromInstruction(Insn, 12, 4) << 0); + Vd |= (fieldFromInstruction(Insn, 22, 1) << 4); + unsigned Vn = (fieldFromInstruction(Insn, 16, 4) << 0); + Vn |= (fieldFromInstruction(Insn, 7, 1) << 4); + unsigned Vm = (fieldFromInstruction(Insn, 0, 4) << 0); + Vm |= (fieldFromInstruction(Insn, 5, 1) << 4); + unsigned q = (fieldFromInstruction(Insn, 6, 1) << 0); + unsigned rotate = (fieldFromInstruction(Insn, 20, 2) << 0); + + DecodeStatus S = MCDisassembler::Success; + + auto DestRegDecoder = q ? DecodeQPRRegisterClass : DecodeDPRRegisterClass; + + if (!Check(S, DestRegDecoder(Inst, Vd, Address, Decoder))) + return MCDisassembler::Fail; + if (!Check(S, DestRegDecoder(Inst, Vd, Address, Decoder))) + return MCDisassembler::Fail; + if (!Check(S, DestRegDecoder(Inst, Vn, Address, Decoder))) + return MCDisassembler::Fail; + if (!Check(S, DecodeDPRRegisterClass(Inst, Vm, Address, Decoder))) + return MCDisassembler::Fail; + // The lane index does not have any bits in the encoding, because it can only + // be 0. + Inst.addOperand(MCOperand::createImm(0)); + Inst.addOperand(MCOperand::createImm(rotate)); + + return S; +} + static DecodeStatus DecodeLDR(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; @@ -5270,3 +5323,31 @@ static DecodeStatus DecoderForMRRC2AndMCRR2(MCInst &Inst, unsigned Val, return S; } + +static DecodeStatus DecodeForVMRSandVMSR(MCInst &Inst, unsigned Val, + uint64_t Address, + const void *Decoder) { + const FeatureBitset &featureBits = + ((const MCDisassembler *)Decoder)->getSubtargetInfo().getFeatureBits(); + DecodeStatus S = MCDisassembler::Success; + + unsigned Rt = fieldFromInstruction(Val, 12, 4); + + if (featureBits[ARM::ModeThumb] && !featureBits[ARM::HasV8Ops]) { + if (Rt == 13 || Rt == 15) + S = MCDisassembler::SoftFail; + Check(S, DecodeGPRRegisterClass(Inst, Rt, Address, Decoder)); + } else + Check(S, DecodeGPRnopcRegisterClass(Inst, Rt, Address, Decoder)); + + if (featureBits[ARM::ModeThumb]) { + Inst.addOperand(MCOperand::createImm(ARMCC::AL)); + Inst.addOperand(MCOperand::createReg(0)); + } else { + unsigned pred = fieldFromInstruction(Val, 28, 4); + if (!Check(S, DecodePredicateOperand(Inst, pred, Address, Decoder))) + return MCDisassembler::Fail; + } + + return S; +} diff --git a/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp b/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp index 57b91366a0858..4fc67a4f6eb5e 100644 --- a/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp +++ b/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp @@ -12,6 +12,9 @@ //===----------------------------------------------------------------------===// #include "ARMInstPrinter.h" +#include "Utils/ARMBaseInfo.h" +#include "ARMBaseRegisterInfo.h" +#include "ARMBaseRegisterInfo.h" #include "MCTargetDesc/ARMAddressingModes.h" #include "MCTargetDesc/ARMBaseInfo.h" #include "llvm/MC/MCAsmInfo.h" @@ -789,152 +792,48 @@ void ARMInstPrinter::printMSRMaskOperand(const MCInst *MI, unsigned OpNum, const MCSubtargetInfo &STI, raw_ostream &O) { const MCOperand &Op = MI->getOperand(OpNum); - unsigned SpecRegRBit = Op.getImm() >> 4; - unsigned Mask = Op.getImm() & 0xf; const FeatureBitset &FeatureBits = STI.getFeatureBits(); - if (FeatureBits[ARM::FeatureMClass]) { - unsigned SYSm = Op.getImm(); + + unsigned SYSm = Op.getImm() & 0xFFF; // 12-bit SYSm unsigned Opcode = MI->getOpcode(); // For writes, handle extended mask bits if the DSP extension is present. if (Opcode == ARM::t2MSR_M && FeatureBits[ARM::FeatureDSP]) { - switch (SYSm) { - case 0x400: - O << "apsr_g"; - return; - case 0xc00: - O << "apsr_nzcvqg"; - return; - case 0x401: - O << "iapsr_g"; - return; - case 0xc01: - O << "iapsr_nzcvqg"; - return; - case 0x402: - O << "eapsr_g"; - return; - case 0xc02: - O << "eapsr_nzcvqg"; - return; - case 0x403: - O << "xpsr_g"; - return; - case 0xc03: - O << "xpsr_nzcvqg"; - return; + auto TheReg =ARMSysReg::lookupMClassSysRegBy12bitSYSmValue(SYSm); + if (TheReg && TheReg->isInRequiredFeatures({ARM::FeatureDSP})) { + O << TheReg->Name; + return; } } // Handle the basic 8-bit mask. SYSm &= 0xff; - if (Opcode == ARM::t2MSR_M && FeatureBits [ARM::HasV7Ops]) { // ARMv7-M deprecates using MSR APSR without a _<bits> qualifier as an // alias for MSR APSR_nzcvq. - switch (SYSm) { - case 0: - O << "apsr_nzcvq"; - return; - case 1: - O << "iapsr_nzcvq"; - return; - case 2: - O << "eapsr_nzcvq"; - return; - case 3: - O << "xpsr_nzcvq"; - return; + auto TheReg = ARMSysReg::lookupMClassSysRegAPSRNonDeprecated(SYSm); + if (TheReg) { + O << TheReg->Name; + return; } } - switch (SYSm) { - default: - llvm_unreachable("Unexpected mask value!"); - case 0: - O << "apsr"; - return; - case 1: - O << "iapsr"; - return; - case 2: - O << "eapsr"; - return; - case 3: - O << "xpsr"; - return; - case 5: - O << "ipsr"; - return; - case 6: - O << "epsr"; - return; - case 7: - O << "iepsr"; - return; - case 8: - O << "msp"; - return; - case 9: - O << "psp"; - return; - case 16: - O << "primask"; - return; - case 17: - O << "basepri"; - return; - case 18: - O << "basepri_max"; - return; - case 19: - O << "faultmask"; - return; - case 20: - O << "control"; - return; - case 10: - O << "msplim"; - return; - case 11: - O << "psplim"; - return; - case 0x88: - O << "msp_ns"; - return; - case 0x89: - O << "psp_ns"; - return; - case 0x8a: - O << "msplim_ns"; - return; - case 0x8b: - O << "psplim_ns"; - return; - case 0x90: - O << "primask_ns"; - return; - case 0x91: - O << "basepri_ns"; - return; - case 0x92: - O << "basepri_max_ns"; - return; - case 0x93: - O << "faultmask_ns"; - return; - case 0x94: - O << "control_ns"; - return; - case 0x98: - O << "sp_ns"; + auto TheReg = ARMSysReg::lookupMClassSysRegBy8bitSYSmValue(SYSm); + if (TheReg) { + O << TheReg->Name; return; } + + llvm_unreachable("Unexpected mask value!"); + return; } // As special cases, CPSR_f, CPSR_s and CPSR_fs prefer printing as // APSR_nzcvq, APSR_g and APSRnzcvqg, respectively. + unsigned SpecRegRBit = Op.getImm() >> 4; + unsigned Mask = Op.getImm() & 0xf; + if (!SpecRegRBit && (Mask == 8 || Mask == 4 || Mask == 12)) { O << "APSR_"; switch (Mask) { @@ -974,51 +873,13 @@ void ARMInstPrinter::printBankedRegOperand(const MCInst *MI, unsigned OpNum, const MCSubtargetInfo &STI, raw_ostream &O) { uint32_t Banked = MI->getOperand(OpNum).getImm(); - uint32_t R = (Banked & 0x20) >> 5; - uint32_t SysM = Banked & 0x1f; - - // Nothing much we can do about this, the encodings are specified in B9.2.3 of - // the ARM ARM v7C, and are all over the shop. - if (R) { - O << "SPSR_"; - - switch (SysM) { - case 0x0e: - O << "fiq"; - return; - case 0x10: - O << "irq"; - return; - case 0x12: - O << "svc"; - return; - case 0x14: - O << "abt"; - return; - case 0x16: - O << "und"; - return; - case 0x1c: - O << "mon"; - return; - case 0x1e: - O << "hyp"; - return; - default: - llvm_unreachable("Invalid banked SPSR register"); - } - } - - assert(!R && "should have dealt with SPSR regs"); - const char *RegNames[] = { - "r8_usr", "r9_usr", "r10_usr", "r11_usr", "r12_usr", "sp_usr", "lr_usr", - "", "r8_fiq", "r9_fiq", "r10_fiq", "r11_fiq", "r12_fiq", "sp_fiq", - "lr_fiq", "", "lr_irq", "sp_irq", "lr_svc", "sp_svc", "lr_abt", - "sp_abt", "lr_und", "sp_und", "", "", "", "", - "lr_mon", "sp_mon", "elr_hyp", "sp_hyp"}; - const char *Name = RegNames[SysM]; - assert(Name[0] && "invalid banked register operand"); + auto TheReg = ARMBankedReg::lookupBankedRegByEncoding(Banked); + assert(TheReg && "invalid banked register operand"); + std::string Name = TheReg->Name; + uint32_t isSPSR = (Banked & 0x20) >> 5; + if (isSPSR) + Name.replace(0, 4, "SPSR"); // convert 'spsr_' to 'SPSR_' O << Name; } @@ -1674,3 +1535,12 @@ void ARMInstPrinter::printVectorListFourSpaced(const MCInst *MI, unsigned OpNum, printRegName(O, MI->getOperand(OpNum).getReg() + 6); O << "}"; } + +template<int64_t Angle, int64_t Remainder> +void ARMInstPrinter::printComplexRotationOp(const MCInst *MI, unsigned OpNo, + const MCSubtargetInfo &STI, + raw_ostream &O) { + unsigned Val = MI->getOperand(OpNo).getImm(); + O << "#" << (Val * Angle) + Remainder; +} + diff --git a/lib/Target/ARM/InstPrinter/ARMInstPrinter.h b/lib/Target/ARM/InstPrinter/ARMInstPrinter.h index 86873a3a6ccbb..7dc311229cca8 100644 --- a/lib/Target/ARM/InstPrinter/ARMInstPrinter.h +++ b/lib/Target/ARM/InstPrinter/ARMInstPrinter.h @@ -231,6 +231,9 @@ public: const MCSubtargetInfo &STI, raw_ostream &O); void printVectorListFourSpaced(const MCInst *MI, unsigned OpNum, const MCSubtargetInfo &STI, raw_ostream &O); + template<int64_t Angle, int64_t Remainder> + void printComplexRotationOp(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O); }; } // end namespace llvm diff --git a/lib/Target/ARM/InstPrinter/LLVMBuild.txt b/lib/Target/ARM/InstPrinter/LLVMBuild.txt index 6f4fa365358c4..75d700715a526 100644 --- a/lib/Target/ARM/InstPrinter/LLVMBuild.txt +++ b/lib/Target/ARM/InstPrinter/LLVMBuild.txt @@ -19,5 +19,5 @@ type = Library name = ARMAsmPrinter parent = ARM -required_libraries = MC Support +required_libraries = MC Support ARMUtils add_to_library_groups = ARM diff --git a/lib/Target/ARM/LLVMBuild.txt b/lib/Target/ARM/LLVMBuild.txt index 80d39610574f9..a450acc5e13ac 100644 --- a/lib/Target/ARM/LLVMBuild.txt +++ b/lib/Target/ARM/LLVMBuild.txt @@ -16,7 +16,7 @@ ;===------------------------------------------------------------------------===; [common] -subdirectories = AsmParser Disassembler InstPrinter MCTargetDesc TargetInfo +subdirectories = AsmParser Disassembler InstPrinter MCTargetDesc TargetInfo Utils [component_0] type = TargetGroup @@ -31,5 +31,5 @@ has_jit = 1 type = Library name = ARMCodeGen parent = ARM -required_libraries = ARMAsmPrinter ARMDesc ARMInfo Analysis AsmPrinter CodeGen Core MC Scalar SelectionDAG Support Target GlobalISel +required_libraries = ARMAsmPrinter ARMDesc ARMInfo Analysis AsmPrinter CodeGen Core MC Scalar SelectionDAG Support Target GlobalISel ARMUtils add_to_library_groups = ARM diff --git a/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h b/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h index 3959eab966a84..f472b21543143 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h +++ b/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h @@ -38,11 +38,9 @@ namespace ARM_AM { add }; - static inline const char *getAddrOpcStr(AddrOpc Op) { - return Op == sub ? "-" : ""; - } + inline const char *getAddrOpcStr(AddrOpc Op) { return Op == sub ? "-" : ""; } - static inline const char *getShiftOpcStr(ShiftOpc Op) { + inline const char *getShiftOpcStr(ShiftOpc Op) { switch (Op) { default: llvm_unreachable("Unknown shift opc!"); case ARM_AM::asr: return "asr"; @@ -53,7 +51,7 @@ namespace ARM_AM { } } - static inline unsigned getShiftOpcEncoding(ShiftOpc Op) { + inline unsigned getShiftOpcEncoding(ShiftOpc Op) { switch (Op) { default: llvm_unreachable("Unknown shift opc!"); case ARM_AM::asr: return 2; @@ -71,7 +69,7 @@ namespace ARM_AM { db }; - static inline const char *getAMSubModeStr(AMSubMode Mode) { + inline const char *getAMSubModeStr(AMSubMode Mode) { switch (Mode) { default: llvm_unreachable("Unknown addressing sub-mode!"); case ARM_AM::ia: return "ia"; @@ -83,14 +81,14 @@ namespace ARM_AM { /// rotr32 - Rotate a 32-bit unsigned value right by a specified # bits. /// - static inline unsigned rotr32(unsigned Val, unsigned Amt) { + inline unsigned rotr32(unsigned Val, unsigned Amt) { assert(Amt < 32 && "Invalid rotate amount"); return (Val >> Amt) | (Val << ((32-Amt)&31)); } /// rotl32 - Rotate a 32-bit unsigned value left by a specified # bits. /// - static inline unsigned rotl32(unsigned Val, unsigned Amt) { + inline unsigned rotl32(unsigned Val, unsigned Amt) { assert(Amt < 32 && "Invalid rotate amount"); return (Val << Amt) | (Val >> ((32-Amt)&31)); } @@ -109,32 +107,24 @@ namespace ARM_AM { // reg, the second is the shift amount (or reg0 if not present or imm). The // third operand encodes the shift opcode and the imm if a reg isn't present. // - static inline unsigned getSORegOpc(ShiftOpc ShOp, unsigned Imm) { + inline unsigned getSORegOpc(ShiftOpc ShOp, unsigned Imm) { return ShOp | (Imm << 3); } - static inline unsigned getSORegOffset(unsigned Op) { - return Op >> 3; - } - static inline ShiftOpc getSORegShOp(unsigned Op) { - return (ShiftOpc)(Op & 7); - } + inline unsigned getSORegOffset(unsigned Op) { return Op >> 3; } + inline ShiftOpc getSORegShOp(unsigned Op) { return (ShiftOpc)(Op & 7); } /// getSOImmValImm - Given an encoded imm field for the reg/imm form, return /// the 8-bit imm value. - static inline unsigned getSOImmValImm(unsigned Imm) { - return Imm & 0xFF; - } + inline unsigned getSOImmValImm(unsigned Imm) { return Imm & 0xFF; } /// getSOImmValRot - Given an encoded imm field for the reg/imm form, return /// the rotate amount. - static inline unsigned getSOImmValRot(unsigned Imm) { - return (Imm >> 8) * 2; - } + inline unsigned getSOImmValRot(unsigned Imm) { return (Imm >> 8) * 2; } /// getSOImmValRotate - Try to handle Imm with an immediate shifter operand, /// computing the rotate amount to use. If this immediate value cannot be /// handled with a single shifter-op, determine a good rotate amount that will /// take a maximal chunk of bits out of the immediate. - static inline unsigned getSOImmValRotate(unsigned Imm) { + inline unsigned getSOImmValRotate(unsigned Imm) { // 8-bit (or less) immediates are trivially shifter_operands with a rotate // of zero. if ((Imm & ~255U) == 0) return 0; @@ -168,7 +158,7 @@ namespace ARM_AM { /// getSOImmVal - Given a 32-bit immediate, if it is something that can fit /// into an shifter_operand immediate operand, return the 12-bit encoding for /// it. If not, return -1. - static inline int getSOImmVal(unsigned Arg) { + inline int getSOImmVal(unsigned Arg) { // 8-bit (or less) immediates are trivially shifter_operands with a rotate // of zero. if ((Arg & ~255U) == 0) return Arg; @@ -185,7 +175,7 @@ namespace ARM_AM { /// isSOImmTwoPartVal - Return true if the specified value can be obtained by /// or'ing together two SOImmVal's. - static inline bool isSOImmTwoPartVal(unsigned V) { + inline bool isSOImmTwoPartVal(unsigned V) { // If this can be handled with a single shifter_op, bail out. V = rotr32(~255U, getSOImmValRotate(V)) & V; if (V == 0) @@ -198,13 +188,13 @@ namespace ARM_AM { /// getSOImmTwoPartFirst - If V is a value that satisfies isSOImmTwoPartVal, /// return the first chunk of it. - static inline unsigned getSOImmTwoPartFirst(unsigned V) { + inline unsigned getSOImmTwoPartFirst(unsigned V) { return rotr32(255U, getSOImmValRotate(V)) & V; } /// getSOImmTwoPartSecond - If V is a value that satisfies isSOImmTwoPartVal, /// return the second chunk of it. - static inline unsigned getSOImmTwoPartSecond(unsigned V) { + inline unsigned getSOImmTwoPartSecond(unsigned V) { // Mask out the first hunk. V = rotr32(~255U, getSOImmValRotate(V)) & V; @@ -215,7 +205,7 @@ namespace ARM_AM { /// getThumbImmValShift - Try to handle Imm with a 8-bit immediate followed /// by a left shift. Returns the shift amount to use. - static inline unsigned getThumbImmValShift(unsigned Imm) { + inline unsigned getThumbImmValShift(unsigned Imm) { // 8-bit (or less) immediates are trivially immediate operand with a shift // of zero. if ((Imm & ~255U) == 0) return 0; @@ -226,7 +216,7 @@ namespace ARM_AM { /// isThumbImmShiftedVal - Return true if the specified value can be obtained /// by left shifting a 8-bit immediate. - static inline bool isThumbImmShiftedVal(unsigned V) { + inline bool isThumbImmShiftedVal(unsigned V) { // If this can be handled with V = (~255U << getThumbImmValShift(V)) & V; return V == 0; @@ -234,7 +224,7 @@ namespace ARM_AM { /// getThumbImm16ValShift - Try to handle Imm with a 16-bit immediate followed /// by a left shift. Returns the shift amount to use. - static inline unsigned getThumbImm16ValShift(unsigned Imm) { + inline unsigned getThumbImm16ValShift(unsigned Imm) { // 16-bit (or less) immediates are trivially immediate operand with a shift // of zero. if ((Imm & ~65535U) == 0) return 0; @@ -245,7 +235,7 @@ namespace ARM_AM { /// isThumbImm16ShiftedVal - Return true if the specified value can be /// obtained by left shifting a 16-bit immediate. - static inline bool isThumbImm16ShiftedVal(unsigned V) { + inline bool isThumbImm16ShiftedVal(unsigned V) { // If this can be handled with V = (~65535U << getThumbImm16ValShift(V)) & V; return V == 0; @@ -253,7 +243,7 @@ namespace ARM_AM { /// getThumbImmNonShiftedVal - If V is a value that satisfies /// isThumbImmShiftedVal, return the non-shiftd value. - static inline unsigned getThumbImmNonShiftedVal(unsigned V) { + inline unsigned getThumbImmNonShiftedVal(unsigned V) { return V >> getThumbImmValShift(V); } @@ -267,7 +257,7 @@ namespace ARM_AM { /// abcdefgh abcdefgh abcdefgh abcdefgh control = 3 /// Return -1 if none of the above apply. /// See ARM Reference Manual A6.3.2. - static inline int getT2SOImmValSplatVal(unsigned V) { + inline int getT2SOImmValSplatVal(unsigned V) { unsigned u, Vs, Imm; // control = 0 if ((V & 0xffffff00) == 0) @@ -295,7 +285,7 @@ namespace ARM_AM { /// specified value is a rotated 8-bit value. Return -1 if no rotation /// encoding is possible. /// See ARM Reference Manual A6.3.2. - static inline int getT2SOImmValRotateVal(unsigned V) { + inline int getT2SOImmValRotateVal(unsigned V) { unsigned RotAmt = countLeadingZeros(V); if (RotAmt >= 24) return -1; @@ -311,7 +301,7 @@ namespace ARM_AM { /// into a Thumb-2 shifter_operand immediate operand, return the 12-bit /// encoding for it. If not, return -1. /// See ARM Reference Manual A6.3.2. - static inline int getT2SOImmVal(unsigned Arg) { + inline int getT2SOImmVal(unsigned Arg) { // If 'Arg' is an 8-bit splat, then get the encoded value. int Splat = getT2SOImmValSplatVal(Arg); if (Splat != -1) @@ -325,14 +315,14 @@ namespace ARM_AM { return -1; } - static inline unsigned getT2SOImmValRotate(unsigned V) { + inline unsigned getT2SOImmValRotate(unsigned V) { if ((V & ~255U) == 0) return 0; // Use CTZ to compute the rotate amount. unsigned RotAmt = countTrailingZeros(V); return (32 - RotAmt) & 31; } - static inline bool isT2SOImmTwoPartVal (unsigned Imm) { + inline bool isT2SOImmTwoPartVal(unsigned Imm) { unsigned V = Imm; // Passing values can be any combination of splat values and shifter // values. If this can be handled with a single shifter or splat, bail @@ -359,7 +349,7 @@ namespace ARM_AM { return false; } - static inline unsigned getT2SOImmTwoPartFirst(unsigned Imm) { + inline unsigned getT2SOImmTwoPartFirst(unsigned Imm) { assert (isT2SOImmTwoPartVal(Imm) && "Immedate cannot be encoded as two part immediate!"); // Try a shifter operand as one part @@ -376,7 +366,7 @@ namespace ARM_AM { return Imm & 0x00ff00ffU; } - static inline unsigned getT2SOImmTwoPartSecond(unsigned Imm) { + inline unsigned getT2SOImmTwoPartSecond(unsigned Imm) { // Mask out the first hunk Imm ^= getT2SOImmTwoPartFirst(Imm); // Return what's left @@ -404,25 +394,22 @@ namespace ARM_AM { // and code rewriting), this operand will have the form: FI#, reg0, <offs> // with no shift amount for the frame offset. // - static inline unsigned getAM2Opc(AddrOpc Opc, unsigned Imm12, ShiftOpc SO, - unsigned IdxMode = 0) { + inline unsigned getAM2Opc(AddrOpc Opc, unsigned Imm12, ShiftOpc SO, + unsigned IdxMode = 0) { assert(Imm12 < (1 << 12) && "Imm too large!"); bool isSub = Opc == sub; return Imm12 | ((int)isSub << 12) | (SO << 13) | (IdxMode << 16) ; } - static inline unsigned getAM2Offset(unsigned AM2Opc) { + inline unsigned getAM2Offset(unsigned AM2Opc) { return AM2Opc & ((1 << 12)-1); } - static inline AddrOpc getAM2Op(unsigned AM2Opc) { + inline AddrOpc getAM2Op(unsigned AM2Opc) { return ((AM2Opc >> 12) & 1) ? sub : add; } - static inline ShiftOpc getAM2ShiftOpc(unsigned AM2Opc) { + inline ShiftOpc getAM2ShiftOpc(unsigned AM2Opc) { return (ShiftOpc)((AM2Opc >> 13) & 7); } - static inline unsigned getAM2IdxMode(unsigned AM2Opc) { - return (AM2Opc >> 16); - } - + inline unsigned getAM2IdxMode(unsigned AM2Opc) { return (AM2Opc >> 16); } //===--------------------------------------------------------------------===// // Addressing Mode #3 @@ -439,20 +426,16 @@ namespace ARM_AM { // index mode. /// getAM3Opc - This function encodes the addrmode3 opc field. - static inline unsigned getAM3Opc(AddrOpc Opc, unsigned char Offset, - unsigned IdxMode = 0) { + inline unsigned getAM3Opc(AddrOpc Opc, unsigned char Offset, + unsigned IdxMode = 0) { bool isSub = Opc == sub; return ((int)isSub << 8) | Offset | (IdxMode << 9); } - static inline unsigned char getAM3Offset(unsigned AM3Opc) { - return AM3Opc & 0xFF; - } - static inline AddrOpc getAM3Op(unsigned AM3Opc) { + inline unsigned char getAM3Offset(unsigned AM3Opc) { return AM3Opc & 0xFF; } + inline AddrOpc getAM3Op(unsigned AM3Opc) { return ((AM3Opc >> 8) & 1) ? sub : add; } - static inline unsigned getAM3IdxMode(unsigned AM3Opc) { - return (AM3Opc >> 9); - } + inline unsigned getAM3IdxMode(unsigned AM3Opc) { return (AM3Opc >> 9); } //===--------------------------------------------------------------------===// // Addressing Mode #4 @@ -469,13 +452,11 @@ namespace ARM_AM { // DB - Decrement before // For VFP instructions, only the IA and DB modes are valid. - static inline AMSubMode getAM4SubMode(unsigned Mode) { + inline AMSubMode getAM4SubMode(unsigned Mode) { return (AMSubMode)(Mode & 0x7); } - static inline unsigned getAM4ModeImm(AMSubMode SubMode) { - return (int)SubMode; - } + inline unsigned getAM4ModeImm(AMSubMode SubMode) { return (int)SubMode; } //===--------------------------------------------------------------------===// // Addressing Mode #5 @@ -489,14 +470,12 @@ namespace ARM_AM { // operation (add or subtract) in bit 8 and the immediate in bits 0-7. /// getAM5Opc - This function encodes the addrmode5 opc field. - static inline unsigned getAM5Opc(AddrOpc Opc, unsigned char Offset) { + inline unsigned getAM5Opc(AddrOpc Opc, unsigned char Offset) { bool isSub = Opc == sub; return ((int)isSub << 8) | Offset; } - static inline unsigned char getAM5Offset(unsigned AM5Opc) { - return AM5Opc & 0xFF; - } - static inline AddrOpc getAM5Op(unsigned AM5Opc) { + inline unsigned char getAM5Offset(unsigned AM5Opc) { return AM5Opc & 0xFF; } + inline AddrOpc getAM5Op(unsigned AM5Opc) { return ((AM5Opc >> 8) & 1) ? sub : add; } @@ -512,14 +491,14 @@ namespace ARM_AM { // operation (add or subtract) in bit 8 and the immediate in bits 0-7. /// getAM5FP16Opc - This function encodes the addrmode5fp16 opc field. - static inline unsigned getAM5FP16Opc(AddrOpc Opc, unsigned char Offset) { + inline unsigned getAM5FP16Opc(AddrOpc Opc, unsigned char Offset) { bool isSub = Opc == sub; return ((int)isSub << 8) | Offset; } - static inline unsigned char getAM5FP16Offset(unsigned AM5Opc) { + inline unsigned char getAM5FP16Offset(unsigned AM5Opc) { return AM5Opc & 0xFF; } - static inline AddrOpc getAM5FP16Op(unsigned AM5Opc) { + inline AddrOpc getAM5FP16Op(unsigned AM5Opc) { return ((AM5Opc >> 8) & 1) ? sub : add; } @@ -548,20 +527,18 @@ namespace ARM_AM { // the "Cmode" field of the instruction. The interfaces below treat the // Op and Cmode values as a single 5-bit value. - static inline unsigned createNEONModImm(unsigned OpCmode, unsigned Val) { + inline unsigned createNEONModImm(unsigned OpCmode, unsigned Val) { return (OpCmode << 8) | Val; } - static inline unsigned getNEONModImmOpCmode(unsigned ModImm) { + inline unsigned getNEONModImmOpCmode(unsigned ModImm) { return (ModImm >> 8) & 0x1f; } - static inline unsigned getNEONModImmVal(unsigned ModImm) { - return ModImm & 0xff; - } + inline unsigned getNEONModImmVal(unsigned ModImm) { return ModImm & 0xff; } /// decodeNEONModImm - Decode a NEON modified immediate value into the /// element value and the element size in bits. (If the element size is /// smaller than the vector, it is splatted into all the elements.) - static inline uint64_t decodeNEONModImm(unsigned ModImm, unsigned &EltBits) { + inline uint64_t decodeNEONModImm(unsigned ModImm, unsigned &EltBits) { unsigned OpCmode = getNEONModImmOpCmode(ModImm); unsigned Imm8 = getNEONModImmVal(ModImm); uint64_t Val = 0; @@ -599,7 +576,7 @@ namespace ARM_AM { } // Generic validation for single-byte immediate (0X00, 00X0, etc). - static inline bool isNEONBytesplat(unsigned Value, unsigned Size) { + inline bool isNEONBytesplat(unsigned Value, unsigned Size) { assert(Size >= 1 && Size <= 4 && "Invalid size"); unsigned count = 0; for (unsigned i = 0; i < Size; ++i) { @@ -610,7 +587,7 @@ namespace ARM_AM { } /// Checks if Value is a correct immediate for instructions like VBIC/VORR. - static inline bool isNEONi16splat(unsigned Value) { + inline bool isNEONi16splat(unsigned Value) { if (Value > 0xffff) return false; // i16 value with set bits only in one byte X0 or 0X. @@ -618,7 +595,7 @@ namespace ARM_AM { } // Encode NEON 16 bits Splat immediate for instructions like VBIC/VORR - static inline unsigned encodeNEONi16splat(unsigned Value) { + inline unsigned encodeNEONi16splat(unsigned Value) { assert(isNEONi16splat(Value) && "Invalid NEON splat value"); if (Value >= 0x100) Value = (Value >> 8) | 0xa00; @@ -628,13 +605,13 @@ namespace ARM_AM { } /// Checks if Value is a correct immediate for instructions like VBIC/VORR. - static inline bool isNEONi32splat(unsigned Value) { + inline bool isNEONi32splat(unsigned Value) { // i32 value with set bits only in one byte X000, 0X00, 00X0, or 000X. return Value == 0 || isNEONBytesplat(Value, 4); } /// Encode NEON 32 bits Splat immediate for instructions like VBIC/VORR. - static inline unsigned encodeNEONi32splat(unsigned Value) { + inline unsigned encodeNEONi32splat(unsigned Value) { assert(isNEONi32splat(Value) && "Invalid NEON splat value"); if (Value >= 0x100 && Value <= 0xff00) Value = (Value >> 8) | 0x200; @@ -648,7 +625,7 @@ namespace ARM_AM { //===--------------------------------------------------------------------===// // Floating-point Immediates // - static inline float getFPImmFloat(unsigned Imm) { + inline float getFPImmFloat(unsigned Imm) { // We expect an 8-bit binary encoding of a floating-point number here. union { uint32_t I; @@ -676,7 +653,7 @@ namespace ARM_AM { /// getFP16Imm - Return an 8-bit floating-point version of the 16-bit /// floating-point value. If the value cannot be represented as an 8-bit /// floating-point value, then return -1. - static inline int getFP16Imm(const APInt &Imm) { + inline int getFP16Imm(const APInt &Imm) { uint32_t Sign = Imm.lshr(15).getZExtValue() & 1; int32_t Exp = (Imm.lshr(10).getSExtValue() & 0x1f) - 15; // -14 to 15 int64_t Mantissa = Imm.getZExtValue() & 0x3ff; // 10 bits @@ -695,14 +672,14 @@ namespace ARM_AM { return ((int)Sign << 7) | (Exp << 4) | Mantissa; } - static inline int getFP16Imm(const APFloat &FPImm) { + inline int getFP16Imm(const APFloat &FPImm) { return getFP16Imm(FPImm.bitcastToAPInt()); } /// getFP32Imm - Return an 8-bit floating-point version of the 32-bit /// floating-point value. If the value cannot be represented as an 8-bit /// floating-point value, then return -1. - static inline int getFP32Imm(const APInt &Imm) { + inline int getFP32Imm(const APInt &Imm) { uint32_t Sign = Imm.lshr(31).getZExtValue() & 1; int32_t Exp = (Imm.lshr(23).getSExtValue() & 0xff) - 127; // -126 to 127 int64_t Mantissa = Imm.getZExtValue() & 0x7fffff; // 23 bits @@ -723,14 +700,14 @@ namespace ARM_AM { return ((int)Sign << 7) | (Exp << 4) | Mantissa; } - static inline int getFP32Imm(const APFloat &FPImm) { + inline int getFP32Imm(const APFloat &FPImm) { return getFP32Imm(FPImm.bitcastToAPInt()); } /// getFP64Imm - Return an 8-bit floating-point version of the 64-bit /// floating-point value. If the value cannot be represented as an 8-bit /// floating-point value, then return -1. - static inline int getFP64Imm(const APInt &Imm) { + inline int getFP64Imm(const APInt &Imm) { uint64_t Sign = Imm.lshr(63).getZExtValue() & 1; int64_t Exp = (Imm.lshr(52).getSExtValue() & 0x7ff) - 1023; // -1022 to 1023 uint64_t Mantissa = Imm.getZExtValue() & 0xfffffffffffffULL; @@ -751,7 +728,7 @@ namespace ARM_AM { return ((int)Sign << 7) | (Exp << 4) | Mantissa; } - static inline int getFP64Imm(const APFloat &FPImm) { + inline int getFP64Imm(const APFloat &FPImm) { return getFP64Imm(FPImm.bitcastToAPInt()); } diff --git a/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp b/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp index a77df7a2598f4..1cb9dd44f7894 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp @@ -12,7 +12,6 @@ #include "MCTargetDesc/ARMAsmBackendDarwin.h" #include "MCTargetDesc/ARMAsmBackendELF.h" #include "MCTargetDesc/ARMAsmBackendWinCOFF.h" -#include "MCTargetDesc/ARMBaseInfo.h" #include "MCTargetDesc/ARMFixupKinds.h" #include "MCTargetDesc/ARMMCTargetDesc.h" #include "llvm/ADT/StringSwitch.h" @@ -25,7 +24,6 @@ #include "llvm/MC/MCELFObjectWriter.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCFixupKindInfo.h" -#include "llvm/MC/MCMachObjectWriter.h" #include "llvm/MC/MCObjectWriter.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/MC/MCSectionELF.h" @@ -1127,30 +1125,30 @@ uint32_t ARMAsmBackendDarwin::generateCompactUnwindEncoding( } static MachO::CPUSubTypeARM getMachOSubTypeFromArch(StringRef Arch) { - unsigned AK = ARM::parseArch(Arch); + ARM::ArchKind AK = ARM::parseArch(Arch); switch (AK) { default: return MachO::CPU_SUBTYPE_ARM_V7; - case ARM::AK_ARMV4T: + case ARM::ArchKind::ARMV4T: return MachO::CPU_SUBTYPE_ARM_V4T; - case ARM::AK_ARMV5T: - case ARM::AK_ARMV5TE: - case ARM::AK_ARMV5TEJ: + case ARM::ArchKind::ARMV5T: + case ARM::ArchKind::ARMV5TE: + case ARM::ArchKind::ARMV5TEJ: return MachO::CPU_SUBTYPE_ARM_V5; - case ARM::AK_ARMV6: - case ARM::AK_ARMV6K: + case ARM::ArchKind::ARMV6: + case ARM::ArchKind::ARMV6K: return MachO::CPU_SUBTYPE_ARM_V6; - case ARM::AK_ARMV7A: + case ARM::ArchKind::ARMV7A: return MachO::CPU_SUBTYPE_ARM_V7; - case ARM::AK_ARMV7S: + case ARM::ArchKind::ARMV7S: return MachO::CPU_SUBTYPE_ARM_V7S; - case ARM::AK_ARMV7K: + case ARM::ArchKind::ARMV7K: return MachO::CPU_SUBTYPE_ARM_V7K; - case ARM::AK_ARMV6M: + case ARM::ArchKind::ARMV6M: return MachO::CPU_SUBTYPE_ARM_V6M; - case ARM::AK_ARMV7M: + case ARM::ArchKind::ARMV7M: return MachO::CPU_SUBTYPE_ARM_V7M; - case ARM::AK_ARMV7EM: + case ARM::ArchKind::ARMV7EM: return MachO::CPU_SUBTYPE_ARM_V7EM; } } diff --git a/lib/Target/ARM/MCTargetDesc/ARMAsmBackendDarwin.h b/lib/Target/ARM/MCTargetDesc/ARMAsmBackendDarwin.h index bd729fabedf5a..f05e3a6f1160f 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMAsmBackendDarwin.h +++ b/lib/Target/ARM/MCTargetDesc/ARMAsmBackendDarwin.h @@ -12,6 +12,7 @@ #include "ARMAsmBackend.h" #include "llvm/BinaryFormat/MachO.h" +#include "llvm/MC/MCObjectWriter.h" namespace llvm { class ARMAsmBackendDarwin : public ARMAsmBackend { @@ -23,7 +24,8 @@ public: : ARMAsmBackend(T, TT, /* IsLittleEndian */ true), MRI(MRI), Subtype(st) { } - MCObjectWriter *createObjectWriter(raw_pwrite_stream &OS) const override { + std::unique_ptr<MCObjectWriter> + createObjectWriter(raw_pwrite_stream &OS) const override { return createARMMachObjectWriter(OS, /*Is64Bit=*/false, MachO::CPU_TYPE_ARM, Subtype); } diff --git a/lib/Target/ARM/MCTargetDesc/ARMAsmBackendELF.h b/lib/Target/ARM/MCTargetDesc/ARMAsmBackendELF.h index 748f915be17bb..d0f5419a1b0ff 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMAsmBackendELF.h +++ b/lib/Target/ARM/MCTargetDesc/ARMAsmBackendELF.h @@ -12,6 +12,8 @@ #include "ARMAsmBackend.h" #include "MCTargetDesc/ARMMCTargetDesc.h" +#include "llvm/MC/MCObjectWriter.h" + using namespace llvm; namespace { @@ -22,7 +24,8 @@ public: bool IsLittle) : ARMAsmBackend(T, TT, IsLittle), OSABI(OSABI) {} - MCObjectWriter *createObjectWriter(raw_pwrite_stream &OS) const override { + std::unique_ptr<MCObjectWriter> + createObjectWriter(raw_pwrite_stream &OS) const override { return createARMELFObjectWriter(OS, OSABI, isLittle()); } }; diff --git a/lib/Target/ARM/MCTargetDesc/ARMAsmBackendWinCOFF.h b/lib/Target/ARM/MCTargetDesc/ARMAsmBackendWinCOFF.h index 2a375be49a830..53b9c29446a33 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMAsmBackendWinCOFF.h +++ b/lib/Target/ARM/MCTargetDesc/ARMAsmBackendWinCOFF.h @@ -11,6 +11,7 @@ #define LLVM_LIB_TARGET_ARM_ARMASMBACKENDWINCOFF_H #include "ARMAsmBackend.h" +#include "llvm/MC/MCObjectWriter.h" using namespace llvm; namespace { @@ -18,7 +19,8 @@ class ARMAsmBackendWinCOFF : public ARMAsmBackend { public: ARMAsmBackendWinCOFF(const Target &T, const Triple &TheTriple) : ARMAsmBackend(T, TheTriple, true) {} - MCObjectWriter *createObjectWriter(raw_pwrite_stream &OS) const override { + std::unique_ptr<MCObjectWriter> + createObjectWriter(raw_pwrite_stream &OS) const override { return createARMWinCOFFObjectWriter(OS, /*Is64Bit=*/false); } }; diff --git a/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h b/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h index 92e553f21f143..c4480e3da505e 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h +++ b/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h @@ -19,73 +19,10 @@ #include "ARMMCTargetDesc.h" #include "llvm/Support/ErrorHandling.h" +#include "Utils/ARMBaseInfo.h" namespace llvm { -// Enums corresponding to ARM condition codes -namespace ARMCC { - // The CondCodes constants map directly to the 4-bit encoding of the - // condition field for predicated instructions. - enum CondCodes { // Meaning (integer) Meaning (floating-point) - EQ, // Equal Equal - NE, // Not equal Not equal, or unordered - HS, // Carry set >, ==, or unordered - LO, // Carry clear Less than - MI, // Minus, negative Less than - PL, // Plus, positive or zero >, ==, or unordered - VS, // Overflow Unordered - VC, // No overflow Not unordered - HI, // Unsigned higher Greater than, or unordered - LS, // Unsigned lower or same Less than or equal - GE, // Greater than or equal Greater than or equal - LT, // Less than Less than, or unordered - GT, // Greater than Greater than - LE, // Less than or equal <, ==, or unordered - AL // Always (unconditional) Always (unconditional) - }; - - inline static CondCodes getOppositeCondition(CondCodes CC) { - switch (CC) { - default: llvm_unreachable("Unknown condition code"); - case EQ: return NE; - case NE: return EQ; - case HS: return LO; - case LO: return HS; - case MI: return PL; - case PL: return MI; - case VS: return VC; - case VC: return VS; - case HI: return LS; - case LS: return HI; - case GE: return LT; - case LT: return GE; - case GT: return LE; - case LE: return GT; - } - } -} // namespace ARMCC - -inline static const char *ARMCondCodeToString(ARMCC::CondCodes CC) { - switch (CC) { - case ARMCC::EQ: return "eq"; - case ARMCC::NE: return "ne"; - case ARMCC::HS: return "hs"; - case ARMCC::LO: return "lo"; - case ARMCC::MI: return "mi"; - case ARMCC::PL: return "pl"; - case ARMCC::VS: return "vs"; - case ARMCC::VC: return "vc"; - case ARMCC::HI: return "hi"; - case ARMCC::LS: return "ls"; - case ARMCC::GE: return "ge"; - case ARMCC::LT: return "lt"; - case ARMCC::GT: return "gt"; - case ARMCC::LE: return "le"; - case ARMCC::AL: return "al"; - } - llvm_unreachable("Unknown condition code"); -} - namespace ARM_PROC { enum IMod { IE = 2, @@ -291,7 +228,10 @@ namespace ARMII { /// MO_OPTION_MASK - Most flags are mutually exclusive; this mask selects /// just that part of the flag set. - MO_OPTION_MASK = 0x0f, + MO_OPTION_MASK = 0x3, + + /// MO_GOT - On a symbol operand, this represents a GOT relative relocation. + MO_GOT = 0x8, /// MO_SBREL - On a symbol operand, this represents a static base relative /// relocation. Used in movw and movt instructions. @@ -406,6 +346,7 @@ namespace ARMII { NVExtFrm = 39 << FormShift, NVMulSLFrm = 40 << FormShift, NVTBLFrm = 41 << FormShift, + N3RegCplxFrm = 43 << FormShift, //===------------------------------------------------------------------===// // Misc flags. diff --git a/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp b/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp index 59f31be69d58c..3cd52fe1e7eb1 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp @@ -14,6 +14,7 @@ #include "llvm/MC/MCELFObjectWriter.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCFixup.h" +#include "llvm/MC/MCObjectWriter.h" #include "llvm/MC/MCValue.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" @@ -78,7 +79,6 @@ unsigned ARMELFObjectWriter::GetRelocTypeInner(const MCValue &Target, MCContext &Ctx) const { MCSymbolRefExpr::VariantKind Modifier = Target.getAccessVariant(); - unsigned Type = 0; if (IsPCRel) { switch ((unsigned)Fixup.getKind()) { default: @@ -86,220 +86,159 @@ unsigned ARMELFObjectWriter::GetRelocTypeInner(const MCValue &Target, return ELF::R_ARM_NONE; case FK_Data_4: switch (Modifier) { - default: llvm_unreachable("Unsupported Modifier"); + default: + llvm_unreachable("Unsupported Modifier"); case MCSymbolRefExpr::VK_None: - Type = ELF::R_ARM_REL32; - break; - case MCSymbolRefExpr::VK_TLSGD: - llvm_unreachable("unimplemented"); + return ELF::R_ARM_REL32; case MCSymbolRefExpr::VK_GOTTPOFF: - Type = ELF::R_ARM_TLS_IE32; - break; + return ELF::R_ARM_TLS_IE32; case MCSymbolRefExpr::VK_ARM_GOT_PREL: - Type = ELF::R_ARM_GOT_PREL; - break; + return ELF::R_ARM_GOT_PREL; case MCSymbolRefExpr::VK_ARM_PREL31: - Type = ELF::R_ARM_PREL31; - break; + return ELF::R_ARM_PREL31; } - break; case ARM::fixup_arm_blx: case ARM::fixup_arm_uncondbl: switch (Modifier) { case MCSymbolRefExpr::VK_PLT: - Type = ELF::R_ARM_CALL; - break; + return ELF::R_ARM_CALL; case MCSymbolRefExpr::VK_TLSCALL: - Type = ELF::R_ARM_TLS_CALL; - break; + return ELF::R_ARM_TLS_CALL; default: - Type = ELF::R_ARM_CALL; - break; + return ELF::R_ARM_CALL; } - break; case ARM::fixup_arm_condbl: case ARM::fixup_arm_condbranch: case ARM::fixup_arm_uncondbranch: - Type = ELF::R_ARM_JUMP24; - break; + return ELF::R_ARM_JUMP24; case ARM::fixup_t2_condbranch: - Type = ELF::R_ARM_THM_JUMP19; - break; + return ELF::R_ARM_THM_JUMP19; case ARM::fixup_t2_uncondbranch: - Type = ELF::R_ARM_THM_JUMP24; - break; + return ELF::R_ARM_THM_JUMP24; case ARM::fixup_arm_movt_hi16: - Type = ELF::R_ARM_MOVT_PREL; - break; + return ELF::R_ARM_MOVT_PREL; case ARM::fixup_arm_movw_lo16: - Type = ELF::R_ARM_MOVW_PREL_NC; - break; + return ELF::R_ARM_MOVW_PREL_NC; case ARM::fixup_t2_movt_hi16: - Type = ELF::R_ARM_THM_MOVT_PREL; - break; + return ELF::R_ARM_THM_MOVT_PREL; case ARM::fixup_t2_movw_lo16: - Type = ELF::R_ARM_THM_MOVW_PREL_NC; - break; + return ELF::R_ARM_THM_MOVW_PREL_NC; case ARM::fixup_arm_thumb_br: - Type = ELF::R_ARM_THM_JUMP11; - break; + return ELF::R_ARM_THM_JUMP11; case ARM::fixup_arm_thumb_bcc: - Type = ELF::R_ARM_THM_JUMP8; - break; + return ELF::R_ARM_THM_JUMP8; case ARM::fixup_arm_thumb_bl: case ARM::fixup_arm_thumb_blx: switch (Modifier) { case MCSymbolRefExpr::VK_TLSCALL: - Type = ELF::R_ARM_THM_TLS_CALL; - break; + return ELF::R_ARM_THM_TLS_CALL; default: - Type = ELF::R_ARM_THM_CALL; - break; + return ELF::R_ARM_THM_CALL; } - break; } - } else { - switch ((unsigned)Fixup.getKind()) { + } + switch ((unsigned)Fixup.getKind()) { + default: + Ctx.reportFatalError(Fixup.getLoc(), "unsupported relocation on symbol"); + return ELF::R_ARM_NONE; + case FK_Data_1: + switch (Modifier) { default: - Ctx.reportFatalError(Fixup.getLoc(), "unsupported relocation on symbol"); + llvm_unreachable("unsupported Modifier"); + case MCSymbolRefExpr::VK_None: + return ELF::R_ARM_ABS8; + } + case FK_Data_2: + switch (Modifier) { + default: + llvm_unreachable("unsupported modifier"); + case MCSymbolRefExpr::VK_None: + return ELF::R_ARM_ABS16; + } + case FK_Data_4: + switch (Modifier) { + default: + llvm_unreachable("Unsupported Modifier"); + case MCSymbolRefExpr::VK_ARM_NONE: return ELF::R_ARM_NONE; - case FK_Data_1: - switch (Modifier) { - default: llvm_unreachable("unsupported Modifier"); - case MCSymbolRefExpr::VK_None: - Type = ELF::R_ARM_ABS8; - break; - } - break; - case FK_Data_2: - switch (Modifier) { - default: llvm_unreachable("unsupported modifier"); - case MCSymbolRefExpr::VK_None: - Type = ELF::R_ARM_ABS16; - break; - } - break; - case FK_Data_4: - switch (Modifier) { - default: llvm_unreachable("Unsupported Modifier"); - case MCSymbolRefExpr::VK_ARM_NONE: - Type = ELF::R_ARM_NONE; - break; - case MCSymbolRefExpr::VK_GOT: - Type = ELF::R_ARM_GOT_BREL; - break; - case MCSymbolRefExpr::VK_TLSGD: - Type = ELF::R_ARM_TLS_GD32; - break; - case MCSymbolRefExpr::VK_TPOFF: - Type = ELF::R_ARM_TLS_LE32; - break; - case MCSymbolRefExpr::VK_GOTTPOFF: - Type = ELF::R_ARM_TLS_IE32; - break; - case MCSymbolRefExpr::VK_None: - Type = ELF::R_ARM_ABS32; - break; - case MCSymbolRefExpr::VK_GOTOFF: - Type = ELF::R_ARM_GOTOFF32; - break; - case MCSymbolRefExpr::VK_ARM_GOT_PREL: - Type = ELF::R_ARM_GOT_PREL; - break; - case MCSymbolRefExpr::VK_ARM_TARGET1: - Type = ELF::R_ARM_TARGET1; - break; - case MCSymbolRefExpr::VK_ARM_TARGET2: - Type = ELF::R_ARM_TARGET2; - break; - case MCSymbolRefExpr::VK_ARM_PREL31: - Type = ELF::R_ARM_PREL31; - break; - case MCSymbolRefExpr::VK_ARM_SBREL: - Type = ELF::R_ARM_SBREL32; - break; - case MCSymbolRefExpr::VK_ARM_TLSLDO: - Type = ELF::R_ARM_TLS_LDO32; - break; - case MCSymbolRefExpr::VK_TLSCALL: - Type = ELF::R_ARM_TLS_CALL; - break; - case MCSymbolRefExpr::VK_TLSDESC: - Type = ELF::R_ARM_TLS_GOTDESC; - break; - case MCSymbolRefExpr::VK_TLSLDM: - Type = ELF::R_ARM_TLS_LDM32; - break; - case MCSymbolRefExpr::VK_ARM_TLSDESCSEQ: - Type = ELF::R_ARM_TLS_DESCSEQ; - break; - } - break; - case ARM::fixup_arm_ldst_pcrel_12: - case ARM::fixup_arm_pcrel_10: - case ARM::fixup_arm_adr_pcrel_12: - case ARM::fixup_arm_thumb_bl: - case ARM::fixup_arm_thumb_cb: - case ARM::fixup_arm_thumb_cp: - case ARM::fixup_arm_thumb_br: - llvm_unreachable("Unimplemented"); - case ARM::fixup_arm_condbranch: - case ARM::fixup_arm_uncondbranch: - Type = ELF::R_ARM_JUMP24; - break; - case ARM::fixup_arm_movt_hi16: - switch (Modifier) { - default: llvm_unreachable("Unsupported Modifier"); - case MCSymbolRefExpr::VK_None: - Type = ELF::R_ARM_MOVT_ABS; - break; - case MCSymbolRefExpr::VK_ARM_SBREL: - Type = ELF:: R_ARM_MOVT_BREL; - break; - } - break; - case ARM::fixup_arm_movw_lo16: - switch (Modifier) { - default: llvm_unreachable("Unsupported Modifier"); - case MCSymbolRefExpr::VK_None: - Type = ELF::R_ARM_MOVW_ABS_NC; - break; - case MCSymbolRefExpr::VK_ARM_SBREL: - Type = ELF:: R_ARM_MOVW_BREL_NC; - break; - } - break; - case ARM::fixup_t2_movt_hi16: - switch (Modifier) { - default: llvm_unreachable("Unsupported Modifier"); - case MCSymbolRefExpr::VK_None: - Type = ELF::R_ARM_THM_MOVT_ABS; - break; - case MCSymbolRefExpr::VK_ARM_SBREL: - Type = ELF:: R_ARM_THM_MOVT_BREL; - break; - } - break; - case ARM::fixup_t2_movw_lo16: - switch (Modifier) { - default: llvm_unreachable("Unsupported Modifier"); - case MCSymbolRefExpr::VK_None: - Type = ELF::R_ARM_THM_MOVW_ABS_NC; - break; - case MCSymbolRefExpr::VK_ARM_SBREL: - Type = ELF:: R_ARM_THM_MOVW_BREL_NC; - break; - } - break; + case MCSymbolRefExpr::VK_GOT: + return ELF::R_ARM_GOT_BREL; + case MCSymbolRefExpr::VK_TLSGD: + return ELF::R_ARM_TLS_GD32; + case MCSymbolRefExpr::VK_TPOFF: + return ELF::R_ARM_TLS_LE32; + case MCSymbolRefExpr::VK_GOTTPOFF: + return ELF::R_ARM_TLS_IE32; + case MCSymbolRefExpr::VK_None: + return ELF::R_ARM_ABS32; + case MCSymbolRefExpr::VK_GOTOFF: + return ELF::R_ARM_GOTOFF32; + case MCSymbolRefExpr::VK_ARM_GOT_PREL: + return ELF::R_ARM_GOT_PREL; + case MCSymbolRefExpr::VK_ARM_TARGET1: + return ELF::R_ARM_TARGET1; + case MCSymbolRefExpr::VK_ARM_TARGET2: + return ELF::R_ARM_TARGET2; + case MCSymbolRefExpr::VK_ARM_PREL31: + return ELF::R_ARM_PREL31; + case MCSymbolRefExpr::VK_ARM_SBREL: + return ELF::R_ARM_SBREL32; + case MCSymbolRefExpr::VK_ARM_TLSLDO: + return ELF::R_ARM_TLS_LDO32; + case MCSymbolRefExpr::VK_TLSCALL: + return ELF::R_ARM_TLS_CALL; + case MCSymbolRefExpr::VK_TLSDESC: + return ELF::R_ARM_TLS_GOTDESC; + case MCSymbolRefExpr::VK_TLSLDM: + return ELF::R_ARM_TLS_LDM32; + case MCSymbolRefExpr::VK_ARM_TLSDESCSEQ: + return ELF::R_ARM_TLS_DESCSEQ; + } + case ARM::fixup_arm_condbranch: + case ARM::fixup_arm_uncondbranch: + return ELF::R_ARM_JUMP24; + case ARM::fixup_arm_movt_hi16: + switch (Modifier) { + default: + llvm_unreachable("Unsupported Modifier"); + case MCSymbolRefExpr::VK_None: + return ELF::R_ARM_MOVT_ABS; + case MCSymbolRefExpr::VK_ARM_SBREL: + return ELF::R_ARM_MOVT_BREL; + } + case ARM::fixup_arm_movw_lo16: + switch (Modifier) { + default: + llvm_unreachable("Unsupported Modifier"); + case MCSymbolRefExpr::VK_None: + return ELF::R_ARM_MOVW_ABS_NC; + case MCSymbolRefExpr::VK_ARM_SBREL: + return ELF::R_ARM_MOVW_BREL_NC; + } + case ARM::fixup_t2_movt_hi16: + switch (Modifier) { + default: + llvm_unreachable("Unsupported Modifier"); + case MCSymbolRefExpr::VK_None: + return ELF::R_ARM_THM_MOVT_ABS; + case MCSymbolRefExpr::VK_ARM_SBREL: + return ELF::R_ARM_THM_MOVT_BREL; + } + case ARM::fixup_t2_movw_lo16: + switch (Modifier) { + default: + llvm_unreachable("Unsupported Modifier"); + case MCSymbolRefExpr::VK_None: + return ELF::R_ARM_THM_MOVW_ABS_NC; + case MCSymbolRefExpr::VK_ARM_SBREL: + return ELF::R_ARM_THM_MOVW_BREL_NC; } } - - return Type; } -MCObjectWriter *llvm::createARMELFObjectWriter(raw_pwrite_stream &OS, - uint8_t OSABI, - bool IsLittleEndian) { - MCELFObjectTargetWriter *MOTW = new ARMELFObjectWriter(OSABI); - return createELFObjectWriter(MOTW, OS, IsLittleEndian); +std::unique_ptr<MCObjectWriter> +llvm::createARMELFObjectWriter(raw_pwrite_stream &OS, uint8_t OSABI, + bool IsLittleEndian) { + return createELFObjectWriter(llvm::make_unique<ARMELFObjectWriter>(OSABI), OS, + IsLittleEndian); } diff --git a/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp b/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp index 93f4006cee876..d465da1a7bb10 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp @@ -92,9 +92,9 @@ class ARMTargetAsmStreamer : public ARMTargetStreamer { void emitTextAttribute(unsigned Attribute, StringRef String) override; void emitIntTextAttribute(unsigned Attribute, unsigned IntValue, StringRef StringValue) override; - void emitArch(unsigned Arch) override; + void emitArch(ARM::ArchKind Arch) override; void emitArchExtension(unsigned ArchExt) override; - void emitObjectArch(unsigned Arch) override; + void emitObjectArch(ARM::ArchKind Arch) override; void emitFPU(unsigned FPU) override; void emitInst(uint32_t Inst, char Suffix = '\0') override; void finishAttributeSection() override; @@ -218,7 +218,7 @@ void ARMTargetAsmStreamer::emitIntTextAttribute(unsigned Attribute, OS << "\n"; } -void ARMTargetAsmStreamer::emitArch(unsigned Arch) { +void ARMTargetAsmStreamer::emitArch(ARM::ArchKind Arch) { OS << "\t.arch\t" << ARM::getArchName(Arch) << "\n"; } @@ -226,7 +226,7 @@ void ARMTargetAsmStreamer::emitArchExtension(unsigned ArchExt) { OS << "\t.arch_extension\t" << ARM::getArchExtName(ArchExt) << "\n"; } -void ARMTargetAsmStreamer::emitObjectArch(unsigned Arch) { +void ARMTargetAsmStreamer::emitObjectArch(ARM::ArchKind Arch) { OS << "\t.object_arch\t" << ARM::getArchName(Arch) << '\n'; } @@ -303,8 +303,8 @@ private: StringRef CurrentVendor; unsigned FPU = ARM::FK_INVALID; - unsigned Arch = ARM::AK_INVALID; - unsigned EmittedArch = ARM::AK_INVALID; + ARM::ArchKind Arch = ARM::ArchKind::INVALID; + ARM::ArchKind EmittedArch = ARM::ArchKind::INVALID; SmallVector<AttributeItem, 64> Contents; MCSection *AttributeSection = nullptr; @@ -404,8 +404,8 @@ private: void emitTextAttribute(unsigned Attribute, StringRef String) override; void emitIntTextAttribute(unsigned Attribute, unsigned IntValue, StringRef StringValue) override; - void emitArch(unsigned Arch) override; - void emitObjectArch(unsigned Arch) override; + void emitArch(ARM::ArchKind Arch) override; + void emitObjectArch(ARM::ArchKind Arch) override; void emitFPU(unsigned FPU) override; void emitInst(uint32_t Inst, char Suffix = '\0') override; void finishAttributeSection() override; @@ -440,9 +440,11 @@ class ARMELFStreamer : public MCELFStreamer { public: friend class ARMTargetELFStreamer; - ARMELFStreamer(MCContext &Context, MCAsmBackend &TAB, raw_pwrite_stream &OS, - MCCodeEmitter *Emitter, bool IsThumb) - : MCELFStreamer(Context, TAB, OS, Emitter), IsThumb(IsThumb) { + ARMELFStreamer(MCContext &Context, std::unique_ptr<MCAsmBackend> TAB, + raw_pwrite_stream &OS, std::unique_ptr<MCCodeEmitter> Emitter, + bool IsThumb) + : MCELFStreamer(Context, std::move(TAB), OS, std::move(Emitter)), + IsThumb(IsThumb) { EHReset(); } @@ -776,11 +778,11 @@ void ARMTargetELFStreamer::emitIntTextAttribute(unsigned Attribute, /* OverwriteExisting= */ true); } -void ARMTargetELFStreamer::emitArch(unsigned Value) { +void ARMTargetELFStreamer::emitArch(ARM::ArchKind Value) { Arch = Value; } -void ARMTargetELFStreamer::emitObjectArch(unsigned Value) { +void ARMTargetELFStreamer::emitObjectArch(ARM::ArchKind Value) { EmittedArch = Value; } @@ -791,7 +793,7 @@ void ARMTargetELFStreamer::emitArchDefaultAttributes() { ARM::getCPUAttr(Arch), false); - if (EmittedArch == ARM::AK_INVALID) + if (EmittedArch == ARM::ArchKind::INVALID) setAttributeItem(CPU_arch, ARM::getArchAttr(Arch), false); @@ -801,58 +803,59 @@ void ARMTargetELFStreamer::emitArchDefaultAttributes() { false); switch (Arch) { - case ARM::AK_ARMV2: - case ARM::AK_ARMV2A: - case ARM::AK_ARMV3: - case ARM::AK_ARMV3M: - case ARM::AK_ARMV4: + case ARM::ArchKind::ARMV2: + case ARM::ArchKind::ARMV2A: + case ARM::ArchKind::ARMV3: + case ARM::ArchKind::ARMV3M: + case ARM::ArchKind::ARMV4: setAttributeItem(ARM_ISA_use, Allowed, false); break; - case ARM::AK_ARMV4T: - case ARM::AK_ARMV5T: - case ARM::AK_ARMV5TE: - case ARM::AK_ARMV6: + case ARM::ArchKind::ARMV4T: + case ARM::ArchKind::ARMV5T: + case ARM::ArchKind::ARMV5TE: + case ARM::ArchKind::ARMV6: setAttributeItem(ARM_ISA_use, Allowed, false); setAttributeItem(THUMB_ISA_use, Allowed, false); break; - case ARM::AK_ARMV6T2: + case ARM::ArchKind::ARMV6T2: setAttributeItem(ARM_ISA_use, Allowed, false); setAttributeItem(THUMB_ISA_use, AllowThumb32, false); break; - case ARM::AK_ARMV6K: - case ARM::AK_ARMV6KZ: + case ARM::ArchKind::ARMV6K: + case ARM::ArchKind::ARMV6KZ: setAttributeItem(ARM_ISA_use, Allowed, false); setAttributeItem(THUMB_ISA_use, Allowed, false); setAttributeItem(Virtualization_use, AllowTZ, false); break; - case ARM::AK_ARMV6M: + case ARM::ArchKind::ARMV6M: setAttributeItem(THUMB_ISA_use, Allowed, false); break; - case ARM::AK_ARMV7A: + case ARM::ArchKind::ARMV7A: setAttributeItem(CPU_arch_profile, ApplicationProfile, false); setAttributeItem(ARM_ISA_use, Allowed, false); setAttributeItem(THUMB_ISA_use, AllowThumb32, false); break; - case ARM::AK_ARMV7R: + case ARM::ArchKind::ARMV7R: setAttributeItem(CPU_arch_profile, RealTimeProfile, false); setAttributeItem(ARM_ISA_use, Allowed, false); setAttributeItem(THUMB_ISA_use, AllowThumb32, false); break; - case ARM::AK_ARMV7M: + case ARM::ArchKind::ARMV7EM: + case ARM::ArchKind::ARMV7M: setAttributeItem(CPU_arch_profile, MicroControllerProfile, false); setAttributeItem(THUMB_ISA_use, AllowThumb32, false); break; - case ARM::AK_ARMV8A: - case ARM::AK_ARMV8_1A: - case ARM::AK_ARMV8_2A: + case ARM::ArchKind::ARMV8A: + case ARM::ArchKind::ARMV8_1A: + case ARM::ArchKind::ARMV8_2A: setAttributeItem(CPU_arch_profile, ApplicationProfile, false); setAttributeItem(ARM_ISA_use, Allowed, false); setAttributeItem(THUMB_ISA_use, AllowThumb32, false); @@ -860,26 +863,26 @@ void ARMTargetELFStreamer::emitArchDefaultAttributes() { setAttributeItem(Virtualization_use, AllowTZVirtualization, false); break; - case ARM::AK_ARMV8MBaseline: - case ARM::AK_ARMV8MMainline: + case ARM::ArchKind::ARMV8MBaseline: + case ARM::ArchKind::ARMV8MMainline: setAttributeItem(THUMB_ISA_use, AllowThumbDerived, false); setAttributeItem(CPU_arch_profile, MicroControllerProfile, false); break; - case ARM::AK_IWMMXT: + case ARM::ArchKind::IWMMXT: setAttributeItem(ARM_ISA_use, Allowed, false); setAttributeItem(THUMB_ISA_use, Allowed, false); setAttributeItem(WMMX_arch, AllowWMMXv1, false); break; - case ARM::AK_IWMMXT2: + case ARM::ArchKind::IWMMXT2: setAttributeItem(ARM_ISA_use, Allowed, false); setAttributeItem(THUMB_ISA_use, Allowed, false); setAttributeItem(WMMX_arch, AllowWMMXv2, false); break; default: - report_fatal_error("Unknown Arch: " + Twine(Arch)); + report_fatal_error("Unknown Arch: " + Twine(ARM::getArchName(Arch))); break; } } @@ -1057,7 +1060,7 @@ void ARMTargetELFStreamer::finishAttributeSection() { if (FPU != ARM::FK_INVALID) emitFPUDefaultAttributes(); - if (Arch != ARM::AK_INVALID) + if (Arch != ARM::ArchKind::INVALID) emitArchDefaultAttributes(); if (Contents.empty()) @@ -1169,6 +1172,8 @@ void ARMELFStreamer::reset() { ATS.reset(); MappingSymbolCounter = 0; MCELFStreamer::reset(); + LastMappingSymbols.clear(); + LastEMSInfo.reset(); // MCELFStreamer clear's the assembler's e_flags. However, for // arm we manually set the ABI version on streamer creation, so // do the same here @@ -1485,19 +1490,21 @@ MCTargetStreamer *createARMObjectTargetStreamer(MCStreamer &S, return new ARMTargetStreamer(S); } -MCELFStreamer *createARMELFStreamer(MCContext &Context, MCAsmBackend &TAB, +MCELFStreamer *createARMELFStreamer(MCContext &Context, + std::unique_ptr<MCAsmBackend> TAB, raw_pwrite_stream &OS, - MCCodeEmitter *Emitter, bool RelaxAll, - bool IsThumb) { - ARMELFStreamer *S = new ARMELFStreamer(Context, TAB, OS, Emitter, IsThumb); - // FIXME: This should eventually end up somewhere else where more - // intelligent flag decisions can be made. For now we are just maintaining - // the status quo for ARM and setting EF_ARM_EABI_VER5 as the default. - S->getAssembler().setELFHeaderEFlags(ELF::EF_ARM_EABI_VER5); + std::unique_ptr<MCCodeEmitter> Emitter, + bool RelaxAll, bool IsThumb) { + ARMELFStreamer *S = new ARMELFStreamer(Context, std::move(TAB), OS, + std::move(Emitter), IsThumb); + // FIXME: This should eventually end up somewhere else where more + // intelligent flag decisions can be made. For now we are just maintaining + // the status quo for ARM and setting EF_ARM_EABI_VER5 as the default. + S->getAssembler().setELFHeaderEFlags(ELF::EF_ARM_EABI_VER5); - if (RelaxAll) - S->getAssembler().setRelaxAll(true); - return S; + if (RelaxAll) + S->getAssembler().setRelaxAll(true); + return S; } } // end namespace llvm diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp index 1e062ad45af50..0cef683778e58 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp @@ -58,7 +58,6 @@ ARMELFMCAsmInfo::ARMELFMCAsmInfo(const Triple &TheTriple) { // Exceptions handling switch (TheTriple.getOS()) { - case Triple::Bitrig: case Triple::NetBSD: ExceptionsType = ExceptionHandling::DwarfCFI; break; @@ -87,7 +86,7 @@ void ARMCOFFMCAsmInfoMicrosoft::anchor() { } ARMCOFFMCAsmInfoMicrosoft::ARMCOFFMCAsmInfoMicrosoft() { AlignmentIsInBytes = false; - + ExceptionsType = ExceptionHandling::WinEH; PrivateGlobalPrefix = "$M"; PrivateLabelPrefix = "$M"; CommentString = ";"; @@ -106,10 +105,10 @@ ARMCOFFMCAsmInfoGNU::ARMCOFFMCAsmInfoGNU() { PrivateLabelPrefix = ".L"; SupportsDebugInformation = true; - ExceptionsType = ExceptionHandling::None; + ExceptionsType = ExceptionHandling::DwarfCFI; UseParensForSymbolVariant = true; - UseIntegratedAssembler = false; - DwarfRegNumForCFI = true; + UseIntegratedAssembler = true; + DwarfRegNumForCFI = false; } diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCExpr.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCExpr.cpp index 2063ca6bdf3b8..306f068312f53 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMMCExpr.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMMCExpr.cpp @@ -8,7 +8,6 @@ //===----------------------------------------------------------------------===// #include "ARMMCExpr.h" -#include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCStreamer.h" using namespace llvm; diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp index 2ab7bfe4410bd..ae5bc723ee5fc 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp @@ -16,6 +16,8 @@ #include "ARMMCAsmInfo.h" #include "InstPrinter/ARMInstPrinter.h" #include "llvm/ADT/Triple.h" +#include "llvm/MC/MCAsmBackend.h" +#include "llvm/MC/MCCodeEmitter.h" #include "llvm/MC/MCELFStreamer.h" #include "llvm/MC/MCInstrAnalysis.h" #include "llvm/MC/MCInstrInfo.h" @@ -131,16 +133,13 @@ static bool getARMLoadDeprecationInfo(MCInst &MI, const MCSubtargetInfo &STI, #include "ARMGenSubtargetInfo.inc" std::string ARM_MC::ParseARMTriple(const Triple &TT, StringRef CPU) { - bool isThumb = - TT.getArch() == Triple::thumb || TT.getArch() == Triple::thumbeb; - std::string ARMArchFeature; - unsigned ArchID = ARM::parseArch(TT.getArchName()); - if (ArchID != ARM::AK_INVALID && (CPU.empty() || CPU == "generic")) + ARM::ArchKind ArchID = ARM::parseArch(TT.getArchName()); + if (ArchID != ARM::ArchKind::INVALID && (CPU.empty() || CPU == "generic")) ARMArchFeature = (ARMArchFeature + "+" + ARM::getArchName(ArchID)).str(); - if (isThumb) { + if (TT.isThumb()) { if (ARMArchFeature.empty()) ARMArchFeature = "+thumb-mode,+v4t"; else @@ -201,18 +200,22 @@ static MCAsmInfo *createARMMCAsmInfo(const MCRegisterInfo &MRI, } static MCStreamer *createELFStreamer(const Triple &T, MCContext &Ctx, - MCAsmBackend &MAB, raw_pwrite_stream &OS, - MCCodeEmitter *Emitter, bool RelaxAll) { - return createARMELFStreamer(Ctx, MAB, OS, Emitter, false, - (T.getArch() == Triple::thumb || - T.getArch() == Triple::thumbeb)); + std::unique_ptr<MCAsmBackend> &&MAB, + raw_pwrite_stream &OS, + std::unique_ptr<MCCodeEmitter> &&Emitter, + bool RelaxAll) { + return createARMELFStreamer( + Ctx, std::move(MAB), OS, std::move(Emitter), false, + (T.getArch() == Triple::thumb || T.getArch() == Triple::thumbeb)); } -static MCStreamer *createARMMachOStreamer(MCContext &Ctx, MCAsmBackend &MAB, - raw_pwrite_stream &OS, - MCCodeEmitter *Emitter, bool RelaxAll, - bool DWARFMustBeAtTheEnd) { - return createMachOStreamer(Ctx, MAB, OS, Emitter, false, DWARFMustBeAtTheEnd); +static MCStreamer * +createARMMachOStreamer(MCContext &Ctx, std::unique_ptr<MCAsmBackend> &&MAB, + raw_pwrite_stream &OS, + std::unique_ptr<MCCodeEmitter> &&Emitter, bool RelaxAll, + bool DWARFMustBeAtTheEnd) { + return createMachOStreamer(Ctx, std::move(MAB), OS, std::move(Emitter), false, + DWARFMustBeAtTheEnd); } static MCInstPrinter *createARMMCInstPrinter(const Triple &T, diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h index ba834201e585a..0fb97e5fee977 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h +++ b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h @@ -15,6 +15,7 @@ #define LLVM_LIB_TARGET_ARM_MCTARGETDESC_ARMMCTARGETDESC_H #include "llvm/Support/DataTypes.h" +#include <memory> #include <string> namespace llvm { @@ -92,23 +93,27 @@ MCAsmBackend *createThumbBEAsmBackend(const Target &T, // Construct a PE/COFF machine code streamer which will generate a PE/COFF // object file. -MCStreamer *createARMWinCOFFStreamer(MCContext &Context, MCAsmBackend &MAB, +MCStreamer *createARMWinCOFFStreamer(MCContext &Context, + std::unique_ptr<MCAsmBackend> &&MAB, raw_pwrite_stream &OS, - MCCodeEmitter *Emitter, bool RelaxAll, + std::unique_ptr<MCCodeEmitter> &&Emitter, + bool RelaxAll, bool IncrementalLinkerCompatible); /// Construct an ELF Mach-O object writer. -MCObjectWriter *createARMELFObjectWriter(raw_pwrite_stream &OS, uint8_t OSABI, - bool IsLittleEndian); +std::unique_ptr<MCObjectWriter> createARMELFObjectWriter(raw_pwrite_stream &OS, + uint8_t OSABI, + bool IsLittleEndian); /// Construct an ARM Mach-O object writer. -MCObjectWriter *createARMMachObjectWriter(raw_pwrite_stream &OS, bool Is64Bit, - uint32_t CPUType, - uint32_t CPUSubtype); +std::unique_ptr<MCObjectWriter> createARMMachObjectWriter(raw_pwrite_stream &OS, + bool Is64Bit, + uint32_t CPUType, + uint32_t CPUSubtype); /// Construct an ARM PE/COFF object writer. -MCObjectWriter *createARMWinCOFFObjectWriter(raw_pwrite_stream &OS, - bool Is64Bit); +std::unique_ptr<MCObjectWriter> +createARMWinCOFFObjectWriter(raw_pwrite_stream &OS, bool Is64Bit); /// Construct ARM Mach-O relocation info. MCRelocationInfo *createARMMachORelocationInfo(MCContext &Ctx); diff --git a/lib/Target/ARM/MCTargetDesc/ARMMachORelocationInfo.cpp b/lib/Target/ARM/MCTargetDesc/ARMMachORelocationInfo.cpp index 5516a1bdb03da..6259c98321f4e 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMMachORelocationInfo.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMMachORelocationInfo.cpp @@ -10,7 +10,6 @@ #include "ARMMCExpr.h" #include "MCTargetDesc/ARMMCTargetDesc.h" #include "llvm-c/Disassembler.h" -#include "llvm/MC/MCContext.h" #include "llvm/MC/MCDisassembler/MCRelocationInfo.h" #include "llvm/MC/MCExpr.h" diff --git a/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp b/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp index 4a8139dea6682..521ae5337e7ac 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp @@ -322,6 +322,14 @@ bool ARMMachObjectWriter::requiresExternRelocation(MachObjectWriter *Writer, default: return false; case MachO::ARM_RELOC_BR24: + // An ARM call might be to a Thumb function, in which case the offset may + // not be encodable in the instruction and we must use an external + // relocation that explicitly mentions the function. Not a problem if it's + // to a temporary "Lwhatever" symbol though, and in fact trying to use an + // external relocation there causes more issues. + if (!S.isTemporary()) + return true; + // PC pre-adjustment of 8 for these instructions. Value -= 8; // ARM BL/BLX has a 25-bit offset. @@ -476,11 +484,10 @@ void ARMMachObjectWriter::recordRelocation(MachObjectWriter *Writer, Writer->addRelocation(RelSymbol, Fragment->getParent(), MRE); } -MCObjectWriter *llvm::createARMMachObjectWriter(raw_pwrite_stream &OS, - bool Is64Bit, uint32_t CPUType, - uint32_t CPUSubtype) { - return createMachObjectWriter(new ARMMachObjectWriter(Is64Bit, - CPUType, - CPUSubtype), - OS, /*IsLittleEndian=*/true); +std::unique_ptr<MCObjectWriter> +llvm::createARMMachObjectWriter(raw_pwrite_stream &OS, bool Is64Bit, + uint32_t CPUType, uint32_t CPUSubtype) { + return createMachObjectWriter( + llvm::make_unique<ARMMachObjectWriter>(Is64Bit, CPUType, CPUSubtype), OS, + /*IsLittleEndian=*/true); } diff --git a/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp b/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp index 4a943187ab6da..42371736fef45 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp @@ -71,9 +71,9 @@ void ARMTargetStreamer::emitTextAttribute(unsigned Attribute, void ARMTargetStreamer::emitIntTextAttribute(unsigned Attribute, unsigned IntValue, StringRef StringValue) {} -void ARMTargetStreamer::emitArch(unsigned Arch) {} +void ARMTargetStreamer::emitArch(ARM::ArchKind Arch) {} void ARMTargetStreamer::emitArchExtension(unsigned ArchExt) {} -void ARMTargetStreamer::emitObjectArch(unsigned Arch) {} +void ARMTargetStreamer::emitObjectArch(ARM::ArchKind Arch) {} void ARMTargetStreamer::emitFPU(unsigned FPU) {} void ARMTargetStreamer::finishAttributeSection() {} void ARMTargetStreamer::emitInst(uint32_t Inst, char Suffix) {} diff --git a/lib/Target/ARM/MCTargetDesc/ARMWinCOFFObjectWriter.cpp b/lib/Target/ARM/MCTargetDesc/ARMWinCOFFObjectWriter.cpp index f74fb2e20b5a3..5e09b126f43fc 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMWinCOFFObjectWriter.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMWinCOFFObjectWriter.cpp @@ -14,6 +14,7 @@ #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCFixup.h" #include "llvm/MC/MCFixupKindInfo.h" +#include "llvm/MC/MCObjectWriter.h" #include "llvm/MC/MCValue.h" #include "llvm/MC/MCWinCOFFObjectWriter.h" #include "llvm/Support/ErrorHandling.h" @@ -90,10 +91,10 @@ bool ARMWinCOFFObjectWriter::recordRelocation(const MCFixup &Fixup) const { namespace llvm { -MCObjectWriter *createARMWinCOFFObjectWriter(raw_pwrite_stream &OS, - bool Is64Bit) { - MCWinCOFFObjectTargetWriter *MOTW = new ARMWinCOFFObjectWriter(Is64Bit); - return createWinCOFFObjectWriter(MOTW, OS); +std::unique_ptr<MCObjectWriter> +createARMWinCOFFObjectWriter(raw_pwrite_stream &OS, bool Is64Bit) { + auto MOTW = llvm::make_unique<ARMWinCOFFObjectWriter>(Is64Bit); + return createWinCOFFObjectWriter(std::move(MOTW), OS); } } // end namespace llvm diff --git a/lib/Target/ARM/MCTargetDesc/ARMWinCOFFStreamer.cpp b/lib/Target/ARM/MCTargetDesc/ARMWinCOFFStreamer.cpp index 83fa084e60c75..a2424e1abab38 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMWinCOFFStreamer.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMWinCOFFStreamer.cpp @@ -8,6 +8,8 @@ //===----------------------------------------------------------------------===// #include "ARMMCTargetDesc.h" +#include "llvm/MC/MCAsmBackend.h" +#include "llvm/MC/MCCodeEmitter.h" #include "llvm/MC/MCWinCOFFStreamer.h" using namespace llvm; @@ -15,12 +17,13 @@ using namespace llvm; namespace { class ARMWinCOFFStreamer : public MCWinCOFFStreamer { public: - ARMWinCOFFStreamer(MCContext &C, MCAsmBackend &AB, MCCodeEmitter &CE, - raw_pwrite_stream &OS) - : MCWinCOFFStreamer(C, AB, CE, OS) {} + ARMWinCOFFStreamer(MCContext &C, std::unique_ptr<MCAsmBackend> AB, + std::unique_ptr<MCCodeEmitter> CE, raw_pwrite_stream &OS) + : MCWinCOFFStreamer(C, std::move(AB), std::move(CE), OS) {} void EmitAssemblerFlag(MCAssemblerFlag Flag) override; void EmitThumbFunc(MCSymbol *Symbol) override; + void FinishImpl() override; }; void ARMWinCOFFStreamer::EmitAssemblerFlag(MCAssemblerFlag Flag) { @@ -35,12 +38,20 @@ void ARMWinCOFFStreamer::EmitAssemblerFlag(MCAssemblerFlag Flag) { void ARMWinCOFFStreamer::EmitThumbFunc(MCSymbol *Symbol) { getAssembler().setIsThumbFunc(Symbol); } + +void ARMWinCOFFStreamer::FinishImpl() { + EmitFrames(nullptr); + + MCWinCOFFStreamer::FinishImpl(); +} } MCStreamer *llvm::createARMWinCOFFStreamer( - MCContext &Context, MCAsmBackend &MAB, raw_pwrite_stream &OS, - MCCodeEmitter *Emitter, bool RelaxAll, bool IncrementalLinkerCompatible) { - auto *S = new ARMWinCOFFStreamer(Context, MAB, *Emitter, OS); + MCContext &Context, std::unique_ptr<MCAsmBackend> &&MAB, + raw_pwrite_stream &OS, std::unique_ptr<MCCodeEmitter> &&Emitter, + bool RelaxAll, bool IncrementalLinkerCompatible) { + auto *S = + new ARMWinCOFFStreamer(Context, std::move(MAB), std::move(Emitter), OS); S->getAssembler().setIncrementalLinkerCompatible(IncrementalLinkerCompatible); return S; } diff --git a/lib/Target/ARM/MLxExpansionPass.cpp b/lib/Target/ARM/MLxExpansionPass.cpp index 744761bcddb87..153e7b1e21979 100644 --- a/lib/Target/ARM/MLxExpansionPass.cpp +++ b/lib/Target/ARM/MLxExpansionPass.cpp @@ -21,10 +21,10 @@ #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetRegisterInfo.h" using namespace llvm; #define DEBUG_TYPE "mlx-expansion" @@ -371,7 +371,7 @@ bool MLxExpansion::ExpandFPMLxInstructions(MachineBasicBlock &MBB) { } bool MLxExpansion::runOnMachineFunction(MachineFunction &Fn) { - if (skipFunction(*Fn.getFunction())) + if (skipFunction(Fn.getFunction())) return false; TII = static_cast<const ARMBaseInstrInfo *>(Fn.getSubtarget().getInstrInfo()); diff --git a/lib/Target/ARM/TargetInfo/ARMTargetInfo.cpp b/lib/Target/ARM/TargetInfo/ARMTargetInfo.cpp index caa69f8d71b74..b0491a4108a62 100644 --- a/lib/Target/ARM/TargetInfo/ARMTargetInfo.cpp +++ b/lib/Target/ARM/TargetInfo/ARMTargetInfo.cpp @@ -30,12 +30,12 @@ Target &llvm::getTheThumbBETarget() { extern "C" void LLVMInitializeARMTargetInfo() { RegisterTarget<Triple::arm, /*HasJIT=*/true> X(getTheARMLETarget(), "arm", - "ARM"); + "ARM", "ARM"); RegisterTarget<Triple::armeb, /*HasJIT=*/true> Y(getTheARMBETarget(), "armeb", - "ARM (big endian)"); + "ARM (big endian)", "ARM"); RegisterTarget<Triple::thumb, /*HasJIT=*/true> A(getTheThumbLETarget(), - "thumb", "Thumb"); + "thumb", "Thumb", "ARM"); RegisterTarget<Triple::thumbeb, /*HasJIT=*/true> B( - getTheThumbBETarget(), "thumbeb", "Thumb (big endian)"); + getTheThumbBETarget(), "thumbeb", "Thumb (big endian)", "ARM"); } diff --git a/lib/Target/ARM/Thumb1FrameLowering.cpp b/lib/Target/ARM/Thumb1FrameLowering.cpp index 5709b4e617987..ba00b3d79da97 100644 --- a/lib/Target/ARM/Thumb1FrameLowering.cpp +++ b/lib/Target/ARM/Thumb1FrameLowering.cpp @@ -1,4 +1,4 @@ -//===-- Thumb1FrameLowering.cpp - Thumb1 Frame Information ----------------===// +//===- Thumb1FrameLowering.cpp - Thumb1 Frame Information -----------------===// // // The LLVM Compiler Infrastructure // @@ -16,12 +16,11 @@ #include "ARMBaseRegisterInfo.h" #include "ARMMachineFunctionInfo.h" #include "ARMSubtarget.h" -#include "MCTargetDesc/ARMBaseInfo.h" #include "Thumb1InstrInfo.h" #include "ThumbRegisterInfo.h" +#include "Utils/ARMBaseInfo.h" #include "llvm/ADT/BitVector.h" #include "llvm/ADT/STLExtras.h" -#include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/CodeGen/LivePhysRegs.h" #include "llvm/CodeGen/MachineBasicBlock.h" @@ -32,12 +31,17 @@ #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/TargetInstrInfo.h" +#include "llvm/CodeGen/TargetOpcodes.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/IR/DebugLoc.h" +#include "llvm/MC/MCContext.h" #include "llvm/MC/MCDwarf.h" +#include "llvm/MC/MCRegisterInfo.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/ErrorHandling.h" -#include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Target/TargetSubtargetInfo.h" +#include "llvm/Support/MathExtras.h" +#include <bitset> #include <cassert> #include <iterator> #include <vector> @@ -69,7 +73,6 @@ static void emitSPUpdate(MachineBasicBlock &MBB, MRI, MIFlags); } - MachineBasicBlock::iterator Thumb1FrameLowering:: eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator I) const { @@ -349,10 +352,36 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF, AFI->setGPRCalleeSavedArea2Size(GPRCS2Size); AFI->setDPRCalleeSavedAreaSize(DPRCSSize); - // Thumb1 does not currently support dynamic stack realignment. Report a - // fatal error rather then silently generate bad code. - if (RegInfo->needsStackRealignment(MF)) - report_fatal_error("Dynamic stack realignment not supported for thumb1."); + if (RegInfo->needsStackRealignment(MF)) { + const unsigned NrBitsToZero = countTrailingZeros(MFI.getMaxAlignment()); + // Emit the following sequence, using R4 as a temporary, since we cannot use + // SP as a source or destination register for the shifts: + // mov r4, sp + // lsrs r4, r4, #NrBitsToZero + // lsls r4, r4, #NrBitsToZero + // mov sp, r4 + BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::R4) + .addReg(ARM::SP, RegState::Kill) + .add(predOps(ARMCC::AL)); + + BuildMI(MBB, MBBI, dl, TII.get(ARM::tLSRri), ARM::R4) + .addDef(ARM::CPSR) + .addReg(ARM::R4, RegState::Kill) + .addImm(NrBitsToZero) + .add(predOps(ARMCC::AL)); + + BuildMI(MBB, MBBI, dl, TII.get(ARM::tLSLri), ARM::R4) + .addDef(ARM::CPSR) + .addReg(ARM::R4, RegState::Kill) + .addImm(NrBitsToZero) + .add(predOps(ARMCC::AL)); + + BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::SP) + .addReg(ARM::R4, RegState::Kill) + .add(predOps(ARMCC::AL)); + + AFI->setShouldRestoreSPFromFP(true); + } // If we need a base pointer, set it up here. It's whatever the value // of the stack pointer is at this point. Any variable size objects @@ -483,6 +512,26 @@ bool Thumb1FrameLowering::needPopSpecialFixUp(const MachineFunction &MF) const { return false; } +static void findTemporariesForLR(const BitVector &GPRsNoLRSP, + const BitVector &PopFriendly, + const LivePhysRegs &UsedRegs, unsigned &PopReg, + unsigned &TmpReg) { + PopReg = TmpReg = 0; + for (auto Reg : GPRsNoLRSP.set_bits()) { + if (!UsedRegs.contains(Reg)) { + // Remember the first pop-friendly register and exit. + if (PopFriendly.test(Reg)) { + PopReg = Reg; + TmpReg = 0; + break; + } + // Otherwise, remember that the register will be available to + // save a pop-friendly register. + TmpReg = Reg; + } + } +} + bool Thumb1FrameLowering::emitPopSpecialFixUp(MachineBasicBlock &MBB, bool DoIt) const { MachineFunction &MF = *MBB.getParent(); @@ -571,17 +620,19 @@ bool Thumb1FrameLowering::emitPopSpecialFixUp(MachineBasicBlock &MBB, GPRsNoLRSP.reset(ARM::LR); GPRsNoLRSP.reset(ARM::SP); GPRsNoLRSP.reset(ARM::PC); - for (unsigned Register : GPRsNoLRSP.set_bits()) { - if (!UsedRegs.contains(Register)) { - // Remember the first pop-friendly register and exit. - if (PopFriendly.test(Register)) { - PopReg = Register; - TemporaryReg = 0; - break; - } - // Otherwise, remember that the register will be available to - // save a pop-friendly register. - TemporaryReg = Register; + findTemporariesForLR(GPRsNoLRSP, PopFriendly, UsedRegs, PopReg, TemporaryReg); + + // If we couldn't find a pop-friendly register, restore LR before popping the + // other callee-saved registers, so we can use one of them as a temporary. + bool UseLDRSP = false; + if (!PopReg && MBBI != MBB.begin()) { + auto PrevMBBI = MBBI; + PrevMBBI--; + if (PrevMBBI->getOpcode() == ARM::tPOP) { + MBBI = PrevMBBI; + UsedRegs.stepBackward(*MBBI); + findTemporariesForLR(GPRsNoLRSP, PopFriendly, UsedRegs, PopReg, TemporaryReg); + UseLDRSP = true; } } @@ -590,6 +641,26 @@ bool Thumb1FrameLowering::emitPopSpecialFixUp(MachineBasicBlock &MBB, assert((PopReg || TemporaryReg) && "Cannot get LR"); + if (UseLDRSP) { + assert(PopReg && "Do not know how to get LR"); + // Load the LR via LDR tmp, [SP, #off] + BuildMI(MBB, MBBI, dl, TII.get(ARM::tLDRspi)) + .addReg(PopReg, RegState::Define) + .addReg(ARM::SP) + .addImm(MBBI->getNumExplicitOperands() - 2) + .add(predOps(ARMCC::AL)); + // Move from the temporary register to the LR. + BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr)) + .addReg(ARM::LR, RegState::Define) + .addReg(PopReg, RegState::Kill) + .add(predOps(ARMCC::AL)); + // Advance past the pop instruction. + MBBI++; + // Increment the SP. + emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, ArgRegsSaveSize + 4); + return true; + } + if (TemporaryReg) { assert(!PopReg && "Unnecessary MOV is about to be inserted"); PopReg = PopFriendly.find_first(); @@ -643,15 +714,15 @@ bool Thumb1FrameLowering::emitPopSpecialFixUp(MachineBasicBlock &MBB, return true; } +using ARMRegSet = std::bitset<ARM::NUM_TARGET_REGS>; + // Return the first iteraror after CurrentReg which is present in EnabledRegs, // or OrderEnd if no further registers are in that set. This does not advance // the iterator fiorst, so returns CurrentReg if it is in EnabledRegs. -template <unsigned SetSize> -static const unsigned * -findNextOrderedReg(const unsigned *CurrentReg, - SmallSet<unsigned, SetSize> &EnabledRegs, - const unsigned *OrderEnd) { - while (CurrentReg != OrderEnd && !EnabledRegs.count(*CurrentReg)) +static const unsigned *findNextOrderedReg(const unsigned *CurrentReg, + const ARMRegSet &EnabledRegs, + const unsigned *OrderEnd) { + while (CurrentReg != OrderEnd && !EnabledRegs[*CurrentReg]) ++CurrentReg; return CurrentReg; } @@ -670,18 +741,18 @@ spillCalleeSavedRegisters(MachineBasicBlock &MBB, const ARMBaseRegisterInfo *RegInfo = static_cast<const ARMBaseRegisterInfo *>( MF.getSubtarget().getRegisterInfo()); - SmallSet<unsigned, 9> LoRegsToSave; // r0-r7, lr - SmallSet<unsigned, 4> HiRegsToSave; // r8-r11 - SmallSet<unsigned, 9> CopyRegs; // Registers which can be used after pushing - // LoRegs for saving HiRegs. + ARMRegSet LoRegsToSave; // r0-r7, lr + ARMRegSet HiRegsToSave; // r8-r11 + ARMRegSet CopyRegs; // Registers which can be used after pushing + // LoRegs for saving HiRegs. for (unsigned i = CSI.size(); i != 0; --i) { unsigned Reg = CSI[i-1].getReg(); if (ARM::tGPRRegClass.contains(Reg) || Reg == ARM::LR) { - LoRegsToSave.insert(Reg); + LoRegsToSave[Reg] = true; } else if (ARM::hGPRRegClass.contains(Reg) && Reg != ARM::LR) { - HiRegsToSave.insert(Reg); + HiRegsToSave[Reg] = true; } else { llvm_unreachable("callee-saved register of unexpected class"); } @@ -689,21 +760,21 @@ spillCalleeSavedRegisters(MachineBasicBlock &MBB, if ((ARM::tGPRRegClass.contains(Reg) || Reg == ARM::LR) && !MF.getRegInfo().isLiveIn(Reg) && !(hasFP(MF) && Reg == RegInfo->getFrameRegister(MF))) - CopyRegs.insert(Reg); + CopyRegs[Reg] = true; } // Unused argument registers can be used for the high register saving. for (unsigned ArgReg : {ARM::R0, ARM::R1, ARM::R2, ARM::R3}) if (!MF.getRegInfo().isLiveIn(ArgReg)) - CopyRegs.insert(ArgReg); + CopyRegs[ArgReg] = true; // Push the low registers and lr const MachineRegisterInfo &MRI = MF.getRegInfo(); - if (!LoRegsToSave.empty()) { + if (!LoRegsToSave.none()) { MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(ARM::tPUSH)).add(predOps(ARMCC::AL)); for (unsigned Reg : {ARM::R4, ARM::R5, ARM::R6, ARM::R7, ARM::LR}) { - if (LoRegsToSave.count(Reg)) { + if (LoRegsToSave[Reg]) { bool isKill = !MRI.isLiveIn(Reg); if (isKill && !MRI.isReserved(Reg)) MBB.addLiveIn(Reg); @@ -746,7 +817,7 @@ spillCalleeSavedRegisters(MachineBasicBlock &MBB, SmallVector<unsigned, 4> RegsToPush; while (HiRegToSave != AllHighRegsEnd && CopyReg != AllCopyRegsEnd) { - if (HiRegsToSave.count(*HiRegToSave)) { + if (HiRegsToSave[*HiRegToSave]) { bool isKill = !MRI.isLiveIn(*HiRegToSave); if (isKill && !MRI.isReserved(*HiRegToSave)) MBB.addLiveIn(*HiRegToSave); @@ -780,7 +851,7 @@ spillCalleeSavedRegisters(MachineBasicBlock &MBB, bool Thumb1FrameLowering:: restoreCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, - const std::vector<CalleeSavedInfo> &CSI, + std::vector<CalleeSavedInfo> &CSI, const TargetRegisterInfo *TRI) const { if (CSI.empty()) return false; @@ -794,18 +865,18 @@ restoreCalleeSavedRegisters(MachineBasicBlock &MBB, bool isVarArg = AFI->getArgRegsSaveSize() > 0; DebugLoc DL = MI != MBB.end() ? MI->getDebugLoc() : DebugLoc(); - SmallSet<unsigned, 9> LoRegsToRestore; - SmallSet<unsigned, 4> HiRegsToRestore; + ARMRegSet LoRegsToRestore; + ARMRegSet HiRegsToRestore; // Low registers (r0-r7) which can be used to restore the high registers. - SmallSet<unsigned, 9> CopyRegs; + ARMRegSet CopyRegs; for (CalleeSavedInfo I : CSI) { unsigned Reg = I.getReg(); if (ARM::tGPRRegClass.contains(Reg) || Reg == ARM::LR) { - LoRegsToRestore.insert(Reg); + LoRegsToRestore[Reg] = true; } else if (ARM::hGPRRegClass.contains(Reg) && Reg != ARM::LR) { - HiRegsToRestore.insert(Reg); + HiRegsToRestore[Reg] = true; } else { llvm_unreachable("callee-saved register of unexpected class"); } @@ -814,20 +885,20 @@ restoreCalleeSavedRegisters(MachineBasicBlock &MBB, // use it for restoring the high registers. if ((ARM::tGPRRegClass.contains(Reg)) && !(hasFP(MF) && Reg == RegInfo->getFrameRegister(MF))) - CopyRegs.insert(Reg); + CopyRegs[Reg] = true; } // If this is a return block, we may be able to use some unused return value // registers for restoring the high regs. auto Terminator = MBB.getFirstTerminator(); if (Terminator != MBB.end() && Terminator->getOpcode() == ARM::tBX_RET) { - CopyRegs.insert(ARM::R0); - CopyRegs.insert(ARM::R1); - CopyRegs.insert(ARM::R2); - CopyRegs.insert(ARM::R3); + CopyRegs[ARM::R0] = true; + CopyRegs[ARM::R1] = true; + CopyRegs[ARM::R2] = true; + CopyRegs[ARM::R3] = true; for (auto Op : Terminator->implicit_operands()) { if (Op.isReg()) - CopyRegs.erase(Op.getReg()); + CopyRegs[Op.getReg()] = false; } } @@ -843,7 +914,7 @@ restoreCalleeSavedRegisters(MachineBasicBlock &MBB, HiRegsToRestore, AllHighRegsEnd); while (HiRegToRestore != AllHighRegsEnd) { - assert(!CopyRegs.empty()); + assert(!CopyRegs.none()); // Find the first low register to use. auto CopyReg = findNextOrderedReg(std::begin(AllCopyRegs), CopyRegs, AllCopyRegsEnd); @@ -873,40 +944,38 @@ restoreCalleeSavedRegisters(MachineBasicBlock &MBB, bool NeedsPop = false; for (unsigned i = CSI.size(); i != 0; --i) { - unsigned Reg = CSI[i-1].getReg(); + CalleeSavedInfo &Info = CSI[i-1]; + unsigned Reg = Info.getReg(); // High registers (excluding lr) have already been dealt with if (!(ARM::tGPRRegClass.contains(Reg) || Reg == ARM::LR)) continue; if (Reg == ARM::LR) { - if (MBB.succ_empty()) { - // Special epilogue for vararg functions. See emitEpilogue - if (isVarArg) - continue; - // ARMv4T requires BX, see emitEpilogue - if (!STI.hasV5TOps()) - continue; - // Tailcall optimization failed; change TCRETURN to a tBL - if (MI->getOpcode() == ARM::TCRETURNdi || - MI->getOpcode() == ARM::TCRETURNri) { - unsigned Opcode = MI->getOpcode() == ARM::TCRETURNdi - ? ARM::tBL : ARM::tBLXr; - MachineInstrBuilder BL = BuildMI(MF, DL, TII.get(Opcode)); - BL.add(predOps(ARMCC::AL)); - BL.add(MI->getOperand(0)); - MBB.insert(MI, &*BL); - } - Reg = ARM::PC; - (*MIB).setDesc(TII.get(ARM::tPOP_RET)); - if (MI != MBB.end()) - MIB.copyImplicitOps(*MI); - MI = MBB.erase(MI); - } else + Info.setRestored(false); + if (!MBB.succ_empty() || + MI->getOpcode() == ARM::TCRETURNdi || + MI->getOpcode() == ARM::TCRETURNri) // LR may only be popped into PC, as part of return sequence. // If this isn't the return sequence, we'll need emitPopSpecialFixUp // to restore LR the hard way. + // FIXME: if we don't pass any stack arguments it would be actually + // advantageous *and* correct to do the conversion to an ordinary call + // instruction here. + continue; + // Special epilogue for vararg functions. See emitEpilogue + if (isVarArg) continue; + // ARMv4T requires BX, see emitEpilogue + if (!STI.hasV5TOps()) + continue; + + // Pop LR into PC. + Reg = ARM::PC; + (*MIB).setDesc(TII.get(ARM::tPOP_RET)); + if (MI != MBB.end()) + MIB.copyImplicitOps(*MI); + MI = MBB.erase(MI); } MIB.addReg(Reg, getDefRegState(true)); NeedsPop = true; diff --git a/lib/Target/ARM/Thumb1FrameLowering.h b/lib/Target/ARM/Thumb1FrameLowering.h index 9de1ba1d7009a..a4d6451ccf125 100644 --- a/lib/Target/ARM/Thumb1FrameLowering.h +++ b/lib/Target/ARM/Thumb1FrameLowering.h @@ -1,4 +1,4 @@ -//===-- Thumb1FrameLowering.h - Thumb1-specific frame info stuff --*- C++ -*-=// +//===- Thumb1FrameLowering.h - Thumb1-specific frame info stuff ---*- C++ -*-=// // // The LLVM Compiler Infrastructure // @@ -6,21 +6,17 @@ // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// -// -// -// -//===----------------------------------------------------------------------===// #ifndef LLVM_LIB_TARGET_ARM_THUMB1FRAMELOWERING_H #define LLVM_LIB_TARGET_ARM_THUMB1FRAMELOWERING_H #include "ARMFrameLowering.h" -#include "Thumb1InstrInfo.h" -#include "ThumbRegisterInfo.h" -#include "llvm/Target/TargetFrameLowering.h" namespace llvm { +class ARMSubtarget; +class MachineFunction; + class Thumb1FrameLowering : public ARMFrameLowering { public: explicit Thumb1FrameLowering(const ARMSubtarget &sti); @@ -36,7 +32,7 @@ public: const TargetRegisterInfo *TRI) const override; bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, - const std::vector<CalleeSavedInfo> &CSI, + std::vector<CalleeSavedInfo> &CSI, const TargetRegisterInfo *TRI) const override; bool hasReservedCallFrame(const MachineFunction &MF) const override; @@ -88,6 +84,6 @@ private: bool emitPopSpecialFixUp(MachineBasicBlock &MBB, bool DoIt) const; }; -} // End llvm namespace +} // end namespace llvm -#endif +#endif // LLVM_LIB_TARGET_ARM_THUMB1FRAMELOWERING_H diff --git a/lib/Target/ARM/Thumb1InstrInfo.cpp b/lib/Target/ARM/Thumb1InstrInfo.cpp index 3a3920a2db327..49645834e2de6 100644 --- a/lib/Target/ARM/Thumb1InstrInfo.cpp +++ b/lib/Target/ARM/Thumb1InstrInfo.cpp @@ -16,7 +16,6 @@ #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineMemOperand.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/MC/MCInst.h" using namespace llvm; diff --git a/lib/Target/ARM/Thumb2InstrInfo.cpp b/lib/Target/ARM/Thumb2InstrInfo.cpp index 9125be96a07b4..c5eb14f3e608c 100644 --- a/lib/Target/ARM/Thumb2InstrInfo.cpp +++ b/lib/Target/ARM/Thumb2InstrInfo.cpp @@ -11,9 +11,9 @@ // //===----------------------------------------------------------------------===// +#include "Thumb2InstrInfo.h" #include "ARMMachineFunctionInfo.h" #include "MCTargetDesc/ARMAddressingModes.h" -#include "Thumb2InstrInfo.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" @@ -22,6 +22,7 @@ #include "llvm/CodeGen/MachineMemOperand.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/IR/DebugLoc.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/MCInstrDesc.h" @@ -29,7 +30,6 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" #include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetRegisterInfo.h" #include <cassert> using namespace llvm; diff --git a/lib/Target/ARM/Thumb2SizeReduction.cpp b/lib/Target/ARM/Thumb2SizeReduction.cpp index d911dd97b1ac7..3920c73fba6aa 100644 --- a/lib/Target/ARM/Thumb2SizeReduction.cpp +++ b/lib/Target/ARM/Thumb2SizeReduction.cpp @@ -25,6 +25,7 @@ #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineOperand.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/IR/DebugLoc.h" #include "llvm/IR/Function.h" #include "llvm/MC/MCInstrDesc.h" @@ -34,7 +35,6 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetInstrInfo.h" #include <algorithm> #include <cassert> #include <cstdint> @@ -449,7 +449,7 @@ Thumb2SizeReduce::ReduceLoadStore(MachineBasicBlock &MBB, MachineInstr *MI, break; case ARM::t2LDR_POST: case ARM::t2STR_POST: { - if (!MBB.getParent()->getFunction()->optForMinSize()) + if (!MBB.getParent()->getFunction().optForMinSize()) return false; if (!MI->hasOneMemOperand() || @@ -1084,7 +1084,7 @@ bool Thumb2SizeReduce::ReduceMBB(MachineBasicBlock &MBB) { } bool Thumb2SizeReduce::runOnMachineFunction(MachineFunction &MF) { - if (PredicateFtor && !PredicateFtor(*MF.getFunction())) + if (PredicateFtor && !PredicateFtor(MF.getFunction())) return false; STI = &static_cast<const ARMSubtarget &>(MF.getSubtarget()); @@ -1094,8 +1094,8 @@ bool Thumb2SizeReduce::runOnMachineFunction(MachineFunction &MF) { TII = static_cast<const Thumb2InstrInfo *>(STI->getInstrInfo()); // Optimizing / minimizing size? Minimizing size implies optimizing for size. - OptimizeSize = MF.getFunction()->optForSize(); - MinimizeSize = MF.getFunction()->optForMinSize(); + OptimizeSize = MF.getFunction().optForSize(); + MinimizeSize = MF.getFunction().optForMinSize(); BlockInfo.clear(); BlockInfo.resize(MF.getNumBlockIDs()); diff --git a/lib/Target/ARM/ThumbRegisterInfo.cpp b/lib/Target/ARM/ThumbRegisterInfo.cpp index 15a5675233364..d190edf5913c5 100644 --- a/lib/Target/ARM/ThumbRegisterInfo.cpp +++ b/lib/Target/ARM/ThumbRegisterInfo.cpp @@ -29,7 +29,7 @@ #include "llvm/IR/LLVMContext.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/ErrorHandling.h" -#include "llvm/Target/TargetFrameLowering.h" +#include "llvm/CodeGen/TargetFrameLowering.h" #include "llvm/Target/TargetMachine.h" namespace llvm { @@ -70,7 +70,7 @@ static void emitThumb1LoadConstPool(MachineBasicBlock &MBB, const TargetInstrInfo &TII = *STI.getInstrInfo(); MachineConstantPool *ConstantPool = MF.getConstantPool(); const Constant *C = ConstantInt::get( - Type::getInt32Ty(MBB.getParent()->getFunction()->getContext()), Val); + Type::getInt32Ty(MBB.getParent()->getFunction().getContext()), Val); unsigned Idx = ConstantPool->getConstantPoolIndex(C, 4); BuildMI(MBB, MBBI, dl, TII.get(ARM::tLDRpci)) @@ -89,7 +89,7 @@ static void emitThumb2LoadConstPool(MachineBasicBlock &MBB, const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); MachineConstantPool *ConstantPool = MF.getConstantPool(); const Constant *C = ConstantInt::get( - Type::getInt32Ty(MBB.getParent()->getFunction()->getContext()), Val); + Type::getInt32Ty(MBB.getParent()->getFunction().getContext()), Val); unsigned Idx = ConstantPool->getConstantPoolIndex(C, 4); BuildMI(MBB, MBBI, dl, TII.get(ARM::t2LDRpci)) diff --git a/lib/Target/ARM/ThumbRegisterInfo.h b/lib/Target/ARM/ThumbRegisterInfo.h index e6b06959e4285..75c3fe9ae8ad5 100644 --- a/lib/Target/ARM/ThumbRegisterInfo.h +++ b/lib/Target/ARM/ThumbRegisterInfo.h @@ -17,7 +17,7 @@ #define LLVM_LIB_TARGET_ARM_THUMB1REGISTERINFO_H #include "ARMBaseRegisterInfo.h" -#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/CodeGen/TargetRegisterInfo.h" namespace llvm { class ARMSubtarget; diff --git a/lib/Target/ARM/Utils/ARMBaseInfo.cpp b/lib/Target/ARM/Utils/ARMBaseInfo.cpp new file mode 100644 index 0000000000000..534f78c6d4d2c --- /dev/null +++ b/lib/Target/ARM/Utils/ARMBaseInfo.cpp @@ -0,0 +1,47 @@ +//===-- ARMBaseInfo.cpp - ARM Base encoding information------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file provides basic encoding and assembly information for ARM. +// +//===----------------------------------------------------------------------===// +#include "ARMBaseInfo.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/SmallVector.h" + +using namespace llvm; +namespace llvm { +namespace ARMSysReg { + +// lookup system register using 12-bit SYSm value. +// Note: the search is uniqued using M1 mask +const MClassSysReg *lookupMClassSysRegBy12bitSYSmValue(unsigned SYSm) { + return lookupMClassSysRegByM1Encoding12(SYSm); +} + +// returns APSR with _<bits> qualifier. +// Note: ARMv7-M deprecates using MSR APSR without a _<bits> qualifier +const MClassSysReg *lookupMClassSysRegAPSRNonDeprecated(unsigned SYSm) { + return lookupMClassSysRegByM2M3Encoding8((1<<9)|(SYSm & 0xFF)); +} + +// lookup system registers using 8-bit SYSm value +const MClassSysReg *lookupMClassSysRegBy8bitSYSmValue(unsigned SYSm) { + return ARMSysReg::lookupMClassSysRegByM2M3Encoding8((1<<8)|(SYSm & 0xFF)); +} + +#define GET_MCLASSSYSREG_IMPL +#include "ARMGenSystemRegister.inc" + +} // end namespace ARMSysReg + +namespace ARMBankedReg { +#define GET_BANKEDREG_IMPL +#include "ARMGenSystemRegister.inc" +} // end namespce ARMSysReg +} // end namespace llvm diff --git a/lib/Target/ARM/Utils/ARMBaseInfo.h b/lib/Target/ARM/Utils/ARMBaseInfo.h new file mode 100644 index 0000000000000..f32d8223f53ce --- /dev/null +++ b/lib/Target/ARM/Utils/ARMBaseInfo.h @@ -0,0 +1,161 @@ +//===-- ARMBaseInfo.h - Top level definitions for ARM ---*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains small standalone helper functions and enum definitions for +// the ARM target useful for the compiler back-end and the MC libraries. +// As such, it deliberately does not include references to LLVM core +// code gen types, passes, etc.. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_ARM_UTILS_ARMBASEINFO_H +#define LLVM_LIB_TARGET_ARM_UTILS_ARMBASEINFO_H + +#include "llvm/ADT/StringSwitch.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/MC/SubtargetFeature.h" +#include "MCTargetDesc/ARMMCTargetDesc.h" + +namespace llvm { + +// Enums corresponding to ARM condition codes +namespace ARMCC { +// The CondCodes constants map directly to the 4-bit encoding of the +// condition field for predicated instructions. +enum CondCodes { // Meaning (integer) Meaning (floating-point) + EQ, // Equal Equal + NE, // Not equal Not equal, or unordered + HS, // Carry set >, ==, or unordered + LO, // Carry clear Less than + MI, // Minus, negative Less than + PL, // Plus, positive or zero >, ==, or unordered + VS, // Overflow Unordered + VC, // No overflow Not unordered + HI, // Unsigned higher Greater than, or unordered + LS, // Unsigned lower or same Less than or equal + GE, // Greater than or equal Greater than or equal + LT, // Less than Less than, or unordered + GT, // Greater than Greater than + LE, // Less than or equal <, ==, or unordered + AL // Always (unconditional) Always (unconditional) +}; + +inline static CondCodes getOppositeCondition(CondCodes CC) { + switch (CC) { + default: llvm_unreachable("Unknown condition code"); + case EQ: return NE; + case NE: return EQ; + case HS: return LO; + case LO: return HS; + case MI: return PL; + case PL: return MI; + case VS: return VC; + case VC: return VS; + case HI: return LS; + case LS: return HI; + case GE: return LT; + case LT: return GE; + case GT: return LE; + case LE: return GT; + } +} +} // end namespace ARMCC + +inline static const char *ARMCondCodeToString(ARMCC::CondCodes CC) { + switch (CC) { + case ARMCC::EQ: return "eq"; + case ARMCC::NE: return "ne"; + case ARMCC::HS: return "hs"; + case ARMCC::LO: return "lo"; + case ARMCC::MI: return "mi"; + case ARMCC::PL: return "pl"; + case ARMCC::VS: return "vs"; + case ARMCC::VC: return "vc"; + case ARMCC::HI: return "hi"; + case ARMCC::LS: return "ls"; + case ARMCC::GE: return "ge"; + case ARMCC::LT: return "lt"; + case ARMCC::GT: return "gt"; + case ARMCC::LE: return "le"; + case ARMCC::AL: return "al"; + } + llvm_unreachable("Unknown condition code"); +} + +inline static unsigned ARMCondCodeFromString(StringRef CC) { + return StringSwitch<unsigned>(CC.lower()) + .Case("eq", ARMCC::EQ) + .Case("ne", ARMCC::NE) + .Case("hs", ARMCC::HS) + .Case("cs", ARMCC::HS) + .Case("lo", ARMCC::LO) + .Case("cc", ARMCC::LO) + .Case("mi", ARMCC::MI) + .Case("pl", ARMCC::PL) + .Case("vs", ARMCC::VS) + .Case("vc", ARMCC::VC) + .Case("hi", ARMCC::HI) + .Case("ls", ARMCC::LS) + .Case("ge", ARMCC::GE) + .Case("lt", ARMCC::LT) + .Case("gt", ARMCC::GT) + .Case("le", ARMCC::LE) + .Case("al", ARMCC::AL) + .Default(~0U); +} + +// System Registers +namespace ARMSysReg { + struct MClassSysReg { + const char *Name; + uint16_t M1Encoding12; + uint16_t M2M3Encoding8; + uint16_t Encoding; + FeatureBitset FeaturesRequired; + + // return true if FeaturesRequired are all present in ActiveFeatures + bool hasRequiredFeatures(FeatureBitset ActiveFeatures) const { + return (FeaturesRequired & ActiveFeatures) == FeaturesRequired; + } + + // returns true if TestFeatures are all present in FeaturesRequired + bool isInRequiredFeatures(FeatureBitset TestFeatures) const { + return (FeaturesRequired & TestFeatures) == TestFeatures; + } + }; + + #define GET_MCLASSSYSREG_DECL + #include "ARMGenSystemRegister.inc" + + // lookup system register using 12-bit SYSm value. + // Note: the search is uniqued using M1 mask + const MClassSysReg *lookupMClassSysRegBy12bitSYSmValue(unsigned SYSm); + + // returns APSR with _<bits> qualifier. + // Note: ARMv7-M deprecates using MSR APSR without a _<bits> qualifier + const MClassSysReg *lookupMClassSysRegAPSRNonDeprecated(unsigned SYSm); + + // lookup system registers using 8-bit SYSm value + const MClassSysReg *lookupMClassSysRegBy8bitSYSmValue(unsigned SYSm); + +} // end namespace ARMSysReg + +// Banked Registers +namespace ARMBankedReg { + struct BankedReg { + const char *Name; + uint16_t Encoding; + }; + #define GET_BANKEDREG_DECL + #include "ARMGenSystemRegister.inc" +} // end namespace ARMBankedReg + +} // end namespace llvm + +#endif // LLVM_LIB_TARGET_ARM_UTILS_ARMBASEINFO_H diff --git a/lib/Target/ARM/Utils/CMakeLists.txt b/lib/Target/ARM/Utils/CMakeLists.txt new file mode 100644 index 0000000000000..61bfe0ad73d05 --- /dev/null +++ b/lib/Target/ARM/Utils/CMakeLists.txt @@ -0,0 +1,3 @@ +add_llvm_library(LLVMARMUtils + ARMBaseInfo.cpp + ) diff --git a/lib/Target/ARM/Utils/LLVMBuild.txt b/lib/Target/ARM/Utils/LLVMBuild.txt new file mode 100644 index 0000000000000..bbbaef6332135 --- /dev/null +++ b/lib/Target/ARM/Utils/LLVMBuild.txt @@ -0,0 +1,24 @@ +;===- ./lib/Target/ARM/Utils/LLVMBuild.txt ----------------*- Conf -*--===; +; +; The LLVM Compiler Infrastructure +; +; This file is distributed under the University of Illinois Open Source +; License. See LICENSE.TXT for details. +; +;===------------------------------------------------------------------------===; +; +; This is an LLVMBuild description file for the components in this subdirectory. +; +; For more information on the LLVMBuild system, please see: +; +; http://llvm.org/docs/LLVMBuild.html +; +;===------------------------------------------------------------------------===; + +[component_0] +type = Library +name = ARMUtils +parent = ARM +required_libraries = Support +add_to_library_groups = ARM + |
