summaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
Diffstat (limited to 'lib')
-rw-r--r--lib/Analysis/AliasAnalysis.cpp233
-rw-r--r--lib/Analysis/AliasAnalysisCounter.cpp20
-rw-r--r--lib/Analysis/AliasAnalysisEvaluator.cpp50
-rw-r--r--lib/Analysis/AliasDebugger.cpp17
-rw-r--r--lib/Analysis/AliasSetTracker.cpp157
-rw-r--r--lib/Analysis/BasicAliasAnalysis.cpp584
-rw-r--r--lib/Analysis/CFGPrinter.cpp30
-rw-r--r--lib/Analysis/CMakeLists.txt3
-rw-r--r--lib/Analysis/CaptureTracking.cpp2
-rw-r--r--lib/Analysis/ConstantFolding.cpp4
-rw-r--r--lib/Analysis/DbgInfoPrinter.cpp6
-rw-r--r--lib/Analysis/DebugInfo.cpp192
-rw-r--r--lib/Analysis/DomPrinter.cpp65
-rw-r--r--lib/Analysis/IPA/CallGraph.cpp15
-rw-r--r--lib/Analysis/IPA/CallGraphSCCPass.cpp12
-rw-r--r--lib/Analysis/IPA/FindUsedTypes.cpp4
-rw-r--r--lib/Analysis/IPA/GlobalsModRef.cpp81
-rw-r--r--lib/Analysis/IVUsers.cpp39
-rw-r--r--lib/Analysis/InlineCost.cpp8
-rw-r--r--lib/Analysis/InstCount.cpp6
-rw-r--r--lib/Analysis/IntervalPartition.cpp6
-rw-r--r--lib/Analysis/LazyValueInfo.cpp583
-rw-r--r--lib/Analysis/LibCallAliasAnalysis.cpp14
-rw-r--r--lib/Analysis/LibCallSemantics.cpp3
-rw-r--r--lib/Analysis/Lint.cpp11
-rw-r--r--lib/Analysis/LiveValues.cpp6
-rw-r--r--lib/Analysis/LoopDependenceAnalysis.cpp4
-rw-r--r--lib/Analysis/LoopInfo.cpp45
-rw-r--r--lib/Analysis/LoopPass.cpp16
-rw-r--r--lib/Analysis/MemoryDependenceAnalysis.cpp44
-rw-r--r--lib/Analysis/ModuleDebugInfoPrinter.cpp7
-rw-r--r--lib/Analysis/PointerTracking.cpp55
-rw-r--r--lib/Analysis/PostDominators.cpp8
-rw-r--r--lib/Analysis/ProfileEstimatorPass.cpp14
-rw-r--r--lib/Analysis/ProfileInfo.cpp13
-rw-r--r--lib/Analysis/ProfileInfoLoaderPass.cpp14
-rw-r--r--lib/Analysis/ProfileVerifierPass.cpp8
-rw-r--r--lib/Analysis/RegionInfo.cpp749
-rw-r--r--lib/Analysis/RegionPrinter.cpp186
-rw-r--r--lib/Analysis/ScalarEvolution.cpp743
-rw-r--r--lib/Analysis/ScalarEvolutionAliasAnalysis.cpp17
-rw-r--r--lib/Analysis/ScalarEvolutionExpander.cpp74
-rw-r--r--lib/Analysis/ScalarEvolutionNormalization.cpp114
-rw-r--r--lib/Analysis/TypeBasedAliasAnalysis.cpp191
-rw-r--r--lib/Analysis/ValueTracking.cpp238
-rw-r--r--lib/AsmParser/LLLexer.cpp2
-rw-r--r--lib/AsmParser/LLParser.cpp161
-rw-r--r--lib/AsmParser/LLParser.h14
-rw-r--r--lib/AsmParser/LLToken.h4
-rw-r--r--lib/AsmParser/Parser.cpp3
-rw-r--r--lib/Bitcode/Reader/BitcodeReader.cpp119
-rw-r--r--lib/Bitcode/Reader/BitcodeReader.h12
-rw-r--r--lib/Bitcode/Writer/BitcodeWriter.cpp136
-rw-r--r--lib/Bitcode/Writer/BitcodeWriterPass.cpp2
-rw-r--r--lib/Bitcode/Writer/ValueEnumerator.cpp139
-rw-r--r--lib/Bitcode/Writer/ValueEnumerator.h9
-rw-r--r--lib/CodeGen/AggressiveAntiDepBreaker.cpp32
-rw-r--r--lib/CodeGen/AggressiveAntiDepBreaker.h10
-rw-r--r--lib/CodeGen/Analysis.cpp4
-rw-r--r--lib/CodeGen/AsmPrinter/AsmPrinter.cpp88
-rw-r--r--lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp4
-rw-r--r--lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp13
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfDebug.cpp862
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfDebug.h28
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfException.cpp2
-rw-r--r--lib/CodeGen/BranchFolding.cpp2
-rw-r--r--lib/CodeGen/CMakeLists.txt6
-rw-r--r--lib/CodeGen/CalcSpillWeights.cpp276
-rw-r--r--lib/CodeGen/CodePlacementOpt.cpp2
-rw-r--r--lib/CodeGen/CriticalAntiDepBreaker.cpp27
-rw-r--r--lib/CodeGen/CriticalAntiDepBreaker.h7
-rw-r--r--lib/CodeGen/DeadMachineInstructionElim.cpp21
-rw-r--r--lib/CodeGen/DwarfEHPrepare.cpp282
-rw-r--r--lib/CodeGen/ELF.h120
-rw-r--r--lib/CodeGen/ELFCodeEmitter.cpp2
-rw-r--r--lib/CodeGen/ELFWriter.cpp30
-rw-r--r--lib/CodeGen/ELFWriter.h13
-rw-r--r--lib/CodeGen/GCMetadata.cpp12
-rw-r--r--lib/CodeGen/GCStrategy.cpp9
-rw-r--r--lib/CodeGen/IfConversion.cpp5
-rw-r--r--lib/CodeGen/InlineSpiller.cpp118
-rw-r--r--lib/CodeGen/IntrinsicLowering.cpp3
-rw-r--r--lib/CodeGen/LLVMTargetMachine.cpp81
-rw-r--r--lib/CodeGen/LiveInterval.cpp164
-rw-r--r--lib/CodeGen/LiveIntervalAnalysis.cpp79
-rw-r--r--lib/CodeGen/LiveStackAnalysis.cpp3
-rw-r--r--lib/CodeGen/LiveVariables.cpp18
-rw-r--r--lib/CodeGen/LocalStackSlotAllocation.cpp354
-rw-r--r--lib/CodeGen/LowerSubregs.cpp22
-rw-r--r--lib/CodeGen/MachineBasicBlock.cpp32
-rw-r--r--lib/CodeGen/MachineCSE.cpp41
-rw-r--r--lib/CodeGen/MachineDominators.cpp8
-rw-r--r--lib/CodeGen/MachineFunction.cpp3
-rw-r--r--lib/CodeGen/MachineFunctionAnalysis.cpp4
-rw-r--r--lib/CodeGen/MachineFunctionPrinterPass.cpp2
-rw-r--r--lib/CodeGen/MachineInstr.cpp33
-rw-r--r--lib/CodeGen/MachineLICM.cpp21
-rw-r--r--lib/CodeGen/MachineLoopInfo.cpp6
-rw-r--r--lib/CodeGen/MachineModuleInfo.cpp15
-rw-r--r--lib/CodeGen/MachineSink.cpp138
-rw-r--r--lib/CodeGen/MachineVerifier.cpp230
-rw-r--r--lib/CodeGen/OptimizeExts.cpp220
-rw-r--r--lib/CodeGen/OptimizePHIs.cpp20
-rw-r--r--lib/CodeGen/PBQP/HeuristicBase.h8
-rw-r--r--lib/CodeGen/PBQP/HeuristicSolver.h9
-rw-r--r--lib/CodeGen/PBQP/Heuristics/Briggs.h8
-rw-r--r--lib/CodeGen/PBQP/Solution.h31
-rw-r--r--lib/CodeGen/PHIElimination.cpp36
-rw-r--r--lib/CodeGen/PHIElimination.h10
-rw-r--r--lib/CodeGen/PeepholeOptimizer.cpp287
-rw-r--r--lib/CodeGen/PostRASchedulerList.cpp7
-rw-r--r--lib/CodeGen/PreAllocSplitting.cpp15
-rw-r--r--lib/CodeGen/ProcessImplicitDefs.cpp18
-rw-r--r--lib/CodeGen/PrologEpilogInserter.cpp243
-rw-r--r--lib/CodeGen/PrologEpilogInserter.h9
-rw-r--r--lib/CodeGen/RegAllocFast.cpp136
-rw-r--r--lib/CodeGen/RegAllocLinearScan.cpp154
-rw-r--r--lib/CodeGen/RegAllocPBQP.cpp47
-rw-r--r--lib/CodeGen/RegisterCoalescer.cpp3
-rw-r--r--lib/CodeGen/RegisterScavenging.cpp34
-rw-r--r--lib/CodeGen/RenderMachineFunction.cpp1014
-rw-r--r--lib/CodeGen/RenderMachineFunction.h336
-rw-r--r--lib/CodeGen/ScheduleDAGInstrs.cpp9
-rw-r--r--lib/CodeGen/ScheduleDAGInstrs.h4
-rw-r--r--lib/CodeGen/SelectionDAG/DAGCombiner.cpp15
-rw-r--r--lib/CodeGen/SelectionDAG/FastISel.cpp17
-rw-r--r--lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp67
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeDAG.cpp26
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp20
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeTypes.h24
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp390
-rw-r--r--lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp74
-rw-r--r--lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp421
-rw-r--r--lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp5
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAG.cpp40
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp638
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h37
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp44
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp2
-rw-r--r--lib/CodeGen/SelectionDAG/TargetLowering.cpp182
-rw-r--r--lib/CodeGen/SimpleRegisterCoalescing.cpp161
-rw-r--r--lib/CodeGen/SimpleRegisterCoalescing.h3
-rw-r--r--lib/CodeGen/SjLjEHPrepare.cpp2
-rw-r--r--lib/CodeGen/SlotIndexes.cpp3
-rw-r--r--lib/CodeGen/Spiller.cpp98
-rw-r--r--lib/CodeGen/Spiller.h19
-rw-r--r--lib/CodeGen/SplitKit.cpp1097
-rw-r--r--lib/CodeGen/SplitKit.h321
-rw-r--r--lib/CodeGen/Splitter.cpp817
-rw-r--r--lib/CodeGen/Splitter.h99
-rw-r--r--lib/CodeGen/StackProtector.cpp8
-rw-r--r--lib/CodeGen/StackSlotColoring.cpp9
-rw-r--r--lib/CodeGen/StrongPHIElimination.cpp9
-rw-r--r--lib/CodeGen/TailDuplication.cpp19
-rw-r--r--lib/CodeGen/TargetInstrInfoImpl.cpp13
-rw-r--r--lib/CodeGen/TargetLoweringObjectFileImpl.cpp6
-rw-r--r--lib/CodeGen/TwoAddressInstructionPass.cpp50
-rw-r--r--lib/CodeGen/UnreachableBlockElim.cpp15
-rw-r--r--lib/CodeGen/VirtRegMap.cpp3
-rw-r--r--lib/CodeGen/VirtRegMap.h7
-rw-r--r--lib/CodeGen/VirtRegRewriter.cpp103
-rw-r--r--lib/CompilerDriver/Action.cpp13
-rw-r--r--lib/CompilerDriver/BuiltinOptions.cpp4
-rw-r--r--lib/CompilerDriver/CompilationGraph.cpp289
-rw-r--r--lib/CompilerDriver/Main.cpp123
-rw-r--r--lib/CompilerDriver/Makefile36
-rw-r--r--lib/CompilerDriver/Plugin.cpp78
-rw-r--r--lib/ExecutionEngine/ExecutionEngineBindings.cpp4
-rw-r--r--lib/ExecutionEngine/JIT/Intercept.cpp12
-rw-r--r--lib/ExecutionEngine/JIT/JIT.cpp10
-rw-r--r--lib/ExecutionEngine/JIT/JITDebugRegisterer.cpp10
-rw-r--r--lib/ExecutionEngine/JIT/JITDwarfEmitter.cpp440
-rw-r--r--lib/ExecutionEngine/JIT/JITDwarfEmitter.h16
-rw-r--r--lib/ExecutionEngine/JIT/JITEmitter.cpp307
-rw-r--r--lib/Linker/LinkModules.cpp180
-rw-r--r--lib/MC/CMakeLists.txt3
-rw-r--r--lib/MC/ELFObjectWriter.cpp973
-rw-r--r--lib/MC/MCAsmInfo.cpp2
-rw-r--r--lib/MC/MCAsmInfoDarwin.cpp3
-rw-r--r--lib/MC/MCAsmStreamer.cpp3
-rw-r--r--lib/MC/MCAssembler.cpp74
-rw-r--r--lib/MC/MCContext.cpp88
-rw-r--r--lib/MC/MCDisassembler/CMakeLists.txt7
-rw-r--r--lib/MC/MCDisassembler/EDDisassembler.cpp402
-rw-r--r--lib/MC/MCDisassembler/EDDisassembler.h271
-rw-r--r--lib/MC/MCDisassembler/EDInfo.h73
-rw-r--r--lib/MC/MCDisassembler/EDInst.cpp207
-rw-r--r--lib/MC/MCDisassembler/EDInst.h182
-rw-r--r--lib/MC/MCDisassembler/EDOperand.cpp282
-rw-r--r--lib/MC/MCDisassembler/EDOperand.h91
-rw-r--r--lib/MC/MCDisassembler/EDToken.cpp206
-rw-r--r--lib/MC/MCDisassembler/EDToken.h139
-rw-r--r--lib/MC/MCDisassembler/Makefile (renamed from lib/Target/MSIL/Makefile)12
-rw-r--r--lib/MC/MCDwarf.cpp21
-rw-r--r--lib/MC/MCELFStreamer.cpp408
-rw-r--r--lib/MC/MCMachOStreamer.cpp277
-rw-r--r--lib/MC/MCNullStreamer.cpp1
-rw-r--r--lib/MC/MCObjectStreamer.cpp48
-rw-r--r--lib/MC/MCParser/AsmLexer.cpp28
-rw-r--r--lib/MC/MCParser/AsmParser.cpp700
-rw-r--r--lib/MC/MCParser/DarwinAsmParser.cpp237
-rw-r--r--lib/MC/MCParser/ELFAsmParser.cpp235
-rw-r--r--lib/MC/MCParser/MCAsmParser.cpp15
-rw-r--r--lib/MC/MCParser/TargetAsmParser.cpp2
-rw-r--r--lib/MC/MCStreamer.cpp3
-rw-r--r--lib/MC/MachObjectWriter.cpp2
-rw-r--r--lib/MC/Makefile2
-rw-r--r--lib/MC/WinCOFFObjectWriter.cpp737
-rw-r--r--lib/MC/WinCOFFStreamer.cpp229
-rw-r--r--lib/Support/APFloat.cpp1
-rw-r--r--lib/Support/APInt.cpp13
-rw-r--r--lib/Support/CMakeLists.txt2
-rw-r--r--lib/Support/ConstantRange.cpp124
-rw-r--r--lib/Support/CrashRecoveryContext.cpp204
-rw-r--r--lib/Support/ErrorHandling.cpp34
-rw-r--r--lib/Support/FoldingSet.cpp89
-rw-r--r--lib/Support/PrettyStackTrace.cpp5
-rw-r--r--lib/Support/SlowOperationInformer.cpp67
-rw-r--r--lib/Support/SmallVector.cpp2
-rw-r--r--lib/Support/Statistic.cpp14
-rw-r--r--lib/Support/StringRef.cpp27
-rw-r--r--lib/Support/SystemUtils.cpp10
-rw-r--r--lib/Support/Triple.cpp243
-rw-r--r--lib/Support/raw_ostream.cpp97
-rw-r--r--lib/System/DynamicLibrary.cpp6
-rw-r--r--lib/System/Path.cpp40
-rw-r--r--lib/System/RWMutex.cpp18
-rw-r--r--lib/System/ThreadLocal.cpp5
-rw-r--r--lib/System/Unix/Path.inc12
-rw-r--r--lib/System/Unix/Signals.inc44
-rw-r--r--lib/System/Unix/ThreadLocal.inc1
-rw-r--r--lib/System/Win32/Path.inc16
-rw-r--r--lib/System/Win32/Signals.inc14
-rw-r--r--lib/System/Win32/ThreadLocal.inc4
-rw-r--r--lib/Target/ARM/ARM.h60
-rw-r--r--lib/Target/ARM/ARM.td76
-rw-r--r--lib/Target/ARM/ARMAddressingModes.h20
-rw-r--r--lib/Target/ARM/ARMAsmPrinter.cpp (renamed from lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp)171
-rw-r--r--lib/Target/ARM/ARMBaseInstrInfo.cpp134
-rw-r--r--lib/Target/ARM/ARMBaseInstrInfo.h81
-rw-r--r--lib/Target/ARM/ARMBaseRegisterInfo.cpp563
-rw-r--r--lib/Target/ARM/ARMBaseRegisterInfo.h29
-rw-r--r--lib/Target/ARM/ARMCallingConv.td4
-rw-r--r--lib/Target/ARM/ARMCodeEmitter.cpp72
-rw-r--r--lib/Target/ARM/ARMConstantIslandPass.cpp96
-rw-r--r--lib/Target/ARM/ARMExpandPseudoInsts.cpp359
-rw-r--r--lib/Target/ARM/ARMFastISel.cpp665
-rw-r--r--lib/Target/ARM/ARMGlobalMerge.cpp212
-rw-r--r--lib/Target/ARM/ARMISelDAGToDAG.cpp371
-rw-r--r--lib/Target/ARM/ARMISelLowering.cpp635
-rw-r--r--lib/Target/ARM/ARMISelLowering.h42
-rw-r--r--lib/Target/ARM/ARMInstrFormats.td296
-rw-r--r--lib/Target/ARM/ARMInstrInfo.td406
-rw-r--r--lib/Target/ARM/ARMInstrNEON.td627
-rw-r--r--lib/Target/ARM/ARMInstrThumb.td19
-rw-r--r--lib/Target/ARM/ARMInstrThumb2.td730
-rw-r--r--lib/Target/ARM/ARMInstrVFP.td76
-rw-r--r--lib/Target/ARM/ARMLoadStoreOptimizer.cpp139
-rw-r--r--lib/Target/ARM/ARMMCInstLower.cpp (renamed from lib/Target/ARM/AsmPrinter/ARMMCInstLower.cpp)0
-rw-r--r--lib/Target/ARM/ARMMCInstLower.h (renamed from lib/Target/ARM/AsmPrinter/ARMMCInstLower.h)0
-rw-r--r--lib/Target/ARM/ARMMachineFunctionInfo.h11
-rw-r--r--lib/Target/ARM/ARMRegisterInfo.td160
-rw-r--r--lib/Target/ARM/ARMSubtarget.cpp5
-rw-r--r--lib/Target/ARM/ARMSubtarget.h22
-rw-r--r--lib/Target/ARM/ARMTargetMachine.cpp14
-rw-r--r--lib/Target/ARM/ARMTargetMachine.h1
-rw-r--r--lib/Target/ARM/AsmParser/ARMAsmParser.cpp245
-rw-r--r--lib/Target/ARM/AsmPrinter/ARMInstPrinter.cpp66
-rw-r--r--lib/Target/ARM/AsmPrinter/ARMInstPrinter.h2
-rw-r--r--lib/Target/ARM/AsmPrinter/CMakeLists.txt2
-rw-r--r--lib/Target/ARM/CMakeLists.txt10
-rw-r--r--lib/Target/ARM/Disassembler/ARMDisassembler.cpp13
-rw-r--r--lib/Target/ARM/Disassembler/ARMDisassemblerCore.cpp183
-rw-r--r--lib/Target/ARM/Disassembler/ARMDisassemblerCore.h66
-rw-r--r--lib/Target/ARM/Disassembler/ThumbDisassemblerCore.h227
-rw-r--r--lib/Target/ARM/Makefile5
-rw-r--r--lib/Target/ARM/NEONMoveFix.cpp2
-rw-r--r--lib/Target/ARM/NEONPreAllocPass.cpp147
-rw-r--r--lib/Target/ARM/README.txt42
-rw-r--r--lib/Target/ARM/Thumb1RegisterInfo.cpp279
-rw-r--r--lib/Target/ARM/Thumb1RegisterInfo.h22
-rw-r--r--lib/Target/ARM/Thumb2ITBlockPass.cpp68
-rw-r--r--lib/Target/ARM/Thumb2InstrInfo.cpp8
-rw-r--r--lib/Target/ARM/Thumb2SizeReduction.cpp14
-rw-r--r--lib/Target/Alpha/AlphaBranchSelector.cpp2
-rw-r--r--lib/Target/Alpha/AlphaCodeEmitter.cpp2
-rw-r--r--lib/Target/Alpha/AlphaISelDAGToDAG.cpp4
-rw-r--r--lib/Target/Alpha/AlphaInstrInfo.cpp26
-rw-r--r--lib/Target/Alpha/AlphaInstrInfo.h6
-rw-r--r--lib/Target/Alpha/AlphaLLRP.cpp2
-rw-r--r--lib/Target/Alpha/AlphaRegisterInfo.cpp6
-rw-r--r--lib/Target/Alpha/AlphaRegisterInfo.h5
-rw-r--r--lib/Target/Alpha/AsmPrinter/AlphaAsmPrinter.cpp2
-rw-r--r--lib/Target/Blackfin/BlackfinInstrInfo.cpp28
-rw-r--r--lib/Target/Blackfin/BlackfinInstrInfo.h4
-rw-r--r--lib/Target/Blackfin/BlackfinRegisterInfo.cpp16
-rw-r--r--lib/Target/Blackfin/BlackfinRegisterInfo.h7
-rw-r--r--lib/Target/CBackend/CBackend.cpp12
-rw-r--r--lib/Target/CellSPU/SPUCallingConv.td24
-rw-r--r--lib/Target/CellSPU/SPUISelDAGToDAG.cpp44
-rw-r--r--lib/Target/CellSPU/SPUISelLowering.cpp105
-rw-r--r--lib/Target/CellSPU/SPUInstrInfo.cpp142
-rw-r--r--lib/Target/CellSPU/SPUInstrInfo.h6
-rw-r--r--lib/Target/CellSPU/SPUInstrInfo.td156
-rw-r--r--lib/Target/CellSPU/SPUOperands.td6
-rw-r--r--lib/Target/CellSPU/SPURegisterInfo.cpp11
-rw-r--r--lib/Target/CellSPU/SPURegisterInfo.h5
-rw-r--r--lib/Target/CellSPU/SPURegisterInfo.td2
-rw-r--r--lib/Target/CppBackend/CPPBackend.cpp17
-rw-r--r--lib/Target/MBlaze/AsmPrinter/MBlazeAsmPrinter.cpp10
-rw-r--r--lib/Target/MBlaze/MBlaze.td2
-rw-r--r--lib/Target/MBlaze/MBlazeCallingConv.td2
-rw-r--r--lib/Target/MBlaze/MBlazeDelaySlotFiller.cpp2
-rw-r--r--lib/Target/MBlaze/MBlazeISelDAGToDAG.cpp2
-rw-r--r--lib/Target/MBlaze/MBlazeInstrFPU.td2
-rw-r--r--lib/Target/MBlaze/MBlazeInstrFSL.td2
-rw-r--r--lib/Target/MBlaze/MBlazeInstrFormats.td2
-rw-r--r--lib/Target/MBlaze/MBlazeInstrInfo.cpp35
-rw-r--r--lib/Target/MBlaze/MBlazeInstrInfo.h6
-rw-r--r--lib/Target/MBlaze/MBlazeInstrInfo.td2
-rw-r--r--lib/Target/MBlaze/MBlazeIntrinsics.td12
-rw-r--r--lib/Target/MBlaze/MBlazeRegisterInfo.cpp5
-rw-r--r--lib/Target/MBlaze/MBlazeRegisterInfo.h5
-rw-r--r--lib/Target/MBlaze/MBlazeRegisterInfo.td2
-rw-r--r--lib/Target/MBlaze/MBlazeSchedule.td2
-rw-r--r--lib/Target/MSIL/CMakeLists.txt3
-rw-r--r--lib/Target/MSIL/MSILWriter.cpp1706
-rw-r--r--lib/Target/MSIL/MSILWriter.h258
-rw-r--r--lib/Target/MSIL/README.TXT26
-rw-r--r--lib/Target/MSIL/TargetInfo/CMakeLists.txt6
-rw-r--r--lib/Target/MSIL/TargetInfo/MSILTargetInfo.cpp26
-rw-r--r--lib/Target/MSIL/TargetInfo/Makefile15
-rw-r--r--lib/Target/MSP430/MSP430BranchSelector.cpp7
-rw-r--r--lib/Target/MSP430/MSP430InstrInfo.cpp23
-rw-r--r--lib/Target/MSP430/MSP430InstrInfo.h4
-rw-r--r--lib/Target/MSP430/MSP430RegisterInfo.cpp12
-rw-r--r--lib/Target/MSP430/MSP430RegisterInfo.h7
-rw-r--r--lib/Target/Mangler.cpp3
-rw-r--r--lib/Target/Mips/AsmPrinter/MipsAsmPrinter.cpp20
-rw-r--r--lib/Target/Mips/Mips.td2
-rw-r--r--lib/Target/Mips/MipsCallingConv.td2
-rw-r--r--lib/Target/Mips/MipsDelaySlotFiller.cpp2
-rw-r--r--lib/Target/Mips/MipsISelDAGToDAG.cpp11
-rw-r--r--lib/Target/Mips/MipsISelLowering.cpp8
-rw-r--r--lib/Target/Mips/MipsInstrFPU.td2
-rw-r--r--lib/Target/Mips/MipsInstrFormats.td2
-rw-r--r--lib/Target/Mips/MipsInstrInfo.cpp47
-rw-r--r--lib/Target/Mips/MipsInstrInfo.h6
-rw-r--r--lib/Target/Mips/MipsInstrInfo.td9
-rw-r--r--lib/Target/Mips/MipsRegisterInfo.cpp6
-rw-r--r--lib/Target/Mips/MipsRegisterInfo.h5
-rw-r--r--lib/Target/Mips/MipsRegisterInfo.td2
-rw-r--r--lib/Target/Mips/MipsSchedule.td2
-rw-r--r--lib/Target/PIC16/CMakeLists.txt2
-rw-r--r--lib/Target/PIC16/PIC16.h7
-rw-r--r--lib/Target/PIC16/PIC16ISelLowering.cpp10
-rw-r--r--lib/Target/PIC16/PIC16ISelLowering.h5
-rw-r--r--lib/Target/PIC16/PIC16InstrInfo.cpp15
-rw-r--r--lib/Target/PIC16/PIC16InstrInfo.h4
-rw-r--r--lib/Target/PIC16/PIC16MemSelOpt.cpp2
-rw-r--r--lib/Target/PIC16/PIC16Passes/PIC16Cloner.cpp2
-rw-r--r--lib/Target/PIC16/PIC16Passes/PIC16Cloner.h2
-rw-r--r--lib/Target/PIC16/PIC16Passes/PIC16Overlay.cpp5
-rw-r--r--lib/Target/PIC16/PIC16Passes/PIC16Overlay.h2
-rw-r--r--lib/Target/PIC16/PIC16RegisterInfo.cpp9
-rw-r--r--lib/Target/PIC16/PIC16RegisterInfo.h5
-rw-r--r--lib/Target/PowerPC/AsmPrinter/PPCAsmPrinter.cpp14
-rw-r--r--lib/Target/PowerPC/PPCBranchSelector.cpp7
-rw-r--r--lib/Target/PowerPC/PPCCallingConv.td2
-rw-r--r--lib/Target/PowerPC/PPCCodeEmitter.cpp4
-rw-r--r--lib/Target/PowerPC/PPCISelLowering.cpp39
-rw-r--r--lib/Target/PowerPC/PPCInstrInfo.cpp82
-rw-r--r--lib/Target/PowerPC/PPCInstrInfo.h6
-rw-r--r--lib/Target/PowerPC/PPCInstrInfo.td19
-rw-r--r--lib/Target/PowerPC/PPCRegisterInfo.cpp22
-rw-r--r--lib/Target/PowerPC/PPCRegisterInfo.h5
-rw-r--r--lib/Target/PowerPC/PPCSubtarget.cpp4
-rw-r--r--lib/Target/PowerPC/PPCSubtarget.h4
-rw-r--r--lib/Target/README.txt16
-rw-r--r--lib/Target/Sparc/DelaySlotFiller.cpp2
-rw-r--r--lib/Target/Sparc/FPMover.cpp2
-rw-r--r--lib/Target/Sparc/Sparc.td2
-rw-r--r--lib/Target/Sparc/SparcISelDAGToDAG.cpp8
-rw-r--r--lib/Target/Sparc/SparcInstrInfo.cpp40
-rw-r--r--lib/Target/Sparc/SparcInstrInfo.h6
-rw-r--r--lib/Target/Sparc/SparcInstrInfo.td12
-rw-r--r--lib/Target/Sparc/SparcRegisterInfo.cpp6
-rw-r--r--lib/Target/Sparc/SparcRegisterInfo.h5
-rw-r--r--lib/Target/SystemZ/SystemZInstrInfo.cpp25
-rw-r--r--lib/Target/SystemZ/SystemZInstrInfo.h3
-rw-r--r--lib/Target/SystemZ/SystemZRegisterInfo.cpp9
-rw-r--r--lib/Target/SystemZ/SystemZRegisterInfo.h7
-rw-r--r--lib/Target/TargetData.cpp62
-rw-r--r--lib/Target/TargetMachine.cpp20
-rw-r--r--lib/Target/TargetRegisterInfo.cpp14
-rw-r--r--lib/Target/X86/AsmParser/X86AsmParser.cpp274
-rw-r--r--lib/Target/X86/AsmPrinter/CMakeLists.txt3
-rw-r--r--lib/Target/X86/AsmPrinter/X86ATTInstPrinter.cpp5
-rw-r--r--lib/Target/X86/AsmPrinter/X86ATTInstPrinter.h3
-rw-r--r--lib/Target/X86/AsmPrinter/X86InstComments.cpp232
-rw-r--r--lib/Target/X86/AsmPrinter/X86InstComments.h25
-rw-r--r--lib/Target/X86/AsmPrinter/X86IntelInstPrinter.cpp5
-rw-r--r--lib/Target/X86/AsmPrinter/X86IntelInstPrinter.h4
-rw-r--r--lib/Target/X86/CMakeLists.txt10
-rw-r--r--lib/Target/X86/README-FPStack.txt4
-rw-r--r--lib/Target/X86/README-SSE.txt42
-rw-r--r--lib/Target/X86/README.txt104
-rw-r--r--lib/Target/X86/SSEDomainFix.cpp2
-rw-r--r--lib/Target/X86/X86.h5
-rw-r--r--lib/Target/X86/X86.td4
-rw-r--r--lib/Target/X86/X86AsmBackend.cpp45
-rw-r--r--lib/Target/X86/X86AsmPrinter.cpp (renamed from lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp)51
-rw-r--r--lib/Target/X86/X86AsmPrinter.h (renamed from lib/Target/X86/AsmPrinter/X86AsmPrinter.h)6
-rw-r--r--lib/Target/X86/X86CallingConv.td32
-rw-r--r--lib/Target/X86/X86CodeEmitter.cpp119
-rw-r--r--lib/Target/X86/X86FastISel.cpp29
-rw-r--r--lib/Target/X86/X86FloatingPoint.cpp473
-rw-r--r--lib/Target/X86/X86FloatingPointRegKill.cpp153
-rw-r--r--lib/Target/X86/X86ISelDAGToDAG.cpp37
-rw-r--r--lib/Target/X86/X86ISelLowering.cpp1207
-rw-r--r--lib/Target/X86/X86ISelLowering.h58
-rw-r--r--lib/Target/X86/X86Instr64bit.td156
-rw-r--r--lib/Target/X86/X86InstrFMA.td60
-rw-r--r--lib/Target/X86/X86InstrFPStack.td6
-rw-r--r--lib/Target/X86/X86InstrFormats.td33
-rw-r--r--lib/Target/X86/X86InstrFragmentsSIMD.td80
-rw-r--r--lib/Target/X86/X86InstrInfo.cpp692
-rw-r--r--lib/Target/X86/X86InstrInfo.h30
-rw-r--r--lib/Target/X86/X86InstrInfo.td145
-rw-r--r--lib/Target/X86/X86InstrMMX.td2
-rw-r--r--lib/Target/X86/X86InstrSSE.td1945
-rw-r--r--lib/Target/X86/X86MCAsmInfo.cpp3
-rw-r--r--lib/Target/X86/X86MCCodeEmitter.cpp57
-rw-r--r--lib/Target/X86/X86MCInstLower.cpp (renamed from lib/Target/X86/AsmPrinter/X86MCInstLower.cpp)119
-rw-r--r--lib/Target/X86/X86MCInstLower.h (renamed from lib/Target/X86/AsmPrinter/X86MCInstLower.h)13
-rw-r--r--lib/Target/X86/X86RegisterInfo.cpp103
-rw-r--r--lib/Target/X86/X86RegisterInfo.h9
-rw-r--r--lib/Target/X86/X86RegisterInfo.td17
-rw-r--r--lib/Target/X86/X86ShuffleDecode.h155
-rw-r--r--lib/Target/X86/X86Subtarget.cpp10
-rw-r--r--lib/Target/X86/X86Subtarget.h8
-rw-r--r--lib/Target/X86/X86TargetMachine.cpp44
-rw-r--r--lib/Target/XCore/AsmPrinter/XCoreAsmPrinter.cpp12
-rw-r--r--lib/Target/XCore/CMakeLists.txt2
-rw-r--r--lib/Target/XCore/XCoreISelDAGToDAG.cpp21
-rw-r--r--lib/Target/XCore/XCoreInstrInfo.cpp29
-rw-r--r--lib/Target/XCore/XCoreInstrInfo.h6
-rw-r--r--lib/Target/XCore/XCoreInstrInfo.td12
-rw-r--r--lib/Target/XCore/XCoreRegisterInfo.cpp14
-rw-r--r--lib/Target/XCore/XCoreRegisterInfo.h5
-rw-r--r--lib/Transforms/Hello/Hello.cpp11
-rw-r--r--lib/Transforms/IPO/ArgumentPromotion.cpp13
-rw-r--r--lib/Transforms/IPO/ConstantMerge.cpp34
-rw-r--r--lib/Transforms/IPO/DeadArgumentElimination.cpp26
-rw-r--r--lib/Transforms/IPO/DeadTypeElimination.cpp4
-rw-r--r--lib/Transforms/IPO/ExtractGV.cpp157
-rw-r--r--lib/Transforms/IPO/FunctionAttrs.cpp12
-rw-r--r--lib/Transforms/IPO/GlobalDCE.cpp5
-rw-r--r--lib/Transforms/IPO/GlobalOpt.cpp12
-rw-r--r--lib/Transforms/IPO/IPConstantPropagation.cpp10
-rw-r--r--lib/Transforms/IPO/InlineAlways.cpp6
-rw-r--r--lib/Transforms/IPO/InlineSimple.cpp8
-rw-r--r--lib/Transforms/IPO/Inliner.cpp12
-rw-r--r--lib/Transforms/IPO/Internalize.cpp8
-rw-r--r--lib/Transforms/IPO/LoopExtractor.cpp23
-rw-r--r--lib/Transforms/IPO/LowerSetJmp.cpp4
-rw-r--r--lib/Transforms/IPO/MergeFunctions.cpp658
-rw-r--r--lib/Transforms/IPO/PartialInlining.cpp10
-rw-r--r--lib/Transforms/IPO/PartialSpecialization.cpp46
-rw-r--r--lib/Transforms/IPO/PruneEH.cpp6
-rw-r--r--lib/Transforms/IPO/StripDeadPrototypes.cpp6
-rw-r--r--lib/Transforms/IPO/StripSymbols.cpp36
-rw-r--r--lib/Transforms/IPO/StructRetPromotion.cpp26
-rw-r--r--lib/Transforms/InstCombine/InstCombine.h2
-rw-r--r--lib/Transforms/InstCombine/InstCombineAndOrXor.cpp46
-rw-r--r--lib/Transforms/InstCombine/InstCombineCalls.cpp35
-rw-r--r--lib/Transforms/InstCombine/InstCombineCasts.cpp287
-rw-r--r--lib/Transforms/InstCombine/InstCombineCompares.cpp2
-rw-r--r--lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp22
-rw-r--r--lib/Transforms/InstCombine/InstCombineSelect.cpp28
-rw-r--r--lib/Transforms/InstCombine/InstCombineShifts.cpp307
-rw-r--r--lib/Transforms/InstCombine/InstructionCombining.cpp4
-rw-r--r--lib/Transforms/Instrumentation/EdgeProfiling.cpp6
-rw-r--r--lib/Transforms/Instrumentation/OptimalEdgeProfiling.cpp8
-rw-r--r--lib/Transforms/Scalar/ABCD.cpp1112
-rw-r--r--lib/Transforms/Scalar/ADCE.cpp4
-rw-r--r--lib/Transforms/Scalar/BasicBlockPlacement.cpp6
-rw-r--r--lib/Transforms/Scalar/CMakeLists.txt3
-rw-r--r--lib/Transforms/Scalar/CodeGenPrepare.cpp36
-rw-r--r--lib/Transforms/Scalar/ConstantProp.cpp6
-rw-r--r--lib/Transforms/Scalar/CorrelatedValuePropagation.cpp200
-rw-r--r--lib/Transforms/Scalar/DCE.cpp10
-rw-r--r--lib/Transforms/Scalar/DeadStoreElimination.cpp7
-rw-r--r--lib/Transforms/Scalar/GEPSplitter.cpp6
-rw-r--r--lib/Transforms/Scalar/GVN.cpp15
-rw-r--r--lib/Transforms/Scalar/IndVarSimplify.cpp18
-rw-r--r--lib/Transforms/Scalar/JumpThreading.cpp217
-rw-r--r--lib/Transforms/Scalar/LICM.cpp728
-rw-r--r--lib/Transforms/Scalar/LoopDeletion.cpp7
-rw-r--r--lib/Transforms/Scalar/LoopIndexSplit.cpp12
-rw-r--r--lib/Transforms/Scalar/LoopRotation.cpp34
-rw-r--r--lib/Transforms/Scalar/LoopStrengthReduce.cpp182
-rw-r--r--lib/Transforms/Scalar/LoopUnrollPass.cpp21
-rw-r--r--lib/Transforms/Scalar/LoopUnswitch.cpp30
-rw-r--r--lib/Transforms/Scalar/LowerAtomic.cpp161
-rw-r--r--lib/Transforms/Scalar/MemCpyOptimizer.cpp21
-rw-r--r--lib/Transforms/Scalar/Reassociate.cpp5
-rw-r--r--lib/Transforms/Scalar/Reg2Mem.cpp8
-rw-r--r--lib/Transforms/Scalar/SCCP.cpp41
-rw-r--r--lib/Transforms/Scalar/ScalarReplAggregates.cpp49
-rw-r--r--lib/Transforms/Scalar/SimplifyCFGPass.cpp10
-rw-r--r--lib/Transforms/Scalar/SimplifyHalfPowrLibCalls.cpp6
-rw-r--r--lib/Transforms/Scalar/SimplifyLibCalls.cpp27
-rw-r--r--lib/Transforms/Scalar/Sink.cpp5
-rw-r--r--lib/Transforms/Scalar/TailDuplication.cpp4
-rw-r--r--lib/Transforms/Scalar/TailRecursionElimination.cpp68
-rw-r--r--lib/Transforms/Utils/BasicBlockUtils.cpp25
-rw-r--r--lib/Transforms/Utils/BasicInliner.cpp4
-rw-r--r--lib/Transforms/Utils/BreakCriticalEdges.cpp10
-rw-r--r--lib/Transforms/Utils/BuildLibCalls.cpp21
-rw-r--r--lib/Transforms/Utils/CMakeLists.txt1
-rw-r--r--lib/Transforms/Utils/CloneFunction.cpp86
-rw-r--r--lib/Transforms/Utils/CloneModule.cpp30
-rw-r--r--lib/Transforms/Utils/InlineFunction.cpp11
-rw-r--r--lib/Transforms/Utils/InstructionNamer.cpp8
-rw-r--r--lib/Transforms/Utils/LCSSA.cpp27
-rw-r--r--lib/Transforms/Utils/Local.cpp3
-rw-r--r--lib/Transforms/Utils/LoopSimplify.cpp40
-rw-r--r--lib/Transforms/Utils/LoopUnroll.cpp6
-rw-r--r--lib/Transforms/Utils/LowerInvoke.cpp11
-rw-r--r--lib/Transforms/Utils/LowerSwitch.cpp16
-rw-r--r--lib/Transforms/Utils/Mem2Reg.cpp7
-rw-r--r--lib/Transforms/Utils/PromoteMemoryToRegister.cpp17
-rw-r--r--lib/Transforms/Utils/SSAUpdater.cpp40
-rw-r--r--lib/Transforms/Utils/SSI.cpp432
-rw-r--r--lib/Transforms/Utils/SimplifyCFG.cpp56
-rw-r--r--lib/Transforms/Utils/UnifyFunctionExitNodes.cpp6
-rw-r--r--lib/Transforms/Utils/ValueMapper.cpp84
-rw-r--r--lib/Transforms/Utils/ValueMapper.h29
-rw-r--r--lib/VMCore/AsmWriter.cpp167
-rw-r--r--lib/VMCore/AutoUpgrade.cpp217
-rw-r--r--lib/VMCore/CMakeLists.txt1
-rw-r--r--lib/VMCore/ConstantFold.cpp40
-rw-r--r--lib/VMCore/Constants.cpp121
-rw-r--r--lib/VMCore/ConstantsContext.h16
-rw-r--r--lib/VMCore/Core.cpp92
-rw-r--r--lib/VMCore/Dominators.cpp147
-rw-r--r--lib/VMCore/Globals.cpp9
-rw-r--r--lib/VMCore/InlineAsm.cpp2
-rw-r--r--lib/VMCore/Instruction.cpp4
-rw-r--r--lib/VMCore/Instructions.cpp49
-rw-r--r--lib/VMCore/LLVMContext.cpp13
-rw-r--r--lib/VMCore/LLVMContextImpl.cpp3
-rw-r--r--lib/VMCore/LLVMContextImpl.h5
-rw-r--r--lib/VMCore/Metadata.cpp170
-rw-r--r--lib/VMCore/Module.cpp23
-rw-r--r--lib/VMCore/Pass.cpp293
-rw-r--r--lib/VMCore/PassManager.cpp330
-rw-r--r--lib/VMCore/PassRegistry.cpp159
-rw-r--r--lib/VMCore/PrintModulePass.cpp18
-rw-r--r--lib/VMCore/Type.cpp265
-rw-r--r--lib/VMCore/TypesContext.h72
-rw-r--r--lib/VMCore/Use.cpp19
-rw-r--r--lib/VMCore/Value.cpp35
-rw-r--r--lib/VMCore/ValueSymbolTable.cpp2
-rw-r--r--lib/VMCore/Verifier.cpp125
566 files changed, 32107 insertions, 18511 deletions
diff --git a/lib/Analysis/AliasAnalysis.cpp b/lib/Analysis/AliasAnalysis.cpp
index 503fbbdab8d67..1f2528fa560f2 100644
--- a/lib/Analysis/AliasAnalysis.cpp
+++ b/lib/Analysis/AliasAnalysis.cpp
@@ -65,10 +65,127 @@ void AliasAnalysis::copyValue(Value *From, Value *To) {
}
AliasAnalysis::ModRefResult
-AliasAnalysis::getModRefInfo(CallSite CS1, CallSite CS2) {
- // FIXME: we can do better.
+AliasAnalysis::getModRefInfo(ImmutableCallSite CS,
+ const Value *P, unsigned Size) {
+ // Don't assert AA because BasicAA calls us in order to make use of the
+ // logic here.
+
+ ModRefBehavior MRB = getModRefBehavior(CS);
+ if (MRB == DoesNotAccessMemory)
+ return NoModRef;
+
+ ModRefResult Mask = ModRef;
+ if (MRB == OnlyReadsMemory)
+ Mask = Ref;
+ else if (MRB == AliasAnalysis::AccessesArguments) {
+ bool doesAlias = false;
+ for (ImmutableCallSite::arg_iterator AI = CS.arg_begin(), AE = CS.arg_end();
+ AI != AE; ++AI)
+ if (!isNoAlias(*AI, ~0U, P, Size)) {
+ doesAlias = true;
+ break;
+ }
+
+ if (!doesAlias)
+ return NoModRef;
+ }
+
+ // If P points to a constant memory location, the call definitely could not
+ // modify the memory location.
+ if ((Mask & Mod) && pointsToConstantMemory(P))
+ Mask = ModRefResult(Mask & ~Mod);
+
+ // If this is BasicAA, don't forward.
+ if (!AA) return Mask;
+
+ // Otherwise, fall back to the next AA in the chain. But we can merge
+ // in any mask we've managed to compute.
+ return ModRefResult(AA->getModRefInfo(CS, P, Size) & Mask);
+}
+
+AliasAnalysis::ModRefResult
+AliasAnalysis::getModRefInfo(ImmutableCallSite CS1, ImmutableCallSite CS2) {
+ // Don't assert AA because BasicAA calls us in order to make use of the
+ // logic here.
+
+ // If CS1 or CS2 are readnone, they don't interact.
+ ModRefBehavior CS1B = getModRefBehavior(CS1);
+ if (CS1B == DoesNotAccessMemory) return NoModRef;
+
+ ModRefBehavior CS2B = getModRefBehavior(CS2);
+ if (CS2B == DoesNotAccessMemory) return NoModRef;
+
+ // If they both only read from memory, there is no dependence.
+ if (CS1B == OnlyReadsMemory && CS2B == OnlyReadsMemory)
+ return NoModRef;
+
+ AliasAnalysis::ModRefResult Mask = ModRef;
+
+ // If CS1 only reads memory, the only dependence on CS2 can be
+ // from CS1 reading memory written by CS2.
+ if (CS1B == OnlyReadsMemory)
+ Mask = ModRefResult(Mask & Ref);
+
+ // If CS2 only access memory through arguments, accumulate the mod/ref
+ // information from CS1's references to the memory referenced by
+ // CS2's arguments.
+ if (CS2B == AccessesArguments) {
+ AliasAnalysis::ModRefResult R = NoModRef;
+ for (ImmutableCallSite::arg_iterator
+ I = CS2.arg_begin(), E = CS2.arg_end(); I != E; ++I) {
+ R = ModRefResult((R | getModRefInfo(CS1, *I, UnknownSize)) & Mask);
+ if (R == Mask)
+ break;
+ }
+ return R;
+ }
+
+ // If CS1 only accesses memory through arguments, check if CS2 references
+ // any of the memory referenced by CS1's arguments. If not, return NoModRef.
+ if (CS1B == AccessesArguments) {
+ AliasAnalysis::ModRefResult R = NoModRef;
+ for (ImmutableCallSite::arg_iterator
+ I = CS1.arg_begin(), E = CS1.arg_end(); I != E; ++I)
+ if (getModRefInfo(CS2, *I, UnknownSize) != NoModRef) {
+ R = Mask;
+ break;
+ }
+ if (R == NoModRef)
+ return R;
+ }
+
+ // If this is BasicAA, don't forward.
+ if (!AA) return Mask;
+
+ // Otherwise, fall back to the next AA in the chain. But we can merge
+ // in any mask we've managed to compute.
+ return ModRefResult(AA->getModRefInfo(CS1, CS2) & Mask);
+}
+
+AliasAnalysis::ModRefBehavior
+AliasAnalysis::getModRefBehavior(ImmutableCallSite CS) {
+ // Don't assert AA because BasicAA calls us in order to make use of the
+ // logic here.
+
+ ModRefBehavior Min = UnknownModRefBehavior;
+
+ // Call back into the alias analysis with the other form of getModRefBehavior
+ // to see if it can give a better response.
+ if (const Function *F = CS.getCalledFunction())
+ Min = getModRefBehavior(F);
+
+ // If this is BasicAA, don't forward.
+ if (!AA) return Min;
+
+ // Otherwise, fall back to the next AA in the chain. But we can merge
+ // in any result we've managed to compute.
+ return std::min(AA->getModRefBehavior(CS), Min);
+}
+
+AliasAnalysis::ModRefBehavior
+AliasAnalysis::getModRefBehavior(const Function *F) {
assert(AA && "AA didn't call InitializeAliasAnalysis in its run method!");
- return AA->getModRefInfo(CS1, CS2);
+ return AA->getModRefBehavior(F);
}
@@ -77,87 +194,63 @@ AliasAnalysis::getModRefInfo(CallSite CS1, CallSite CS2) {
//===----------------------------------------------------------------------===//
AliasAnalysis::ModRefResult
-AliasAnalysis::getModRefInfo(LoadInst *L, Value *P, unsigned Size) {
- return alias(L->getOperand(0), getTypeStoreSize(L->getType()),
- P, Size) ? Ref : NoModRef;
+AliasAnalysis::getModRefInfo(const LoadInst *L, const Value *P, unsigned Size) {
+ // Be conservative in the face of volatile.
+ if (L->isVolatile())
+ return ModRef;
+
+ // If the load address doesn't alias the given address, it doesn't read
+ // or write the specified memory.
+ if (!alias(L->getOperand(0), getTypeStoreSize(L->getType()), P, Size))
+ return NoModRef;
+
+ // Otherwise, a load just reads.
+ return Ref;
}
AliasAnalysis::ModRefResult
-AliasAnalysis::getModRefInfo(StoreInst *S, Value *P, unsigned Size) {
- // If the stored address cannot alias the pointer in question, then the
- // pointer cannot be modified by the store.
+AliasAnalysis::getModRefInfo(const StoreInst *S, const Value *P, unsigned Size) {
+ // Be conservative in the face of volatile.
+ if (S->isVolatile())
+ return ModRef;
+
+ // If the store address cannot alias the pointer in question, then the
+ // specified memory cannot be modified by the store.
if (!alias(S->getOperand(1),
getTypeStoreSize(S->getOperand(0)->getType()), P, Size))
return NoModRef;
// If the pointer is a pointer to constant memory, then it could not have been
// modified by this store.
- return pointsToConstantMemory(P) ? NoModRef : Mod;
-}
-
-AliasAnalysis::ModRefBehavior
-AliasAnalysis::getModRefBehavior(CallSite CS,
- std::vector<PointerAccessInfo> *Info) {
- if (CS.doesNotAccessMemory())
- // Can't do better than this.
- return DoesNotAccessMemory;
- ModRefBehavior MRB = getModRefBehavior(CS.getCalledFunction(), Info);
- if (MRB != DoesNotAccessMemory && CS.onlyReadsMemory())
- return OnlyReadsMemory;
- return MRB;
-}
-
-AliasAnalysis::ModRefBehavior
-AliasAnalysis::getModRefBehavior(Function *F,
- std::vector<PointerAccessInfo> *Info) {
- if (F) {
- if (F->doesNotAccessMemory())
- // Can't do better than this.
- return DoesNotAccessMemory;
- if (F->onlyReadsMemory())
- return OnlyReadsMemory;
- if (unsigned id = F->getIntrinsicID())
- return getModRefBehavior(id);
- }
- return UnknownModRefBehavior;
-}
+ if (pointsToConstantMemory(P))
+ return NoModRef;
-AliasAnalysis::ModRefBehavior AliasAnalysis::getModRefBehavior(unsigned iid) {
-#define GET_INTRINSIC_MODREF_BEHAVIOR
-#include "llvm/Intrinsics.gen"
-#undef GET_INTRINSIC_MODREF_BEHAVIOR
+ // Otherwise, a store just writes.
+ return Mod;
}
AliasAnalysis::ModRefResult
-AliasAnalysis::getModRefInfo(CallSite CS, Value *P, unsigned Size) {
- ModRefBehavior MRB = getModRefBehavior(CS);
- if (MRB == DoesNotAccessMemory)
+AliasAnalysis::getModRefInfo(const VAArgInst *V, const Value *P, unsigned Size) {
+ // If the va_arg address cannot alias the pointer in question, then the
+ // specified memory cannot be accessed by the va_arg.
+ if (!alias(V->getOperand(0), UnknownSize, P, Size))
return NoModRef;
-
- ModRefResult Mask = ModRef;
- if (MRB == OnlyReadsMemory)
- Mask = Ref;
- else if (MRB == AliasAnalysis::AccessesArguments) {
- bool doesAlias = false;
- for (CallSite::arg_iterator AI = CS.arg_begin(), AE = CS.arg_end();
- AI != AE; ++AI)
- if (!isNoAlias(*AI, ~0U, P, Size)) {
- doesAlias = true;
- break;
- }
- if (!doesAlias)
- return NoModRef;
- }
+ // If the pointer is a pointer to constant memory, then it could not have been
+ // modified by this va_arg.
+ if (pointsToConstantMemory(P))
+ return NoModRef;
- if (!AA) return Mask;
+ // Otherwise, a va_arg reads and writes.
+ return ModRef;
+}
- // If P points to a constant memory location, the call definitely could not
- // modify the memory location.
- if ((Mask & Mod) && AA->pointsToConstantMemory(P))
- Mask = ModRefResult(Mask & ~Mod);
- return ModRefResult(Mask & AA->getModRefInfo(CS, P, Size));
+AliasAnalysis::ModRefBehavior
+AliasAnalysis::getIntrinsicModRefBehavior(unsigned iid) {
+#define GET_INTRINSIC_MODREF_BEHAVIOR
+#include "llvm/Intrinsics.gen"
+#undef GET_INTRINSIC_MODREF_BEHAVIOR
}
// AliasAnalysis destructor: DO NOT move this to the header file for
@@ -206,12 +299,12 @@ bool AliasAnalysis::canInstructionRangeModify(const Instruction &I1,
const Value *Ptr, unsigned Size) {
assert(I1.getParent() == I2.getParent() &&
"Instructions not in same basic block!");
- BasicBlock::iterator I = const_cast<Instruction*>(&I1);
- BasicBlock::iterator E = const_cast<Instruction*>(&I2);
+ BasicBlock::const_iterator I = &I1;
+ BasicBlock::const_iterator E = &I2;
++E; // Convert from inclusive to exclusive range.
for (; I != E; ++I) // Check every instruction in range
- if (getModRefInfo(I, const_cast<Value*>(Ptr), Size) & Mod)
+ if (getModRefInfo(I, Ptr, Size) & Mod)
return true;
return false;
}
@@ -220,7 +313,7 @@ bool AliasAnalysis::canInstructionRangeModify(const Instruction &I1,
/// function.
bool llvm::isNoAliasCall(const Value *V) {
if (isa<CallInst>(V) || isa<InvokeInst>(V))
- return CallSite(const_cast<Instruction*>(cast<Instruction>(V)))
+ return ImmutableCallSite(cast<Instruction>(V))
.paramHasAttr(0, Attribute::NoAlias);
return false;
}
diff --git a/lib/Analysis/AliasAnalysisCounter.cpp b/lib/Analysis/AliasAnalysisCounter.cpp
index 1053955ea2339..b17804186a63a 100644
--- a/lib/Analysis/AliasAnalysisCounter.cpp
+++ b/lib/Analysis/AliasAnalysisCounter.cpp
@@ -34,7 +34,7 @@ namespace {
Module *M;
public:
static char ID; // Class identification, replacement for typeinfo
- AliasAnalysisCounter() : ModulePass(&ID) {
+ AliasAnalysisCounter() : ModulePass(ID) {
No = May = Must = 0;
NoMR = JustRef = JustMod = MR = 0;
}
@@ -87,8 +87,8 @@ namespace {
/// an analysis interface through multiple inheritance. If needed, it
/// should override this to adjust the this pointer as needed for the
/// specified pass info.
- virtual void *getAdjustedAnalysisPointer(const PassInfo *PI) {
- if (PI->isPassID(&AliasAnalysis::ID))
+ virtual void *getAdjustedAnalysisPointer(AnalysisID PI) {
+ if (PI == &AliasAnalysis::ID)
return (AliasAnalysis*)this;
return this;
}
@@ -103,17 +103,18 @@ namespace {
AliasResult alias(const Value *V1, unsigned V1Size,
const Value *V2, unsigned V2Size);
- ModRefResult getModRefInfo(CallSite CS, Value *P, unsigned Size);
- ModRefResult getModRefInfo(CallSite CS1, CallSite CS2) {
+ ModRefResult getModRefInfo(ImmutableCallSite CS,
+ const Value *P, unsigned Size);
+ ModRefResult getModRefInfo(ImmutableCallSite CS1,
+ ImmutableCallSite CS2) {
return AliasAnalysis::getModRefInfo(CS1,CS2);
}
};
}
char AliasAnalysisCounter::ID = 0;
-static RegisterPass<AliasAnalysisCounter>
-X("count-aa", "Count Alias Analysis Query Responses", false, true);
-static RegisterAnalysisGroup<AliasAnalysis> Y(X);
+INITIALIZE_AG_PASS(AliasAnalysisCounter, AliasAnalysis, "count-aa",
+ "Count Alias Analysis Query Responses", false, true, false);
ModulePass *llvm::createAliasAnalysisCounterPass() {
return new AliasAnalysisCounter();
@@ -146,7 +147,8 @@ AliasAnalysisCounter::alias(const Value *V1, unsigned V1Size,
}
AliasAnalysis::ModRefResult
-AliasAnalysisCounter::getModRefInfo(CallSite CS, Value *P, unsigned Size) {
+AliasAnalysisCounter::getModRefInfo(ImmutableCallSite CS,
+ const Value *P, unsigned Size) {
ModRefResult R = getAnalysis<AliasAnalysis>().getModRefInfo(CS, P, Size);
const char *MRString;
diff --git a/lib/Analysis/AliasAnalysisEvaluator.cpp b/lib/Analysis/AliasAnalysisEvaluator.cpp
index 37ee9fc22c9b9..ce363cbc7bbd6 100644
--- a/lib/Analysis/AliasAnalysisEvaluator.cpp
+++ b/lib/Analysis/AliasAnalysisEvaluator.cpp
@@ -50,7 +50,7 @@ namespace {
public:
static char ID; // Pass identification, replacement for typeid
- AAEval() : FunctionPass(&ID) {}
+ AAEval() : FunctionPass(ID) {}
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
AU.addRequired<AliasAnalysis>();
@@ -74,8 +74,8 @@ namespace {
}
char AAEval::ID = 0;
-static RegisterPass<AAEval>
-X("aa-eval", "Exhaustive Alias Analysis Precision Evaluator", false, true);
+INITIALIZE_PASS(AAEval, "aa-eval",
+ "Exhaustive Alias Analysis Precision Evaluator", false, true);
FunctionPass *llvm::createAAEvalPass() { return new AAEval(); }
@@ -107,6 +107,15 @@ PrintModRefResults(const char *Msg, bool P, Instruction *I, Value *Ptr,
}
}
+static inline void
+PrintModRefResults(const char *Msg, bool P, CallSite CSA, CallSite CSB,
+ Module *M) {
+ if (P) {
+ errs() << " " << Msg << ": " << *CSA.getInstruction()
+ << " <-> " << *CSB.getInstruction() << '\n';
+ }
+}
+
static inline bool isInterestingPointer(Value *V) {
return V->getType()->isPointerTy()
&& !isa<ConstantPointerNull>(V);
@@ -126,8 +135,7 @@ bool AAEval::runOnFunction(Function &F) {
if (I->getType()->isPointerTy()) // Add all pointer instructions.
Pointers.insert(&*I);
Instruction &Inst = *I;
- CallSite CS = CallSite::get(&Inst);
- if (CS) {
+ if (CallSite CS = cast<Value>(&Inst)) {
Value *Callee = CS.getCalledValue();
// Skip actual functions for direct function calls.
if (!isa<Function>(Callee) && isInterestingPointer(Callee))
@@ -137,6 +145,7 @@ bool AAEval::runOnFunction(Function &F) {
AI != AE; ++AI)
if (isInterestingPointer(*AI))
Pointers.insert(*AI);
+ CallSites.insert(CS);
} else {
// Consider all operands.
for (Instruction::op_iterator OI = Inst.op_begin(), OE = Inst.op_end();
@@ -144,8 +153,6 @@ bool AAEval::runOnFunction(Function &F) {
if (isInterestingPointer(*OI))
Pointers.insert(*OI);
}
-
- if (CS.getInstruction()) CallSites.insert(CS);
}
if (PrintNoAlias || PrintMayAlias || PrintMustAlias ||
@@ -197,13 +204,13 @@ bool AAEval::runOnFunction(Function &F) {
PrintModRefResults("NoModRef", PrintNoModRef, I, *V, F.getParent());
++NoModRef; break;
case AliasAnalysis::Mod:
- PrintModRefResults(" Mod", PrintMod, I, *V, F.getParent());
+ PrintModRefResults("Just Mod", PrintMod, I, *V, F.getParent());
++Mod; break;
case AliasAnalysis::Ref:
- PrintModRefResults(" Ref", PrintRef, I, *V, F.getParent());
+ PrintModRefResults("Just Ref", PrintRef, I, *V, F.getParent());
++Ref; break;
case AliasAnalysis::ModRef:
- PrintModRefResults(" ModRef", PrintModRef, I, *V, F.getParent());
+ PrintModRefResults("Both ModRef", PrintModRef, I, *V, F.getParent());
++ModRef; break;
default:
errs() << "Unknown alias query result!\n";
@@ -211,6 +218,29 @@ bool AAEval::runOnFunction(Function &F) {
}
}
+ // Mod/ref alias analysis: compare all pairs of calls
+ for (SetVector<CallSite>::iterator C = CallSites.begin(),
+ Ce = CallSites.end(); C != Ce; ++C) {
+ for (SetVector<CallSite>::iterator D = CallSites.begin(); D != Ce; ++D) {
+ if (D == C)
+ continue;
+ switch (AA.getModRefInfo(*C, *D)) {
+ case AliasAnalysis::NoModRef:
+ PrintModRefResults("NoModRef", PrintNoModRef, *C, *D, F.getParent());
+ ++NoModRef; break;
+ case AliasAnalysis::Mod:
+ PrintModRefResults("Just Mod", PrintMod, *C, *D, F.getParent());
+ ++Mod; break;
+ case AliasAnalysis::Ref:
+ PrintModRefResults("Just Ref", PrintRef, *C, *D, F.getParent());
+ ++Ref; break;
+ case AliasAnalysis::ModRef:
+ PrintModRefResults("Both ModRef", PrintModRef, *C, *D, F.getParent());
+ ++ModRef; break;
+ }
+ }
+ }
+
return false;
}
diff --git a/lib/Analysis/AliasDebugger.cpp b/lib/Analysis/AliasDebugger.cpp
index bc2d9c55d1837..b9fe64608c01c 100644
--- a/lib/Analysis/AliasDebugger.cpp
+++ b/lib/Analysis/AliasDebugger.cpp
@@ -39,7 +39,7 @@ namespace {
public:
static char ID; // Class identification, replacement for typeinfo
- AliasDebugger() : ModulePass(&ID) {}
+ AliasDebugger() : ModulePass(ID) {}
bool runOnModule(Module &M) {
InitializeAliasAnalysis(this); // set up super class
@@ -83,8 +83,8 @@ namespace {
/// an analysis interface through multiple inheritance. If needed, it
/// should override this to adjust the this pointer as needed for the
/// specified pass info.
- virtual void *getAdjustedAnalysisPointer(const PassInfo *PI) {
- if (PI->isPassID(&AliasAnalysis::ID))
+ virtual void *getAdjustedAnalysisPointer(AnalysisID PI) {
+ if (PI == &AliasAnalysis::ID)
return (AliasAnalysis*)this;
return this;
}
@@ -99,12 +99,14 @@ namespace {
return AliasAnalysis::alias(V1, V1Size, V2, V2Size);
}
- ModRefResult getModRefInfo(CallSite CS, Value *P, unsigned Size) {
+ ModRefResult getModRefInfo(ImmutableCallSite CS,
+ const Value *P, unsigned Size) {
assert(Vals.find(P) != Vals.end() && "Never seen value in AA before");
return AliasAnalysis::getModRefInfo(CS, P, Size);
}
- ModRefResult getModRefInfo(CallSite CS1, CallSite CS2) {
+ ModRefResult getModRefInfo(ImmutableCallSite CS1,
+ ImmutableCallSite CS2) {
return AliasAnalysis::getModRefInfo(CS1,CS2);
}
@@ -126,9 +128,8 @@ namespace {
}
char AliasDebugger::ID = 0;
-static RegisterPass<AliasDebugger>
-X("debug-aa", "AA use debugger", false, true);
-static RegisterAnalysisGroup<AliasAnalysis> Y(X);
+INITIALIZE_AG_PASS(AliasDebugger, AliasAnalysis, "debug-aa",
+ "AA use debugger", false, true, false);
Pass *llvm::createAliasDebugger() { return new AliasDebugger(); }
diff --git a/lib/Analysis/AliasSetTracker.cpp b/lib/Analysis/AliasSetTracker.cpp
index 02aff50d8a13a..e74543bb508aa 100644
--- a/lib/Analysis/AliasSetTracker.cpp
+++ b/lib/Analysis/AliasSetTracker.cpp
@@ -22,7 +22,6 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/InstIterator.h"
-#include "llvm/Support/Format.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
@@ -35,6 +34,7 @@ void AliasSet::mergeSetIn(AliasSet &AS, AliasSetTracker &AST) {
// Update the alias and access types of this set...
AccessTy |= AS.AccessTy;
AliasTy |= AS.AliasTy;
+ Volatile |= AS.Volatile;
if (AliasTy == MustAlias) {
// Check that these two merged sets really are must aliases. Since both
@@ -111,11 +111,11 @@ void AliasSet::addPointer(AliasSetTracker &AST, PointerRec &Entry,
*PtrListEnd = &Entry;
PtrListEnd = Entry.setPrevInList(PtrListEnd);
assert(*PtrListEnd == 0 && "End of list is not null?");
- addRef(); // Entry points to alias set...
+ addRef(); // Entry points to alias set.
}
void AliasSet::addCallSite(CallSite CS, AliasAnalysis &AA) {
- CallSites.push_back(CS);
+ CallSites.push_back(CS.getInstruction());
AliasAnalysis::ModRefBehavior Behavior = AA.getModRefBehavior(CS);
if (Behavior == AliasAnalysis::DoesNotAccessMemory)
@@ -140,7 +140,7 @@ bool AliasSet::aliasesPointer(const Value *Ptr, unsigned Size,
assert(CallSites.empty() && "Illegal must alias set!");
// If this is a set of MustAliases, only check to see if the pointer aliases
- // SOME value in the set...
+ // SOME value in the set.
PointerRec *SomePtr = getSomePointer();
assert(SomePtr && "Empty must-alias set??");
return AA.alias(SomePtr->getValue(), SomePtr->getSize(), Ptr, Size);
@@ -155,8 +155,7 @@ bool AliasSet::aliasesPointer(const Value *Ptr, unsigned Size,
// Check the call sites list and invoke list...
if (!CallSites.empty()) {
for (unsigned i = 0, e = CallSites.size(); i != e; ++i)
- if (AA.getModRefInfo(CallSites[i], const_cast<Value*>(Ptr), Size)
- != AliasAnalysis::NoModRef)
+ if (AA.getModRefInfo(CallSites[i], Ptr, Size) != AliasAnalysis::NoModRef)
return true;
}
@@ -167,10 +166,11 @@ bool AliasSet::aliasesCallSite(CallSite CS, AliasAnalysis &AA) const {
if (AA.doesNotAccessMemory(CS))
return false;
- for (unsigned i = 0, e = CallSites.size(); i != e; ++i)
- if (AA.getModRefInfo(CallSites[i], CS) != AliasAnalysis::NoModRef ||
- AA.getModRefInfo(CS, CallSites[i]) != AliasAnalysis::NoModRef)
+ for (unsigned i = 0, e = CallSites.size(); i != e; ++i) {
+ if (AA.getModRefInfo(getCallSite(i), CS) != AliasAnalysis::NoModRef ||
+ AA.getModRefInfo(CS, getCallSite(i)) != AliasAnalysis::NoModRef)
return true;
+ }
for (iterator I = begin(), E = end(); I != E; ++I)
if (AA.getModRefInfo(CS, I.getPointer(), I.getSize()) !=
@@ -200,14 +200,15 @@ void AliasSetTracker::clear() {
AliasSet *AliasSetTracker::findAliasSetForPointer(const Value *Ptr,
unsigned Size) {
AliasSet *FoundSet = 0;
- for (iterator I = begin(), E = end(); I != E; ++I)
- if (!I->Forward && I->aliasesPointer(Ptr, Size, AA)) {
- if (FoundSet == 0) { // If this is the first alias set ptr can go into.
- FoundSet = I; // Remember it.
- } else { // Otherwise, we must merge the sets.
- FoundSet->mergeSetIn(*I, *this); // Merge in contents.
- }
+ for (iterator I = begin(), E = end(); I != E; ++I) {
+ if (I->Forward || !I->aliasesPointer(Ptr, Size, AA)) continue;
+
+ if (FoundSet == 0) { // If this is the first alias set ptr can go into.
+ FoundSet = I; // Remember it.
+ } else { // Otherwise, we must merge the sets.
+ FoundSet->mergeSetIn(*I, *this); // Merge in contents.
}
+ }
return FoundSet;
}
@@ -226,15 +227,15 @@ bool AliasSetTracker::containsPointer(Value *Ptr, unsigned Size) const {
AliasSet *AliasSetTracker::findAliasSetForCallSite(CallSite CS) {
AliasSet *FoundSet = 0;
- for (iterator I = begin(), E = end(); I != E; ++I)
- if (!I->Forward && I->aliasesCallSite(CS, AA)) {
- if (FoundSet == 0) { // If this is the first alias set ptr can go into.
- FoundSet = I; // Remember it.
- } else if (!I->Forward) { // Otherwise, we must merge the sets.
- FoundSet->mergeSetIn(*I, *this); // Merge in contents.
- }
- }
-
+ for (iterator I = begin(), E = end(); I != E; ++I) {
+ if (I->Forward || !I->aliasesCallSite(CS, AA))
+ continue;
+
+ if (FoundSet == 0) // If this is the first alias set ptr can go into.
+ FoundSet = I; // Remember it.
+ else if (!I->Forward) // Otherwise, we must merge the sets.
+ FoundSet->mergeSetIn(*I, *this); // Merge in contents.
+ }
return FoundSet;
}
@@ -247,22 +248,24 @@ AliasSet &AliasSetTracker::getAliasSetForPointer(Value *Pointer, unsigned Size,
bool *New) {
AliasSet::PointerRec &Entry = getEntryFor(Pointer);
- // Check to see if the pointer is already known...
+ // Check to see if the pointer is already known.
if (Entry.hasAliasSet()) {
Entry.updateSize(Size);
// Return the set!
return *Entry.getAliasSet(*this)->getForwardedTarget(*this);
- } else if (AliasSet *AS = findAliasSetForPointer(Pointer, Size)) {
- // Add it to the alias set it aliases...
+ }
+
+ if (AliasSet *AS = findAliasSetForPointer(Pointer, Size)) {
+ // Add it to the alias set it aliases.
AS->addPointer(*this, Entry, Size);
return *AS;
- } else {
- if (New) *New = true;
- // Otherwise create a new alias set to hold the loaded pointer...
- AliasSets.push_back(new AliasSet());
- AliasSets.back().addPointer(*this, Entry, Size);
- return AliasSets.back();
}
+
+ if (New) *New = true;
+ // Otherwise create a new alias set to hold the loaded pointer.
+ AliasSets.push_back(new AliasSet());
+ AliasSets.back().addPointer(*this, Entry, Size);
+ return AliasSets.back();
}
bool AliasSetTracker::add(Value *Ptr, unsigned Size) {
@@ -305,28 +308,27 @@ bool AliasSetTracker::add(CallSite CS) {
return true; // doesn't alias anything
AliasSet *AS = findAliasSetForCallSite(CS);
- if (!AS) {
- AliasSets.push_back(new AliasSet());
- AS = &AliasSets.back();
- AS->addCallSite(CS, AA);
- return true;
- } else {
+ if (AS) {
AS->addCallSite(CS, AA);
return false;
}
+ AliasSets.push_back(new AliasSet());
+ AS = &AliasSets.back();
+ AS->addCallSite(CS, AA);
+ return true;
}
bool AliasSetTracker::add(Instruction *I) {
- // Dispatch to one of the other add methods...
+ // Dispatch to one of the other add methods.
if (LoadInst *LI = dyn_cast<LoadInst>(I))
return add(LI);
- else if (StoreInst *SI = dyn_cast<StoreInst>(I))
+ if (StoreInst *SI = dyn_cast<StoreInst>(I))
return add(SI);
- else if (CallInst *CI = dyn_cast<CallInst>(I))
+ if (CallInst *CI = dyn_cast<CallInst>(I))
return add(CI);
- else if (InvokeInst *II = dyn_cast<InvokeInst>(I))
+ if (InvokeInst *II = dyn_cast<InvokeInst>(I))
return add(II);
- else if (VAArgInst *VAAI = dyn_cast<VAArgInst>(I))
+ if (VAArgInst *VAAI = dyn_cast<VAArgInst>(I))
return add(VAAI);
return true;
}
@@ -343,23 +345,23 @@ void AliasSetTracker::add(const AliasSetTracker &AST) {
// Loop over all of the alias sets in AST, adding the pointers contained
// therein into the current alias sets. This can cause alias sets to be
// merged together in the current AST.
- for (const_iterator I = AST.begin(), E = AST.end(); I != E; ++I)
- if (!I->Forward) { // Ignore forwarding alias sets
- AliasSet &AS = const_cast<AliasSet&>(*I);
-
- // If there are any call sites in the alias set, add them to this AST.
- for (unsigned i = 0, e = AS.CallSites.size(); i != e; ++i)
- add(AS.CallSites[i]);
-
- // Loop over all of the pointers in this alias set...
- AliasSet::iterator I = AS.begin(), E = AS.end();
- bool X;
- for (; I != E; ++I) {
- AliasSet &NewAS = addPointer(I.getPointer(), I.getSize(),
- (AliasSet::AccessType)AS.AccessTy, X);
- if (AS.isVolatile()) NewAS.setVolatile();
- }
+ for (const_iterator I = AST.begin(), E = AST.end(); I != E; ++I) {
+ if (I->Forward) continue; // Ignore forwarding alias sets
+
+ AliasSet &AS = const_cast<AliasSet&>(*I);
+
+ // If there are any call sites in the alias set, add them to this AST.
+ for (unsigned i = 0, e = AS.CallSites.size(); i != e; ++i)
+ add(AS.CallSites[i]);
+
+ // Loop over all of the pointers in this alias set.
+ bool X;
+ for (AliasSet::iterator ASI = AS.begin(), E = AS.end(); ASI != E; ++ASI) {
+ AliasSet &NewAS = addPointer(ASI.getPointer(), ASI.getSize(),
+ (AliasSet::AccessType)AS.AccessTy, X);
+ if (AS.isVolatile()) NewAS.setVolatile();
}
+ }
}
/// remove - Remove the specified (potentially non-empty) alias set from the
@@ -435,11 +437,11 @@ bool AliasSetTracker::remove(Instruction *I) {
// Dispatch to one of the other remove methods...
if (LoadInst *LI = dyn_cast<LoadInst>(I))
return remove(LI);
- else if (StoreInst *SI = dyn_cast<StoreInst>(I))
+ if (StoreInst *SI = dyn_cast<StoreInst>(I))
return remove(SI);
- else if (CallInst *CI = dyn_cast<CallInst>(I))
+ if (CallInst *CI = dyn_cast<CallInst>(I))
return remove(CI);
- else if (VAArgInst *VAAI = dyn_cast<VAArgInst>(I))
+ if (VAArgInst *VAAI = dyn_cast<VAArgInst>(I))
return remove(VAAI);
return true;
}
@@ -455,12 +457,17 @@ void AliasSetTracker::deleteValue(Value *PtrVal) {
AA.deleteValue(PtrVal);
// If this is a call instruction, remove the callsite from the appropriate
- // AliasSet.
- CallSite CS = CallSite::get(PtrVal);
- if (CS.getInstruction())
- if (!AA.doesNotAccessMemory(CS))
- if (AliasSet *AS = findAliasSetForCallSite(CS))
- AS->removeCallSite(CS);
+ // AliasSet (if present).
+ if (CallSite CS = PtrVal) {
+ if (!AA.doesNotAccessMemory(CS)) {
+ // Scan all the alias sets to see if this call site is contained.
+ for (iterator I = begin(), E = end(); I != E; ++I) {
+ if (I->Forward) continue;
+
+ I->removeCallSite(CS);
+ }
+ }
+ }
// First, look up the PointerRec for this pointer.
PointerMapType::iterator I = PointerMap.find(PtrVal);
@@ -510,7 +517,7 @@ void AliasSetTracker::copyValue(Value *From, Value *To) {
//===----------------------------------------------------------------------===//
void AliasSet::print(raw_ostream &OS) const {
- OS << " AliasSet[" << format("0x%p", (void*)this) << "," << RefCount << "] ";
+ OS << " AliasSet[" << (void*)this << ", " << RefCount << "] ";
OS << (AliasTy == MustAlias ? "must" : "may") << " alias, ";
switch (AccessTy) {
case NoModRef: OS << "No access "; break;
@@ -536,7 +543,7 @@ void AliasSet::print(raw_ostream &OS) const {
OS << "\n " << CallSites.size() << " Call Sites: ";
for (unsigned i = 0, e = CallSites.size(); i != e; ++i) {
if (i) OS << ", ";
- WriteAsOperand(OS, CallSites[i].getCalledValue());
+ WriteAsOperand(OS, CallSites[i]);
}
}
OS << "\n";
@@ -580,7 +587,7 @@ namespace {
AliasSetTracker *Tracker;
public:
static char ID; // Pass identification, replacement for typeid
- AliasSetPrinter() : FunctionPass(&ID) {}
+ AliasSetPrinter() : FunctionPass(ID) {}
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesAll();
@@ -600,5 +607,5 @@ namespace {
}
char AliasSetPrinter::ID = 0;
-static RegisterPass<AliasSetPrinter>
-X("print-alias-sets", "Alias Set Printer", false, true);
+INITIALIZE_PASS(AliasSetPrinter, "print-alias-sets",
+ "Alias Set Printer", false, true);
diff --git a/lib/Analysis/BasicAliasAnalysis.cpp b/lib/Analysis/BasicAliasAnalysis.cpp
index 4f53a6d62559b..113c72b94dac5 100644
--- a/lib/Analysis/BasicAliasAnalysis.cpp
+++ b/lib/Analysis/BasicAliasAnalysis.cpp
@@ -18,6 +18,7 @@
#include "llvm/Constants.h"
#include "llvm/DerivedTypes.h"
#include "llvm/Function.h"
+#include "llvm/GlobalAlias.h"
#include "llvm/GlobalVariable.h"
#include "llvm/Instructions.h"
#include "llvm/IntrinsicInst.h"
@@ -30,6 +31,7 @@
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/GetElementPtrTypeIterator.h"
#include <algorithm>
using namespace llvm;
@@ -137,8 +139,8 @@ namespace {
///
struct NoAA : public ImmutablePass, public AliasAnalysis {
static char ID; // Class identification, replacement for typeinfo
- NoAA() : ImmutablePass(&ID) {}
- explicit NoAA(void *PID) : ImmutablePass(PID) { }
+ NoAA() : ImmutablePass(ID) {}
+ explicit NoAA(char &PID) : ImmutablePass(PID) { }
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
}
@@ -152,16 +154,20 @@ namespace {
return MayAlias;
}
- virtual void getArgumentAccesses(Function *F, CallSite CS,
- std::vector<PointerAccessInfo> &Info) {
- llvm_unreachable("This method may not be called on this function!");
+ virtual ModRefBehavior getModRefBehavior(ImmutableCallSite CS) {
+ return UnknownModRefBehavior;
+ }
+ virtual ModRefBehavior getModRefBehavior(const Function *F) {
+ return UnknownModRefBehavior;
}
virtual bool pointsToConstantMemory(const Value *P) { return false; }
- virtual ModRefResult getModRefInfo(CallSite CS, Value *P, unsigned Size) {
+ virtual ModRefResult getModRefInfo(ImmutableCallSite CS,
+ const Value *P, unsigned Size) {
return ModRef;
}
- virtual ModRefResult getModRefInfo(CallSite CS1, CallSite CS2) {
+ virtual ModRefResult getModRefInfo(ImmutableCallSite CS1,
+ ImmutableCallSite CS2) {
return ModRef;
}
@@ -169,11 +175,11 @@ namespace {
virtual void copyValue(Value *From, Value *To) {}
/// getAdjustedAnalysisPointer - This method is used when a pass implements
- /// an analysis interface through multiple inheritance. If needed, it should
- /// override this to adjust the this pointer as needed for the specified pass
- /// info.
- virtual void *getAdjustedAnalysisPointer(const PassInfo *PI) {
- if (PI->isPassID(&AliasAnalysis::ID))
+ /// an analysis interface through multiple inheritance. If needed, it
+ /// should override this to adjust the this pointer as needed for the
+ /// specified pass info.
+ virtual void *getAdjustedAnalysisPointer(const void *ID) {
+ if (ID == &AliasAnalysis::ID)
return (AliasAnalysis*)this;
return this;
}
@@ -182,15 +188,279 @@ namespace {
// Register this pass...
char NoAA::ID = 0;
-static RegisterPass<NoAA>
-U("no-aa", "No Alias Analysis (always returns 'may' alias)", true, true);
-
-// Declare that we implement the AliasAnalysis interface
-static RegisterAnalysisGroup<AliasAnalysis> V(U);
+INITIALIZE_AG_PASS(NoAA, AliasAnalysis, "no-aa",
+ "No Alias Analysis (always returns 'may' alias)",
+ true, true, false);
ImmutablePass *llvm::createNoAAPass() { return new NoAA(); }
//===----------------------------------------------------------------------===//
+// GetElementPtr Instruction Decomposition and Analysis
+//===----------------------------------------------------------------------===//
+
+namespace {
+ enum ExtensionKind {
+ EK_NotExtended,
+ EK_SignExt,
+ EK_ZeroExt
+ };
+
+ struct VariableGEPIndex {
+ const Value *V;
+ ExtensionKind Extension;
+ int64_t Scale;
+ };
+}
+
+
+/// GetLinearExpression - Analyze the specified value as a linear expression:
+/// "A*V + B", where A and B are constant integers. Return the scale and offset
+/// values as APInts and return V as a Value*, and return whether we looked
+/// through any sign or zero extends. The incoming Value is known to have
+/// IntegerType and it may already be sign or zero extended.
+///
+/// Note that this looks through extends, so the high bits may not be
+/// represented in the result.
+static Value *GetLinearExpression(Value *V, APInt &Scale, APInt &Offset,
+ ExtensionKind &Extension,
+ const TargetData &TD, unsigned Depth) {
+ assert(V->getType()->isIntegerTy() && "Not an integer value");
+
+ // Limit our recursion depth.
+ if (Depth == 6) {
+ Scale = 1;
+ Offset = 0;
+ return V;
+ }
+
+ if (BinaryOperator *BOp = dyn_cast<BinaryOperator>(V)) {
+ if (ConstantInt *RHSC = dyn_cast<ConstantInt>(BOp->getOperand(1))) {
+ switch (BOp->getOpcode()) {
+ default: break;
+ case Instruction::Or:
+ // X|C == X+C if all the bits in C are unset in X. Otherwise we can't
+ // analyze it.
+ if (!MaskedValueIsZero(BOp->getOperand(0), RHSC->getValue(), &TD))
+ break;
+ // FALL THROUGH.
+ case Instruction::Add:
+ V = GetLinearExpression(BOp->getOperand(0), Scale, Offset, Extension,
+ TD, Depth+1);
+ Offset += RHSC->getValue();
+ return V;
+ case Instruction::Mul:
+ V = GetLinearExpression(BOp->getOperand(0), Scale, Offset, Extension,
+ TD, Depth+1);
+ Offset *= RHSC->getValue();
+ Scale *= RHSC->getValue();
+ return V;
+ case Instruction::Shl:
+ V = GetLinearExpression(BOp->getOperand(0), Scale, Offset, Extension,
+ TD, Depth+1);
+ Offset <<= RHSC->getValue().getLimitedValue();
+ Scale <<= RHSC->getValue().getLimitedValue();
+ return V;
+ }
+ }
+ }
+
+ // Since GEP indices are sign extended anyway, we don't care about the high
+ // bits of a sign or zero extended value - just scales and offsets. The
+ // extensions have to be consistent though.
+ if ((isa<SExtInst>(V) && Extension != EK_ZeroExt) ||
+ (isa<ZExtInst>(V) && Extension != EK_SignExt)) {
+ Value *CastOp = cast<CastInst>(V)->getOperand(0);
+ unsigned OldWidth = Scale.getBitWidth();
+ unsigned SmallWidth = CastOp->getType()->getPrimitiveSizeInBits();
+ Scale.trunc(SmallWidth);
+ Offset.trunc(SmallWidth);
+ Extension = isa<SExtInst>(V) ? EK_SignExt : EK_ZeroExt;
+
+ Value *Result = GetLinearExpression(CastOp, Scale, Offset, Extension,
+ TD, Depth+1);
+ Scale.zext(OldWidth);
+ Offset.zext(OldWidth);
+
+ return Result;
+ }
+
+ Scale = 1;
+ Offset = 0;
+ return V;
+}
+
+/// DecomposeGEPExpression - If V is a symbolic pointer expression, decompose it
+/// into a base pointer with a constant offset and a number of scaled symbolic
+/// offsets.
+///
+/// The scaled symbolic offsets (represented by pairs of a Value* and a scale in
+/// the VarIndices vector) are Value*'s that are known to be scaled by the
+/// specified amount, but which may have other unrepresented high bits. As such,
+/// the gep cannot necessarily be reconstructed from its decomposed form.
+///
+/// When TargetData is around, this function is capable of analyzing everything
+/// that Value::getUnderlyingObject() can look through. When not, it just looks
+/// through pointer casts.
+///
+static const Value *
+DecomposeGEPExpression(const Value *V, int64_t &BaseOffs,
+ SmallVectorImpl<VariableGEPIndex> &VarIndices,
+ const TargetData *TD) {
+ // Limit recursion depth to limit compile time in crazy cases.
+ unsigned MaxLookup = 6;
+
+ BaseOffs = 0;
+ do {
+ // See if this is a bitcast or GEP.
+ const Operator *Op = dyn_cast<Operator>(V);
+ if (Op == 0) {
+ // The only non-operator case we can handle are GlobalAliases.
+ if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(V)) {
+ if (!GA->mayBeOverridden()) {
+ V = GA->getAliasee();
+ continue;
+ }
+ }
+ return V;
+ }
+
+ if (Op->getOpcode() == Instruction::BitCast) {
+ V = Op->getOperand(0);
+ continue;
+ }
+
+ const GEPOperator *GEPOp = dyn_cast<GEPOperator>(Op);
+ if (GEPOp == 0)
+ return V;
+
+ // Don't attempt to analyze GEPs over unsized objects.
+ if (!cast<PointerType>(GEPOp->getOperand(0)->getType())
+ ->getElementType()->isSized())
+ return V;
+
+ // If we are lacking TargetData information, we can't compute the offets of
+ // elements computed by GEPs. However, we can handle bitcast equivalent
+ // GEPs.
+ if (TD == 0) {
+ if (!GEPOp->hasAllZeroIndices())
+ return V;
+ V = GEPOp->getOperand(0);
+ continue;
+ }
+
+ // Walk the indices of the GEP, accumulating them into BaseOff/VarIndices.
+ gep_type_iterator GTI = gep_type_begin(GEPOp);
+ for (User::const_op_iterator I = GEPOp->op_begin()+1,
+ E = GEPOp->op_end(); I != E; ++I) {
+ Value *Index = *I;
+ // Compute the (potentially symbolic) offset in bytes for this index.
+ if (const StructType *STy = dyn_cast<StructType>(*GTI++)) {
+ // For a struct, add the member offset.
+ unsigned FieldNo = cast<ConstantInt>(Index)->getZExtValue();
+ if (FieldNo == 0) continue;
+
+ BaseOffs += TD->getStructLayout(STy)->getElementOffset(FieldNo);
+ continue;
+ }
+
+ // For an array/pointer, add the element offset, explicitly scaled.
+ if (ConstantInt *CIdx = dyn_cast<ConstantInt>(Index)) {
+ if (CIdx->isZero()) continue;
+ BaseOffs += TD->getTypeAllocSize(*GTI)*CIdx->getSExtValue();
+ continue;
+ }
+
+ uint64_t Scale = TD->getTypeAllocSize(*GTI);
+ ExtensionKind Extension = EK_NotExtended;
+
+ // If the integer type is smaller than the pointer size, it is implicitly
+ // sign extended to pointer size.
+ unsigned Width = cast<IntegerType>(Index->getType())->getBitWidth();
+ if (TD->getPointerSizeInBits() > Width)
+ Extension = EK_SignExt;
+
+ // Use GetLinearExpression to decompose the index into a C1*V+C2 form.
+ APInt IndexScale(Width, 0), IndexOffset(Width, 0);
+ Index = GetLinearExpression(Index, IndexScale, IndexOffset, Extension,
+ *TD, 0);
+
+ // The GEP index scale ("Scale") scales C1*V+C2, yielding (C1*V+C2)*Scale.
+ // This gives us an aggregate computation of (C1*Scale)*V + C2*Scale.
+ BaseOffs += IndexOffset.getZExtValue()*Scale;
+ Scale *= IndexScale.getZExtValue();
+
+
+ // If we already had an occurrance of this index variable, merge this
+ // scale into it. For example, we want to handle:
+ // A[x][x] -> x*16 + x*4 -> x*20
+ // This also ensures that 'x' only appears in the index list once.
+ for (unsigned i = 0, e = VarIndices.size(); i != e; ++i) {
+ if (VarIndices[i].V == Index &&
+ VarIndices[i].Extension == Extension) {
+ Scale += VarIndices[i].Scale;
+ VarIndices.erase(VarIndices.begin()+i);
+ break;
+ }
+ }
+
+ // Make sure that we have a scale that makes sense for this target's
+ // pointer size.
+ if (unsigned ShiftBits = 64-TD->getPointerSizeInBits()) {
+ Scale <<= ShiftBits;
+ Scale >>= ShiftBits;
+ }
+
+ if (Scale) {
+ VariableGEPIndex Entry = {Index, Extension, Scale};
+ VarIndices.push_back(Entry);
+ }
+ }
+
+ // Analyze the base pointer next.
+ V = GEPOp->getOperand(0);
+ } while (--MaxLookup);
+
+ // If the chain of expressions is too deep, just return early.
+ return V;
+}
+
+/// GetIndexDifference - Dest and Src are the variable indices from two
+/// decomposed GetElementPtr instructions GEP1 and GEP2 which have common base
+/// pointers. Subtract the GEP2 indices from GEP1 to find the symbolic
+/// difference between the two pointers.
+static void GetIndexDifference(SmallVectorImpl<VariableGEPIndex> &Dest,
+ const SmallVectorImpl<VariableGEPIndex> &Src) {
+ if (Src.empty()) return;
+
+ for (unsigned i = 0, e = Src.size(); i != e; ++i) {
+ const Value *V = Src[i].V;
+ ExtensionKind Extension = Src[i].Extension;
+ int64_t Scale = Src[i].Scale;
+
+ // Find V in Dest. This is N^2, but pointer indices almost never have more
+ // than a few variable indexes.
+ for (unsigned j = 0, e = Dest.size(); j != e; ++j) {
+ if (Dest[j].V != V || Dest[j].Extension != Extension) continue;
+
+ // If we found it, subtract off Scale V's from the entry in Dest. If it
+ // goes to zero, remove the entry.
+ if (Dest[j].Scale != Scale)
+ Dest[j].Scale -= Scale;
+ else
+ Dest.erase(Dest.begin()+j);
+ Scale = 0;
+ break;
+ }
+
+ // If we didn't consume this entry, add it to the end of the Dest list.
+ if (Scale) {
+ VariableGEPIndex Entry = { V, Extension, -Scale };
+ Dest.push_back(Entry);
+ }
+ }
+}
+
+//===----------------------------------------------------------------------===//
// BasicAliasAnalysis Pass
//===----------------------------------------------------------------------===//
@@ -220,10 +490,10 @@ namespace {
/// derives from the NoAA class.
struct BasicAliasAnalysis : public NoAA {
static char ID; // Class identification, replacement for typeinfo
- BasicAliasAnalysis() : NoAA(&ID) {}
+ BasicAliasAnalysis() : NoAA(ID) {}
- AliasResult alias(const Value *V1, unsigned V1Size,
- const Value *V2, unsigned V2Size) {
+ virtual AliasResult alias(const Value *V1, unsigned V1Size,
+ const Value *V2, unsigned V2Size) {
assert(Visited.empty() && "Visited must be cleared after use!");
assert(notDifferentParent(V1, V2) &&
"BasicAliasAnalysis doesn't support interprocedural queries.");
@@ -232,19 +502,33 @@ namespace {
return Alias;
}
- ModRefResult getModRefInfo(CallSite CS, Value *P, unsigned Size);
- ModRefResult getModRefInfo(CallSite CS1, CallSite CS2);
+ virtual ModRefResult getModRefInfo(ImmutableCallSite CS,
+ const Value *P, unsigned Size);
+
+ virtual ModRefResult getModRefInfo(ImmutableCallSite CS1,
+ ImmutableCallSite CS2) {
+ // The AliasAnalysis base class has some smarts, lets use them.
+ return AliasAnalysis::getModRefInfo(CS1, CS2);
+ }
/// pointsToConstantMemory - Chase pointers until we find a (constant
/// global) or not.
- bool pointsToConstantMemory(const Value *P);
+ virtual bool pointsToConstantMemory(const Value *P);
+
+ /// getModRefBehavior - Return the behavior when calling the given
+ /// call site.
+ virtual ModRefBehavior getModRefBehavior(ImmutableCallSite CS);
+
+ /// getModRefBehavior - Return the behavior when calling the given function.
+ /// For use when the call site is not known.
+ virtual ModRefBehavior getModRefBehavior(const Function *F);
/// getAdjustedAnalysisPointer - This method is used when a pass implements
- /// an analysis interface through multiple inheritance. If needed, it should
- /// override this to adjust the this pointer as needed for the specified pass
- /// info.
- virtual void *getAdjustedAnalysisPointer(const PassInfo *PI) {
- if (PI->isPassID(&AliasAnalysis::ID))
+ /// an analysis interface through multiple inheritance. If needed, it
+ /// should override this to adjust the this pointer as needed for the
+ /// specified pass info.
+ virtual void *getAdjustedAnalysisPointer(const void *ID) {
+ if (ID == &AliasAnalysis::ID)
return (AliasAnalysis*)this;
return this;
}
@@ -275,11 +559,9 @@ namespace {
// Register this pass...
char BasicAliasAnalysis::ID = 0;
-static RegisterPass<BasicAliasAnalysis>
-X("basicaa", "Basic Alias Analysis (default AA impl)", false, true);
-
-// Declare that we implement the AliasAnalysis interface
-static RegisterAnalysisGroup<AliasAnalysis, true> Y(X);
+INITIALIZE_AG_PASS(BasicAliasAnalysis, AliasAnalysis, "basicaa",
+ "Basic Alias Analysis (default AA impl)",
+ false, true, true);
ImmutablePass *llvm::createBasicAliasAnalysisPass() {
return new BasicAliasAnalysis();
@@ -295,16 +577,50 @@ bool BasicAliasAnalysis::pointsToConstantMemory(const Value *P) {
// global to be marked constant in some modules and non-constant in others.
// GV may even be a declaration, not a definition.
return GV->isConstant();
- return false;
+
+ return NoAA::pointsToConstantMemory(P);
}
+/// getModRefBehavior - Return the behavior when calling the given call site.
+AliasAnalysis::ModRefBehavior
+BasicAliasAnalysis::getModRefBehavior(ImmutableCallSite CS) {
+ if (CS.doesNotAccessMemory())
+ // Can't do better than this.
+ return DoesNotAccessMemory;
+
+ ModRefBehavior Min = UnknownModRefBehavior;
+
+ // If the callsite knows it only reads memory, don't return worse
+ // than that.
+ if (CS.onlyReadsMemory())
+ Min = OnlyReadsMemory;
+
+ // The AliasAnalysis base class has some smarts, lets use them.
+ return std::min(AliasAnalysis::getModRefBehavior(CS), Min);
+}
+
+/// getModRefBehavior - Return the behavior when calling the given function.
+/// For use when the call site is not known.
+AliasAnalysis::ModRefBehavior
+BasicAliasAnalysis::getModRefBehavior(const Function *F) {
+ if (F->doesNotAccessMemory())
+ // Can't do better than this.
+ return DoesNotAccessMemory;
+ if (F->onlyReadsMemory())
+ return OnlyReadsMemory;
+ if (unsigned id = F->getIntrinsicID())
+ return getIntrinsicModRefBehavior(id);
+
+ return NoAA::getModRefBehavior(F);
+}
/// getModRefInfo - Check to see if the specified callsite can clobber the
/// specified memory object. Since we only look at local properties of this
/// function, we really can't say much about this query. We do, however, use
/// simple "address taken" analysis on local objects.
AliasAnalysis::ModRefResult
-BasicAliasAnalysis::getModRefInfo(CallSite CS, Value *P, unsigned Size) {
+BasicAliasAnalysis::getModRefInfo(ImmutableCallSite CS,
+ const Value *P, unsigned Size) {
assert(notDifferentParent(CS.getInstruction(), P) &&
"AliasAnalysis query involving multiple functions!");
@@ -316,7 +632,7 @@ BasicAliasAnalysis::getModRefInfo(CallSite CS, Value *P, unsigned Size) {
// the current function not to the current function, and a tail callee
// may reference them.
if (isa<AllocaInst>(Object))
- if (CallInst *CI = dyn_cast<CallInst>(CS.getInstruction()))
+ if (const CallInst *CI = dyn_cast<CallInst>(CS.getInstruction()))
if (CI->isTailCall())
return NoModRef;
@@ -327,7 +643,7 @@ BasicAliasAnalysis::getModRefInfo(CallSite CS, Value *P, unsigned Size) {
isNonEscapingLocalObject(Object)) {
bool PassedAsArg = false;
unsigned ArgNo = 0;
- for (CallSite::arg_iterator CI = CS.arg_begin(), CE = CS.arg_end();
+ for (ImmutableCallSite::arg_iterator CI = CS.arg_begin(), CE = CS.arg_end();
CI != CE; ++CI, ++ArgNo) {
// Only look at the no-capture pointer arguments.
if (!(*CI)->getType()->isPointerTy() ||
@@ -338,7 +654,7 @@ BasicAliasAnalysis::getModRefInfo(CallSite CS, Value *P, unsigned Size) {
// is impossible to alias the pointer we're checking. If not, we have to
// assume that the call could touch the pointer, even though it doesn't
// escape.
- if (!isNoAlias(cast<Value>(CI), ~0U, P, ~0U)) {
+ if (!isNoAlias(cast<Value>(CI), UnknownSize, P, UnknownSize)) {
PassedAsArg = true;
break;
}
@@ -349,127 +665,76 @@ BasicAliasAnalysis::getModRefInfo(CallSite CS, Value *P, unsigned Size) {
}
// Finally, handle specific knowledge of intrinsics.
- IntrinsicInst *II = dyn_cast<IntrinsicInst>(CS.getInstruction());
- if (II == 0)
- return AliasAnalysis::getModRefInfo(CS, P, Size);
-
- switch (II->getIntrinsicID()) {
- default: break;
- case Intrinsic::memcpy:
- case Intrinsic::memmove: {
- unsigned Len = ~0U;
- if (ConstantInt *LenCI = dyn_cast<ConstantInt>(II->getArgOperand(2)))
- Len = LenCI->getZExtValue();
- Value *Dest = II->getArgOperand(0);
- Value *Src = II->getArgOperand(1);
- if (isNoAlias(Dest, Len, P, Size)) {
- if (isNoAlias(Src, Len, P, Size))
- return NoModRef;
- return Ref;
- }
- break;
- }
- case Intrinsic::memset:
- // Since memset is 'accesses arguments' only, the AliasAnalysis base class
- // will handle it for the variable length case.
- if (ConstantInt *LenCI = dyn_cast<ConstantInt>(II->getArgOperand(2))) {
- unsigned Len = LenCI->getZExtValue();
+ const IntrinsicInst *II = dyn_cast<IntrinsicInst>(CS.getInstruction());
+ if (II != 0)
+ switch (II->getIntrinsicID()) {
+ default: break;
+ case Intrinsic::memcpy:
+ case Intrinsic::memmove: {
+ unsigned Len = UnknownSize;
+ if (ConstantInt *LenCI = dyn_cast<ConstantInt>(II->getArgOperand(2)))
+ Len = LenCI->getZExtValue();
Value *Dest = II->getArgOperand(0);
- if (isNoAlias(Dest, Len, P, Size))
+ Value *Src = II->getArgOperand(1);
+ if (isNoAlias(Dest, Len, P, Size)) {
+ if (isNoAlias(Src, Len, P, Size))
+ return NoModRef;
+ return Ref;
+ }
+ break;
+ }
+ case Intrinsic::memset:
+ // Since memset is 'accesses arguments' only, the AliasAnalysis base class
+ // will handle it for the variable length case.
+ if (ConstantInt *LenCI = dyn_cast<ConstantInt>(II->getArgOperand(2))) {
+ unsigned Len = LenCI->getZExtValue();
+ Value *Dest = II->getArgOperand(0);
+ if (isNoAlias(Dest, Len, P, Size))
+ return NoModRef;
+ }
+ break;
+ case Intrinsic::atomic_cmp_swap:
+ case Intrinsic::atomic_swap:
+ case Intrinsic::atomic_load_add:
+ case Intrinsic::atomic_load_sub:
+ case Intrinsic::atomic_load_and:
+ case Intrinsic::atomic_load_nand:
+ case Intrinsic::atomic_load_or:
+ case Intrinsic::atomic_load_xor:
+ case Intrinsic::atomic_load_max:
+ case Intrinsic::atomic_load_min:
+ case Intrinsic::atomic_load_umax:
+ case Intrinsic::atomic_load_umin:
+ if (TD) {
+ Value *Op1 = II->getArgOperand(0);
+ unsigned Op1Size = TD->getTypeStoreSize(Op1->getType());
+ if (isNoAlias(Op1, Op1Size, P, Size))
+ return NoModRef;
+ }
+ break;
+ case Intrinsic::lifetime_start:
+ case Intrinsic::lifetime_end:
+ case Intrinsic::invariant_start: {
+ unsigned PtrSize =
+ cast<ConstantInt>(II->getArgOperand(0))->getZExtValue();
+ if (isNoAlias(II->getArgOperand(1), PtrSize, P, Size))
return NoModRef;
+ break;
}
- break;
- case Intrinsic::atomic_cmp_swap:
- case Intrinsic::atomic_swap:
- case Intrinsic::atomic_load_add:
- case Intrinsic::atomic_load_sub:
- case Intrinsic::atomic_load_and:
- case Intrinsic::atomic_load_nand:
- case Intrinsic::atomic_load_or:
- case Intrinsic::atomic_load_xor:
- case Intrinsic::atomic_load_max:
- case Intrinsic::atomic_load_min:
- case Intrinsic::atomic_load_umax:
- case Intrinsic::atomic_load_umin:
- if (TD) {
- Value *Op1 = II->getArgOperand(0);
- unsigned Op1Size = TD->getTypeStoreSize(Op1->getType());
- if (isNoAlias(Op1, Op1Size, P, Size))
+ case Intrinsic::invariant_end: {
+ unsigned PtrSize =
+ cast<ConstantInt>(II->getArgOperand(1))->getZExtValue();
+ if (isNoAlias(II->getArgOperand(2), PtrSize, P, Size))
return NoModRef;
+ break;
+ }
}
- break;
- case Intrinsic::lifetime_start:
- case Intrinsic::lifetime_end:
- case Intrinsic::invariant_start: {
- unsigned PtrSize = cast<ConstantInt>(II->getArgOperand(0))->getZExtValue();
- if (isNoAlias(II->getArgOperand(1), PtrSize, P, Size))
- return NoModRef;
- break;
- }
- case Intrinsic::invariant_end: {
- unsigned PtrSize = cast<ConstantInt>(II->getArgOperand(1))->getZExtValue();
- if (isNoAlias(II->getArgOperand(2), PtrSize, P, Size))
- return NoModRef;
- break;
- }
- }
// The AliasAnalysis base class has some smarts, lets use them.
return AliasAnalysis::getModRefInfo(CS, P, Size);
}
-AliasAnalysis::ModRefResult
-BasicAliasAnalysis::getModRefInfo(CallSite CS1, CallSite CS2) {
- // If CS1 or CS2 are readnone, they don't interact.
- ModRefBehavior CS1B = AliasAnalysis::getModRefBehavior(CS1);
- if (CS1B == DoesNotAccessMemory) return NoModRef;
-
- ModRefBehavior CS2B = AliasAnalysis::getModRefBehavior(CS2);
- if (CS2B == DoesNotAccessMemory) return NoModRef;
-
- // If they both only read from memory, just return ref.
- if (CS1B == OnlyReadsMemory && CS2B == OnlyReadsMemory)
- return Ref;
-
- // Otherwise, fall back to NoAA (mod+ref).
- return NoAA::getModRefInfo(CS1, CS2);
-}
-
-/// GetIndiceDifference - Dest and Src are the variable indices from two
-/// decomposed GetElementPtr instructions GEP1 and GEP2 which have common base
-/// pointers. Subtract the GEP2 indices from GEP1 to find the symbolic
-/// difference between the two pointers.
-static void GetIndiceDifference(
- SmallVectorImpl<std::pair<const Value*, int64_t> > &Dest,
- const SmallVectorImpl<std::pair<const Value*, int64_t> > &Src) {
- if (Src.empty()) return;
-
- for (unsigned i = 0, e = Src.size(); i != e; ++i) {
- const Value *V = Src[i].first;
- int64_t Scale = Src[i].second;
-
- // Find V in Dest. This is N^2, but pointer indices almost never have more
- // than a few variable indexes.
- for (unsigned j = 0, e = Dest.size(); j != e; ++j) {
- if (Dest[j].first != V) continue;
-
- // If we found it, subtract off Scale V's from the entry in Dest. If it
- // goes to zero, remove the entry.
- if (Dest[j].second != Scale)
- Dest[j].second -= Scale;
- else
- Dest.erase(Dest.begin()+j);
- Scale = 0;
- break;
- }
-
- // If we didn't consume this entry, add it to the end of the Dest list.
- if (Scale)
- Dest.push_back(std::make_pair(V, -Scale));
- }
-}
-
/// aliasGEP - Provide a bunch of ad-hoc rules to disambiguate a GEP instruction
/// against another pointer. We know that V1 is a GEP, but we don't know
/// anything about V2. UnderlyingV1 is GEP1->getUnderlyingObject(),
@@ -488,13 +753,14 @@ BasicAliasAnalysis::aliasGEP(const GEPOperator *GEP1, unsigned V1Size,
return MayAlias;
int64_t GEP1BaseOffset;
- SmallVector<std::pair<const Value*, int64_t>, 4> GEP1VariableIndices;
+ SmallVector<VariableGEPIndex, 4> GEP1VariableIndices;
// If we have two gep instructions with must-alias'ing base pointers, figure
// out if the indexes to the GEP tell us anything about the derived pointer.
if (const GEPOperator *GEP2 = dyn_cast<GEPOperator>(V2)) {
// Do the base pointers alias?
- AliasResult BaseAlias = aliasCheck(UnderlyingV1, ~0U, UnderlyingV2, ~0U);
+ AliasResult BaseAlias = aliasCheck(UnderlyingV1, UnknownSize,
+ UnderlyingV2, UnknownSize);
// If we get a No or May, then return it immediately, no amount of analysis
// will improve this situation.
@@ -507,7 +773,7 @@ BasicAliasAnalysis::aliasGEP(const GEPOperator *GEP1, unsigned V1Size,
DecomposeGEPExpression(GEP1, GEP1BaseOffset, GEP1VariableIndices, TD);
int64_t GEP2BaseOffset;
- SmallVector<std::pair<const Value*, int64_t>, 4> GEP2VariableIndices;
+ SmallVector<VariableGEPIndex, 4> GEP2VariableIndices;
const Value *GEP2BasePtr =
DecomposeGEPExpression(GEP2, GEP2BaseOffset, GEP2VariableIndices, TD);
@@ -523,7 +789,7 @@ BasicAliasAnalysis::aliasGEP(const GEPOperator *GEP1, unsigned V1Size,
// Subtract the GEP2 pointer from the GEP1 pointer to find out their
// symbolic difference.
GEP1BaseOffset -= GEP2BaseOffset;
- GetIndiceDifference(GEP1VariableIndices, GEP2VariableIndices);
+ GetIndexDifference(GEP1VariableIndices, GEP2VariableIndices);
} else {
// Check to see if these two pointers are related by the getelementptr
@@ -531,10 +797,10 @@ BasicAliasAnalysis::aliasGEP(const GEPOperator *GEP1, unsigned V1Size,
// pointer, we know they cannot alias.
// If both accesses are unknown size, we can't do anything useful here.
- if (V1Size == ~0U && V2Size == ~0U)
+ if (V1Size == UnknownSize && V2Size == UnknownSize)
return MayAlias;
- AliasResult R = aliasCheck(UnderlyingV1, ~0U, V2, V2Size);
+ AliasResult R = aliasCheck(UnderlyingV1, UnknownSize, V2, V2Size);
if (R != MustAlias)
// If V2 may alias GEP base pointer, conservatively returns MayAlias.
// If V2 is known not to alias GEP base pointer, then the two values
@@ -578,8 +844,8 @@ BasicAliasAnalysis::aliasGEP(const GEPOperator *GEP1, unsigned V1Size,
// provides an offset of 4 bytes (assuming a <= 4 byte access).
for (unsigned i = 0, e = GEP1VariableIndices.size();
i != e && GEP1BaseOffset;++i)
- if (int64_t RemovedOffset = GEP1BaseOffset/GEP1VariableIndices[i].second)
- GEP1BaseOffset -= RemovedOffset*GEP1VariableIndices[i].second;
+ if (int64_t RemovedOffset = GEP1BaseOffset/GEP1VariableIndices[i].Scale)
+ GEP1BaseOffset -= RemovedOffset*GEP1VariableIndices[i].Scale;
// If our known offset is bigger than the access size, we know we don't have
// an alias.
@@ -782,8 +1048,8 @@ BasicAliasAnalysis::aliasCheck(const Value *V1, unsigned V1Size,
// If the size of one access is larger than the entire object on the other
// side, then we know such behavior is undefined and can assume no alias.
if (TD)
- if ((V1Size != ~0U && isObjectSmallerThan(O2, V1Size, *TD)) ||
- (V2Size != ~0U && isObjectSmallerThan(O1, V2Size, *TD)))
+ if ((V1Size != UnknownSize && isObjectSmallerThan(O2, V1Size, *TD)) ||
+ (V2Size != UnknownSize && isObjectSmallerThan(O1, V2Size, *TD)))
return NoAlias;
// FIXME: This isn't aggressively handling alias(GEP, PHI) for example: if the
@@ -810,7 +1076,7 @@ BasicAliasAnalysis::aliasCheck(const Value *V1, unsigned V1Size,
if (const SelectInst *S1 = dyn_cast<SelectInst>(V1))
return aliasSelect(S1, V1Size, V2, V2Size);
- return MayAlias;
+ return NoAA::alias(V1, V1Size, V2, V2Size);
}
// Make sure that anything that uses AliasAnalysis pulls in this file.
diff --git a/lib/Analysis/CFGPrinter.cpp b/lib/Analysis/CFGPrinter.cpp
index e06704bd897ca..617a362062fc2 100644
--- a/lib/Analysis/CFGPrinter.cpp
+++ b/lib/Analysis/CFGPrinter.cpp
@@ -25,7 +25,7 @@ using namespace llvm;
namespace {
struct CFGViewer : public FunctionPass {
static char ID; // Pass identifcation, replacement for typeid
- CFGViewer() : FunctionPass(&ID) {}
+ CFGViewer() : FunctionPass(ID) {}
virtual bool runOnFunction(Function &F) {
F.viewCFG();
@@ -41,13 +41,12 @@ namespace {
}
char CFGViewer::ID = 0;
-static RegisterPass<CFGViewer>
-V0("view-cfg", "View CFG of function", false, true);
+INITIALIZE_PASS(CFGViewer, "view-cfg", "View CFG of function", false, true);
namespace {
struct CFGOnlyViewer : public FunctionPass {
static char ID; // Pass identifcation, replacement for typeid
- CFGOnlyViewer() : FunctionPass(&ID) {}
+ CFGOnlyViewer() : FunctionPass(ID) {}
virtual bool runOnFunction(Function &F) {
F.viewCFGOnly();
@@ -63,15 +62,14 @@ namespace {
}
char CFGOnlyViewer::ID = 0;
-static RegisterPass<CFGOnlyViewer>
-V1("view-cfg-only",
- "View CFG of function (with no function bodies)", false, true);
+INITIALIZE_PASS(CFGOnlyViewer, "view-cfg-only",
+ "View CFG of function (with no function bodies)", false, true);
namespace {
struct CFGPrinter : public FunctionPass {
static char ID; // Pass identification, replacement for typeid
- CFGPrinter() : FunctionPass(&ID) {}
- explicit CFGPrinter(void *pid) : FunctionPass(pid) {}
+ CFGPrinter() : FunctionPass(ID) {}
+ explicit CFGPrinter(char &pid) : FunctionPass(pid) {}
virtual bool runOnFunction(Function &F) {
std::string Filename = "cfg." + F.getNameStr() + ".dot";
@@ -97,14 +95,14 @@ namespace {
}
char CFGPrinter::ID = 0;
-static RegisterPass<CFGPrinter>
-P1("dot-cfg", "Print CFG of function to 'dot' file", false, true);
+INITIALIZE_PASS(CFGPrinter, "dot-cfg", "Print CFG of function to 'dot' file",
+ false, true);
namespace {
struct CFGOnlyPrinter : public FunctionPass {
static char ID; // Pass identification, replacement for typeid
- CFGOnlyPrinter() : FunctionPass(&ID) {}
- explicit CFGOnlyPrinter(void *pid) : FunctionPass(pid) {}
+ CFGOnlyPrinter() : FunctionPass(ID) {}
+ explicit CFGOnlyPrinter(char &pid) : FunctionPass(pid) {}
virtual bool runOnFunction(Function &F) {
std::string Filename = "cfg." + F.getNameStr() + ".dot";
errs() << "Writing '" << Filename << "'...";
@@ -128,9 +126,9 @@ namespace {
}
char CFGOnlyPrinter::ID = 0;
-static RegisterPass<CFGOnlyPrinter>
-P2("dot-cfg-only",
- "Print CFG of function to 'dot' file (with no function bodies)", false, true);
+INITIALIZE_PASS(CFGOnlyPrinter, "dot-cfg-only",
+ "Print CFG of function to 'dot' file (with no function bodies)",
+ false, true);
/// viewCFG - This function is meant for use from the debugger. You can just
/// say 'call F->viewCFG()' and a ghostview window should pop up from the
diff --git a/lib/Analysis/CMakeLists.txt b/lib/Analysis/CMakeLists.txt
index d9b670dea58d2..6a2ab681d1acf 100644
--- a/lib/Analysis/CMakeLists.txt
+++ b/lib/Analysis/CMakeLists.txt
@@ -38,12 +38,15 @@ add_llvm_library(LLVMAnalysis
ProfileInfoLoader.cpp
ProfileInfoLoaderPass.cpp
ProfileVerifierPass.cpp
+ RegionInfo.cpp
+ RegionPrinter.cpp
ScalarEvolution.cpp
ScalarEvolutionAliasAnalysis.cpp
ScalarEvolutionExpander.cpp
ScalarEvolutionNormalization.cpp
SparsePropagation.cpp
Trace.cpp
+ TypeBasedAliasAnalysis.cpp
ValueTracking.cpp
)
diff --git a/lib/Analysis/CaptureTracking.cpp b/lib/Analysis/CaptureTracking.cpp
index 047825884ef35..90eae20858fb9 100644
--- a/lib/Analysis/CaptureTracking.cpp
+++ b/lib/Analysis/CaptureTracking.cpp
@@ -69,7 +69,7 @@ bool llvm::PointerMayBeCaptured(const Value *V,
switch (I->getOpcode()) {
case Instruction::Call:
case Instruction::Invoke: {
- CallSite CS = CallSite::get(I);
+ CallSite CS(I);
// Not captured if the callee is readonly, doesn't return a copy through
// its return value and doesn't unwind (a readonly function can leak bits
// by throwing an exception or not depending on the input value).
diff --git a/lib/Analysis/ConstantFolding.cpp b/lib/Analysis/ConstantFolding.cpp
index 13d8f4de48248..0bf7967e83b13 100644
--- a/lib/Analysis/ConstantFolding.cpp
+++ b/lib/Analysis/ConstantFolding.cpp
@@ -778,9 +778,9 @@ Constant *llvm::ConstantFoldInstOperands(unsigned Opcode, const Type *DestTy,
case Instruction::ICmp:
case Instruction::FCmp: assert(0 && "Invalid for compares");
case Instruction::Call:
- if (Function *F = dyn_cast<Function>(Ops[CallInst::ArgOffset ? 0:NumOps-1]))
+ if (Function *F = dyn_cast<Function>(Ops[NumOps - 1]))
if (canConstantFoldCallTo(F))
- return ConstantFoldCall(F, Ops+CallInst::ArgOffset, NumOps-1);
+ return ConstantFoldCall(F, Ops, NumOps - 1);
return 0;
case Instruction::PtrToInt:
// If the input is a inttoptr, eliminate the pair. This requires knowing
diff --git a/lib/Analysis/DbgInfoPrinter.cpp b/lib/Analysis/DbgInfoPrinter.cpp
index 3532b052dc558..0567750606104 100644
--- a/lib/Analysis/DbgInfoPrinter.cpp
+++ b/lib/Analysis/DbgInfoPrinter.cpp
@@ -40,7 +40,7 @@ namespace {
void printVariableDeclaration(const Value *V);
public:
static char ID; // Pass identification
- PrintDbgInfo() : FunctionPass(&ID), Out(outs()) {}
+ PrintDbgInfo() : FunctionPass(ID), Out(errs()) {}
virtual bool runOnFunction(Function &F);
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
@@ -48,8 +48,8 @@ namespace {
}
};
char PrintDbgInfo::ID = 0;
- static RegisterPass<PrintDbgInfo> X("print-dbginfo",
- "Print debug info in human readable form");
+ INITIALIZE_PASS(PrintDbgInfo, "print-dbginfo",
+ "Print debug info in human readable form", false, false);
}
FunctionPass *llvm::createDbgInfoPrinterPass() { return new PrintDbgInfo(); }
diff --git a/lib/Analysis/DebugInfo.cpp b/lib/Analysis/DebugInfo.cpp
index c8d0d22ec2e10..5ca89c658df6f 100644
--- a/lib/Analysis/DebugInfo.cpp
+++ b/lib/Analysis/DebugInfo.cpp
@@ -13,7 +13,6 @@
//===----------------------------------------------------------------------===//
#include "llvm/Analysis/DebugInfo.h"
-#include "llvm/Target/TargetMachine.h" // FIXME: LAYERING VIOLATION!
#include "llvm/Constants.h"
#include "llvm/DerivedTypes.h"
#include "llvm/Intrinsics.h"
@@ -22,6 +21,8 @@
#include "llvm/Module.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/STLExtras.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/Dwarf.h"
#include "llvm/Support/raw_ostream.h"
@@ -32,7 +33,22 @@ using namespace llvm::dwarf;
// DIDescriptor
//===----------------------------------------------------------------------===//
-StringRef
+DIDescriptor::DIDescriptor(const DIFile F) : DbgNode(F.DbgNode) {
+}
+
+DIDescriptor::DIDescriptor(const DISubprogram F) : DbgNode(F.DbgNode) {
+}
+
+DIDescriptor::DIDescriptor(const DILexicalBlock F) : DbgNode(F.DbgNode) {
+}
+
+DIDescriptor::DIDescriptor(const DIVariable F) : DbgNode(F.DbgNode) {
+}
+
+DIDescriptor::DIDescriptor(const DIType F) : DbgNode(F.DbgNode) {
+}
+
+StringRef
DIDescriptor::getStringField(unsigned Elt) const {
if (DbgNode == 0)
return StringRef();
@@ -60,7 +76,8 @@ DIDescriptor DIDescriptor::getDescriptorField(unsigned Elt) const {
return DIDescriptor();
if (Elt < DbgNode->getNumOperands())
- return DIDescriptor(dyn_cast_or_null<const MDNode>(DbgNode->getOperand(Elt)));
+ return
+ DIDescriptor(dyn_cast_or_null<const MDNode>(DbgNode->getOperand(Elt)));
return DIDescriptor();
}
@@ -73,6 +90,15 @@ GlobalVariable *DIDescriptor::getGlobalVariableField(unsigned Elt) const {
return 0;
}
+Constant *DIDescriptor::getConstantField(unsigned Elt) const {
+ if (DbgNode == 0)
+ return 0;
+
+ if (Elt < DbgNode->getNumOperands())
+ return dyn_cast_or_null<Constant>(DbgNode->getOperand(Elt));
+ return 0;
+}
+
Function *DIDescriptor::getFunctionField(unsigned Elt) const {
if (DbgNode == 0)
return 0;
@@ -109,6 +135,7 @@ bool DIDescriptor::isDerivedType() const {
case dwarf::DW_TAG_restrict_type:
case dwarf::DW_TAG_member:
case dwarf::DW_TAG_inheritance:
+ case dwarf::DW_TAG_friend:
return true;
default:
// CompositeTypes are currently modelled as DerivedTypes.
@@ -161,7 +188,8 @@ bool DIDescriptor::isSubprogram() const {
/// isGlobalVariable - Return true if the specified tag is legal for
/// DIGlobalVariable.
bool DIDescriptor::isGlobalVariable() const {
- return DbgNode && getTag() == dwarf::DW_TAG_variable;
+ return DbgNode && (getTag() == dwarf::DW_TAG_variable ||
+ getTag() == dwarf::DW_TAG_constant);
}
/// isGlobal - Return true if the specified tag is legal for DIGlobal.
@@ -233,9 +261,8 @@ unsigned DIArray::getNumElements() const {
}
/// replaceAllUsesWith - Replace all uses of debug info referenced by
-/// this descriptor. After this completes, the current debug info value
-/// is erased.
-void DIDerivedType::replaceAllUsesWith(DIDescriptor &D) {
+/// this descriptor.
+void DIType::replaceAllUsesWith(DIDescriptor &D) {
if (!DbgNode)
return;
@@ -249,7 +276,7 @@ void DIDerivedType::replaceAllUsesWith(DIDescriptor &D) {
const MDNode *DN = D;
const Value *V = cast_or_null<Value>(DN);
Node->replaceAllUsesWith(const_cast<Value*>(V));
- Node->destroy();
+ MDNode::deleteTemporary(Node);
}
}
@@ -277,6 +304,16 @@ bool DIType::Verify() const {
return true;
}
+/// Verify - Verify that a basic type descriptor is well formed.
+bool DIBasicType::Verify() const {
+ return isBasicType();
+}
+
+/// Verify - Verify that a derived type descriptor is well formed.
+bool DIDerivedType::Verify() const {
+ return isDerivedType();
+}
+
/// Verify - Verify that a composite type descriptor is well formed.
bool DICompositeType::Verify() const {
if (!DbgNode)
@@ -327,7 +364,7 @@ bool DIGlobalVariable::Verify() const {
if (!Ty.Verify())
return false;
- if (!getGlobal())
+ if (!getGlobal() && !getConstant())
return false;
return true;
@@ -355,7 +392,7 @@ bool DIVariable::Verify() const {
bool DILocation::Verify() const {
if (!DbgNode)
return false;
-
+
return DbgNode->getNumOperands() == 4;
}
@@ -378,7 +415,7 @@ uint64_t DIDerivedType::getOriginalTypeSize() const {
Tag == dwarf::DW_TAG_const_type || Tag == dwarf::DW_TAG_volatile_type ||
Tag == dwarf::DW_TAG_restrict_type) {
DIType BaseType = getTypeDerivedFrom();
- // If this type is not derived from any type then take conservative
+ // If this type is not derived from any type then take conservative
// approach.
if (!BaseType.isValid())
return getSizeInBits();
@@ -387,17 +424,17 @@ uint64_t DIDerivedType::getOriginalTypeSize() const {
else
return BaseType.getSizeInBits();
}
-
+
return getSizeInBits();
}
-/// isInlinedFnArgument - Return trule if this variable provides debugging
+/// isInlinedFnArgument - Return true if this variable provides debugging
/// information for an inlined function arguments.
bool DIVariable::isInlinedFnArgument(const Function *CurFn) {
assert(CurFn && "Invalid function");
if (!getContext().isSubprogram())
return false;
- // This variable is not inlined function argument if its scope
+ // This variable is not inlined function argument if its scope
// does not describe current function.
return !(DISubprogram(getContext()).describes(CurFn));
}
@@ -416,7 +453,7 @@ bool DISubprogram::describes(const Function *F) {
return false;
}
-unsigned DISubprogram::isOptimized() const {
+unsigned DISubprogram::isOptimized() const {
assert (DbgNode && "Invalid subprogram descriptor!");
if (DbgNode->getNumOperands() == 16)
return getUnsignedField(15);
@@ -426,7 +463,7 @@ unsigned DISubprogram::isOptimized() const {
StringRef DIScope::getFilename() const {
if (!DbgNode)
return StringRef();
- if (isLexicalBlock())
+ if (isLexicalBlock())
return DILexicalBlock(DbgNode).getFilename();
if (isSubprogram())
return DISubprogram(DbgNode).getFilename();
@@ -445,7 +482,7 @@ StringRef DIScope::getFilename() const {
StringRef DIScope::getDirectory() const {
if (!DbgNode)
return StringRef();
- if (isLexicalBlock())
+ if (isLexicalBlock())
return DILexicalBlock(DbgNode).getDirectory();
if (isSubprogram())
return DISubprogram(DbgNode).getDirectory();
@@ -899,7 +936,26 @@ DICompositeType DIFactory::CreateCompositeType(unsigned Tag,
ConstantInt::get(Type::getInt32Ty(VMContext), RuntimeLang),
ContainingType
};
- return DICompositeType(MDNode::get(VMContext, &Elts[0], 13));
+
+ MDNode *Node = MDNode::get(VMContext, &Elts[0], 13);
+ // Create a named metadata so that we do not lose this enum info.
+ if (Tag == dwarf::DW_TAG_enumeration_type) {
+ NamedMDNode *NMD = M.getOrInsertNamedMetadata("llvm.dbg.enum");
+ NMD->addOperand(Node);
+ }
+ return DICompositeType(Node);
+}
+
+
+/// CreateTemporaryType - Create a temporary forward-declared type.
+DIType DIFactory::CreateTemporaryType() {
+ // Give the temporary MDNode a tag. It doesn't matter what tag we
+ // use here as long as DIType accepts it.
+ Value *Elts[] = {
+ GetTagConstant(DW_TAG_base_type)
+ };
+ MDNode *Node = MDNode::getTemporary(VMContext, Elts, array_lengthof(Elts));
+ return DIType(Node);
}
@@ -915,8 +971,8 @@ DICompositeType DIFactory::CreateCompositeTypeEx(unsigned Tag,
unsigned Flags,
DIType DerivedFrom,
DIArray Elements,
- unsigned RuntimeLang) {
-
+ unsigned RuntimeLang,
+ MDNode *ContainingType) {
Value *Elts[] = {
GetTagConstant(Tag),
Context,
@@ -929,9 +985,16 @@ DICompositeType DIFactory::CreateCompositeTypeEx(unsigned Tag,
ConstantInt::get(Type::getInt32Ty(VMContext), Flags),
DerivedFrom,
Elements,
- ConstantInt::get(Type::getInt32Ty(VMContext), RuntimeLang)
+ ConstantInt::get(Type::getInt32Ty(VMContext), RuntimeLang),
+ ContainingType
};
- return DICompositeType(MDNode::get(VMContext, &Elts[0], 12));
+ MDNode *Node = MDNode::get(VMContext, &Elts[0], 13);
+ // Create a named metadata so that we do not lose this enum info.
+ if (Tag == dwarf::DW_TAG_enumeration_type) {
+ NamedMDNode *NMD = M.getOrInsertNamedMetadata("llvm.dbg.enum");
+ NMD->addOperand(Node);
+ }
+ return DICompositeType(Node);
}
@@ -980,8 +1043,8 @@ DISubprogram DIFactory::CreateSubprogram(DIDescriptor Context,
}
/// CreateSubprogramDefinition - Create new subprogram descriptor for the
-/// given declaration.
-DISubprogram DIFactory::CreateSubprogramDefinition(DISubprogram &SPDeclaration) {
+/// given declaration.
+DISubprogram DIFactory::CreateSubprogramDefinition(DISubprogram &SPDeclaration){
if (SPDeclaration.isDefinition())
return DISubprogram(SPDeclaration);
@@ -1046,6 +1109,38 @@ DIFactory::CreateGlobalVariable(DIDescriptor Context, StringRef Name,
return DIGlobalVariable(Node);
}
+/// CreateGlobalVariable - Create a new descriptor for the specified constant.
+DIGlobalVariable
+DIFactory::CreateGlobalVariable(DIDescriptor Context, StringRef Name,
+ StringRef DisplayName,
+ StringRef LinkageName,
+ DIFile F,
+ unsigned LineNo, DIType Ty,bool isLocalToUnit,
+ bool isDefinition, llvm::Constant *Val) {
+ Value *Elts[] = {
+ GetTagConstant(dwarf::DW_TAG_variable),
+ llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)),
+ Context,
+ MDString::get(VMContext, Name),
+ MDString::get(VMContext, DisplayName),
+ MDString::get(VMContext, LinkageName),
+ F,
+ ConstantInt::get(Type::getInt32Ty(VMContext), LineNo),
+ Ty,
+ ConstantInt::get(Type::getInt1Ty(VMContext), isLocalToUnit),
+ ConstantInt::get(Type::getInt1Ty(VMContext), isDefinition),
+ Val
+ };
+
+ Value *const *Vs = &Elts[0];
+ MDNode *Node = MDNode::get(VMContext,Vs, 12);
+
+ // Create a named metadata so that we do not lose this mdnode.
+ NamedMDNode *NMD = M.getOrInsertNamedMetadata("llvm.dbg.gv");
+ NMD->addOperand(Node);
+
+ return DIGlobalVariable(Node);
+}
/// CreateVariable - Create a new descriptor for the specified variable.
DIVariable DIFactory::CreateVariable(unsigned Tag, DIDescriptor Context,
@@ -1073,10 +1168,10 @@ DIVariable DIFactory::CreateVariable(unsigned Tag, DIDescriptor Context,
char One = '\1';
if (FName.startswith(StringRef(&One, 1)))
FName = FName.substr(1);
- NamedMDNode *FnLocals = M.getNamedMetadata(Twine("llvm.dbg.lv.", FName));
- if (!FnLocals)
- FnLocals = NamedMDNode::Create(VMContext, Twine("llvm.dbg.lv.", FName),
- NULL, 0, &M);
+
+ SmallString<32> Out;
+ NamedMDNode *FnLocals =
+ M.getOrInsertNamedMetadata(Twine("llvm.dbg.lv.", FName).toStringRef(Out));
FnLocals->addOperand(Node);
}
return DIVariable(Node);
@@ -1089,7 +1184,7 @@ DIVariable DIFactory::CreateComplexVariable(unsigned Tag, DIDescriptor Context,
const std::string &Name,
DIFile F,
unsigned LineNo,
- DIType Ty,
+ DIType Ty,
SmallVector<Value *, 9> &addr) {
SmallVector<Value *, 9> Elts;
Elts.push_back(GetTagConstant(Tag));
@@ -1107,14 +1202,19 @@ DIVariable DIFactory::CreateComplexVariable(unsigned Tag, DIDescriptor Context,
/// CreateBlock - This creates a descriptor for a lexical block with the
/// specified parent VMContext.
DILexicalBlock DIFactory::CreateLexicalBlock(DIDescriptor Context,
- unsigned LineNo, unsigned Col) {
+ DIFile F, unsigned LineNo,
+ unsigned Col) {
+ // Defeat MDNode uniqing for lexical blocks.
+ static unsigned int unique_id = 0;
Value *Elts[] = {
GetTagConstant(dwarf::DW_TAG_lexical_block),
Context,
ConstantInt::get(Type::getInt32Ty(VMContext), LineNo),
- ConstantInt::get(Type::getInt32Ty(VMContext), Col)
+ ConstantInt::get(Type::getInt32Ty(VMContext), Col),
+ F,
+ ConstantInt::get(Type::getInt32Ty(VMContext), unique_id++)
};
- return DILexicalBlock(MDNode::get(VMContext, &Elts[0], 4));
+ return DILexicalBlock(MDNode::get(VMContext, &Elts[0], 6));
}
/// CreateNameSpace - This creates new descriptor for a namespace
@@ -1174,7 +1274,7 @@ Instruction *DIFactory::InsertDeclare(Value *Storage, DIVariable D,
// If this block already has a terminator then insert this intrinsic
// before the terminator.
- if (TerminatorInst *T = InsertAtEnd->getTerminator())
+ if (TerminatorInst *T = InsertAtEnd->getTerminator())
return CallInst::Create(DeclareFn, Args, Args+2, "", T);
else
return CallInst::Create(DeclareFn, Args, Args+2, "", InsertAtEnd);}
@@ -1203,7 +1303,7 @@ Instruction *DIFactory::InsertDbgValueIntrinsic(Value *V, uint64_t Offset,
if (!ValueFn)
ValueFn = Intrinsic::getDeclaration(&M, Intrinsic::dbg_value);
- Value *Args[] = { MDNode::get(V->getContext(), &V, 1),
+ Value *Args[] = { MDNode::get(V->getContext(), &V, 1),
ConstantInt::get(Type::getInt64Ty(V->getContext()), Offset),
D };
return CallInst::Create(ValueFn, Args, Args+3, "", InsertAtEnd);
@@ -1221,21 +1321,21 @@ void DebugInfoFinder::processModule(Module &M) {
++BI) {
if (DbgDeclareInst *DDI = dyn_cast<DbgDeclareInst>(BI))
processDeclare(DDI);
-
+
DebugLoc Loc = BI->getDebugLoc();
if (Loc.isUnknown())
continue;
-
+
LLVMContext &Ctx = BI->getContext();
DIDescriptor Scope(Loc.getScope(Ctx));
-
+
if (Scope.isCompileUnit())
addCompileUnit(DICompileUnit(Scope));
else if (Scope.isSubprogram())
processSubprogram(DISubprogram(Scope));
else if (Scope.isLexicalBlock())
processLexicalBlock(DILexicalBlock(Scope));
-
+
if (MDNode *IA = Loc.getInlinedAt(Ctx))
processLocation(DILocation(IA));
}
@@ -1380,7 +1480,7 @@ static Value *findDbgGlobalDeclare(GlobalVariable *V) {
return 0;
for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i) {
- DIDescriptor DIG(cast_or_null<MDNode>(NMD->getOperand(i)));
+ DIDescriptor DIG(cast<MDNode>(NMD->getOperand(i)));
if (!DIG.isGlobalVariable())
continue;
if (DIGlobalVariable(DIG).getGlobal() == V)
@@ -1393,16 +1493,16 @@ static Value *findDbgGlobalDeclare(GlobalVariable *V) {
/// It looks through pointer casts too.
static const DbgDeclareInst *findDbgDeclare(const Value *V) {
V = V->stripPointerCasts();
-
+
if (!isa<Instruction>(V) && !isa<Argument>(V))
return 0;
-
+
const Function *F = NULL;
if (const Instruction *I = dyn_cast<Instruction>(V))
F = I->getParent()->getParent();
else if (const Argument *A = dyn_cast<Argument>(V))
F = A->getParent();
-
+
for (Function::const_iterator FI = F->begin(), FE = F->end(); FI != FE; ++FI)
for (BasicBlock::const_iterator BI = (*FI).begin(), BE = (*FI).end();
BI != BE; ++BI)
@@ -1460,10 +1560,10 @@ DISubprogram llvm::getDISubprogram(const MDNode *Scope) {
DIDescriptor D(Scope);
if (D.isSubprogram())
return DISubprogram(Scope);
-
+
if (D.isLexicalBlock())
return getDISubprogram(DILexicalBlock(Scope).getContext());
-
+
return DISubprogram();
}
@@ -1471,9 +1571,9 @@ DISubprogram llvm::getDISubprogram(const MDNode *Scope) {
DICompositeType llvm::getDICompositeType(DIType T) {
if (T.isCompositeType())
return DICompositeType(T);
-
+
if (T.isDerivedType())
return getDICompositeType(DIDerivedType(T).getTypeDerivedFrom());
-
+
return DICompositeType();
}
diff --git a/lib/Analysis/DomPrinter.cpp b/lib/Analysis/DomPrinter.cpp
index d95c3761bee62..9f340942f2ccb 100644
--- a/lib/Analysis/DomPrinter.cpp
+++ b/lib/Analysis/DomPrinter.cpp
@@ -86,99 +86,100 @@ namespace {
struct DomViewer
: public DOTGraphTraitsViewer<DominatorTree, false> {
static char ID;
- DomViewer() : DOTGraphTraitsViewer<DominatorTree, false>("dom", &ID){}
+ DomViewer() : DOTGraphTraitsViewer<DominatorTree, false>("dom", ID){}
};
struct DomOnlyViewer
: public DOTGraphTraitsViewer<DominatorTree, true> {
static char ID;
- DomOnlyViewer() : DOTGraphTraitsViewer<DominatorTree, true>("domonly", &ID){}
+ DomOnlyViewer() : DOTGraphTraitsViewer<DominatorTree, true>("domonly", ID){}
};
struct PostDomViewer
: public DOTGraphTraitsViewer<PostDominatorTree, false> {
static char ID;
PostDomViewer() :
- DOTGraphTraitsViewer<PostDominatorTree, false>("postdom", &ID){}
+ DOTGraphTraitsViewer<PostDominatorTree, false>("postdom", ID){}
};
struct PostDomOnlyViewer
: public DOTGraphTraitsViewer<PostDominatorTree, true> {
static char ID;
PostDomOnlyViewer() :
- DOTGraphTraitsViewer<PostDominatorTree, true>("postdomonly", &ID){}
+ DOTGraphTraitsViewer<PostDominatorTree, true>("postdomonly", ID){}
};
} // end anonymous namespace
char DomViewer::ID = 0;
-RegisterPass<DomViewer> A("view-dom",
- "View dominance tree of function");
+INITIALIZE_PASS(DomViewer, "view-dom",
+ "View dominance tree of function", false, false);
char DomOnlyViewer::ID = 0;
-RegisterPass<DomOnlyViewer> B("view-dom-only",
- "View dominance tree of function "
- "(with no function bodies)");
+INITIALIZE_PASS(DomOnlyViewer, "view-dom-only",
+ "View dominance tree of function (with no function bodies)",
+ false, false);
char PostDomViewer::ID = 0;
-RegisterPass<PostDomViewer> C("view-postdom",
- "View postdominance tree of function");
+INITIALIZE_PASS(PostDomViewer, "view-postdom",
+ "View postdominance tree of function", false, false);
char PostDomOnlyViewer::ID = 0;
-RegisterPass<PostDomOnlyViewer> D("view-postdom-only",
- "View postdominance tree of function "
- "(with no function bodies)");
+INITIALIZE_PASS(PostDomOnlyViewer, "view-postdom-only",
+ "View postdominance tree of function "
+ "(with no function bodies)",
+ false, false);
namespace {
struct DomPrinter
: public DOTGraphTraitsPrinter<DominatorTree, false> {
static char ID;
- DomPrinter() : DOTGraphTraitsPrinter<DominatorTree, false>("dom", &ID) {}
+ DomPrinter() : DOTGraphTraitsPrinter<DominatorTree, false>("dom", ID) {}
};
struct DomOnlyPrinter
: public DOTGraphTraitsPrinter<DominatorTree, true> {
static char ID;
- DomOnlyPrinter() : DOTGraphTraitsPrinter<DominatorTree, true>("domonly", &ID) {}
+ DomOnlyPrinter() : DOTGraphTraitsPrinter<DominatorTree, true>("domonly", ID) {}
};
struct PostDomPrinter
: public DOTGraphTraitsPrinter<PostDominatorTree, false> {
static char ID;
PostDomPrinter() :
- DOTGraphTraitsPrinter<PostDominatorTree, false>("postdom", &ID) {}
+ DOTGraphTraitsPrinter<PostDominatorTree, false>("postdom", ID) {}
};
struct PostDomOnlyPrinter
: public DOTGraphTraitsPrinter<PostDominatorTree, true> {
static char ID;
PostDomOnlyPrinter() :
- DOTGraphTraitsPrinter<PostDominatorTree, true>("postdomonly", &ID) {}
+ DOTGraphTraitsPrinter<PostDominatorTree, true>("postdomonly", ID) {}
};
} // end anonymous namespace
char DomPrinter::ID = 0;
-RegisterPass<DomPrinter> E("dot-dom",
- "Print dominance tree of function "
- "to 'dot' file");
+INITIALIZE_PASS(DomPrinter, "dot-dom",
+ "Print dominance tree of function to 'dot' file",
+ false, false);
char DomOnlyPrinter::ID = 0;
-RegisterPass<DomOnlyPrinter> F("dot-dom-only",
- "Print dominance tree of function "
- "to 'dot' file "
- "(with no function bodies)");
+INITIALIZE_PASS(DomOnlyPrinter, "dot-dom-only",
+ "Print dominance tree of function to 'dot' file "
+ "(with no function bodies)",
+ false, false);
char PostDomPrinter::ID = 0;
-RegisterPass<PostDomPrinter> G("dot-postdom",
- "Print postdominance tree of function "
- "to 'dot' file");
+INITIALIZE_PASS(PostDomPrinter, "dot-postdom",
+ "Print postdominance tree of function to 'dot' file",
+ false, false);
char PostDomOnlyPrinter::ID = 0;
-RegisterPass<PostDomOnlyPrinter> H("dot-postdom-only",
- "Print postdominance tree of function "
- "to 'dot' file "
- "(with no function bodies)");
+INITIALIZE_PASS(PostDomOnlyPrinter, "dot-postdom-only",
+ "Print postdominance tree of function to 'dot' file "
+ "(with no function bodies)",
+ false, false);
// Create methods available outside of this file, to use them
// "include/llvm/LinkAllPasses.h". Otherwise the pass would be deleted by
diff --git a/lib/Analysis/IPA/CallGraph.cpp b/lib/Analysis/IPA/CallGraph.cpp
index 65c7c6efd8027..b3635283fda58 100644
--- a/lib/Analysis/IPA/CallGraph.cpp
+++ b/lib/Analysis/IPA/CallGraph.cpp
@@ -42,7 +42,7 @@ class BasicCallGraph : public ModulePass, public CallGraph {
public:
static char ID; // Class identification, replacement for typeinfo
- BasicCallGraph() : ModulePass(&ID), Root(0),
+ BasicCallGraph() : ModulePass(ID), Root(0),
ExternalCallingNode(0), CallsExternalNode(0) {}
// runOnModule - Compute the call graph for the specified module.
@@ -86,8 +86,8 @@ public:
/// an analysis interface through multiple inheritance. If needed, it should
/// override this to adjust the this pointer as needed for the specified pass
/// info.
- virtual void *getAdjustedAnalysisPointer(const PassInfo *PI) {
- if (PI->isPassID(&CallGraph::ID))
+ virtual void *getAdjustedAnalysisPointer(AnalysisID PI) {
+ if (PI == &CallGraph::ID)
return (CallGraph*)this;
return this;
}
@@ -145,8 +145,8 @@ private:
for (Function::iterator BB = F->begin(), BBE = F->end(); BB != BBE; ++BB)
for (BasicBlock::iterator II = BB->begin(), IE = BB->end();
II != IE; ++II) {
- CallSite CS = CallSite::get(II);
- if (CS.getInstruction() && !isa<DbgInfoIntrinsic>(II)) {
+ CallSite CS(cast<Value>(II));
+ if (CS && !isa<DbgInfoIntrinsic>(II)) {
const Function *Callee = CS.getCalledFunction();
if (Callee)
Node->addCalledFunction(CS, getOrInsertFunction(Callee));
@@ -172,9 +172,8 @@ private:
} //End anonymous namespace
static RegisterAnalysisGroup<CallGraph> X("Call Graph");
-static RegisterPass<BasicCallGraph>
-Y("basiccg", "Basic CallGraph Construction", false, true);
-static RegisterAnalysisGroup<CallGraph, true> Z(Y);
+INITIALIZE_AG_PASS(BasicCallGraph, CallGraph, "basiccg",
+ "Basic CallGraph Construction", false, true, true);
char CallGraph::ID = 0;
char BasicCallGraph::ID = 0;
diff --git a/lib/Analysis/IPA/CallGraphSCCPass.cpp b/lib/Analysis/IPA/CallGraphSCCPass.cpp
index 0c01ee5b82848..b7a27cb288d92 100644
--- a/lib/Analysis/IPA/CallGraphSCCPass.cpp
+++ b/lib/Analysis/IPA/CallGraphSCCPass.cpp
@@ -45,7 +45,7 @@ class CGPassManager : public ModulePass, public PMDataManager {
public:
static char ID;
explicit CGPassManager(int Depth)
- : ModulePass(&ID), PMDataManager(Depth) { }
+ : ModulePass(ID), PMDataManager(Depth) { }
/// run - Execute all of the passes scheduled for execution. Keep track of
/// whether any of the passes modifies the module, and if so, return true.
@@ -209,7 +209,7 @@ bool CGPassManager::RefreshCallGraph(CallGraphSCC &CurSCC,
// If the call edge is not from a call or invoke, then the function
// pass RAUW'd a call with another value. This can happen when
// constant folding happens of well known functions etc.
- CallSite::get(I->first).getInstruction() == 0) {
+ !CallSite(I->first)) {
assert(!CheckingMode &&
"CallGraphSCCPass did not update the CallGraph correctly!");
@@ -245,8 +245,8 @@ bool CGPassManager::RefreshCallGraph(CallGraphSCC &CurSCC,
for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB)
for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
- CallSite CS = CallSite::get(I);
- if (!CS.getInstruction() || isa<DbgInfoIntrinsic>(I)) continue;
+ CallSite CS(cast<Value>(I));
+ if (!CS || isa<DbgInfoIntrinsic>(I)) continue;
// If this call site already existed in the callgraph, just verify it
// matches up to expectations and remove it from CallSites.
@@ -582,9 +582,9 @@ namespace {
public:
static char ID;
- PrintCallGraphPass() : CallGraphSCCPass(&ID), Out(dbgs()) {}
+ PrintCallGraphPass() : CallGraphSCCPass(ID), Out(dbgs()) {}
PrintCallGraphPass(const std::string &B, raw_ostream &o)
- : CallGraphSCCPass(&ID), Banner(B), Out(o) {}
+ : CallGraphSCCPass(ID), Banner(B), Out(o) {}
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesAll();
diff --git a/lib/Analysis/IPA/FindUsedTypes.cpp b/lib/Analysis/IPA/FindUsedTypes.cpp
index c4fb0b9a4e3dd..8eed9d6f68bc5 100644
--- a/lib/Analysis/IPA/FindUsedTypes.cpp
+++ b/lib/Analysis/IPA/FindUsedTypes.cpp
@@ -23,8 +23,8 @@
using namespace llvm;
char FindUsedTypes::ID = 0;
-static RegisterPass<FindUsedTypes>
-X("print-used-types", "Find Used Types", false, true);
+INITIALIZE_PASS(FindUsedTypes, "print-used-types",
+ "Find Used Types", false, true);
// IncorporateType - Incorporate one type and all of its subtypes into the
// collection of used types.
diff --git a/lib/Analysis/IPA/GlobalsModRef.cpp b/lib/Analysis/IPA/GlobalsModRef.cpp
index f13deea41d4ed..6759b0afdce39 100644
--- a/lib/Analysis/IPA/GlobalsModRef.cpp
+++ b/lib/Analysis/IPA/GlobalsModRef.cpp
@@ -47,14 +47,15 @@ namespace {
/// GlobalInfo - Maintain mod/ref info for all of the globals without
/// addresses taken that are read or written (transitively) by this
/// function.
- std::map<GlobalValue*, unsigned> GlobalInfo;
+ std::map<const GlobalValue*, unsigned> GlobalInfo;
/// MayReadAnyGlobal - May read global variables, but it is not known which.
bool MayReadAnyGlobal;
- unsigned getInfoForGlobal(GlobalValue *GV) const {
+ unsigned getInfoForGlobal(const GlobalValue *GV) const {
unsigned Effect = MayReadAnyGlobal ? AliasAnalysis::Ref : 0;
- std::map<GlobalValue*, unsigned>::const_iterator I = GlobalInfo.find(GV);
+ std::map<const GlobalValue*, unsigned>::const_iterator I =
+ GlobalInfo.find(GV);
if (I != GlobalInfo.end())
Effect |= I->second;
return Effect;
@@ -71,23 +72,23 @@ namespace {
class GlobalsModRef : public ModulePass, public AliasAnalysis {
/// NonAddressTakenGlobals - The globals that do not have their addresses
/// taken.
- std::set<GlobalValue*> NonAddressTakenGlobals;
+ std::set<const GlobalValue*> NonAddressTakenGlobals;
/// IndirectGlobals - The memory pointed to by this global is known to be
/// 'owned' by the global.
- std::set<GlobalValue*> IndirectGlobals;
+ std::set<const GlobalValue*> IndirectGlobals;
/// AllocsForIndirectGlobals - If an instruction allocates memory for an
/// indirect global, this map indicates which one.
- std::map<Value*, GlobalValue*> AllocsForIndirectGlobals;
+ std::map<const Value*, const GlobalValue*> AllocsForIndirectGlobals;
/// FunctionInfo - For each function, keep track of what globals are
/// modified or read.
- std::map<Function*, FunctionRecord> FunctionInfo;
+ std::map<const Function*, FunctionRecord> FunctionInfo;
public:
static char ID;
- GlobalsModRef() : ModulePass(&ID) {}
+ GlobalsModRef() : ModulePass(ID) {}
bool runOnModule(Module &M) {
InitializeAliasAnalysis(this); // set up super class
@@ -107,39 +108,39 @@ namespace {
//
AliasResult alias(const Value *V1, unsigned V1Size,
const Value *V2, unsigned V2Size);
- ModRefResult getModRefInfo(CallSite CS, Value *P, unsigned Size);
- ModRefResult getModRefInfo(CallSite CS1, CallSite CS2) {
- return AliasAnalysis::getModRefInfo(CS1,CS2);
+ ModRefResult getModRefInfo(ImmutableCallSite CS,
+ const Value *P, unsigned Size);
+ ModRefResult getModRefInfo(ImmutableCallSite CS1,
+ ImmutableCallSite CS2) {
+ return AliasAnalysis::getModRefInfo(CS1, CS2);
}
/// getModRefBehavior - Return the behavior of the specified function if
/// called from the specified call site. The call site may be null in which
/// case the most generic behavior of this function should be returned.
- ModRefBehavior getModRefBehavior(Function *F,
- std::vector<PointerAccessInfo> *Info) {
+ ModRefBehavior getModRefBehavior(const Function *F) {
if (FunctionRecord *FR = getFunctionInfo(F)) {
if (FR->FunctionEffect == 0)
return DoesNotAccessMemory;
else if ((FR->FunctionEffect & Mod) == 0)
return OnlyReadsMemory;
}
- return AliasAnalysis::getModRefBehavior(F, Info);
+ return AliasAnalysis::getModRefBehavior(F);
}
/// getModRefBehavior - Return the behavior of the specified function if
/// called from the specified call site. The call site may be null in which
/// case the most generic behavior of this function should be returned.
- ModRefBehavior getModRefBehavior(CallSite CS,
- std::vector<PointerAccessInfo> *Info) {
- Function* F = CS.getCalledFunction();
- if (!F) return AliasAnalysis::getModRefBehavior(CS, Info);
+ ModRefBehavior getModRefBehavior(ImmutableCallSite CS) {
+ const Function* F = CS.getCalledFunction();
+ if (!F) return AliasAnalysis::getModRefBehavior(CS);
if (FunctionRecord *FR = getFunctionInfo(F)) {
if (FR->FunctionEffect == 0)
return DoesNotAccessMemory;
else if ((FR->FunctionEffect & Mod) == 0)
return OnlyReadsMemory;
}
- return AliasAnalysis::getModRefBehavior(CS, Info);
+ return AliasAnalysis::getModRefBehavior(CS);
}
virtual void deleteValue(Value *V);
@@ -149,8 +150,8 @@ namespace {
/// an analysis interface through multiple inheritance. If needed, it
/// should override this to adjust the this pointer as needed for the
/// specified pass info.
- virtual void *getAdjustedAnalysisPointer(const PassInfo *PI) {
- if (PI->isPassID(&AliasAnalysis::ID))
+ virtual void *getAdjustedAnalysisPointer(AnalysisID PI) {
+ if (PI == &AliasAnalysis::ID)
return (AliasAnalysis*)this;
return this;
}
@@ -158,8 +159,9 @@ namespace {
private:
/// getFunctionInfo - Return the function info for the function, or null if
/// we don't have anything useful to say about it.
- FunctionRecord *getFunctionInfo(Function *F) {
- std::map<Function*, FunctionRecord>::iterator I = FunctionInfo.find(F);
+ FunctionRecord *getFunctionInfo(const Function *F) {
+ std::map<const Function*, FunctionRecord>::iterator I =
+ FunctionInfo.find(F);
if (I != FunctionInfo.end())
return &I->second;
return 0;
@@ -175,9 +177,9 @@ namespace {
}
char GlobalsModRef::ID = 0;
-static RegisterPass<GlobalsModRef>
-X("globalsmodref-aa", "Simple mod/ref analysis for globals", false, true);
-static RegisterAnalysisGroup<AliasAnalysis> Y(X);
+INITIALIZE_AG_PASS(GlobalsModRef, AliasAnalysis,
+ "globalsmodref-aa", "Simple mod/ref analysis for globals",
+ false, true, false);
Pass *llvm::createGlobalsModRefPass() { return new GlobalsModRef(); }
@@ -409,7 +411,7 @@ void GlobalsModRef::AnalyzeCallGraph(CallGraph &CG, Module &M) {
FunctionEffect |= CalleeFR->FunctionEffect;
// Incorporate callee's effects on globals into our info.
- for (std::map<GlobalValue*, unsigned>::iterator GI =
+ for (std::map<const GlobalValue*, unsigned>::iterator GI =
CalleeFR->GlobalInfo.begin(), E = CalleeFR->GlobalInfo.end();
GI != E; ++GI)
FR.GlobalInfo[GI->first] |= GI->second;
@@ -477,13 +479,13 @@ AliasAnalysis::AliasResult
GlobalsModRef::alias(const Value *V1, unsigned V1Size,
const Value *V2, unsigned V2Size) {
// Get the base object these pointers point to.
- Value *UV1 = const_cast<Value*>(V1->getUnderlyingObject());
- Value *UV2 = const_cast<Value*>(V2->getUnderlyingObject());
+ const Value *UV1 = V1->getUnderlyingObject();
+ const Value *UV2 = V2->getUnderlyingObject();
// If either of the underlying values is a global, they may be non-addr-taken
// globals, which we can answer queries about.
- GlobalValue *GV1 = dyn_cast<GlobalValue>(UV1);
- GlobalValue *GV2 = dyn_cast<GlobalValue>(UV2);
+ const GlobalValue *GV1 = dyn_cast<GlobalValue>(UV1);
+ const GlobalValue *GV2 = dyn_cast<GlobalValue>(UV2);
if (GV1 || GV2) {
// If the global's address is taken, pretend we don't know it's a pointer to
// the global.
@@ -503,12 +505,12 @@ GlobalsModRef::alias(const Value *V1, unsigned V1Size,
// so, we may be able to handle this. First check to see if the base pointer
// is a direct load from an indirect global.
GV1 = GV2 = 0;
- if (LoadInst *LI = dyn_cast<LoadInst>(UV1))
+ if (const LoadInst *LI = dyn_cast<LoadInst>(UV1))
if (GlobalVariable *GV = dyn_cast<GlobalVariable>(LI->getOperand(0)))
if (IndirectGlobals.count(GV))
GV1 = GV;
- if (LoadInst *LI = dyn_cast<LoadInst>(UV2))
- if (GlobalVariable *GV = dyn_cast<GlobalVariable>(LI->getOperand(0)))
+ if (const LoadInst *LI = dyn_cast<LoadInst>(UV2))
+ if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(LI->getOperand(0)))
if (IndirectGlobals.count(GV))
GV2 = GV;
@@ -530,16 +532,17 @@ GlobalsModRef::alias(const Value *V1, unsigned V1Size,
}
AliasAnalysis::ModRefResult
-GlobalsModRef::getModRefInfo(CallSite CS, Value *P, unsigned Size) {
+GlobalsModRef::getModRefInfo(ImmutableCallSite CS,
+ const Value *P, unsigned Size) {
unsigned Known = ModRef;
// If we are asking for mod/ref info of a direct call with a pointer to a
// global we are tracking, return information if we have it.
- if (GlobalValue *GV = dyn_cast<GlobalValue>(P->getUnderlyingObject()))
+ if (const GlobalValue *GV = dyn_cast<GlobalValue>(P->getUnderlyingObject()))
if (GV->hasLocalLinkage())
- if (Function *F = CS.getCalledFunction())
+ if (const Function *F = CS.getCalledFunction())
if (NonAddressTakenGlobals.count(GV))
- if (FunctionRecord *FR = getFunctionInfo(F))
+ if (const FunctionRecord *FR = getFunctionInfo(F))
Known = FR->getInfoForGlobal(GV);
if (Known == NoModRef)
@@ -558,7 +561,7 @@ void GlobalsModRef::deleteValue(Value *V) {
// any AllocRelatedValues for it.
if (IndirectGlobals.erase(GV)) {
// Remove any entries in AllocsForIndirectGlobals for this global.
- for (std::map<Value*, GlobalValue*>::iterator
+ for (std::map<const Value*, const GlobalValue*>::iterator
I = AllocsForIndirectGlobals.begin(),
E = AllocsForIndirectGlobals.end(); I != E; ) {
if (I->second == GV) {
diff --git a/lib/Analysis/IVUsers.cpp b/lib/Analysis/IVUsers.cpp
index 2c997dae5859c..cdf667ad6eed9 100644
--- a/lib/Analysis/IVUsers.cpp
+++ b/lib/Analysis/IVUsers.cpp
@@ -21,7 +21,6 @@
#include "llvm/Analysis/Dominators.h"
#include "llvm/Analysis/LoopPass.h"
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
-#include "llvm/Assembly/AsmAnnotationWriter.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
@@ -29,8 +28,7 @@
using namespace llvm;
char IVUsers::ID = 0;
-static RegisterPass<IVUsers>
-X("iv-users", "Induction Variable Users", false, true);
+INITIALIZE_PASS(IVUsers, "iv-users", "Induction Variable Users", false, true);
Pass *llvm::createIVUsersPass() {
return new IVUsers();
@@ -39,27 +37,31 @@ Pass *llvm::createIVUsersPass() {
/// isInteresting - Test whether the given expression is "interesting" when
/// used by the given expression, within the context of analyzing the
/// given loop.
-static bool isInteresting(const SCEV *S, const Instruction *I, const Loop *L) {
- // Anything loop-invariant is interesting.
- if (!isa<SCEVUnknown>(S) && S->isLoopInvariant(L))
- return true;
-
+static bool isInteresting(const SCEV *S, const Instruction *I, const Loop *L,
+ ScalarEvolution *SE) {
// An addrec is interesting if it's affine or if it has an interesting start.
if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
// Keep things simple. Don't touch loop-variant strides.
if (AR->getLoop() == L)
return AR->isAffine() || !L->contains(I);
- // Otherwise recurse to see if the start value is interesting.
- return isInteresting(AR->getStart(), I, L);
+ // Otherwise recurse to see if the start value is interesting, and that
+ // the step value is not interesting, since we don't yet know how to
+ // do effective SCEV expansions for addrecs with interesting steps.
+ return isInteresting(AR->getStart(), I, L, SE) &&
+ !isInteresting(AR->getStepRecurrence(*SE), I, L, SE);
}
- // An add is interesting if any of its operands is.
+ // An add is interesting if exactly one of its operands is interesting.
if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
+ bool AnyInterestingYet = false;
for (SCEVAddExpr::op_iterator OI = Add->op_begin(), OE = Add->op_end();
OI != OE; ++OI)
- if (isInteresting(*OI, I, L))
- return true;
- return false;
+ if (isInteresting(*OI, I, L, SE)) {
+ if (AnyInterestingYet)
+ return false;
+ AnyInterestingYet = true;
+ }
+ return AnyInterestingYet;
}
// Nothing else is interesting here.
@@ -85,7 +87,7 @@ bool IVUsers::AddUsersIfInteresting(Instruction *I) {
// If we've come to an uninteresting expression, stop the traversal and
// call this a user.
- if (!isInteresting(ISE, I, L))
+ if (!isInteresting(ISE, I, L, SE))
return false;
SmallPtrSet<Instruction *, 4> UniqueUsers;
@@ -141,7 +143,7 @@ IVStrideUse &IVUsers::AddUser(Instruction *User, Value *Operand) {
}
IVUsers::IVUsers()
- : LoopPass(&ID) {
+ : LoopPass(ID) {
}
void IVUsers::getAnalysisUsage(AnalysisUsage &AU) const {
@@ -176,9 +178,6 @@ void IVUsers::print(raw_ostream &OS, const Module *M) const {
}
OS << ":\n";
- // Use a default AssemblyAnnotationWriter to suppress the default info
- // comments, which aren't relevant here.
- AssemblyAnnotationWriter Annotator;
for (ilist<IVStrideUse>::const_iterator UI = IVUses.begin(),
E = IVUses.end(); UI != E; ++UI) {
OS << " ";
@@ -192,7 +191,7 @@ void IVUsers::print(raw_ostream &OS, const Module *M) const {
OS << ")";
}
OS << " in ";
- UI->getUser()->print(OS, &Annotator);
+ UI->getUser()->print(OS);
OS << '\n';
}
}
diff --git a/lib/Analysis/InlineCost.cpp b/lib/Analysis/InlineCost.cpp
index b1df517c2a94f..3e550f35c2553 100644
--- a/lib/Analysis/InlineCost.cpp
+++ b/lib/Analysis/InlineCost.cpp
@@ -152,14 +152,14 @@ void CodeMetrics::analyzeBasicBlock(const BasicBlock *BB) {
if (isa<CallInst>(II) || isa<InvokeInst>(II)) {
if (isa<DbgInfoIntrinsic>(II))
continue; // Debug intrinsics don't count as size.
-
- CallSite CS = CallSite::get(const_cast<Instruction*>(&*II));
-
+
+ ImmutableCallSite CS(cast<Instruction>(II));
+
// If this function contains a call to setjmp or _setjmp, never inline
// it. This is a hack because we depend on the user marking their local
// variables as volatile if they are live across a setjmp call, and they
// probably won't do this in callers.
- if (Function *F = CS.getCalledFunction()) {
+ if (const Function *F = CS.getCalledFunction()) {
if (F->isDeclaration() &&
(F->getName() == "setjmp" || F->getName() == "_setjmp"))
callsSetJmp = true;
diff --git a/lib/Analysis/InstCount.cpp b/lib/Analysis/InstCount.cpp
index bb2cf53c85efd..dcbcac005a2fc 100644
--- a/lib/Analysis/InstCount.cpp
+++ b/lib/Analysis/InstCount.cpp
@@ -51,7 +51,7 @@ namespace {
}
public:
static char ID; // Pass identification, replacement for typeid
- InstCount() : FunctionPass(&ID) {}
+ InstCount() : FunctionPass(ID) {}
virtual bool runOnFunction(Function &F);
@@ -64,8 +64,8 @@ namespace {
}
char InstCount::ID = 0;
-static RegisterPass<InstCount>
-X("instcount", "Counts the various types of Instructions", false, true);
+INITIALIZE_PASS(InstCount, "instcount",
+ "Counts the various types of Instructions", false, true);
FunctionPass *llvm::createInstCountPass() { return new InstCount(); }
diff --git a/lib/Analysis/IntervalPartition.cpp b/lib/Analysis/IntervalPartition.cpp
index 1f17b77a5b96f..1c9e14884316b 100644
--- a/lib/Analysis/IntervalPartition.cpp
+++ b/lib/Analysis/IntervalPartition.cpp
@@ -16,8 +16,8 @@
using namespace llvm;
char IntervalPartition::ID = 0;
-static RegisterPass<IntervalPartition>
-X("intervals", "Interval Partition Construction", true, true);
+INITIALIZE_PASS(IntervalPartition, "intervals",
+ "Interval Partition Construction", true, true);
//===----------------------------------------------------------------------===//
// IntervalPartition Implementation
@@ -91,7 +91,7 @@ bool IntervalPartition::runOnFunction(Function &F) {
// distinguish it from a copy constructor. Always pass in false for now.
//
IntervalPartition::IntervalPartition(IntervalPartition &IP, bool)
- : FunctionPass(&ID) {
+ : FunctionPass(ID) {
assert(IP.getRootInterval() && "Cannot operate on empty IntervalPartitions!");
// Pass false to intervals_begin because we take ownership of it's memory
diff --git a/lib/Analysis/LazyValueInfo.cpp b/lib/Analysis/LazyValueInfo.cpp
index ff9026bede97e..e32dbc4447135 100644
--- a/lib/Analysis/LazyValueInfo.cpp
+++ b/lib/Analysis/LazyValueInfo.cpp
@@ -19,16 +19,18 @@
#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/Target/TargetData.h"
#include "llvm/Support/CFG.h"
+#include "llvm/Support/ConstantRange.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/ValueHandle.h"
#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/PointerIntPair.h"
+#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/STLExtras.h"
using namespace llvm;
char LazyValueInfo::ID = 0;
-static RegisterPass<LazyValueInfo>
-X("lazy-value-info", "Lazy Value Information Analysis", false, true);
+INITIALIZE_PASS(LazyValueInfo, "lazy-value-info",
+ "Lazy Value Information Analysis", false, true);
namespace llvm {
FunctionPass *createLazyValueInfoPass() { return new LazyValueInfo(); }
@@ -50,12 +52,15 @@ class LVILatticeVal {
enum LatticeValueTy {
/// undefined - This LLVM Value has no known value yet.
undefined,
+
/// constant - This LLVM Value has a specific constant value.
constant,
-
/// notconstant - This LLVM value is known to not have the specified value.
notconstant,
+ /// constantrange
+ constantrange,
+
/// overdefined - This instruction is not known to be constant, and we know
/// it has a value.
overdefined
@@ -63,42 +68,62 @@ class LVILatticeVal {
/// Val: This stores the current lattice value along with the Constant* for
/// the constant if this is a 'constant' or 'notconstant' value.
- PointerIntPair<Constant *, 2, LatticeValueTy> Val;
+ LatticeValueTy Tag;
+ Constant *Val;
+ ConstantRange Range;
public:
- LVILatticeVal() : Val(0, undefined) {}
+ LVILatticeVal() : Tag(undefined), Val(0), Range(1, true) {}
static LVILatticeVal get(Constant *C) {
LVILatticeVal Res;
- Res.markConstant(C);
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(C))
+ Res.markConstantRange(ConstantRange(CI->getValue(), CI->getValue()+1));
+ else if (!isa<UndefValue>(C))
+ Res.markConstant(C);
return Res;
}
static LVILatticeVal getNot(Constant *C) {
LVILatticeVal Res;
- Res.markNotConstant(C);
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(C))
+ Res.markConstantRange(ConstantRange(CI->getValue()+1, CI->getValue()));
+ else
+ Res.markNotConstant(C);
+ return Res;
+ }
+ static LVILatticeVal getRange(ConstantRange CR) {
+ LVILatticeVal Res;
+ Res.markConstantRange(CR);
return Res;
}
- bool isUndefined() const { return Val.getInt() == undefined; }
- bool isConstant() const { return Val.getInt() == constant; }
- bool isNotConstant() const { return Val.getInt() == notconstant; }
- bool isOverdefined() const { return Val.getInt() == overdefined; }
+ bool isUndefined() const { return Tag == undefined; }
+ bool isConstant() const { return Tag == constant; }
+ bool isNotConstant() const { return Tag == notconstant; }
+ bool isConstantRange() const { return Tag == constantrange; }
+ bool isOverdefined() const { return Tag == overdefined; }
Constant *getConstant() const {
assert(isConstant() && "Cannot get the constant of a non-constant!");
- return Val.getPointer();
+ return Val;
}
Constant *getNotConstant() const {
assert(isNotConstant() && "Cannot get the constant of a non-notconstant!");
- return Val.getPointer();
+ return Val;
+ }
+
+ ConstantRange getConstantRange() const {
+ assert(isConstantRange() &&
+ "Cannot get the constant-range of a non-constant-range!");
+ return Range;
}
/// markOverdefined - Return true if this is a change in status.
bool markOverdefined() {
if (isOverdefined())
return false;
- Val.setInt(overdefined);
+ Tag = overdefined;
return true;
}
@@ -110,9 +135,9 @@ public:
}
assert(isUndefined());
- Val.setInt(constant);
+ Tag = constant;
assert(V && "Marking constant with NULL");
- Val.setPointer(V);
+ Val = V;
return true;
}
@@ -128,9 +153,29 @@ public:
else
assert(isUndefined());
- Val.setInt(notconstant);
+ Tag = notconstant;
assert(V && "Marking constant with NULL");
- Val.setPointer(V);
+ Val = V;
+ return true;
+ }
+
+ /// markConstantRange - Return true if this is a change in status.
+ bool markConstantRange(const ConstantRange NewR) {
+ if (isConstantRange()) {
+ if (NewR.isEmptySet())
+ return markOverdefined();
+
+ bool changed = Range == NewR;
+ Range = NewR;
+ return changed;
+ }
+
+ assert(isUndefined());
+ if (NewR.isEmptySet())
+ return markOverdefined();
+
+ Tag = constantrange;
+ Range = NewR;
return true;
}
@@ -147,20 +192,39 @@ public:
isa<ConstantExpr>(RHS.getNotConstant()))
return markOverdefined();
return false;
- }
- if (isConstant()) {
+ } else if (isConstant()) {
if (getConstant() == RHS.getNotConstant() ||
isa<ConstantExpr>(RHS.getNotConstant()) ||
isa<ConstantExpr>(getConstant()))
return markOverdefined();
return markNotConstant(RHS.getNotConstant());
+ } else if (isConstantRange()) {
+ return markOverdefined();
}
assert(isUndefined() && "Unexpected lattice");
return markNotConstant(RHS.getNotConstant());
}
+ if (RHS.isConstantRange()) {
+ if (isConstantRange()) {
+ ConstantRange NewR = Range.unionWith(RHS.getConstantRange());
+ if (NewR.isFullSet())
+ return markOverdefined();
+ else
+ return markConstantRange(NewR);
+ } else if (!isUndefined()) {
+ return markOverdefined();
+ }
+
+ assert(isUndefined() && "Unexpected lattice");
+ return markConstantRange(RHS.getConstantRange());
+ }
+
// RHS must be a constant, we must be undef, constant, or notconstant.
+ assert(!isConstantRange() &&
+ "Constant and ConstantRange cannot be merged.");
+
if (isUndefined())
return markConstant(RHS.getConstant());
@@ -191,6 +255,9 @@ raw_ostream &operator<<(raw_ostream &OS, const LVILatticeVal &Val) {
if (Val.isNotConstant())
return OS << "notconstant<" << *Val.getNotConstant() << '>';
+ else if (Val.isConstantRange())
+ return OS << "constantrange<" << Val.getConstantRange().getLower() << ", "
+ << Val.getConstantRange().getUpper() << '>';
return OS << "constant<" << *Val.getConstant() << '>';
}
}
@@ -206,17 +273,41 @@ namespace {
public:
/// BlockCacheEntryTy - This is a computed lattice value at the end of the
/// specified basic block for a Value* that depends on context.
- typedef std::pair<BasicBlock*, LVILatticeVal> BlockCacheEntryTy;
+ typedef std::pair<AssertingVH<BasicBlock>, LVILatticeVal> BlockCacheEntryTy;
/// ValueCacheEntryTy - This is all of the cached block information for
/// exactly one Value*. The entries are sorted by the BasicBlock* of the
/// entries, allowing us to do a lookup with a binary search.
- typedef std::vector<BlockCacheEntryTy> ValueCacheEntryTy;
+ typedef std::map<AssertingVH<BasicBlock>, LVILatticeVal> ValueCacheEntryTy;
private:
+ /// LVIValueHandle - A callback value handle update the cache when
+ /// values are erased.
+ struct LVIValueHandle : public CallbackVH {
+ LazyValueInfoCache *Parent;
+
+ LVIValueHandle(Value *V, LazyValueInfoCache *P)
+ : CallbackVH(V), Parent(P) { }
+
+ void deleted();
+ void allUsesReplacedWith(Value* V) {
+ deleted();
+ }
+
+ LVIValueHandle &operator=(Value *V) {
+ return *this = LVIValueHandle(V, Parent);
+ }
+ };
+
/// ValueCache - This is all of the cached information for all values,
/// mapped from Value* to key information.
- DenseMap<Value*, ValueCacheEntryTy> ValueCache;
+ std::map<LVIValueHandle, ValueCacheEntryTy> ValueCache;
+
+ /// OverDefinedCache - This tracks, on a per-block basis, the set of
+ /// values that are over-defined at the end of that block. This is required
+ /// for cache updating.
+ std::set<std::pair<AssertingVH<BasicBlock>, Value*> > OverDefinedCache;
+
public:
/// getValueInBlock - This is the query interface to determine the lattice
@@ -226,29 +317,23 @@ namespace {
/// getValueOnEdge - This is the query interface to determine the lattice
/// value for the specified Value* that is true on the specified edge.
LVILatticeVal getValueOnEdge(Value *V, BasicBlock *FromBB,BasicBlock *ToBB);
- };
-} // end anonymous namespace
-
-namespace {
- struct BlockCacheEntryComparator {
- static int Compare(const void *LHSv, const void *RHSv) {
- const LazyValueInfoCache::BlockCacheEntryTy *LHS =
- static_cast<const LazyValueInfoCache::BlockCacheEntryTy *>(LHSv);
- const LazyValueInfoCache::BlockCacheEntryTy *RHS =
- static_cast<const LazyValueInfoCache::BlockCacheEntryTy *>(RHSv);
- if (LHS->first < RHS->first)
- return -1;
- if (LHS->first > RHS->first)
- return 1;
- return 0;
- }
- bool operator()(const LazyValueInfoCache::BlockCacheEntryTy &LHS,
- const LazyValueInfoCache::BlockCacheEntryTy &RHS) const {
- return LHS.first < RHS.first;
+ /// threadEdge - This is the update interface to inform the cache that an
+ /// edge from PredBB to OldSucc has been threaded to be from PredBB to
+ /// NewSucc.
+ void threadEdge(BasicBlock *PredBB,BasicBlock *OldSucc,BasicBlock *NewSucc);
+
+ /// eraseBlock - This is part of the update interface to inform the cache
+ /// that a block has been deleted.
+ void eraseBlock(BasicBlock *BB);
+
+ /// clear - Empty the cache.
+ void clear() {
+ ValueCache.clear();
+ OverDefinedCache.clear();
}
};
-}
+} // end anonymous namespace
//===----------------------------------------------------------------------===//
// LVIQuery Impl
@@ -267,78 +352,87 @@ namespace {
/// This is the current value being queried for.
Value *Val;
+ /// This is a pointer to the owning cache, for recursive queries.
+ LazyValueInfoCache &Parent;
+
/// This is all of the cached information about this value.
ValueCacheEntryTy &Cache;
+ /// This tracks, for each block, what values are overdefined.
+ std::set<std::pair<AssertingVH<BasicBlock>, Value*> > &OverDefinedCache;
+
/// NewBlocks - This is a mapping of the new BasicBlocks which have been
/// added to cache but that are not in sorted order.
- DenseMap<BasicBlock*, LVILatticeVal> NewBlockInfo;
+ DenseSet<BasicBlock*> NewBlockInfo;
+
public:
- LVIQuery(Value *V, ValueCacheEntryTy &VC) : Val(V), Cache(VC) {
+ LVIQuery(Value *V, LazyValueInfoCache &P,
+ ValueCacheEntryTy &VC,
+ std::set<std::pair<AssertingVH<BasicBlock>, Value*> > &ODC)
+ : Val(V), Parent(P), Cache(VC), OverDefinedCache(ODC) {
}
~LVIQuery() {
// When the query is done, insert the newly discovered facts into the
// cache in sorted order.
if (NewBlockInfo.empty()) return;
-
- // Grow the cache to exactly fit the new data.
- Cache.reserve(Cache.size() + NewBlockInfo.size());
- // If we only have one new entry, insert it instead of doing a full-on
- // sort.
- if (NewBlockInfo.size() == 1) {
- BlockCacheEntryTy Entry = *NewBlockInfo.begin();
- ValueCacheEntryTy::iterator I =
- std::lower_bound(Cache.begin(), Cache.end(), Entry,
- BlockCacheEntryComparator());
- assert((I == Cache.end() || I->first != Entry.first) &&
- "Entry already in map!");
-
- Cache.insert(I, Entry);
- return;
+ for (DenseSet<BasicBlock*>::iterator I = NewBlockInfo.begin(),
+ E = NewBlockInfo.end(); I != E; ++I) {
+ if (Cache[*I].isOverdefined())
+ OverDefinedCache.insert(std::make_pair(*I, Val));
}
-
- // TODO: If we only have two new elements, INSERT them both.
-
- Cache.insert(Cache.end(), NewBlockInfo.begin(), NewBlockInfo.end());
- array_pod_sort(Cache.begin(), Cache.end(),
- BlockCacheEntryComparator::Compare);
-
}
LVILatticeVal getBlockValue(BasicBlock *BB);
LVILatticeVal getEdgeValue(BasicBlock *FromBB, BasicBlock *ToBB);
private:
- LVILatticeVal &getCachedEntryForBlock(BasicBlock *BB);
+ LVILatticeVal getCachedEntryForBlock(BasicBlock *BB);
};
} // end anonymous namespace
-/// getCachedEntryForBlock - See if we already have a value for this block. If
-/// so, return it, otherwise create a new entry in the NewBlockInfo map to use.
-LVILatticeVal &LVIQuery::getCachedEntryForBlock(BasicBlock *BB) {
-
- // Do a binary search to see if we already have an entry for this block in
- // the cache set. If so, find it.
- if (!Cache.empty()) {
- ValueCacheEntryTy::iterator Entry =
- std::lower_bound(Cache.begin(), Cache.end(),
- BlockCacheEntryTy(BB, LVILatticeVal()),
- BlockCacheEntryComparator());
- if (Entry != Cache.end() && Entry->first == BB)
- return Entry->second;
+void LazyValueInfoCache::LVIValueHandle::deleted() {
+ for (std::set<std::pair<AssertingVH<BasicBlock>, Value*> >::iterator
+ I = Parent->OverDefinedCache.begin(),
+ E = Parent->OverDefinedCache.end();
+ I != E; ) {
+ std::set<std::pair<AssertingVH<BasicBlock>, Value*> >::iterator tmp = I;
+ ++I;
+ if (tmp->second == getValPtr())
+ Parent->OverDefinedCache.erase(tmp);
}
- // Otherwise, check to see if it's in NewBlockInfo or create a new entry if
- // not.
- return NewBlockInfo[BB];
+ // This erasure deallocates *this, so it MUST happen after we're done
+ // using any and all members of *this.
+ Parent->ValueCache.erase(*this);
+}
+
+void LazyValueInfoCache::eraseBlock(BasicBlock *BB) {
+ for (std::set<std::pair<AssertingVH<BasicBlock>, Value*> >::iterator
+ I = OverDefinedCache.begin(), E = OverDefinedCache.end(); I != E; ) {
+ std::set<std::pair<AssertingVH<BasicBlock>, Value*> >::iterator tmp = I;
+ ++I;
+ if (tmp->first == BB)
+ OverDefinedCache.erase(tmp);
+ }
+
+ for (std::map<LVIValueHandle, ValueCacheEntryTy>::iterator
+ I = ValueCache.begin(), E = ValueCache.end(); I != E; ++I)
+ I->second.erase(BB);
+}
+
+/// getCachedEntryForBlock - See if we already have a value for this block. If
+/// so, return it, otherwise create a new entry in the Cache map to use.
+LVILatticeVal LVIQuery::getCachedEntryForBlock(BasicBlock *BB) {
+ NewBlockInfo.insert(BB);
+ return Cache[BB];
}
LVILatticeVal LVIQuery::getBlockValue(BasicBlock *BB) {
// See if we already have a value for this block.
- LVILatticeVal &BBLV = getCachedEntryForBlock(BB);
+ LVILatticeVal BBLV = getCachedEntryForBlock(BB);
// If we've already computed this block's value, return it.
if (!BBLV.isUndefined()) {
@@ -350,13 +444,28 @@ LVILatticeVal LVIQuery::getBlockValue(BasicBlock *BB) {
// lattice value to overdefined, so that cycles will terminate and be
// conservatively correct.
BBLV.markOverdefined();
+ Cache[BB] = BBLV;
- // If V is live into BB, see if our predecessors know anything about it.
Instruction *BBI = dyn_cast<Instruction>(Val);
if (BBI == 0 || BBI->getParent() != BB) {
LVILatticeVal Result; // Start Undefined.
- unsigned NumPreds = 0;
+ // If this is a pointer, and there's a load from that pointer in this BB,
+ // then we know that the pointer can't be NULL.
+ bool NotNull = false;
+ if (Val->getType()->isPointerTy()) {
+ for (BasicBlock::iterator BI = BB->begin(), BE = BB->end();BI != BE;++BI){
+ LoadInst *L = dyn_cast<LoadInst>(BI);
+ if (L && L->getPointerAddressSpace() == 0 &&
+ L->getPointerOperand()->getUnderlyingObject() ==
+ Val->getUnderlyingObject()) {
+ NotNull = true;
+ break;
+ }
+ }
+ }
+
+ unsigned NumPreds = 0;
// Loop over all of our predecessors, merging what we know from them into
// result.
for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) {
@@ -367,11 +476,19 @@ LVILatticeVal LVIQuery::getBlockValue(BasicBlock *BB) {
if (Result.isOverdefined()) {
DEBUG(dbgs() << " compute BB '" << BB->getName()
<< "' - overdefined because of pred.\n");
+ // If we previously determined that this is a pointer that can't be null
+ // then return that rather than giving up entirely.
+ if (NotNull) {
+ const PointerType *PTy = cast<PointerType>(Val->getType());
+ Result = LVILatticeVal::getNot(ConstantPointerNull::get(PTy));
+ }
+
return Result;
}
++NumPreds;
}
+
// If this is the entry block, we must be asking about an argument. The
// value is overdefined.
if (NumPreds == 0 && BB == &BB->getParent()->front()) {
@@ -382,24 +499,123 @@ LVILatticeVal LVIQuery::getBlockValue(BasicBlock *BB) {
// Return the merged value, which is more precise than 'overdefined'.
assert(!Result.isOverdefined());
- return getCachedEntryForBlock(BB) = Result;
+ return Cache[BB] = Result;
}
// If this value is defined by an instruction in this block, we have to
// process it here somehow or return overdefined.
if (PHINode *PN = dyn_cast<PHINode>(BBI)) {
- (void)PN;
- // TODO: PHI Translation in preds.
- } else {
+ LVILatticeVal Result; // Start Undefined.
+ // Loop over all of our predecessors, merging what we know from them into
+ // result.
+ for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) {
+ Value* PhiVal = PN->getIncomingValueForBlock(*PI);
+ Result.mergeIn(Parent.getValueOnEdge(PhiVal, *PI, BB));
+
+ // If we hit overdefined, exit early. The BlockVals entry is already set
+ // to overdefined.
+ if (Result.isOverdefined()) {
+ DEBUG(dbgs() << " compute BB '" << BB->getName()
+ << "' - overdefined because of pred.\n");
+ return Result;
+ }
+ }
+
+ // Return the merged value, which is more precise than 'overdefined'.
+ assert(!Result.isOverdefined());
+ return Cache[BB] = Result;
}
-
- DEBUG(dbgs() << " compute BB '" << BB->getName()
- << "' - overdefined because inst def found.\n");
+ assert(Cache[BB].isOverdefined() && "Recursive query changed our cache?");
+
+ // We can only analyze the definitions of certain classes of instructions
+ // (integral binops and casts at the moment), so bail if this isn't one.
LVILatticeVal Result;
- Result.markOverdefined();
- return getCachedEntryForBlock(BB) = Result;
+ if ((!isa<BinaryOperator>(BBI) && !isa<CastInst>(BBI)) ||
+ !BBI->getType()->isIntegerTy()) {
+ DEBUG(dbgs() << " compute BB '" << BB->getName()
+ << "' - overdefined because inst def found.\n");
+ Result.markOverdefined();
+ return Result;
+ }
+
+ // FIXME: We're currently limited to binops with a constant RHS. This should
+ // be improved.
+ BinaryOperator *BO = dyn_cast<BinaryOperator>(BBI);
+ if (BO && !isa<ConstantInt>(BO->getOperand(1))) {
+ DEBUG(dbgs() << " compute BB '" << BB->getName()
+ << "' - overdefined because inst def found.\n");
+
+ Result.markOverdefined();
+ return Result;
+ }
+
+ // Figure out the range of the LHS. If that fails, bail.
+ LVILatticeVal LHSVal = Parent.getValueInBlock(BBI->getOperand(0), BB);
+ if (!LHSVal.isConstantRange()) {
+ Result.markOverdefined();
+ return Result;
+ }
+
+ ConstantInt *RHS = 0;
+ ConstantRange LHSRange = LHSVal.getConstantRange();
+ ConstantRange RHSRange(1);
+ const IntegerType *ResultTy = cast<IntegerType>(BBI->getType());
+ if (isa<BinaryOperator>(BBI)) {
+ RHS = dyn_cast<ConstantInt>(BBI->getOperand(1));
+ if (!RHS) {
+ Result.markOverdefined();
+ return Result;
+ }
+
+ RHSRange = ConstantRange(RHS->getValue(), RHS->getValue()+1);
+ }
+
+ // NOTE: We're currently limited by the set of operations that ConstantRange
+ // can evaluate symbolically. Enhancing that set will allows us to analyze
+ // more definitions.
+ switch (BBI->getOpcode()) {
+ case Instruction::Add:
+ Result.markConstantRange(LHSRange.add(RHSRange));
+ break;
+ case Instruction::Sub:
+ Result.markConstantRange(LHSRange.sub(RHSRange));
+ break;
+ case Instruction::Mul:
+ Result.markConstantRange(LHSRange.multiply(RHSRange));
+ break;
+ case Instruction::UDiv:
+ Result.markConstantRange(LHSRange.udiv(RHSRange));
+ break;
+ case Instruction::Shl:
+ Result.markConstantRange(LHSRange.shl(RHSRange));
+ break;
+ case Instruction::LShr:
+ Result.markConstantRange(LHSRange.lshr(RHSRange));
+ break;
+ case Instruction::Trunc:
+ Result.markConstantRange(LHSRange.truncate(ResultTy->getBitWidth()));
+ break;
+ case Instruction::SExt:
+ Result.markConstantRange(LHSRange.signExtend(ResultTy->getBitWidth()));
+ break;
+ case Instruction::ZExt:
+ Result.markConstantRange(LHSRange.zeroExtend(ResultTy->getBitWidth()));
+ break;
+ case Instruction::BitCast:
+ Result.markConstantRange(LHSRange);
+ break;
+
+ // Unhandled instructions are overdefined.
+ default:
+ DEBUG(dbgs() << " compute BB '" << BB->getName()
+ << "' - overdefined because inst def found.\n");
+ Result.markOverdefined();
+ break;
+ }
+
+ return Cache[BB] = Result;
}
@@ -420,28 +636,57 @@ LVILatticeVal LVIQuery::getEdgeValue(BasicBlock *BBFrom, BasicBlock *BBTo) {
// it is.
if (BI->getCondition() == Val)
return LVILatticeVal::get(ConstantInt::get(
- Type::getInt1Ty(Val->getContext()), isTrueDest));
+ Type::getInt1Ty(Val->getContext()), isTrueDest));
// If the condition of the branch is an equality comparison, we may be
// able to infer the value.
- if (ICmpInst *ICI = dyn_cast<ICmpInst>(BI->getCondition()))
- if (ICI->isEquality() && ICI->getOperand(0) == Val &&
- isa<Constant>(ICI->getOperand(1))) {
+ ICmpInst *ICI = dyn_cast<ICmpInst>(BI->getCondition());
+ if (ICI && ICI->getOperand(0) == Val &&
+ isa<Constant>(ICI->getOperand(1))) {
+ if (ICI->isEquality()) {
// We know that V has the RHS constant if this is a true SETEQ or
// false SETNE.
if (isTrueDest == (ICI->getPredicate() == ICmpInst::ICMP_EQ))
return LVILatticeVal::get(cast<Constant>(ICI->getOperand(1)));
return LVILatticeVal::getNot(cast<Constant>(ICI->getOperand(1)));
}
+
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(ICI->getOperand(1))) {
+ // Calculate the range of values that would satisfy the comparison.
+ ConstantRange CmpRange(CI->getValue(), CI->getValue()+1);
+ ConstantRange TrueValues =
+ ConstantRange::makeICmpRegion(ICI->getPredicate(), CmpRange);
+
+ // If we're interested in the false dest, invert the condition.
+ if (!isTrueDest) TrueValues = TrueValues.inverse();
+
+ // Figure out the possible values of the query BEFORE this branch.
+ LVILatticeVal InBlock = getBlockValue(BBFrom);
+ if (!InBlock.isConstantRange())
+ return LVILatticeVal::getRange(TrueValues);
+
+ // Find all potential values that satisfy both the input and output
+ // conditions.
+ ConstantRange PossibleValues =
+ TrueValues.intersectWith(InBlock.getConstantRange());
+
+ return LVILatticeVal::getRange(PossibleValues);
+ }
+ }
}
}
// If the edge was formed by a switch on the value, then we may know exactly
// what it is.
if (SwitchInst *SI = dyn_cast<SwitchInst>(BBFrom->getTerminator())) {
- // If BBTo is the default destination of the switch, we don't know anything.
- // Given a more powerful range analysis we could know stuff.
- if (SI->getCondition() == Val && SI->getDefaultDest() != BBTo) {
+ if (SI->getCondition() == Val) {
+ // We don't know anything in the default case.
+ if (SI->getDefaultDest() == BBTo) {
+ LVILatticeVal Result;
+ Result.markOverdefined();
+ return Result;
+ }
+
// We only know something if there is exactly one value that goes from
// BBFrom to BBTo.
unsigned NumEdges = 0;
@@ -474,7 +719,9 @@ LVILatticeVal LazyValueInfoCache::getValueInBlock(Value *V, BasicBlock *BB) {
DEBUG(dbgs() << "LVI Getting block end value " << *V << " at '"
<< BB->getName() << "'\n");
- LVILatticeVal Result = LVIQuery(V, ValueCache[V]).getBlockValue(BB);
+ LVILatticeVal Result = LVIQuery(V, *this,
+ ValueCache[LVIValueHandle(V, this)],
+ OverDefinedCache).getBlockValue(BB);
DEBUG(dbgs() << " Result = " << Result << "\n");
return Result;
@@ -488,24 +735,80 @@ getValueOnEdge(Value *V, BasicBlock *FromBB, BasicBlock *ToBB) {
DEBUG(dbgs() << "LVI Getting edge value " << *V << " from '"
<< FromBB->getName() << "' to '" << ToBB->getName() << "'\n");
+
LVILatticeVal Result =
- LVIQuery(V, ValueCache[V]).getEdgeValue(FromBB, ToBB);
+ LVIQuery(V, *this, ValueCache[LVIValueHandle(V, this)],
+ OverDefinedCache).getEdgeValue(FromBB, ToBB);
DEBUG(dbgs() << " Result = " << Result << "\n");
return Result;
}
+void LazyValueInfoCache::threadEdge(BasicBlock *PredBB, BasicBlock *OldSucc,
+ BasicBlock *NewSucc) {
+ // When an edge in the graph has been threaded, values that we could not
+ // determine a value for before (i.e. were marked overdefined) may be possible
+ // to solve now. We do NOT try to proactively update these values. Instead,
+ // we clear their entries from the cache, and allow lazy updating to recompute
+ // them when needed.
+
+ // The updating process is fairly simple: we need to dropped cached info
+ // for all values that were marked overdefined in OldSucc, and for those same
+ // values in any successor of OldSucc (except NewSucc) in which they were
+ // also marked overdefined.
+ std::vector<BasicBlock*> worklist;
+ worklist.push_back(OldSucc);
+
+ DenseSet<Value*> ClearSet;
+ for (std::set<std::pair<AssertingVH<BasicBlock>, Value*> >::iterator
+ I = OverDefinedCache.begin(), E = OverDefinedCache.end(); I != E; ++I) {
+ if (I->first == OldSucc)
+ ClearSet.insert(I->second);
+ }
+
+ // Use a worklist to perform a depth-first search of OldSucc's successors.
+ // NOTE: We do not need a visited list since any blocks we have already
+ // visited will have had their overdefined markers cleared already, and we
+ // thus won't loop to their successors.
+ while (!worklist.empty()) {
+ BasicBlock *ToUpdate = worklist.back();
+ worklist.pop_back();
+
+ // Skip blocks only accessible through NewSucc.
+ if (ToUpdate == NewSucc) continue;
+
+ bool changed = false;
+ for (DenseSet<Value*>::iterator I = ClearSet.begin(),E = ClearSet.end();
+ I != E; ++I) {
+ // If a value was marked overdefined in OldSucc, and is here too...
+ std::set<std::pair<AssertingVH<BasicBlock>, Value*> >::iterator OI =
+ OverDefinedCache.find(std::make_pair(ToUpdate, *I));
+ if (OI == OverDefinedCache.end()) continue;
+
+ // Remove it from the caches.
+ ValueCacheEntryTy &Entry = ValueCache[LVIValueHandle(*I, this)];
+ ValueCacheEntryTy::iterator CI = Entry.find(ToUpdate);
+
+ assert(CI != Entry.end() && "Couldn't find entry to update?");
+ Entry.erase(CI);
+ OverDefinedCache.erase(OI);
+
+ // If we removed anything, then we potentially need to update
+ // blocks successors too.
+ changed = true;
+ }
+
+ if (!changed) continue;
+
+ worklist.insert(worklist.end(), succ_begin(ToUpdate), succ_end(ToUpdate));
+ }
+}
+
//===----------------------------------------------------------------------===//
// LazyValueInfo Impl
//===----------------------------------------------------------------------===//
-bool LazyValueInfo::runOnFunction(Function &F) {
- TD = getAnalysisIfAvailable<TargetData>();
- // Fully lazy.
- return false;
-}
-
/// getCache - This lazily constructs the LazyValueInfoCache.
static LazyValueInfoCache &getCache(void *&PImpl) {
if (!PImpl)
@@ -513,6 +816,15 @@ static LazyValueInfoCache &getCache(void *&PImpl) {
return *static_cast<LazyValueInfoCache*>(PImpl);
}
+bool LazyValueInfo::runOnFunction(Function &F) {
+ if (PImpl)
+ getCache(PImpl).clear();
+
+ TD = getAnalysisIfAvailable<TargetData>();
+ // Fully lazy.
+ return false;
+}
+
void LazyValueInfo::releaseMemory() {
// If the cache was allocated, free it.
if (PImpl) {
@@ -526,6 +838,11 @@ Constant *LazyValueInfo::getConstant(Value *V, BasicBlock *BB) {
if (Result.isConstant())
return Result.getConstant();
+ else if (Result.isConstantRange()) {
+ ConstantRange CR = Result.getConstantRange();
+ if (const APInt *SingleVal = CR.getSingleElement())
+ return ConstantInt::get(V->getContext(), *SingleVal);
+ }
return 0;
}
@@ -537,6 +854,11 @@ Constant *LazyValueInfo::getConstantOnEdge(Value *V, BasicBlock *FromBB,
if (Result.isConstant())
return Result.getConstant();
+ else if (Result.isConstantRange()) {
+ ConstantRange CR = Result.getConstantRange();
+ if (const APInt *SingleVal = CR.getSingleElement())
+ return ConstantInt::get(V->getContext(), *SingleVal);
+ }
return 0;
}
@@ -557,6 +879,36 @@ LazyValueInfo::getPredicateOnEdge(unsigned Pred, Value *V, Constant *C,
return Unknown;
}
+ if (Result.isConstantRange()) {
+ ConstantInt *CI = dyn_cast<ConstantInt>(C);
+ if (!CI) return Unknown;
+
+ ConstantRange CR = Result.getConstantRange();
+ if (Pred == ICmpInst::ICMP_EQ) {
+ if (!CR.contains(CI->getValue()))
+ return False;
+
+ if (CR.isSingleElement() && CR.contains(CI->getValue()))
+ return True;
+ } else if (Pred == ICmpInst::ICMP_NE) {
+ if (!CR.contains(CI->getValue()))
+ return True;
+
+ if (CR.isSingleElement() && CR.contains(CI->getValue()))
+ return False;
+ }
+
+ // Handle more complex predicates.
+ ConstantRange RHS(CI->getValue(), CI->getValue()+1);
+ ConstantRange TrueValues = ConstantRange::makeICmpRegion(Pred, RHS);
+ if (CR.intersectWith(TrueValues).isEmptySet())
+ return False;
+ else if (TrueValues.contains(CR))
+ return True;
+
+ return Unknown;
+ }
+
if (Result.isNotConstant()) {
// If this is an equality comparison, we can try to fold it knowing that
// "V != C1".
@@ -579,4 +931,11 @@ LazyValueInfo::getPredicateOnEdge(unsigned Pred, Value *V, Constant *C,
return Unknown;
}
+void LazyValueInfo::threadEdge(BasicBlock *PredBB, BasicBlock *OldSucc,
+ BasicBlock* NewSucc) {
+ if (PImpl) getCache(PImpl).threadEdge(PredBB, OldSucc, NewSucc);
+}
+void LazyValueInfo::eraseBlock(BasicBlock *BB) {
+ if (PImpl) getCache(PImpl).eraseBlock(BB);
+}
diff --git a/lib/Analysis/LibCallAliasAnalysis.cpp b/lib/Analysis/LibCallAliasAnalysis.cpp
index 7419659298902..7f51202ecb558 100644
--- a/lib/Analysis/LibCallAliasAnalysis.cpp
+++ b/lib/Analysis/LibCallAliasAnalysis.cpp
@@ -20,11 +20,8 @@ using namespace llvm;
// Register this pass...
char LibCallAliasAnalysis::ID = 0;
-static RegisterPass<LibCallAliasAnalysis>
-X("libcall-aa", "LibCall Alias Analysis", false, true);
-
-// Declare that we implement the AliasAnalysis interface
-static RegisterAnalysisGroup<AliasAnalysis> Y(X);
+INITIALIZE_AG_PASS(LibCallAliasAnalysis, AliasAnalysis, "libcall-aa",
+ "LibCall Alias Analysis", false, true, false);
FunctionPass *llvm::createLibCallAliasAnalysisPass(LibCallInfo *LCI) {
return new LibCallAliasAnalysis(LCI);
@@ -46,7 +43,7 @@ void LibCallAliasAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
/// vs the specified pointer/size.
AliasAnalysis::ModRefResult
LibCallAliasAnalysis::AnalyzeLibCallDetails(const LibCallFunctionInfo *FI,
- CallSite CS, Value *P,
+ ImmutableCallSite CS, const Value *P,
unsigned Size) {
// If we have a function, check to see what kind of mod/ref effects it
// has. Start by including any info globally known about the function.
@@ -120,13 +117,14 @@ LibCallAliasAnalysis::AnalyzeLibCallDetails(const LibCallFunctionInfo *FI,
// specified memory object.
//
AliasAnalysis::ModRefResult
-LibCallAliasAnalysis::getModRefInfo(CallSite CS, Value *P, unsigned Size) {
+LibCallAliasAnalysis::getModRefInfo(ImmutableCallSite CS,
+ const Value *P, unsigned Size) {
ModRefResult MRInfo = ModRef;
// If this is a direct call to a function that LCI knows about, get the
// information about the runtime function.
if (LCI) {
- if (Function *F = CS.getCalledFunction()) {
+ if (const Function *F = CS.getCalledFunction()) {
if (const LibCallFunctionInfo *FI = LCI->getFunctionInfo(F)) {
MRInfo = ModRefResult(MRInfo & AnalyzeLibCallDetails(FI, CS, P, Size));
if (MRInfo == NoModRef) return NoModRef;
diff --git a/lib/Analysis/LibCallSemantics.cpp b/lib/Analysis/LibCallSemantics.cpp
index e0060c3e89b1a..81b0f46f3740e 100644
--- a/lib/Analysis/LibCallSemantics.cpp
+++ b/lib/Analysis/LibCallSemantics.cpp
@@ -40,7 +40,8 @@ const LibCallLocationInfo &LibCallInfo::getLocationInfo(unsigned LocID) const {
/// getFunctionInfo - Return the LibCallFunctionInfo object corresponding to
/// the specified function if we have it. If not, return null.
-const LibCallFunctionInfo *LibCallInfo::getFunctionInfo(Function *F) const {
+const LibCallFunctionInfo *
+LibCallInfo::getFunctionInfo(const Function *F) const {
StringMap<const LibCallFunctionInfo*> *Map = getMap(Impl);
/// If this is the first time we are querying for this info, lazily construct
diff --git a/lib/Analysis/Lint.cpp b/lib/Analysis/Lint.cpp
index 9f1b30d2cf45c..a9d972435f5fb 100644
--- a/lib/Analysis/Lint.cpp
+++ b/lib/Analysis/Lint.cpp
@@ -108,7 +108,7 @@ namespace {
raw_string_ostream MessagesStr;
static char ID; // Pass identification, replacement for typeid
- Lint() : FunctionPass(&ID), MessagesStr(Messages) {}
+ Lint() : FunctionPass(ID), MessagesStr(Messages) {}
virtual bool runOnFunction(Function &F);
@@ -167,8 +167,7 @@ namespace {
}
char Lint::ID = 0;
-static RegisterPass<Lint>
-X("lint", "Statically lint-checks LLVM IR", false, true);
+INITIALIZE_PASS(Lint, "lint", "Statically lint-checks LLVM IR", false, true);
// Assert - We know that cond should be true, if not print an error message.
#define Assert(C, M) \
@@ -247,8 +246,7 @@ void Lint::visitCallSite(CallSite CS) {
// where nothing is known.
if (Formal->hasNoAliasAttr() && Actual->getType()->isPointerTy())
for (CallSite::arg_iterator BI = CS.arg_begin(); BI != AE; ++BI) {
- Assert1(AI == BI ||
- AA->alias(*AI, ~0u, *BI, ~0u) != AliasAnalysis::MustAlias,
+ Assert1(AI == BI || AA->alias(*AI, *BI) != AliasAnalysis::MustAlias,
"Unusual: noalias argument aliases another argument", &I);
}
@@ -520,6 +518,9 @@ void Lint::visitVAArgInst(VAArgInst &I) {
void Lint::visitIndirectBrInst(IndirectBrInst &I) {
visitMemoryReference(I, I.getAddress(), ~0u, 0, 0, MemRef::Branchee);
+
+ Assert1(I.getNumDestinations() != 0,
+ "Undefined behavior: indirectbr with no destinations", &I);
}
void Lint::visitExtractElementInst(ExtractElementInst &I) {
diff --git a/lib/Analysis/LiveValues.cpp b/lib/Analysis/LiveValues.cpp
index 23964ffc457ed..0225f4fa25486 100644
--- a/lib/Analysis/LiveValues.cpp
+++ b/lib/Analysis/LiveValues.cpp
@@ -22,10 +22,10 @@ namespace llvm {
}
char LiveValues::ID = 0;
-static RegisterPass<LiveValues>
-X("live-values", "Value Liveness Analysis", false, true);
+INITIALIZE_PASS(LiveValues, "live-values",
+ "Value Liveness Analysis", false, true);
-LiveValues::LiveValues() : FunctionPass(&ID) {}
+LiveValues::LiveValues() : FunctionPass(ID) {}
void LiveValues::getAnalysisUsage(AnalysisUsage &AU) const {
AU.addRequired<DominatorTree>();
diff --git a/lib/Analysis/LoopDependenceAnalysis.cpp b/lib/Analysis/LoopDependenceAnalysis.cpp
index e1019474cf431..82c02dcd13425 100644
--- a/lib/Analysis/LoopDependenceAnalysis.cpp
+++ b/lib/Analysis/LoopDependenceAnalysis.cpp
@@ -46,8 +46,8 @@ LoopPass *llvm::createLoopDependenceAnalysisPass() {
return new LoopDependenceAnalysis();
}
-static RegisterPass<LoopDependenceAnalysis>
-R("lda", "Loop Dependence Analysis", false, true);
+INITIALIZE_PASS(LoopDependenceAnalysis, "lda",
+ "Loop Dependence Analysis", false, true);
char LoopDependenceAnalysis::ID = 0;
//===----------------------------------------------------------------------===//
diff --git a/lib/Analysis/LoopInfo.cpp b/lib/Analysis/LoopInfo.cpp
index 818d0a9dd1146..46219d1b6f55c 100644
--- a/lib/Analysis/LoopInfo.cpp
+++ b/lib/Analysis/LoopInfo.cpp
@@ -38,8 +38,7 @@ VerifyLoopInfoX("verify-loop-info", cl::location(VerifyLoopInfo),
cl::desc("Verify loop info (time consuming)"));
char LoopInfo::ID = 0;
-static RegisterPass<LoopInfo>
-X("loops", "Natural Loop Information", true, true);
+INITIALIZE_PASS(LoopInfo, "loops", "Natural Loop Information", true, true);
//===----------------------------------------------------------------------===//
// Loop implementation
@@ -124,14 +123,13 @@ PHINode *Loop::getCanonicalInductionVariable() const {
BasicBlock *H = getHeader();
BasicBlock *Incoming = 0, *Backedge = 0;
- typedef GraphTraits<Inverse<BasicBlock*> > InvBlockTraits;
- InvBlockTraits::ChildIteratorType PI = InvBlockTraits::child_begin(H);
- assert(PI != InvBlockTraits::child_end(H) &&
+ pred_iterator PI = pred_begin(H);
+ assert(PI != pred_end(H) &&
"Loop must have at least one backedge!");
Backedge = *PI++;
- if (PI == InvBlockTraits::child_end(H)) return 0; // dead loop
+ if (PI == pred_end(H)) return 0; // dead loop
Incoming = *PI++;
- if (PI != InvBlockTraits::child_end(H)) return 0; // multiple backedges?
+ if (PI != pred_end(H)) return 0; // multiple backedges?
if (contains(Incoming)) {
if (contains(Backedge))
@@ -157,18 +155,6 @@ PHINode *Loop::getCanonicalInductionVariable() const {
return 0;
}
-/// getCanonicalInductionVariableIncrement - Return the LLVM value that holds
-/// the canonical induction variable value for the "next" iteration of the
-/// loop. This always succeeds if getCanonicalInductionVariable succeeds.
-///
-Instruction *Loop::getCanonicalInductionVariableIncrement() const {
- if (PHINode *PN = getCanonicalInductionVariable()) {
- bool P1InLoop = contains(PN->getIncomingBlock(1));
- return cast<Instruction>(PN->getIncomingValue(P1InLoop));
- }
- return 0;
-}
-
/// getTripCount - Return a loop-invariant LLVM value indicating the number of
/// times the loop will be executed. Note that this means that the backedge
/// of the loop executes N-1 times. If the trip-count cannot be determined,
@@ -180,12 +166,12 @@ Instruction *Loop::getCanonicalInductionVariableIncrement() const {
Value *Loop::getTripCount() const {
// Canonical loops will end with a 'cmp ne I, V', where I is the incremented
// canonical induction variable and V is the trip count of the loop.
- Instruction *Inc = getCanonicalInductionVariableIncrement();
- if (Inc == 0) return 0;
- PHINode *IV = cast<PHINode>(Inc->getOperand(0));
+ PHINode *IV = getCanonicalInductionVariable();
+ if (IV == 0 || IV->getNumIncomingValues() != 2) return 0;
- BasicBlock *BackedgeBlock =
- IV->getIncomingBlock(contains(IV->getIncomingBlock(1)));
+ bool P0InLoop = contains(IV->getIncomingBlock(0));
+ Value *Inc = IV->getIncomingValue(!P0InLoop);
+ BasicBlock *BackedgeBlock = IV->getIncomingBlock(!P0InLoop);
if (BranchInst *BI = dyn_cast<BranchInst>(BackedgeBlock->getTerminator()))
if (BI->isConditional()) {
@@ -341,16 +327,12 @@ Loop::getUniqueExitBlocks(SmallVectorImpl<BasicBlock *> &ExitBlocks) const {
BasicBlock *current = *BI;
switchExitBlocks.clear();
- typedef GraphTraits<BasicBlock *> BlockTraits;
- typedef GraphTraits<Inverse<BasicBlock *> > InvBlockTraits;
- for (BlockTraits::ChildIteratorType I =
- BlockTraits::child_begin(*BI), E = BlockTraits::child_end(*BI);
- I != E; ++I) {
+ for (succ_iterator I = succ_begin(*BI), E = succ_end(*BI); I != E; ++I) {
// If block is inside the loop then it is not a exit block.
if (std::binary_search(LoopBBs.begin(), LoopBBs.end(), *I))
continue;
- InvBlockTraits::ChildIteratorType PI = InvBlockTraits::child_begin(*I);
+ pred_iterator PI = pred_begin(*I);
BasicBlock *firstPred = *PI;
// If current basic block is this exit block's first predecessor
@@ -363,8 +345,7 @@ Loop::getUniqueExitBlocks(SmallVectorImpl<BasicBlock *> &ExitBlocks) const {
// If a terminator has more then two successors, for example SwitchInst,
// then it is possible that there are multiple edges from current block
// to one exit block.
- if (std::distance(BlockTraits::child_begin(current),
- BlockTraits::child_end(current)) <= 2) {
+ if (std::distance(succ_begin(current), succ_end(current)) <= 2) {
ExitBlocks.push_back(*I);
continue;
}
diff --git a/lib/Analysis/LoopPass.cpp b/lib/Analysis/LoopPass.cpp
index 2727d2f9465c1..15d4db8f5f98e 100644
--- a/lib/Analysis/LoopPass.cpp
+++ b/lib/Analysis/LoopPass.cpp
@@ -30,9 +30,9 @@ private:
public:
static char ID;
- PrintLoopPass() : LoopPass(&ID), Out(dbgs()) {}
+ PrintLoopPass() : LoopPass(ID), Out(dbgs()) {}
PrintLoopPass(const std::string &B, raw_ostream &o)
- : LoopPass(&ID), Banner(B), Out(o) {}
+ : LoopPass(ID), Banner(B), Out(o) {}
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesAll();
@@ -59,7 +59,7 @@ char PrintLoopPass::ID = 0;
char LPPassManager::ID = 0;
LPPassManager::LPPassManager(int Depth)
- : FunctionPass(&ID), PMDataManager(Depth) {
+ : FunctionPass(ID), PMDataManager(Depth) {
skipThisLoop = false;
redoThisLoop = false;
LI = NULL;
@@ -183,7 +183,7 @@ void LPPassManager::redoLoop(Loop *L) {
void LPPassManager::cloneBasicBlockSimpleAnalysis(BasicBlock *From,
BasicBlock *To, Loop *L) {
for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {
- LoopPass *LP = (LoopPass *)getContainedPass(Index);
+ LoopPass *LP = getContainedPass(Index);
LP->cloneBasicBlockAnalysis(From, To, L);
}
}
@@ -198,7 +198,7 @@ void LPPassManager::deleteSimpleAnalysisValue(Value *V, Loop *L) {
}
}
for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {
- LoopPass *LP = (LoopPass *)getContainedPass(Index);
+ LoopPass *LP = getContainedPass(Index);
LP->deleteAnalysisValue(V, L);
}
}
@@ -240,7 +240,7 @@ bool LPPassManager::runOnFunction(Function &F) {
I != E; ++I) {
Loop *L = *I;
for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {
- LoopPass *P = (LoopPass*)getContainedPass(Index);
+ LoopPass *P = getContainedPass(Index);
Changed |= P->doInitialization(L, *this);
}
}
@@ -254,7 +254,7 @@ bool LPPassManager::runOnFunction(Function &F) {
// Run all passes on the current Loop.
for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {
- LoopPass *P = (LoopPass*)getContainedPass(Index);
+ LoopPass *P = getContainedPass(Index);
dumpPassInfo(P, EXECUTION_MSG, ON_LOOP_MSG,
CurrentLoop->getHeader()->getName());
@@ -320,7 +320,7 @@ bool LPPassManager::runOnFunction(Function &F) {
// Finalization
for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {
- LoopPass *P = (LoopPass *)getContainedPass(Index);
+ LoopPass *P = getContainedPass(Index);
Changed |= P->doFinalization();
}
diff --git a/lib/Analysis/MemoryDependenceAnalysis.cpp b/lib/Analysis/MemoryDependenceAnalysis.cpp
index 1f54d740db9de..d18d5ce0ea4cb 100644
--- a/lib/Analysis/MemoryDependenceAnalysis.cpp
+++ b/lib/Analysis/MemoryDependenceAnalysis.cpp
@@ -46,11 +46,11 @@ STATISTIC(NumCacheCompleteNonLocalPtr,
char MemoryDependenceAnalysis::ID = 0;
// Register this pass...
-static RegisterPass<MemoryDependenceAnalysis> X("memdep",
- "Memory Dependence Analysis", false, true);
+INITIALIZE_PASS(MemoryDependenceAnalysis, "memdep",
+ "Memory Dependence Analysis", false, true);
MemoryDependenceAnalysis::MemoryDependenceAnalysis()
-: FunctionPass(&ID), PredCache(0) {
+: FunctionPass(ID), PredCache(0) {
}
MemoryDependenceAnalysis::~MemoryDependenceAnalysis() {
}
@@ -120,33 +120,21 @@ getCallSiteDependencyFrom(CallSite CS, bool isReadOnlyCall,
Pointer = CI->getArgOperand(0);
// calls to free() erase the entire structure
PointerSize = ~0ULL;
- } else if (isa<CallInst>(Inst) || isa<InvokeInst>(Inst)) {
+ } else if (CallSite InstCS = cast<Value>(Inst)) {
// Debug intrinsics don't cause dependences.
if (isa<DbgInfoIntrinsic>(Inst)) continue;
- CallSite InstCS = CallSite::get(Inst);
// If these two calls do not interfere, look past it.
switch (AA->getModRefInfo(CS, InstCS)) {
case AliasAnalysis::NoModRef:
- // If the two calls don't interact (e.g. InstCS is readnone) keep
- // scanning.
+ // If the two calls are the same, return InstCS as a Def, so that
+ // CS can be found redundant and eliminated.
+ if (isReadOnlyCall && InstCS.onlyReadsMemory() &&
+ CS.getInstruction()->isIdenticalToWhenDefined(Inst))
+ return MemDepResult::getDef(Inst);
+
+ // Otherwise if the two calls don't interact (e.g. InstCS is readnone)
+ // keep scanning.
continue;
- case AliasAnalysis::Ref:
- // If the two calls read the same memory locations and CS is a readonly
- // function, then we have two cases: 1) the calls may not interfere with
- // each other at all. 2) the calls may produce the same value. In case
- // #1 we want to ignore the values, in case #2, we want to return Inst
- // as a Def dependence. This allows us to CSE in cases like:
- // X = strlen(P);
- // memchr(...);
- // Y = strlen(P); // Y = X
- if (isReadOnlyCall) {
- if (CS.getCalledFunction() != 0 &&
- CS.getCalledFunction() == InstCS.getCalledFunction())
- return MemDepResult::getDef(Inst);
- // Ignore unrelated read/read call dependences.
- continue;
- }
- // FALL THROUGH
default:
return MemDepResult::getClobber(Inst);
}
@@ -196,8 +184,7 @@ getPointerDependencyFrom(Value *MemPtr, uint64_t MemSize, bool isLoad,
// FIXME: This only considers queries directly on the invariant-tagged
// pointer, not on query pointers that are indexed off of them. It'd
// be nice to handle that at some point.
- AliasAnalysis::AliasResult R =
- AA->alias(II->getArgOperand(2), ~0U, MemPtr, ~0U);
+ AliasAnalysis::AliasResult R = AA->alias(II->getArgOperand(2), MemPtr);
if (R == AliasAnalysis::MustAlias) {
InvariantTag = II->getArgOperand(0);
continue;
@@ -209,8 +196,7 @@ getPointerDependencyFrom(Value *MemPtr, uint64_t MemSize, bool isLoad,
// FIXME: This only considers queries directly on the invariant-tagged
// pointer, not on query pointers that are indexed off of them. It'd
// be nice to handle that at some point.
- AliasAnalysis::AliasResult R =
- AA->alias(II->getArgOperand(1), ~0U, MemPtr, ~0U);
+ AliasAnalysis::AliasResult R = AA->alias(II->getArgOperand(1), MemPtr);
if (R == AliasAnalysis::MustAlias)
return MemDepResult::getDef(II);
}
@@ -387,7 +373,7 @@ MemDepResult MemoryDependenceAnalysis::getDependency(Instruction *QueryInst) {
MemSize = cast<ConstantInt>(II->getArgOperand(1))->getZExtValue();
break;
default:
- CallSite QueryCS = CallSite::get(QueryInst);
+ CallSite QueryCS(QueryInst);
bool isReadOnly = AA->onlyReadsMemory(QueryCS);
LocalCache = getCallSiteDependencyFrom(QueryCS, isReadOnly, ScanPos,
QueryParent);
diff --git a/lib/Analysis/ModuleDebugInfoPrinter.cpp b/lib/Analysis/ModuleDebugInfoPrinter.cpp
index 556d4c8aab54a..2cc1c2aa005ca 100644
--- a/lib/Analysis/ModuleDebugInfoPrinter.cpp
+++ b/lib/Analysis/ModuleDebugInfoPrinter.cpp
@@ -30,7 +30,7 @@ namespace {
DebugInfoFinder Finder;
public:
static char ID; // Pass identification, replacement for typeid
- ModuleDebugInfoPrinter() : ModulePass(&ID) {}
+ ModuleDebugInfoPrinter() : ModulePass(ID) {}
virtual bool runOnModule(Module &M);
@@ -42,9 +42,8 @@ namespace {
}
char ModuleDebugInfoPrinter::ID = 0;
-static RegisterPass<ModuleDebugInfoPrinter>
-X("module-debuginfo",
- "Decodes module-level debug info", false, true);
+INITIALIZE_PASS(ModuleDebugInfoPrinter, "module-debuginfo",
+ "Decodes module-level debug info", false, true);
ModulePass *llvm::createModuleDebugInfoPrinterPass() {
return new ModuleDebugInfoPrinter();
diff --git a/lib/Analysis/PointerTracking.cpp b/lib/Analysis/PointerTracking.cpp
index 14df0b7198791..07f46824700a8 100644
--- a/lib/Analysis/PointerTracking.cpp
+++ b/lib/Analysis/PointerTracking.cpp
@@ -28,7 +28,7 @@
using namespace llvm;
char PointerTracking::ID = 0;
-PointerTracking::PointerTracking() : FunctionPass(&ID) {}
+PointerTracking::PointerTracking() : FunctionPass(ID) {}
bool PointerTracking::runOnFunction(Function &F) {
predCache.clear();
@@ -144,6 +144,55 @@ const SCEV *PointerTracking::computeAllocationCount(Value *P,
return SE->getCouldNotCompute();
}
+Value *PointerTracking::computeAllocationCountValue(Value *P, const Type *&Ty) const
+{
+ Value *V = P->stripPointerCasts();
+ if (AllocaInst *AI = dyn_cast<AllocaInst>(V)) {
+ Ty = AI->getAllocatedType();
+ // arraySize elements of type Ty.
+ return AI->getArraySize();
+ }
+
+ if (CallInst *CI = extractMallocCall(V)) {
+ Ty = getMallocAllocatedType(CI);
+ if (!Ty)
+ return 0;
+ Value *arraySize = getMallocArraySize(CI, TD);
+ if (!arraySize) {
+ Ty = Type::getInt8Ty(P->getContext());
+ return CI->getArgOperand(0);
+ }
+ // arraySize elements of type Ty.
+ return arraySize;
+ }
+
+ if (GlobalVariable *GV = dyn_cast<GlobalVariable>(V)) {
+ if (GV->hasDefinitiveInitializer()) {
+ Constant *C = GV->getInitializer();
+ if (const ArrayType *ATy = dyn_cast<ArrayType>(C->getType())) {
+ Ty = ATy->getElementType();
+ return ConstantInt::get(Type::getInt32Ty(P->getContext()),
+ ATy->getNumElements());
+ }
+ }
+ Ty = cast<PointerType>(GV->getType())->getElementType();
+ return ConstantInt::get(Type::getInt32Ty(P->getContext()), 1);
+ //TODO: implement more tracking for globals
+ }
+
+ if (CallInst *CI = dyn_cast<CallInst>(V)) {
+ CallSite CS(CI);
+ Function *F = dyn_cast<Function>(CS.getCalledValue()->stripPointerCasts());
+ if (F == reallocFunc) {
+ Ty = Type::getInt8Ty(P->getContext());
+ // realloc allocates arg1 bytes.
+ return CS.getArgument(1);
+ }
+ }
+
+ return 0;
+}
+
// Calculates the number of elements of type Ty allocated for P.
const SCEV *PointerTracking::computeAllocationCountForType(Value *P,
const Type *Ty)
@@ -263,5 +312,5 @@ void PointerTracking::print(raw_ostream &OS, const Module* M) const {
}
}
-static RegisterPass<PointerTracking> X("pointertracking",
- "Track pointer bounds", false, true);
+INITIALIZE_PASS(PointerTracking, "pointertracking",
+ "Track pointer bounds", false, true);
diff --git a/lib/Analysis/PostDominators.cpp b/lib/Analysis/PostDominators.cpp
index 7354afa181b2a..cbe8d1867e4f1 100644
--- a/lib/Analysis/PostDominators.cpp
+++ b/lib/Analysis/PostDominators.cpp
@@ -28,8 +28,8 @@ using namespace llvm;
char PostDominatorTree::ID = 0;
char PostDominanceFrontier::ID = 0;
-static RegisterPass<PostDominatorTree>
-F("postdomtree", "Post-Dominator Tree Construction", true, true);
+INITIALIZE_PASS(PostDominatorTree, "postdomtree",
+ "Post-Dominator Tree Construction", true, true);
bool PostDominatorTree::runOnFunction(Function &F) {
DT->recalculate(F);
@@ -53,8 +53,8 @@ FunctionPass* llvm::createPostDomTree() {
// PostDominanceFrontier Implementation
//===----------------------------------------------------------------------===//
-static RegisterPass<PostDominanceFrontier>
-H("postdomfrontier", "Post-Dominance Frontier Construction", true, true);
+INITIALIZE_PASS(PostDominanceFrontier, "postdomfrontier",
+ "Post-Dominance Frontier Construction", true, true);
const DominanceFrontier::DomSetType &
PostDominanceFrontier::calculate(const PostDominatorTree &DT,
diff --git a/lib/Analysis/ProfileEstimatorPass.cpp b/lib/Analysis/ProfileEstimatorPass.cpp
index da4ce47692624..ecc0a1845307a 100644
--- a/lib/Analysis/ProfileEstimatorPass.cpp
+++ b/lib/Analysis/ProfileEstimatorPass.cpp
@@ -39,7 +39,7 @@ namespace {
public:
static char ID; // Class identification, replacement for typeinfo
explicit ProfileEstimatorPass(const double execcount = 0)
- : FunctionPass(&ID), ExecCount(execcount) {
+ : FunctionPass(ID), ExecCount(execcount) {
if (execcount == 0) ExecCount = LoopWeight;
}
@@ -59,8 +59,8 @@ namespace {
/// an analysis interface through multiple inheritance. If needed, it
/// should override this to adjust the this pointer as needed for the
/// specified pass info.
- virtual void *getAdjustedAnalysisPointer(const PassInfo *PI) {
- if (PI->isPassID(&ProfileInfo::ID))
+ virtual void *getAdjustedAnalysisPointer(AnalysisID PI) {
+ if (PI == &ProfileInfo::ID)
return (ProfileInfo*)this;
return this;
}
@@ -72,13 +72,11 @@ namespace {
} // End of anonymous namespace
char ProfileEstimatorPass::ID = 0;
-static RegisterPass<ProfileEstimatorPass>
-X("profile-estimator", "Estimate profiling information", false, true);
-
-static RegisterAnalysisGroup<ProfileInfo> Y(X);
+INITIALIZE_AG_PASS(ProfileEstimatorPass, ProfileInfo, "profile-estimator",
+ "Estimate profiling information", false, true, false);
namespace llvm {
- const PassInfo *ProfileEstimatorPassID = &X;
+ char &ProfileEstimatorPassID = ProfileEstimatorPass::ID;
FunctionPass *createProfileEstimatorPass() {
return new ProfileEstimatorPass();
diff --git a/lib/Analysis/ProfileInfo.cpp b/lib/Analysis/ProfileInfo.cpp
index 8d2712fd6e063..fc7f28662c017 100644
--- a/lib/Analysis/ProfileInfo.cpp
+++ b/lib/Analysis/ProfileInfo.cpp
@@ -1076,14 +1076,14 @@ raw_ostream& operator<<(raw_ostream &O, std::pair<const MachineBasicBlock *, con
namespace {
struct NoProfileInfo : public ImmutablePass, public ProfileInfo {
static char ID; // Class identification, replacement for typeinfo
- NoProfileInfo() : ImmutablePass(&ID) {}
+ NoProfileInfo() : ImmutablePass(ID) {}
/// getAdjustedAnalysisPointer - This method is used when a pass implements
/// an analysis interface through multiple inheritance. If needed, it
/// should override this to adjust the this pointer as needed for the
/// specified pass info.
- virtual void *getAdjustedAnalysisPointer(const PassInfo *PI) {
- if (PI->isPassID(&ProfileInfo::ID))
+ virtual void *getAdjustedAnalysisPointer(AnalysisID PI) {
+ if (PI == &ProfileInfo::ID)
return (ProfileInfo*)this;
return this;
}
@@ -1096,10 +1096,7 @@ namespace {
char NoProfileInfo::ID = 0;
// Register this pass...
-static RegisterPass<NoProfileInfo>
-X("no-profile", "No Profile Information", false, true);
-
-// Declare that we implement the ProfileInfo interface
-static RegisterAnalysisGroup<ProfileInfo, true> Y(X);
+INITIALIZE_AG_PASS(NoProfileInfo, ProfileInfo, "no-profile",
+ "No Profile Information", false, true, true);
ImmutablePass *llvm::createNoProfileInfoPass() { return new NoProfileInfo(); }
diff --git a/lib/Analysis/ProfileInfoLoaderPass.cpp b/lib/Analysis/ProfileInfoLoaderPass.cpp
index 8ea4ecf54f98f..d325b574e8482 100644
--- a/lib/Analysis/ProfileInfoLoaderPass.cpp
+++ b/lib/Analysis/ProfileInfoLoaderPass.cpp
@@ -45,7 +45,7 @@ namespace {
public:
static char ID; // Class identification, replacement for typeinfo
explicit LoaderPass(const std::string &filename = "")
- : ModulePass(&ID), Filename(filename) {
+ : ModulePass(ID), Filename(filename) {
if (filename.empty()) Filename = ProfileInfoFilename;
}
@@ -67,8 +67,8 @@ namespace {
/// an analysis interface through multiple inheritance. If needed, it
/// should override this to adjust the this pointer as needed for the
/// specified pass info.
- virtual void *getAdjustedAnalysisPointer(const PassInfo *PI) {
- if (PI->isPassID(&ProfileInfo::ID))
+ virtual void *getAdjustedAnalysisPointer(AnalysisID PI) {
+ if (PI == &ProfileInfo::ID)
return (ProfileInfo*)this;
return this;
}
@@ -79,12 +79,10 @@ namespace {
} // End of anonymous namespace
char LoaderPass::ID = 0;
-static RegisterPass<LoaderPass>
-X("profile-loader", "Load profile information from llvmprof.out", false, true);
+INITIALIZE_AG_PASS(LoaderPass, ProfileInfo, "profile-loader",
+ "Load profile information from llvmprof.out", false, true, false);
-static RegisterAnalysisGroup<ProfileInfo> Y(X);
-
-const PassInfo *llvm::ProfileLoaderPassID = &X;
+char &llvm::ProfileLoaderPassID = LoaderPass::ID;
ModulePass *llvm::createProfileLoaderPass() { return new LoaderPass(); }
diff --git a/lib/Analysis/ProfileVerifierPass.cpp b/lib/Analysis/ProfileVerifierPass.cpp
index 5d87e14a97b4f..3f01b2d592bc4 100644
--- a/lib/Analysis/ProfileVerifierPass.cpp
+++ b/lib/Analysis/ProfileVerifierPass.cpp
@@ -59,10 +59,10 @@ namespace llvm {
public:
static char ID; // Class identification, replacement for typeinfo
- explicit ProfileVerifierPassT () : FunctionPass(&ID) {
+ explicit ProfileVerifierPassT () : FunctionPass(ID) {
DisableAssertions = ProfileVerifierDisableAssertions;
}
- explicit ProfileVerifierPassT (bool da) : FunctionPass(&ID),
+ explicit ProfileVerifierPassT (bool da) : FunctionPass(ID),
DisableAssertions(da) {
}
@@ -366,8 +366,8 @@ namespace llvm {
char ProfileVerifierPassT<FType, BType>::ID = 0;
}
-static RegisterPass<ProfileVerifierPass>
-X("profile-verifier", "Verify profiling information", false, true);
+INITIALIZE_PASS(ProfileVerifierPass, "profile-verifier",
+ "Verify profiling information", false, true);
namespace llvm {
FunctionPass *createProfileVerifierPass() {
diff --git a/lib/Analysis/RegionInfo.cpp b/lib/Analysis/RegionInfo.cpp
new file mode 100644
index 0000000000000..abc057a773a9f
--- /dev/null
+++ b/lib/Analysis/RegionInfo.cpp
@@ -0,0 +1,749 @@
+//===- RegionInfo.cpp - SESE region detection analysis --------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// Detects single entry single exit regions in the control flow graph.
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/RegionInfo.h"
+#include "llvm/Analysis/RegionIterator.h"
+
+#include "llvm/ADT/PostOrderIterator.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Analysis/LoopInfo.h"
+
+#define DEBUG_TYPE "region"
+#include "llvm/Support/Debug.h"
+
+#include <set>
+#include <algorithm>
+
+using namespace llvm;
+
+// Always verify if expensive checking is enabled.
+#ifdef XDEBUG
+static bool VerifyRegionInfo = true;
+#else
+static bool VerifyRegionInfo = false;
+#endif
+
+static cl::opt<bool,true>
+VerifyRegionInfoX("verify-region-info", cl::location(VerifyRegionInfo),
+ cl::desc("Verify region info (time consuming)"));
+
+STATISTIC(numRegions, "The # of regions");
+STATISTIC(numSimpleRegions, "The # of simple regions");
+
+//===----------------------------------------------------------------------===//
+/// PrintStyle - Print region in difference ways.
+enum PrintStyle { PrintNone, PrintBB, PrintRN };
+
+cl::opt<enum PrintStyle> printStyle("print-region-style", cl::Hidden,
+ cl::desc("style of printing regions"),
+ cl::values(
+ clEnumValN(PrintNone, "none", "print no details"),
+ clEnumValN(PrintBB, "bb", "print regions in detail with block_iterator"),
+ clEnumValN(PrintRN, "rn", "print regions in detail with element_iterator"),
+ clEnumValEnd));
+//===----------------------------------------------------------------------===//
+/// Region Implementation
+Region::Region(BasicBlock *Entry, BasicBlock *Exit, RegionInfo* RInfo,
+ DominatorTree *dt, Region *Parent)
+ : RegionNode(Parent, Entry, 1), RI(RInfo), DT(dt), exit(Exit) {}
+
+Region::~Region() {
+ // Free the cached nodes.
+ for (BBNodeMapT::iterator it = BBNodeMap.begin(),
+ ie = BBNodeMap.end(); it != ie; ++it)
+ delete it->second;
+
+ // Only clean the cache for this Region. Caches of child Regions will be
+ // cleaned when the child Regions are deleted.
+ BBNodeMap.clear();
+
+ for (iterator I = begin(), E = end(); I != E; ++I)
+ delete *I;
+}
+
+bool Region::contains(const BasicBlock *B) const {
+ BasicBlock *BB = const_cast<BasicBlock*>(B);
+
+ assert(DT->getNode(BB) && "BB not part of the dominance tree");
+
+ BasicBlock *entry = getEntry(), *exit = getExit();
+
+ // Toplevel region.
+ if (!exit)
+ return true;
+
+ return (DT->dominates(entry, BB)
+ && !(DT->dominates(exit, BB) && DT->dominates(entry, exit)));
+}
+
+bool Region::contains(const Loop *L) const {
+ // BBs that are not part of any loop are element of the Loop
+ // described by the NULL pointer. This loop is not part of any region,
+ // except if the region describes the whole function.
+ if (L == 0)
+ return getExit() == 0;
+
+ if (!contains(L->getHeader()))
+ return false;
+
+ SmallVector<BasicBlock *, 8> ExitingBlocks;
+ L->getExitingBlocks(ExitingBlocks);
+
+ for (SmallVectorImpl<BasicBlock*>::iterator BI = ExitingBlocks.begin(),
+ BE = ExitingBlocks.end(); BI != BE; ++BI)
+ if (!contains(*BI))
+ return false;
+
+ return true;
+}
+
+Loop *Region::outermostLoopInRegion(Loop *L) const {
+ if (!contains(L))
+ return 0;
+
+ while (L && contains(L->getParentLoop())) {
+ L = L->getParentLoop();
+ }
+
+ return L;
+}
+
+Loop *Region::outermostLoopInRegion(LoopInfo *LI, BasicBlock* BB) const {
+ assert(LI && BB && "LI and BB cannot be null!");
+ Loop *L = LI->getLoopFor(BB);
+ return outermostLoopInRegion(L);
+}
+
+bool Region::isSimple() const {
+ bool isSimple = true;
+ bool found = false;
+
+ BasicBlock *entry = getEntry(), *exit = getExit();
+
+ // TopLevelRegion
+ if (!exit)
+ return false;
+
+ for (pred_iterator PI = pred_begin(entry), PE = pred_end(entry); PI != PE;
+ ++PI) {
+ BasicBlock *Pred = *PI;
+ if (DT->getNode(Pred) && !contains(Pred)) {
+ if (found) {
+ isSimple = false;
+ break;
+ }
+ found = true;
+ }
+ }
+
+ found = false;
+
+ for (pred_iterator PI = pred_begin(exit), PE = pred_end(exit); PI != PE;
+ ++PI)
+ if (contains(*PI)) {
+ if (found) {
+ isSimple = false;
+ break;
+ }
+ found = true;
+ }
+
+ return isSimple;
+}
+
+std::string Region::getNameStr() const {
+ std::string exitName;
+ std::string entryName;
+
+ if (getEntry()->getName().empty()) {
+ raw_string_ostream OS(entryName);
+
+ WriteAsOperand(OS, getEntry(), false);
+ entryName = OS.str();
+ } else
+ entryName = getEntry()->getNameStr();
+
+ if (getExit()) {
+ if (getExit()->getName().empty()) {
+ raw_string_ostream OS(exitName);
+
+ WriteAsOperand(OS, getExit(), false);
+ exitName = OS.str();
+ } else
+ exitName = getExit()->getNameStr();
+ } else
+ exitName = "<Function Return>";
+
+ return entryName + " => " + exitName;
+}
+
+void Region::verifyBBInRegion(BasicBlock *BB) const {
+ if (!contains(BB))
+ llvm_unreachable("Broken region found!");
+
+ BasicBlock *entry = getEntry(), *exit = getExit();
+
+ for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB); SI != SE; ++SI)
+ if (!contains(*SI) && exit != *SI)
+ llvm_unreachable("Broken region found!");
+
+ if (entry != BB)
+ for (pred_iterator SI = pred_begin(BB), SE = pred_end(BB); SI != SE; ++SI)
+ if (!contains(*SI))
+ llvm_unreachable("Broken region found!");
+}
+
+void Region::verifyWalk(BasicBlock *BB, std::set<BasicBlock*> *visited) const {
+ BasicBlock *exit = getExit();
+
+ visited->insert(BB);
+
+ verifyBBInRegion(BB);
+
+ for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB); SI != SE; ++SI)
+ if (*SI != exit && visited->find(*SI) == visited->end())
+ verifyWalk(*SI, visited);
+}
+
+void Region::verifyRegion() const {
+ // Only do verification when user wants to, otherwise this expensive
+ // check will be invoked by PassManager.
+ if (!VerifyRegionInfo) return;
+
+ std::set<BasicBlock*> visited;
+ verifyWalk(getEntry(), &visited);
+}
+
+void Region::verifyRegionNest() const {
+ for (Region::const_iterator RI = begin(), RE = end(); RI != RE; ++RI)
+ (*RI)->verifyRegionNest();
+
+ verifyRegion();
+}
+
+Region::block_iterator Region::block_begin() {
+ return GraphTraits<FlatIt<Region*> >::nodes_begin(this);
+}
+
+Region::block_iterator Region::block_end() {
+ return GraphTraits<FlatIt<Region*> >::nodes_end(this);
+}
+
+Region::const_block_iterator Region::block_begin() const {
+ return GraphTraits<FlatIt<const Region*> >::nodes_begin(this);
+}
+
+Region::const_block_iterator Region::block_end() const {
+ return GraphTraits<FlatIt<const Region*> >::nodes_end(this);
+}
+
+Region::element_iterator Region::element_begin() {
+ return GraphTraits<Region*>::nodes_begin(this);
+}
+
+Region::element_iterator Region::element_end() {
+ return GraphTraits<Region*>::nodes_end(this);
+}
+
+Region::const_element_iterator Region::element_begin() const {
+ return GraphTraits<const Region*>::nodes_begin(this);
+}
+
+Region::const_element_iterator Region::element_end() const {
+ return GraphTraits<const Region*>::nodes_end(this);
+}
+
+Region* Region::getSubRegionNode(BasicBlock *BB) const {
+ Region *R = RI->getRegionFor(BB);
+
+ if (!R || R == this)
+ return 0;
+
+ // If we pass the BB out of this region, that means our code is broken.
+ assert(contains(R) && "BB not in current region!");
+
+ while (contains(R->getParent()) && R->getParent() != this)
+ R = R->getParent();
+
+ if (R->getEntry() != BB)
+ return 0;
+
+ return R;
+}
+
+RegionNode* Region::getBBNode(BasicBlock *BB) const {
+ assert(contains(BB) && "Can get BB node out of this region!");
+
+ BBNodeMapT::const_iterator at = BBNodeMap.find(BB);
+
+ if (at != BBNodeMap.end())
+ return at->second;
+
+ RegionNode *NewNode = new RegionNode(const_cast<Region*>(this), BB);
+ BBNodeMap.insert(std::make_pair(BB, NewNode));
+ return NewNode;
+}
+
+RegionNode* Region::getNode(BasicBlock *BB) const {
+ assert(contains(BB) && "Can get BB node out of this region!");
+ if (Region* Child = getSubRegionNode(BB))
+ return Child->getNode();
+
+ return getBBNode(BB);
+}
+
+void Region::transferChildrenTo(Region *To) {
+ for (iterator I = begin(), E = end(); I != E; ++I) {
+ (*I)->parent = To;
+ To->children.push_back(*I);
+ }
+ children.clear();
+}
+
+void Region::addSubRegion(Region *SubRegion) {
+ assert(SubRegion->parent == 0 && "SubRegion already has a parent!");
+ SubRegion->parent = this;
+ // Set up the region node.
+ assert(std::find(children.begin(), children.end(), SubRegion) == children.end()
+ && "Node already exist!");
+ children.push_back(SubRegion);
+}
+
+
+Region *Region::removeSubRegion(Region *Child) {
+ assert(Child->parent == this && "Child is not a child of this region!");
+ Child->parent = 0;
+ RegionSet::iterator I = std::find(children.begin(), children.end(), Child);
+ assert(I != children.end() && "Region does not exit. Unable to remove.");
+ children.erase(children.begin()+(I-begin()));
+ return Child;
+}
+
+unsigned Region::getDepth() const {
+ unsigned Depth = 0;
+
+ for (Region *R = parent; R != 0; R = R->parent)
+ ++Depth;
+
+ return Depth;
+}
+
+void Region::print(raw_ostream &OS, bool print_tree, unsigned level) const {
+ if (print_tree)
+ OS.indent(level*2) << "[" << level << "] " << getNameStr();
+ else
+ OS.indent(level*2) << getNameStr();
+
+ OS << "\n";
+
+
+ if (printStyle != PrintNone) {
+ OS.indent(level*2) << "{\n";
+ OS.indent(level*2 + 2);
+
+ if (printStyle == PrintBB) {
+ for (const_block_iterator I = block_begin(), E = block_end(); I!=E; ++I)
+ OS << **I << ", "; // TODO: remove the last ","
+ } else if (printStyle == PrintRN) {
+ for (const_element_iterator I = element_begin(), E = element_end(); I!=E; ++I)
+ OS << **I << ", "; // TODO: remove the last ",
+ }
+
+ OS << "\n";
+ }
+
+ if (print_tree)
+ for (const_iterator RI = begin(), RE = end(); RI != RE; ++RI)
+ (*RI)->print(OS, print_tree, level+1);
+
+ if (printStyle != PrintNone)
+ OS.indent(level*2) << "} \n";
+}
+
+void Region::dump() const {
+ print(dbgs(), true, getDepth());
+}
+
+void Region::clearNodeCache() {
+ BBNodeMap.clear();
+ for (Region::iterator RI = begin(), RE = end(); RI != RE; ++RI)
+ (*RI)->clearNodeCache();
+}
+
+//===----------------------------------------------------------------------===//
+// RegionInfo implementation
+//
+
+bool RegionInfo::isCommonDomFrontier(BasicBlock *BB, BasicBlock *entry,
+ BasicBlock *exit) const {
+ for (pred_iterator PI = pred_begin(BB), PE = pred_end(BB); PI != PE; ++PI) {
+ BasicBlock *P = *PI;
+ if (DT->dominates(entry, P) && !DT->dominates(exit, P))
+ return false;
+ }
+ return true;
+}
+
+bool RegionInfo::isRegion(BasicBlock *entry, BasicBlock *exit) const {
+ assert(entry && exit && "entry and exit must not be null!");
+ typedef DominanceFrontier::DomSetType DST;
+
+ DST *entrySuccs = &DF->find(entry)->second;
+
+ // Exit is the header of a loop that contains the entry. In this case,
+ // the dominance frontier must only contain the exit.
+ if (!DT->dominates(entry, exit)) {
+ for (DST::iterator SI = entrySuccs->begin(), SE = entrySuccs->end();
+ SI != SE; ++SI)
+ if (*SI != exit && *SI != entry)
+ return false;
+
+ return true;
+ }
+
+ DST *exitSuccs = &DF->find(exit)->second;
+
+ // Do not allow edges leaving the region.
+ for (DST::iterator SI = entrySuccs->begin(), SE = entrySuccs->end();
+ SI != SE; ++SI) {
+ if (*SI == exit || *SI == entry)
+ continue;
+ if (exitSuccs->find(*SI) == exitSuccs->end())
+ return false;
+ if (!isCommonDomFrontier(*SI, entry, exit))
+ return false;
+ }
+
+ // Do not allow edges pointing into the region.
+ for (DST::iterator SI = exitSuccs->begin(), SE = exitSuccs->end();
+ SI != SE; ++SI)
+ if (DT->properlyDominates(entry, *SI) && *SI != exit)
+ return false;
+
+
+ return true;
+}
+
+void RegionInfo::insertShortCut(BasicBlock *entry, BasicBlock *exit,
+ BBtoBBMap *ShortCut) const {
+ assert(entry && exit && "entry and exit must not be null!");
+
+ BBtoBBMap::iterator e = ShortCut->find(exit);
+
+ if (e == ShortCut->end())
+ // No further region at exit available.
+ (*ShortCut)[entry] = exit;
+ else {
+ // We found a region e that starts at exit. Therefore (entry, e->second)
+ // is also a region, that is larger than (entry, exit). Insert the
+ // larger one.
+ BasicBlock *BB = e->second;
+ (*ShortCut)[entry] = BB;
+ }
+}
+
+DomTreeNode* RegionInfo::getNextPostDom(DomTreeNode* N,
+ BBtoBBMap *ShortCut) const {
+ BBtoBBMap::iterator e = ShortCut->find(N->getBlock());
+
+ if (e == ShortCut->end())
+ return N->getIDom();
+
+ return PDT->getNode(e->second)->getIDom();
+}
+
+bool RegionInfo::isTrivialRegion(BasicBlock *entry, BasicBlock *exit) const {
+ assert(entry && exit && "entry and exit must not be null!");
+
+ unsigned num_successors = succ_end(entry) - succ_begin(entry);
+
+ if (num_successors <= 1 && exit == *(succ_begin(entry)))
+ return true;
+
+ return false;
+}
+
+void RegionInfo::updateStatistics(Region *R) {
+ ++numRegions;
+
+ // TODO: Slow. Should only be enabled if -stats is used.
+ if (R->isSimple()) ++numSimpleRegions;
+}
+
+Region *RegionInfo::createRegion(BasicBlock *entry, BasicBlock *exit) {
+ assert(entry && exit && "entry and exit must not be null!");
+
+ if (isTrivialRegion(entry, exit))
+ return 0;
+
+ Region *region = new Region(entry, exit, this, DT);
+ BBtoRegion.insert(std::make_pair(entry, region));
+
+ #ifdef XDEBUG
+ region->verifyRegion();
+ #else
+ DEBUG(region->verifyRegion());
+ #endif
+
+ updateStatistics(region);
+ return region;
+}
+
+void RegionInfo::findRegionsWithEntry(BasicBlock *entry, BBtoBBMap *ShortCut) {
+ assert(entry);
+
+ DomTreeNode *N = PDT->getNode(entry);
+
+ if (!N)
+ return;
+
+ Region *lastRegion= 0;
+ BasicBlock *lastExit = entry;
+
+ // As only a BasicBlock that postdominates entry can finish a region, walk the
+ // post dominance tree upwards.
+ while ((N = getNextPostDom(N, ShortCut))) {
+ BasicBlock *exit = N->getBlock();
+
+ if (!exit)
+ break;
+
+ if (isRegion(entry, exit)) {
+ Region *newRegion = createRegion(entry, exit);
+
+ if (lastRegion)
+ newRegion->addSubRegion(lastRegion);
+
+ lastRegion = newRegion;
+ lastExit = exit;
+ }
+
+ // This can never be a region, so stop the search.
+ if (!DT->dominates(entry, exit))
+ break;
+ }
+
+ // Tried to create regions from entry to lastExit. Next time take a
+ // shortcut from entry to lastExit.
+ if (lastExit != entry)
+ insertShortCut(entry, lastExit, ShortCut);
+}
+
+void RegionInfo::scanForRegions(Function &F, BBtoBBMap *ShortCut) {
+ BasicBlock *entry = &(F.getEntryBlock());
+ DomTreeNode *N = DT->getNode(entry);
+
+ // Iterate over the dominance tree in post order to start with the small
+ // regions from the bottom of the dominance tree. If the small regions are
+ // detected first, detection of bigger regions is faster, as we can jump
+ // over the small regions.
+ for (po_iterator<DomTreeNode*> FI = po_begin(N), FE = po_end(N); FI != FE;
+ ++FI) {
+ findRegionsWithEntry(FI->getBlock(), ShortCut);
+ }
+}
+
+Region *RegionInfo::getTopMostParent(Region *region) {
+ while (region->parent)
+ region = region->getParent();
+
+ return region;
+}
+
+void RegionInfo::buildRegionsTree(DomTreeNode *N, Region *region) {
+ BasicBlock *BB = N->getBlock();
+
+ // Passed region exit
+ while (BB == region->getExit())
+ region = region->getParent();
+
+ BBtoRegionMap::iterator it = BBtoRegion.find(BB);
+
+ // This basic block is a start block of a region. It is already in the
+ // BBtoRegion relation. Only the child basic blocks have to be updated.
+ if (it != BBtoRegion.end()) {
+ Region *newRegion = it->second;;
+ region->addSubRegion(getTopMostParent(newRegion));
+ region = newRegion;
+ } else {
+ BBtoRegion[BB] = region;
+ }
+
+ for (DomTreeNode::iterator CI = N->begin(), CE = N->end(); CI != CE; ++CI)
+ buildRegionsTree(*CI, region);
+}
+
+void RegionInfo::releaseMemory() {
+ BBtoRegion.clear();
+ if (TopLevelRegion)
+ delete TopLevelRegion;
+ TopLevelRegion = 0;
+}
+
+RegionInfo::RegionInfo() : FunctionPass(ID) {
+ TopLevelRegion = 0;
+}
+
+RegionInfo::~RegionInfo() {
+ releaseMemory();
+}
+
+void RegionInfo::Calculate(Function &F) {
+ // ShortCut a function where for every BB the exit of the largest region
+ // starting with BB is stored. These regions can be threated as single BBS.
+ // This improves performance on linear CFGs.
+ BBtoBBMap ShortCut;
+
+ scanForRegions(F, &ShortCut);
+ BasicBlock *BB = &F.getEntryBlock();
+ buildRegionsTree(DT->getNode(BB), TopLevelRegion);
+}
+
+bool RegionInfo::runOnFunction(Function &F) {
+ releaseMemory();
+
+ DT = &getAnalysis<DominatorTree>();
+ PDT = &getAnalysis<PostDominatorTree>();
+ DF = &getAnalysis<DominanceFrontier>();
+
+ TopLevelRegion = new Region(&F.getEntryBlock(), 0, this, DT, 0);
+ updateStatistics(TopLevelRegion);
+
+ Calculate(F);
+
+ return false;
+}
+
+void RegionInfo::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ AU.addRequiredTransitive<DominatorTree>();
+ AU.addRequired<PostDominatorTree>();
+ AU.addRequired<DominanceFrontier>();
+}
+
+void RegionInfo::print(raw_ostream &OS, const Module *) const {
+ OS << "Region tree:\n";
+ TopLevelRegion->print(OS, true, 0);
+ OS << "End region tree\n";
+}
+
+void RegionInfo::verifyAnalysis() const {
+ // Only do verification when user wants to, otherwise this expensive check
+ // will be invoked by PMDataManager::verifyPreservedAnalysis when
+ // a regionpass (marked PreservedAll) finish.
+ if (!VerifyRegionInfo) return;
+
+ TopLevelRegion->verifyRegionNest();
+}
+
+// Region pass manager support.
+Region *RegionInfo::getRegionFor(BasicBlock *BB) const {
+ BBtoRegionMap::const_iterator I=
+ BBtoRegion.find(BB);
+ return I != BBtoRegion.end() ? I->second : 0;
+}
+
+Region *RegionInfo::operator[](BasicBlock *BB) const {
+ return getRegionFor(BB);
+}
+
+
+BasicBlock *RegionInfo::getMaxRegionExit(BasicBlock *BB) const {
+ BasicBlock *Exit = NULL;
+
+ while (true) {
+ // Get largest region that starts at BB.
+ Region *R = getRegionFor(BB);
+ while (R && R->getParent() && R->getParent()->getEntry() == BB)
+ R = R->getParent();
+
+ // Get the single exit of BB.
+ if (R && R->getEntry() == BB)
+ Exit = R->getExit();
+ else if (++succ_begin(BB) == succ_end(BB))
+ Exit = *succ_begin(BB);
+ else // No single exit exists.
+ return Exit;
+
+ // Get largest region that starts at Exit.
+ Region *ExitR = getRegionFor(Exit);
+ while (ExitR && ExitR->getParent()
+ && ExitR->getParent()->getEntry() == Exit)
+ ExitR = ExitR->getParent();
+
+ for (pred_iterator PI = pred_begin(Exit), PE = pred_end(Exit); PI != PE;
+ ++PI)
+ if (!R->contains(*PI) && !ExitR->contains(*PI))
+ break;
+
+ // This stops infinite cycles.
+ if (DT->dominates(Exit, BB))
+ break;
+
+ BB = Exit;
+ }
+
+ return Exit;
+}
+
+Region*
+RegionInfo::getCommonRegion(Region *A, Region *B) const {
+ assert (A && B && "One of the Regions is NULL");
+
+ if (A->contains(B)) return A;
+
+ while (!B->contains(A))
+ B = B->getParent();
+
+ return B;
+}
+
+Region*
+RegionInfo::getCommonRegion(SmallVectorImpl<Region*> &Regions) const {
+ Region* ret = Regions.back();
+ Regions.pop_back();
+
+ for (SmallVectorImpl<Region*>::const_iterator I = Regions.begin(),
+ E = Regions.end(); I != E; ++I)
+ ret = getCommonRegion(ret, *I);
+
+ return ret;
+}
+
+Region*
+RegionInfo::getCommonRegion(SmallVectorImpl<BasicBlock*> &BBs) const {
+ Region* ret = getRegionFor(BBs.back());
+ BBs.pop_back();
+
+ for (SmallVectorImpl<BasicBlock*>::const_iterator I = BBs.begin(),
+ E = BBs.end(); I != E; ++I)
+ ret = getCommonRegion(ret, getRegionFor(*I));
+
+ return ret;
+}
+
+char RegionInfo::ID = 0;
+INITIALIZE_PASS(RegionInfo, "regions",
+ "Detect single entry single exit regions", true, true);
+
+// Create methods available outside of this file, to use them
+// "include/llvm/LinkAllPasses.h". Otherwise the pass would be deleted by
+// the link time optimization.
+
+namespace llvm {
+ FunctionPass *createRegionInfoPass() {
+ return new RegionInfo();
+ }
+}
+
diff --git a/lib/Analysis/RegionPrinter.cpp b/lib/Analysis/RegionPrinter.cpp
new file mode 100644
index 0000000000000..fee5c1bae9764
--- /dev/null
+++ b/lib/Analysis/RegionPrinter.cpp
@@ -0,0 +1,186 @@
+//===- RegionPrinter.cpp - Print regions tree pass ------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// Print out the region tree of a function using dotty/graphviz.
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/RegionInfo.h"
+#include "llvm/Analysis/RegionIterator.h"
+#include "llvm/Analysis/RegionPrinter.h"
+#include "llvm/Analysis/Passes.h"
+#include "llvm/Analysis/DOTGraphTraitsPass.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/PostOrderIterator.h"
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+/// onlySimpleRegion - Show only the simple regions in the RegionViewer.
+static cl::opt<bool>
+onlySimpleRegions("only-simple-regions",
+ cl::desc("Show only simple regions in the graphviz viewer"),
+ cl::Hidden,
+ cl::init(false));
+
+namespace llvm {
+template<>
+struct DOTGraphTraits<RegionNode*> : public DefaultDOTGraphTraits {
+
+ DOTGraphTraits (bool isSimple=false)
+ : DefaultDOTGraphTraits(isSimple) {}
+
+ std::string getNodeLabel(RegionNode *Node, RegionNode *Graph) {
+
+ if (!Node->isSubRegion()) {
+ BasicBlock *BB = Node->getNodeAs<BasicBlock>();
+
+ if (isSimple())
+ return DOTGraphTraits<const Function*>
+ ::getSimpleNodeLabel(BB, BB->getParent());
+ else
+ return DOTGraphTraits<const Function*>
+ ::getCompleteNodeLabel(BB, BB->getParent());
+ }
+
+ return "Not implemented";
+ }
+};
+
+template<>
+struct DOTGraphTraits<RegionInfo*> : public DOTGraphTraits<RegionNode*> {
+
+ DOTGraphTraits (bool isSimple=false)
+ : DOTGraphTraits<RegionNode*>(isSimple) {}
+
+ static std::string getGraphName(RegionInfo *DT) {
+ return "Region Graph";
+ }
+
+ std::string getNodeLabel(RegionNode *Node, RegionInfo *G) {
+ return DOTGraphTraits<RegionNode*>::getNodeLabel(Node,
+ G->getTopLevelRegion());
+ }
+
+ // Print the cluster of the subregions. This groups the single basic blocks
+ // and adds a different background color for each group.
+ static void printRegionCluster(const Region *R, GraphWriter<RegionInfo*> &GW,
+ unsigned depth = 0) {
+ raw_ostream &O = GW.getOStream();
+ O.indent(2 * depth) << "subgraph cluster_" << static_cast<const void*>(R)
+ << " {\n";
+ O.indent(2 * (depth + 1)) << "label = \"\";\n";
+
+ if (!onlySimpleRegions || R->isSimple()) {
+ O.indent(2 * (depth + 1)) << "style = filled;\n";
+ O.indent(2 * (depth + 1)) << "color = "
+ << ((R->getDepth() * 2 % 12) + 1) << "\n";
+
+ } else {
+ O.indent(2 * (depth + 1)) << "style = solid;\n";
+ O.indent(2 * (depth + 1)) << "color = "
+ << ((R->getDepth() * 2 % 12) + 2) << "\n";
+ }
+
+ for (Region::const_iterator RI = R->begin(), RE = R->end(); RI != RE; ++RI)
+ printRegionCluster(*RI, GW, depth + 1);
+
+ RegionInfo *RI = R->getRegionInfo();
+
+ for (Region::const_block_iterator BI = R->block_begin(),
+ BE = R->block_end(); BI != BE; ++BI) {
+ BasicBlock *BB = (*BI)->getNodeAs<BasicBlock>();
+ if (RI->getRegionFor(BB) == R)
+ O.indent(2 * (depth + 1)) << "Node"
+ << static_cast<const void*>(RI->getTopLevelRegion()->getBBNode(BB))
+ << ";\n";
+ }
+
+ O.indent(2 * depth) << "}\n";
+ }
+
+ static void addCustomGraphFeatures(const RegionInfo* RI,
+ GraphWriter<RegionInfo*> &GW) {
+ raw_ostream &O = GW.getOStream();
+ O << "\tcolorscheme = \"paired12\"\n";
+ printRegionCluster(RI->getTopLevelRegion(), GW, 4);
+ }
+};
+} //end namespace llvm
+
+namespace {
+
+struct RegionViewer
+ : public DOTGraphTraitsViewer<RegionInfo, false> {
+ static char ID;
+ RegionViewer() : DOTGraphTraitsViewer<RegionInfo, false>("reg", ID){}
+};
+
+char RegionViewer::ID = 0;
+INITIALIZE_PASS(RegionViewer, "view-regions", "View regions of function",
+ true, true);
+
+struct RegionOnlyViewer
+ : public DOTGraphTraitsViewer<RegionInfo, true> {
+ static char ID;
+ RegionOnlyViewer() : DOTGraphTraitsViewer<RegionInfo, true>("regonly", ID){}
+};
+
+char RegionOnlyViewer::ID = 0;
+INITIALIZE_PASS(RegionOnlyViewer, "view-regions-only",
+ "View regions of function (with no function bodies)",
+ true, true);
+
+struct RegionPrinter
+ : public DOTGraphTraitsPrinter<RegionInfo, false> {
+ static char ID;
+ RegionPrinter() :
+ DOTGraphTraitsPrinter<RegionInfo, false>("reg", ID) {}
+};
+} //end anonymous namespace
+
+char RegionPrinter::ID = 0;
+INITIALIZE_PASS(RegionPrinter, "dot-regions",
+ "Print regions of function to 'dot' file", true, true);
+
+namespace {
+
+struct RegionOnlyPrinter
+ : public DOTGraphTraitsPrinter<RegionInfo, true> {
+ static char ID;
+ RegionOnlyPrinter() :
+ DOTGraphTraitsPrinter<RegionInfo, true>("reg", ID) {}
+};
+
+}
+
+char RegionOnlyPrinter::ID = 0;
+INITIALIZE_PASS(RegionOnlyPrinter, "dot-regions-only",
+ "Print regions of function to 'dot' file "
+ "(with no function bodies)",
+ true, true);
+
+FunctionPass* llvm::createRegionViewerPass() {
+ return new RegionViewer();
+}
+
+FunctionPass* llvm::createRegionOnlyViewerPass() {
+ return new RegionOnlyViewer();
+}
+
+FunctionPass* llvm::createRegionPrinterPass() {
+ return new RegionPrinter();
+}
+
+FunctionPass* llvm::createRegionOnlyPrinterPass() {
+ return new RegionOnlyPrinter();
+}
+
diff --git a/lib/Analysis/ScalarEvolution.cpp b/lib/Analysis/ScalarEvolution.cpp
index 413b3b47f92a4..b892d85f9f4a2 100644
--- a/lib/Analysis/ScalarEvolution.cpp
+++ b/lib/Analysis/ScalarEvolution.cpp
@@ -103,8 +103,8 @@ MaxBruteForceIterations("scalar-evolution-max-iterations", cl::ReallyHidden,
"derived loop"),
cl::init(100));
-static RegisterPass<ScalarEvolution>
-R("scalar-evolution", "Scalar Evolution Analysis", false, true);
+INITIALIZE_PASS(ScalarEvolution, "scalar-evolution",
+ "Scalar Evolution Analysis", false, true);
char ScalarEvolution::ID = 0;
//===----------------------------------------------------------------------===//
@@ -251,28 +251,59 @@ void SCEVCommutativeExpr::print(raw_ostream &OS) const {
OS << "(";
for (op_iterator I = op_begin(), E = op_end(); I != E; ++I) {
OS << **I;
- if (next(I) != E)
+ if (llvm::next(I) != E)
OS << OpStr;
}
OS << ")";
}
bool SCEVNAryExpr::dominates(BasicBlock *BB, DominatorTree *DT) const {
- for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
- if (!getOperand(i)->dominates(BB, DT))
+ for (op_iterator I = op_begin(), E = op_end(); I != E; ++I)
+ if (!(*I)->dominates(BB, DT))
return false;
- }
return true;
}
bool SCEVNAryExpr::properlyDominates(BasicBlock *BB, DominatorTree *DT) const {
- for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
- if (!getOperand(i)->properlyDominates(BB, DT))
+ for (op_iterator I = op_begin(), E = op_end(); I != E; ++I)
+ if (!(*I)->properlyDominates(BB, DT))
return false;
- }
return true;
}
+bool SCEVNAryExpr::isLoopInvariant(const Loop *L) const {
+ for (op_iterator I = op_begin(), E = op_end(); I != E; ++I)
+ if (!(*I)->isLoopInvariant(L))
+ return false;
+ return true;
+}
+
+// hasComputableLoopEvolution - N-ary expressions have computable loop
+// evolutions iff they have at least one operand that varies with the loop,
+// but that all varying operands are computable.
+bool SCEVNAryExpr::hasComputableLoopEvolution(const Loop *L) const {
+ bool HasVarying = false;
+ for (op_iterator I = op_begin(), E = op_end(); I != E; ++I) {
+ const SCEV *S = *I;
+ if (!S->isLoopInvariant(L)) {
+ if (S->hasComputableLoopEvolution(L))
+ HasVarying = true;
+ else
+ return false;
+ }
+ }
+ return HasVarying;
+}
+
+bool SCEVNAryExpr::hasOperand(const SCEV *O) const {
+ for (op_iterator I = op_begin(), E = op_end(); I != E; ++I) {
+ const SCEV *S = *I;
+ if (O == S || S->hasOperand(O))
+ return true;
+ }
+ return false;
+}
+
bool SCEVUDivExpr::dominates(BasicBlock *BB, DominatorTree *DT) const {
return LHS->dominates(BB, DT) && RHS->dominates(BB, DT);
}
@@ -303,10 +334,14 @@ bool SCEVAddRecExpr::isLoopInvariant(const Loop *QueryLoop) const {
if (QueryLoop->contains(L))
return false;
+ // This recurrence is invariant w.r.t. QueryLoop if L contains QueryLoop.
+ if (L->contains(QueryLoop))
+ return true;
+
// This recurrence is variant w.r.t. QueryLoop if any of its operands
// are variant.
- for (unsigned i = 0, e = getNumOperands(); i != e; ++i)
- if (!getOperand(i)->isLoopInvariant(QueryLoop))
+ for (op_iterator I = op_begin(), E = op_end(); I != E; ++I)
+ if (!(*I)->isLoopInvariant(QueryLoop))
return false;
// Otherwise it's loop-invariant.
@@ -337,12 +372,36 @@ void SCEVAddRecExpr::print(raw_ostream &OS) const {
OS << ">";
}
+void SCEVUnknown::deleted() {
+ // Clear this SCEVUnknown from ValuesAtScopes.
+ SE->ValuesAtScopes.erase(this);
+
+ // Remove this SCEVUnknown from the uniquing map.
+ SE->UniqueSCEVs.RemoveNode(this);
+
+ // Release the value.
+ setValPtr(0);
+}
+
+void SCEVUnknown::allUsesReplacedWith(Value *New) {
+ // Clear this SCEVUnknown from ValuesAtScopes.
+ SE->ValuesAtScopes.erase(this);
+
+ // Remove this SCEVUnknown from the uniquing map.
+ SE->UniqueSCEVs.RemoveNode(this);
+
+ // Update this SCEVUnknown to point to the new value. This is needed
+ // because there may still be outstanding SCEVs which still point to
+ // this SCEVUnknown.
+ setValPtr(New);
+}
+
bool SCEVUnknown::isLoopInvariant(const Loop *L) const {
// All non-instruction values are loop invariant. All instructions are loop
// invariant if they are not contained in the specified loop.
// Instructions are never considered invariant in the function body
// (null loop) because they are defined within the "loop".
- if (Instruction *I = dyn_cast<Instruction>(V))
+ if (Instruction *I = dyn_cast<Instruction>(getValue()))
return L && !L->contains(I);
return true;
}
@@ -360,11 +419,11 @@ bool SCEVUnknown::properlyDominates(BasicBlock *BB, DominatorTree *DT) const {
}
const Type *SCEVUnknown::getType() const {
- return V->getType();
+ return getValue()->getType();
}
bool SCEVUnknown::isSizeOf(const Type *&AllocTy) const {
- if (ConstantExpr *VCE = dyn_cast<ConstantExpr>(V))
+ if (ConstantExpr *VCE = dyn_cast<ConstantExpr>(getValue()))
if (VCE->getOpcode() == Instruction::PtrToInt)
if (ConstantExpr *CE = dyn_cast<ConstantExpr>(VCE->getOperand(0)))
if (CE->getOpcode() == Instruction::GetElementPtr &&
@@ -381,7 +440,7 @@ bool SCEVUnknown::isSizeOf(const Type *&AllocTy) const {
}
bool SCEVUnknown::isAlignOf(const Type *&AllocTy) const {
- if (ConstantExpr *VCE = dyn_cast<ConstantExpr>(V))
+ if (ConstantExpr *VCE = dyn_cast<ConstantExpr>(getValue()))
if (VCE->getOpcode() == Instruction::PtrToInt)
if (ConstantExpr *CE = dyn_cast<ConstantExpr>(VCE->getOperand(0)))
if (CE->getOpcode() == Instruction::GetElementPtr &&
@@ -406,7 +465,7 @@ bool SCEVUnknown::isAlignOf(const Type *&AllocTy) const {
}
bool SCEVUnknown::isOffsetOf(const Type *&CTy, Constant *&FieldNo) const {
- if (ConstantExpr *VCE = dyn_cast<ConstantExpr>(V))
+ if (ConstantExpr *VCE = dyn_cast<ConstantExpr>(getValue()))
if (VCE->getOpcode() == Instruction::PtrToInt)
if (ConstantExpr *CE = dyn_cast<ConstantExpr>(VCE->getOperand(0)))
if (CE->getOpcode() == Instruction::GetElementPtr &&
@@ -448,166 +507,183 @@ void SCEVUnknown::print(raw_ostream &OS) const {
}
// Otherwise just print it normally.
- WriteAsOperand(OS, V, false);
+ WriteAsOperand(OS, getValue(), false);
}
//===----------------------------------------------------------------------===//
// SCEV Utilities
//===----------------------------------------------------------------------===//
-static bool CompareTypes(const Type *A, const Type *B) {
- if (A->getTypeID() != B->getTypeID())
- return A->getTypeID() < B->getTypeID();
- if (const IntegerType *AI = dyn_cast<IntegerType>(A)) {
- const IntegerType *BI = cast<IntegerType>(B);
- return AI->getBitWidth() < BI->getBitWidth();
- }
- if (const PointerType *AI = dyn_cast<PointerType>(A)) {
- const PointerType *BI = cast<PointerType>(B);
- return CompareTypes(AI->getElementType(), BI->getElementType());
- }
- if (const ArrayType *AI = dyn_cast<ArrayType>(A)) {
- const ArrayType *BI = cast<ArrayType>(B);
- if (AI->getNumElements() != BI->getNumElements())
- return AI->getNumElements() < BI->getNumElements();
- return CompareTypes(AI->getElementType(), BI->getElementType());
- }
- if (const VectorType *AI = dyn_cast<VectorType>(A)) {
- const VectorType *BI = cast<VectorType>(B);
- if (AI->getNumElements() != BI->getNumElements())
- return AI->getNumElements() < BI->getNumElements();
- return CompareTypes(AI->getElementType(), BI->getElementType());
- }
- if (const StructType *AI = dyn_cast<StructType>(A)) {
- const StructType *BI = cast<StructType>(B);
- if (AI->getNumElements() != BI->getNumElements())
- return AI->getNumElements() < BI->getNumElements();
- for (unsigned i = 0, e = AI->getNumElements(); i != e; ++i)
- if (CompareTypes(AI->getElementType(i), BI->getElementType(i)) ||
- CompareTypes(BI->getElementType(i), AI->getElementType(i)))
- return CompareTypes(AI->getElementType(i), BI->getElementType(i));
- }
- return false;
-}
-
namespace {
/// SCEVComplexityCompare - Return true if the complexity of the LHS is less
/// than the complexity of the RHS. This comparator is used to canonicalize
/// expressions.
class SCEVComplexityCompare {
- LoopInfo *LI;
+ const LoopInfo *const LI;
public:
- explicit SCEVComplexityCompare(LoopInfo *li) : LI(li) {}
+ explicit SCEVComplexityCompare(const LoopInfo *li) : LI(li) {}
+ // Return true or false if LHS is less than, or at least RHS, respectively.
bool operator()(const SCEV *LHS, const SCEV *RHS) const {
+ return compare(LHS, RHS) < 0;
+ }
+
+ // Return negative, zero, or positive, if LHS is less than, equal to, or
+ // greater than RHS, respectively. A three-way result allows recursive
+ // comparisons to be more efficient.
+ int compare(const SCEV *LHS, const SCEV *RHS) const {
// Fast-path: SCEVs are uniqued so we can do a quick equality check.
if (LHS == RHS)
- return false;
+ return 0;
// Primarily, sort the SCEVs by their getSCEVType().
- if (LHS->getSCEVType() != RHS->getSCEVType())
- return LHS->getSCEVType() < RHS->getSCEVType();
+ unsigned LType = LHS->getSCEVType(), RType = RHS->getSCEVType();
+ if (LType != RType)
+ return (int)LType - (int)RType;
// Aside from the getSCEVType() ordering, the particular ordering
// isn't very important except that it's beneficial to be consistent,
// so that (a + b) and (b + a) don't end up as different expressions.
-
- // Sort SCEVUnknown values with some loose heuristics. TODO: This is
- // not as complete as it could be.
- if (const SCEVUnknown *LU = dyn_cast<SCEVUnknown>(LHS)) {
+ switch (LType) {
+ case scUnknown: {
+ const SCEVUnknown *LU = cast<SCEVUnknown>(LHS);
const SCEVUnknown *RU = cast<SCEVUnknown>(RHS);
+ // Sort SCEVUnknown values with some loose heuristics. TODO: This is
+ // not as complete as it could be.
+ const Value *LV = LU->getValue(), *RV = RU->getValue();
+
// Order pointer values after integer values. This helps SCEVExpander
// form GEPs.
- if (LU->getType()->isPointerTy() && !RU->getType()->isPointerTy())
- return false;
- if (RU->getType()->isPointerTy() && !LU->getType()->isPointerTy())
- return true;
+ bool LIsPointer = LV->getType()->isPointerTy(),
+ RIsPointer = RV->getType()->isPointerTy();
+ if (LIsPointer != RIsPointer)
+ return (int)LIsPointer - (int)RIsPointer;
// Compare getValueID values.
- if (LU->getValue()->getValueID() != RU->getValue()->getValueID())
- return LU->getValue()->getValueID() < RU->getValue()->getValueID();
+ unsigned LID = LV->getValueID(),
+ RID = RV->getValueID();
+ if (LID != RID)
+ return (int)LID - (int)RID;
// Sort arguments by their position.
- if (const Argument *LA = dyn_cast<Argument>(LU->getValue())) {
- const Argument *RA = cast<Argument>(RU->getValue());
- return LA->getArgNo() < RA->getArgNo();
+ if (const Argument *LA = dyn_cast<Argument>(LV)) {
+ const Argument *RA = cast<Argument>(RV);
+ unsigned LArgNo = LA->getArgNo(), RArgNo = RA->getArgNo();
+ return (int)LArgNo - (int)RArgNo;
}
- // For instructions, compare their loop depth, and their opcode.
- // This is pretty loose.
- if (Instruction *LV = dyn_cast<Instruction>(LU->getValue())) {
- Instruction *RV = cast<Instruction>(RU->getValue());
+ // For instructions, compare their loop depth, and their operand
+ // count. This is pretty loose.
+ if (const Instruction *LInst = dyn_cast<Instruction>(LV)) {
+ const Instruction *RInst = cast<Instruction>(RV);
// Compare loop depths.
- if (LI->getLoopDepth(LV->getParent()) !=
- LI->getLoopDepth(RV->getParent()))
- return LI->getLoopDepth(LV->getParent()) <
- LI->getLoopDepth(RV->getParent());
-
- // Compare opcodes.
- if (LV->getOpcode() != RV->getOpcode())
- return LV->getOpcode() < RV->getOpcode();
+ const BasicBlock *LParent = LInst->getParent(),
+ *RParent = RInst->getParent();
+ if (LParent != RParent) {
+ unsigned LDepth = LI->getLoopDepth(LParent),
+ RDepth = LI->getLoopDepth(RParent);
+ if (LDepth != RDepth)
+ return (int)LDepth - (int)RDepth;
+ }
// Compare the number of operands.
- if (LV->getNumOperands() != RV->getNumOperands())
- return LV->getNumOperands() < RV->getNumOperands();
+ unsigned LNumOps = LInst->getNumOperands(),
+ RNumOps = RInst->getNumOperands();
+ return (int)LNumOps - (int)RNumOps;
}
- return false;
+ return 0;
}
- // Compare constant values.
- if (const SCEVConstant *LC = dyn_cast<SCEVConstant>(LHS)) {
+ case scConstant: {
+ const SCEVConstant *LC = cast<SCEVConstant>(LHS);
const SCEVConstant *RC = cast<SCEVConstant>(RHS);
- if (LC->getValue()->getBitWidth() != RC->getValue()->getBitWidth())
- return LC->getValue()->getBitWidth() < RC->getValue()->getBitWidth();
- return LC->getValue()->getValue().ult(RC->getValue()->getValue());
+
+ // Compare constant values.
+ const APInt &LA = LC->getValue()->getValue();
+ const APInt &RA = RC->getValue()->getValue();
+ unsigned LBitWidth = LA.getBitWidth(), RBitWidth = RA.getBitWidth();
+ if (LBitWidth != RBitWidth)
+ return (int)LBitWidth - (int)RBitWidth;
+ return LA.ult(RA) ? -1 : 1;
}
- // Compare addrec loop depths.
- if (const SCEVAddRecExpr *LA = dyn_cast<SCEVAddRecExpr>(LHS)) {
+ case scAddRecExpr: {
+ const SCEVAddRecExpr *LA = cast<SCEVAddRecExpr>(LHS);
const SCEVAddRecExpr *RA = cast<SCEVAddRecExpr>(RHS);
- if (LA->getLoop()->getLoopDepth() != RA->getLoop()->getLoopDepth())
- return LA->getLoop()->getLoopDepth() < RA->getLoop()->getLoopDepth();
+
+ // Compare addrec loop depths.
+ const Loop *LLoop = LA->getLoop(), *RLoop = RA->getLoop();
+ if (LLoop != RLoop) {
+ unsigned LDepth = LLoop->getLoopDepth(),
+ RDepth = RLoop->getLoopDepth();
+ if (LDepth != RDepth)
+ return (int)LDepth - (int)RDepth;
+ }
+
+ // Addrec complexity grows with operand count.
+ unsigned LNumOps = LA->getNumOperands(), RNumOps = RA->getNumOperands();
+ if (LNumOps != RNumOps)
+ return (int)LNumOps - (int)RNumOps;
+
+ // Lexicographically compare.
+ for (unsigned i = 0; i != LNumOps; ++i) {
+ long X = compare(LA->getOperand(i), RA->getOperand(i));
+ if (X != 0)
+ return X;
+ }
+
+ return 0;
}
- // Lexicographically compare n-ary expressions.
- if (const SCEVNAryExpr *LC = dyn_cast<SCEVNAryExpr>(LHS)) {
+ case scAddExpr:
+ case scMulExpr:
+ case scSMaxExpr:
+ case scUMaxExpr: {
+ const SCEVNAryExpr *LC = cast<SCEVNAryExpr>(LHS);
const SCEVNAryExpr *RC = cast<SCEVNAryExpr>(RHS);
- for (unsigned i = 0, e = LC->getNumOperands(); i != e; ++i) {
- if (i >= RC->getNumOperands())
- return false;
- if (operator()(LC->getOperand(i), RC->getOperand(i)))
- return true;
- if (operator()(RC->getOperand(i), LC->getOperand(i)))
- return false;
+
+ // Lexicographically compare n-ary expressions.
+ unsigned LNumOps = LC->getNumOperands(), RNumOps = RC->getNumOperands();
+ for (unsigned i = 0; i != LNumOps; ++i) {
+ if (i >= RNumOps)
+ return 1;
+ long X = compare(LC->getOperand(i), RC->getOperand(i));
+ if (X != 0)
+ return X;
}
- return LC->getNumOperands() < RC->getNumOperands();
+ return (int)LNumOps - (int)RNumOps;
}
- // Lexicographically compare udiv expressions.
- if (const SCEVUDivExpr *LC = dyn_cast<SCEVUDivExpr>(LHS)) {
+ case scUDivExpr: {
+ const SCEVUDivExpr *LC = cast<SCEVUDivExpr>(LHS);
const SCEVUDivExpr *RC = cast<SCEVUDivExpr>(RHS);
- if (operator()(LC->getLHS(), RC->getLHS()))
- return true;
- if (operator()(RC->getLHS(), LC->getLHS()))
- return false;
- if (operator()(LC->getRHS(), RC->getRHS()))
- return true;
- if (operator()(RC->getRHS(), LC->getRHS()))
- return false;
- return false;
+
+ // Lexicographically compare udiv expressions.
+ long X = compare(LC->getLHS(), RC->getLHS());
+ if (X != 0)
+ return X;
+ return compare(LC->getRHS(), RC->getRHS());
}
- // Compare cast expressions by operand.
- if (const SCEVCastExpr *LC = dyn_cast<SCEVCastExpr>(LHS)) {
+ case scTruncate:
+ case scZeroExtend:
+ case scSignExtend: {
+ const SCEVCastExpr *LC = cast<SCEVCastExpr>(LHS);
const SCEVCastExpr *RC = cast<SCEVCastExpr>(RHS);
- return operator()(LC->getOperand(), RC->getOperand());
+
+ // Compare cast expressions by operand.
+ return compare(LC->getOperand(), RC->getOperand());
+ }
+
+ default:
+ break;
}
llvm_unreachable("Unknown SCEV kind!");
- return false;
+ return 0;
}
};
}
@@ -628,8 +704,9 @@ static void GroupByComplexity(SmallVectorImpl<const SCEV *> &Ops,
if (Ops.size() == 2) {
// This is the common case, which also happens to be trivially simple.
// Special case it.
- if (SCEVComplexityCompare(LI)(Ops[1], Ops[0]))
- std::swap(Ops[0], Ops[1]);
+ const SCEV *&LHS = Ops[0], *&RHS = Ops[1];
+ if (SCEVComplexityCompare(LI)(RHS, LHS))
+ std::swap(LHS, RHS);
return;
}
@@ -845,6 +922,13 @@ const SCEV *ScalarEvolution::getTruncateExpr(const SCEV *Op,
return getAddRecExpr(Operands, AddRec->getLoop());
}
+ // As a special case, fold trunc(undef) to undef. We don't want to
+ // know too much about SCEVUnknowns, but this special case is handy
+ // and harmless.
+ if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(Op))
+ if (isa<UndefValue>(U->getValue()))
+ return getSCEV(UndefValue::get(Ty));
+
// The cast wasn't folded; create an explicit cast node. We can reuse
// the existing insert position since if we get here, we won't have
// made any changes which would invalidate it.
@@ -1163,6 +1247,13 @@ const SCEV *ScalarEvolution::getAnyExtendExpr(const SCEV *Op,
return getAddRecExpr(Ops, AR->getLoop());
}
+ // As a special case, fold anyext(undef) to undef. We don't want to
+ // know too much about SCEVUnknowns, but this special case is handy
+ // and harmless.
+ if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(Op))
+ if (isa<UndefValue>(U->getValue()))
+ return getSCEV(UndefValue::get(Ty));
+
// If the expression is obviously signed, use the sext cast value.
if (isa<SCEVSMaxExpr>(Op))
return SExt;
@@ -1287,8 +1378,9 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops,
// If HasNSW is true and all the operands are non-negative, infer HasNUW.
if (!HasNUW && HasNSW) {
bool All = true;
- for (unsigned i = 0, e = Ops.size(); i != e; ++i)
- if (!isKnownNonNegative(Ops[i])) {
+ for (SmallVectorImpl<const SCEV *>::const_iterator I = Ops.begin(),
+ E = Ops.end(); I != E; ++I)
+ if (!isKnownNonNegative(*I)) {
All = false;
break;
}
@@ -1321,22 +1413,29 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops,
if (Ops.size() == 1) return Ops[0];
}
- // Okay, check to see if the same value occurs in the operand list twice. If
- // so, merge them together into an multiply expression. Since we sorted the
- // list, these values are required to be adjacent.
+ // Okay, check to see if the same value occurs in the operand list more than
+ // once. If so, merge them together into an multiply expression. Since we
+ // sorted the list, these values are required to be adjacent.
const Type *Ty = Ops[0]->getType();
- for (unsigned i = 0, e = Ops.size()-1; i != e; ++i)
+ bool FoundMatch = false;
+ for (unsigned i = 0, e = Ops.size(); i != e-1; ++i)
if (Ops[i] == Ops[i+1]) { // X + Y + Y --> X + Y*2
- // Found a match, merge the two values into a multiply, and add any
- // remaining values to the result.
- const SCEV *Two = getConstant(Ty, 2);
- const SCEV *Mul = getMulExpr(Ops[i], Two);
- if (Ops.size() == 2)
+ // Scan ahead to count how many equal operands there are.
+ unsigned Count = 2;
+ while (i+Count != e && Ops[i+Count] == Ops[i])
+ ++Count;
+ // Merge the values into a multiply.
+ const SCEV *Scale = getConstant(Ty, Count);
+ const SCEV *Mul = getMulExpr(Scale, Ops[i]);
+ if (Ops.size() == Count)
return Mul;
- Ops.erase(Ops.begin()+i, Ops.begin()+i+2);
- Ops.push_back(Mul);
- return getAddExpr(Ops, HasNUW, HasNSW);
+ Ops[i] = Mul;
+ Ops.erase(Ops.begin()+i+1, Ops.begin()+i+Count);
+ --i; e -= Count - 1;
+ FoundMatch = true;
}
+ if (FoundMatch)
+ return getAddExpr(Ops, HasNUW, HasNSW);
// Check for truncates. If all the operands are truncated from the same
// type, see if factoring out the truncate would permit the result to be
@@ -1433,7 +1532,7 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops,
// re-generate the operands list. Group the operands by constant scale,
// to avoid multiplying by the same constant scale multiple times.
std::map<APInt, SmallVector<const SCEV *, 4>, APIntCompare> MulOpLists;
- for (SmallVector<const SCEV *, 8>::iterator I = NewOps.begin(),
+ for (SmallVector<const SCEV *, 8>::const_iterator I = NewOps.begin(),
E = NewOps.end(); I != E; ++I)
MulOpLists[M.find(*I)->second].push_back(*I);
// Re-generate the operands list.
@@ -1460,20 +1559,23 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops,
const SCEVMulExpr *Mul = cast<SCEVMulExpr>(Ops[Idx]);
for (unsigned MulOp = 0, e = Mul->getNumOperands(); MulOp != e; ++MulOp) {
const SCEV *MulOpSCEV = Mul->getOperand(MulOp);
+ if (isa<SCEVConstant>(MulOpSCEV))
+ continue;
for (unsigned AddOp = 0, e = Ops.size(); AddOp != e; ++AddOp)
- if (MulOpSCEV == Ops[AddOp] && !isa<SCEVConstant>(Ops[AddOp])) {
+ if (MulOpSCEV == Ops[AddOp]) {
// Fold W + X + (X * Y * Z) --> W + (X * ((Y*Z)+1))
const SCEV *InnerMul = Mul->getOperand(MulOp == 0);
if (Mul->getNumOperands() != 2) {
// If the multiply has more than two operands, we must get the
// Y*Z term.
- SmallVector<const SCEV *, 4> MulOps(Mul->op_begin(), Mul->op_end());
- MulOps.erase(MulOps.begin()+MulOp);
+ SmallVector<const SCEV *, 4> MulOps(Mul->op_begin(),
+ Mul->op_begin()+MulOp);
+ MulOps.append(Mul->op_begin()+MulOp+1, Mul->op_end());
InnerMul = getMulExpr(MulOps);
}
const SCEV *One = getConstant(Ty, 1);
- const SCEV *AddOne = getAddExpr(InnerMul, One);
- const SCEV *OuterMul = getMulExpr(AddOne, Ops[AddOp]);
+ const SCEV *AddOne = getAddExpr(One, InnerMul);
+ const SCEV *OuterMul = getMulExpr(AddOne, MulOpSCEV);
if (Ops.size() == 2) return OuterMul;
if (AddOp < Idx) {
Ops.erase(Ops.begin()+AddOp);
@@ -1500,15 +1602,15 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops,
const SCEV *InnerMul1 = Mul->getOperand(MulOp == 0);
if (Mul->getNumOperands() != 2) {
SmallVector<const SCEV *, 4> MulOps(Mul->op_begin(),
- Mul->op_end());
- MulOps.erase(MulOps.begin()+MulOp);
+ Mul->op_begin()+MulOp);
+ MulOps.append(Mul->op_begin()+MulOp+1, Mul->op_end());
InnerMul1 = getMulExpr(MulOps);
}
const SCEV *InnerMul2 = OtherMul->getOperand(OMulOp == 0);
if (OtherMul->getNumOperands() != 2) {
SmallVector<const SCEV *, 4> MulOps(OtherMul->op_begin(),
- OtherMul->op_end());
- MulOps.erase(MulOps.begin()+OMulOp);
+ OtherMul->op_begin()+OMulOp);
+ MulOps.append(OtherMul->op_begin()+OMulOp+1, OtherMul->op_end());
InnerMul2 = getMulExpr(MulOps);
}
const SCEV *InnerMulSum = getAddExpr(InnerMul1,InnerMul2);
@@ -1574,30 +1676,31 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops,
// there are multiple AddRec's with the same loop induction variable being
// added together. If so, we can fold them.
for (unsigned OtherIdx = Idx+1;
- OtherIdx < Ops.size() && isa<SCEVAddRecExpr>(Ops[OtherIdx]);++OtherIdx)
- if (OtherIdx != Idx) {
- const SCEVAddRecExpr *OtherAddRec = cast<SCEVAddRecExpr>(Ops[OtherIdx]);
- if (AddRecLoop == OtherAddRec->getLoop()) {
- // Other + {A,+,B} + {C,+,D} --> Other + {A+C,+,B+D}
- SmallVector<const SCEV *, 4> NewOps(AddRec->op_begin(),
- AddRec->op_end());
- for (unsigned i = 0, e = OtherAddRec->getNumOperands(); i != e; ++i) {
- if (i >= NewOps.size()) {
- NewOps.append(OtherAddRec->op_begin()+i,
- OtherAddRec->op_end());
- break;
+ OtherIdx < Ops.size() && isa<SCEVAddRecExpr>(Ops[OtherIdx]);
+ ++OtherIdx)
+ if (AddRecLoop == cast<SCEVAddRecExpr>(Ops[OtherIdx])->getLoop()) {
+ // Other + {A,+,B}<L> + {C,+,D}<L> --> Other + {A+C,+,B+D}<L>
+ SmallVector<const SCEV *, 4> AddRecOps(AddRec->op_begin(),
+ AddRec->op_end());
+ for (; OtherIdx != Ops.size() && isa<SCEVAddRecExpr>(Ops[OtherIdx]);
+ ++OtherIdx)
+ if (const SCEVAddRecExpr *OtherAddRec =
+ dyn_cast<SCEVAddRecExpr>(Ops[OtherIdx]))
+ if (OtherAddRec->getLoop() == AddRecLoop) {
+ for (unsigned i = 0, e = OtherAddRec->getNumOperands();
+ i != e; ++i) {
+ if (i >= AddRecOps.size()) {
+ AddRecOps.append(OtherAddRec->op_begin()+i,
+ OtherAddRec->op_end());
+ break;
+ }
+ AddRecOps[i] = getAddExpr(AddRecOps[i],
+ OtherAddRec->getOperand(i));
+ }
+ Ops.erase(Ops.begin() + OtherIdx); --OtherIdx;
}
- NewOps[i] = getAddExpr(NewOps[i], OtherAddRec->getOperand(i));
- }
- const SCEV *NewAddRec = getAddRecExpr(NewOps, AddRecLoop);
-
- if (Ops.size() == 2) return NewAddRec;
-
- Ops.erase(Ops.begin()+Idx);
- Ops.erase(Ops.begin()+OtherIdx-1);
- Ops.push_back(NewAddRec);
- return getAddExpr(Ops);
- }
+ Ops[Idx] = getAddRecExpr(AddRecOps, AddRecLoop);
+ return getAddExpr(Ops);
}
// Otherwise couldn't fold anything into this recurrence. Move onto the
@@ -1633,17 +1736,18 @@ const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV *> &Ops,
assert(!Ops.empty() && "Cannot get empty mul!");
if (Ops.size() == 1) return Ops[0];
#ifndef NDEBUG
+ const Type *ETy = getEffectiveSCEVType(Ops[0]->getType());
for (unsigned i = 1, e = Ops.size(); i != e; ++i)
- assert(getEffectiveSCEVType(Ops[i]->getType()) ==
- getEffectiveSCEVType(Ops[0]->getType()) &&
+ assert(getEffectiveSCEVType(Ops[i]->getType()) == ETy &&
"SCEVMulExpr operand types don't match!");
#endif
// If HasNSW is true and all the operands are non-negative, infer HasNUW.
if (!HasNUW && HasNSW) {
bool All = true;
- for (unsigned i = 0, e = Ops.size(); i != e; ++i)
- if (!isKnownNonNegative(Ops[i])) {
+ for (SmallVectorImpl<const SCEV *>::const_iterator I = Ops.begin(),
+ E = Ops.end(); I != E; ++I)
+ if (!isKnownNonNegative(*I)) {
All = false;
break;
}
@@ -1740,8 +1844,9 @@ const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV *> &Ops,
// they are loop invariant w.r.t. the recurrence.
SmallVector<const SCEV *, 8> LIOps;
const SCEVAddRecExpr *AddRec = cast<SCEVAddRecExpr>(Ops[Idx]);
+ const Loop *AddRecLoop = AddRec->getLoop();
for (unsigned i = 0, e = Ops.size(); i != e; ++i)
- if (Ops[i]->isLoopInvariant(AddRec->getLoop())) {
+ if (Ops[i]->isLoopInvariant(AddRecLoop)) {
LIOps.push_back(Ops[i]);
Ops.erase(Ops.begin()+i);
--i; --e;
@@ -1758,7 +1863,7 @@ const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV *> &Ops,
// Build the new addrec. Propagate the NUW and NSW flags if both the
// outer mul and the inner addrec are guaranteed to have no overflow.
- const SCEV *NewRec = getAddRecExpr(NewOps, AddRec->getLoop(),
+ const SCEV *NewRec = getAddRecExpr(NewOps, AddRecLoop,
HasNUW && AddRec->hasNoUnsignedWrap(),
HasNSW && AddRec->hasNoSignedWrap());
@@ -1778,28 +1883,30 @@ const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV *> &Ops,
// there are multiple AddRec's with the same loop induction variable being
// multiplied together. If so, we can fold them.
for (unsigned OtherIdx = Idx+1;
- OtherIdx < Ops.size() && isa<SCEVAddRecExpr>(Ops[OtherIdx]);++OtherIdx)
- if (OtherIdx != Idx) {
- const SCEVAddRecExpr *OtherAddRec = cast<SCEVAddRecExpr>(Ops[OtherIdx]);
- if (AddRec->getLoop() == OtherAddRec->getLoop()) {
- // F * G --> {A,+,B} * {C,+,D} --> {A*C,+,F*D + G*B + B*D}
- const SCEVAddRecExpr *F = AddRec, *G = OtherAddRec;
- const SCEV *NewStart = getMulExpr(F->getStart(),
- G->getStart());
- const SCEV *B = F->getStepRecurrence(*this);
- const SCEV *D = G->getStepRecurrence(*this);
- const SCEV *NewStep = getAddExpr(getMulExpr(F, D),
- getMulExpr(G, B),
- getMulExpr(B, D));
- const SCEV *NewAddRec = getAddRecExpr(NewStart, NewStep,
- F->getLoop());
- if (Ops.size() == 2) return NewAddRec;
-
- Ops.erase(Ops.begin()+Idx);
- Ops.erase(Ops.begin()+OtherIdx-1);
- Ops.push_back(NewAddRec);
- return getMulExpr(Ops);
- }
+ OtherIdx < Ops.size() && isa<SCEVAddRecExpr>(Ops[OtherIdx]);
+ ++OtherIdx)
+ if (AddRecLoop == cast<SCEVAddRecExpr>(Ops[OtherIdx])->getLoop()) {
+ // F * G, where F = {A,+,B}<L> and G = {C,+,D}<L> -->
+ // {A*C,+,F*D + G*B + B*D}<L>
+ for (; OtherIdx != Ops.size() && isa<SCEVAddRecExpr>(Ops[OtherIdx]);
+ ++OtherIdx)
+ if (const SCEVAddRecExpr *OtherAddRec =
+ dyn_cast<SCEVAddRecExpr>(Ops[OtherIdx]))
+ if (OtherAddRec->getLoop() == AddRecLoop) {
+ const SCEVAddRecExpr *F = AddRec, *G = OtherAddRec;
+ const SCEV *NewStart = getMulExpr(F->getStart(), G->getStart());
+ const SCEV *B = F->getStepRecurrence(*this);
+ const SCEV *D = G->getStepRecurrence(*this);
+ const SCEV *NewStep = getAddExpr(getMulExpr(F, D),
+ getMulExpr(G, B),
+ getMulExpr(B, D));
+ const SCEV *NewAddRec = getAddRecExpr(NewStart, NewStep,
+ F->getLoop());
+ if (Ops.size() == 2) return NewAddRec;
+ Ops[Idx] = AddRec = cast<SCEVAddRecExpr>(NewAddRec);
+ Ops.erase(Ops.begin() + OtherIdx); --OtherIdx;
+ }
+ return getMulExpr(Ops);
}
// Otherwise couldn't fold anything into this recurrence. Move onto the
@@ -1848,7 +1955,7 @@ const SCEV *ScalarEvolution::getUDivExpr(const SCEV *LHS,
// TODO: Generalize this to non-constants by using known-bits information.
const Type *Ty = LHS->getType();
unsigned LZ = RHSC->getValue()->getValue().countLeadingZeros();
- unsigned MaxShiftAmt = getTypeSizeInBits(Ty) - LZ;
+ unsigned MaxShiftAmt = getTypeSizeInBits(Ty) - LZ - 1;
// For non-power-of-two values, effectively round the value up to the
// nearest power of two.
if (!RHSC->getValue()->getValue().isPowerOf2())
@@ -1955,9 +2062,9 @@ ScalarEvolution::getAddRecExpr(SmallVectorImpl<const SCEV *> &Operands,
bool HasNUW, bool HasNSW) {
if (Operands.size() == 1) return Operands[0];
#ifndef NDEBUG
+ const Type *ETy = getEffectiveSCEVType(Operands[0]->getType());
for (unsigned i = 1, e = Operands.size(); i != e; ++i)
- assert(getEffectiveSCEVType(Operands[i]->getType()) ==
- getEffectiveSCEVType(Operands[0]->getType()) &&
+ assert(getEffectiveSCEVType(Operands[i]->getType()) == ETy &&
"SCEVAddRecExpr operand types don't match!");
#endif
@@ -1975,8 +2082,9 @@ ScalarEvolution::getAddRecExpr(SmallVectorImpl<const SCEV *> &Operands,
// If HasNSW is true and all the operands are non-negative, infer HasNUW.
if (!HasNUW && HasNSW) {
bool All = true;
- for (unsigned i = 0, e = Operands.size(); i != e; ++i)
- if (!isKnownNonNegative(Operands[i])) {
+ for (SmallVectorImpl<const SCEV *>::const_iterator I = Operands.begin(),
+ E = Operands.end(); I != E; ++I)
+ if (!isKnownNonNegative(*I)) {
All = false;
break;
}
@@ -1986,9 +2094,9 @@ ScalarEvolution::getAddRecExpr(SmallVectorImpl<const SCEV *> &Operands,
// Canonicalize nested AddRecs in by nesting them in order of loop depth.
if (const SCEVAddRecExpr *NestedAR = dyn_cast<SCEVAddRecExpr>(Operands[0])) {
const Loop *NestedLoop = NestedAR->getLoop();
- if (L->contains(NestedLoop->getHeader()) ?
+ if (L->contains(NestedLoop) ?
(L->getLoopDepth() < NestedLoop->getLoopDepth()) :
- (!NestedLoop->contains(L->getHeader()) &&
+ (!NestedLoop->contains(L) &&
DT->dominates(L->getHeader(), NestedLoop->getHeader()))) {
SmallVector<const SCEV *, 4> NestedOperands(NestedAR->op_begin(),
NestedAR->op_end());
@@ -2055,9 +2163,9 @@ ScalarEvolution::getSMaxExpr(SmallVectorImpl<const SCEV *> &Ops) {
assert(!Ops.empty() && "Cannot get empty smax!");
if (Ops.size() == 1) return Ops[0];
#ifndef NDEBUG
+ const Type *ETy = getEffectiveSCEVType(Ops[0]->getType());
for (unsigned i = 1, e = Ops.size(); i != e; ++i)
- assert(getEffectiveSCEVType(Ops[i]->getType()) ==
- getEffectiveSCEVType(Ops[0]->getType()) &&
+ assert(getEffectiveSCEVType(Ops[i]->getType()) == ETy &&
"SCEVSMaxExpr operand types don't match!");
#endif
@@ -2160,9 +2268,9 @@ ScalarEvolution::getUMaxExpr(SmallVectorImpl<const SCEV *> &Ops) {
assert(!Ops.empty() && "Cannot get empty umax!");
if (Ops.size() == 1) return Ops[0];
#ifndef NDEBUG
+ const Type *ETy = getEffectiveSCEVType(Ops[0]->getType());
for (unsigned i = 1, e = Ops.size(); i != e; ++i)
- assert(getEffectiveSCEVType(Ops[i]->getType()) ==
- getEffectiveSCEVType(Ops[0]->getType()) &&
+ assert(getEffectiveSCEVType(Ops[i]->getType()) == ETy &&
"SCEVUMaxExpr operand types don't match!");
#endif
@@ -2326,8 +2434,14 @@ const SCEV *ScalarEvolution::getUnknown(Value *V) {
ID.AddInteger(scUnknown);
ID.AddPointer(V);
void *IP = 0;
- if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
- SCEV *S = new (SCEVAllocator) SCEVUnknown(ID.Intern(SCEVAllocator), V);
+ if (SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) {
+ assert(cast<SCEVUnknown>(S)->getValue() == V &&
+ "Stale SCEVUnknown in uniquing map!");
+ return S;
+ }
+ SCEV *S = new (SCEVAllocator) SCEVUnknown(ID.Intern(SCEVAllocator), V, this,
+ FirstUnknown);
+ FirstUnknown = cast<SCEVUnknown>(S);
UniqueSCEVs.InsertNode(S, IP);
return S;
}
@@ -2391,10 +2505,15 @@ const SCEV *ScalarEvolution::getCouldNotCompute() {
const SCEV *ScalarEvolution::getSCEV(Value *V) {
assert(isSCEVable(V->getType()) && "Value is not SCEVable!");
- std::map<SCEVCallbackVH, const SCEV *>::iterator I = Scalars.find(V);
- if (I != Scalars.end()) return I->second;
+ ValueExprMapType::const_iterator I = ValueExprMap.find(V);
+ if (I != ValueExprMap.end()) return I->second;
const SCEV *S = createSCEV(V);
- Scalars.insert(std::make_pair(SCEVCallbackVH(V, this), S));
+
+ // The process of creating a SCEV for V may have caused other SCEVs
+ // to have been created, so it's necessary to insert the new entry
+ // from scratch, rather than trying to remember the insert position
+ // above.
+ ValueExprMap.insert(std::make_pair(SCEVCallbackVH(V, this), S));
return S;
}
@@ -2428,6 +2547,10 @@ const SCEV *ScalarEvolution::getNotSCEV(const SCEV *V) {
///
const SCEV *ScalarEvolution::getMinusSCEV(const SCEV *LHS,
const SCEV *RHS) {
+ // Fast path: X - X --> 0.
+ if (LHS == RHS)
+ return getConstant(LHS->getType(), 0);
+
// X - Y --> X + -Y
return getAddExpr(LHS, getNegativeSCEV(RHS));
}
@@ -2570,12 +2693,12 @@ PushDefUseChildren(Instruction *I,
// Push the def-use children onto the Worklist stack.
for (Value::use_iterator UI = I->use_begin(), UE = I->use_end();
UI != UE; ++UI)
- Worklist.push_back(cast<Instruction>(UI));
+ Worklist.push_back(cast<Instruction>(*UI));
}
/// ForgetSymbolicValue - This looks up computed SCEV values for all
/// instructions that depend on the given instruction and removes them from
-/// the Scalars map if they reference SymName. This is used during PHI
+/// the ValueExprMapType map if they reference SymName. This is used during PHI
/// resolution.
void
ScalarEvolution::ForgetSymbolicName(Instruction *PN, const SCEV *SymName) {
@@ -2588,9 +2711,9 @@ ScalarEvolution::ForgetSymbolicName(Instruction *PN, const SCEV *SymName) {
Instruction *I = Worklist.pop_back_val();
if (!Visited.insert(I)) continue;
- std::map<SCEVCallbackVH, const SCEV *>::iterator It =
- Scalars.find(static_cast<Value *>(I));
- if (It != Scalars.end()) {
+ ValueExprMapType::iterator It =
+ ValueExprMap.find(static_cast<Value *>(I));
+ if (It != ValueExprMap.end()) {
// Short-circuit the def-use traversal if the symbolic name
// ceases to appear in expressions.
if (It->second != SymName && !It->second->hasOperand(SymName))
@@ -2607,7 +2730,7 @@ ScalarEvolution::ForgetSymbolicName(Instruction *PN, const SCEV *SymName) {
!isa<SCEVUnknown>(It->second) ||
(I != PN && It->second == SymName)) {
ValuesAtScopes.erase(It->second);
- Scalars.erase(It);
+ ValueExprMap.erase(It);
}
}
@@ -2644,9 +2767,9 @@ const SCEV *ScalarEvolution::createNodeForPHI(PHINode *PN) {
if (BEValueV && StartValueV) {
// While we are analyzing this PHI node, handle its value symbolically.
const SCEV *SymbolicName = getUnknown(PN);
- assert(Scalars.find(PN) == Scalars.end() &&
+ assert(ValueExprMap.find(PN) == ValueExprMap.end() &&
"PHI node already processed?");
- Scalars.insert(std::make_pair(SCEVCallbackVH(PN, this), SymbolicName));
+ ValueExprMap.insert(std::make_pair(SCEVCallbackVH(PN, this), SymbolicName));
// Using this symbolic name for the PHI, analyze the value coming around
// the back-edge.
@@ -2707,7 +2830,7 @@ const SCEV *ScalarEvolution::createNodeForPHI(PHINode *PN) {
// to be symbolic. We now need to go back and purge all of the
// entries for the scalars that use the symbolic expression.
ForgetSymbolicName(PN, SymbolicName);
- Scalars[SCEVCallbackVH(PN, this)] = PHISCEV;
+ ValueExprMap[SCEVCallbackVH(PN, this)] = PHISCEV;
return PHISCEV;
}
}
@@ -2732,7 +2855,7 @@ const SCEV *ScalarEvolution::createNodeForPHI(PHINode *PN) {
// to be symbolic. We now need to go back and purge all of the
// entries for the scalars that use the symbolic expression.
ForgetSymbolicName(PN, SymbolicName);
- Scalars[SCEVCallbackVH(PN, this)] = PHISCEV;
+ ValueExprMap[SCEVCallbackVH(PN, this)] = PHISCEV;
return PHISCEV;
}
}
@@ -2777,7 +2900,7 @@ const SCEV *ScalarEvolution::createNodeForGEP(GEPOperator *GEP) {
return getUnknown(GEP);
const SCEV *TotalOffset = getConstant(IntPtrTy, 0);
gep_type_iterator GTI = gep_type_begin(GEP);
- for (GetElementPtrInst::op_iterator I = next(GEP->op_begin()),
+ for (GetElementPtrInst::op_iterator I = llvm::next(GEP->op_begin()),
E = GEP->op_end();
I != E; ++I) {
Value *Index = *I;
@@ -3200,12 +3323,42 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) {
Operator *U = cast<Operator>(V);
switch (Opcode) {
- case Instruction::Add:
- return getAddExpr(getSCEV(U->getOperand(0)),
- getSCEV(U->getOperand(1)));
- case Instruction::Mul:
- return getMulExpr(getSCEV(U->getOperand(0)),
- getSCEV(U->getOperand(1)));
+ case Instruction::Add: {
+ // The simple thing to do would be to just call getSCEV on both operands
+ // and call getAddExpr with the result. However if we're looking at a
+ // bunch of things all added together, this can be quite inefficient,
+ // because it leads to N-1 getAddExpr calls for N ultimate operands.
+ // Instead, gather up all the operands and make a single getAddExpr call.
+ // LLVM IR canonical form means we need only traverse the left operands.
+ SmallVector<const SCEV *, 4> AddOps;
+ AddOps.push_back(getSCEV(U->getOperand(1)));
+ for (Value *Op = U->getOperand(0); ; Op = U->getOperand(0)) {
+ unsigned Opcode = Op->getValueID() - Value::InstructionVal;
+ if (Opcode != Instruction::Add && Opcode != Instruction::Sub)
+ break;
+ U = cast<Operator>(Op);
+ const SCEV *Op1 = getSCEV(U->getOperand(1));
+ if (Opcode == Instruction::Sub)
+ AddOps.push_back(getNegativeSCEV(Op1));
+ else
+ AddOps.push_back(Op1);
+ }
+ AddOps.push_back(getSCEV(U->getOperand(0)));
+ return getAddExpr(AddOps);
+ }
+ case Instruction::Mul: {
+ // See the Add code above.
+ SmallVector<const SCEV *, 4> MulOps;
+ MulOps.push_back(getSCEV(U->getOperand(1)));
+ for (Value *Op = U->getOperand(0);
+ Op->getValueID() == Instruction::Mul + Value::InstructionVal;
+ Op = U->getOperand(0)) {
+ U = cast<Operator>(Op);
+ MulOps.push_back(getSCEV(U->getOperand(1)));
+ }
+ MulOps.push_back(getSCEV(U->getOperand(0)));
+ return getMulExpr(MulOps);
+ }
case Instruction::UDiv:
return getUDivExpr(getSCEV(U->getOperand(0)),
getSCEV(U->getOperand(1)));
@@ -3467,7 +3620,7 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) {
const SCEV *LDiff = getMinusSCEV(LA, LS);
const SCEV *RDiff = getMinusSCEV(RA, One);
if (LDiff == RDiff)
- return getAddExpr(getUMaxExpr(LS, One), LDiff);
+ return getAddExpr(getUMaxExpr(One, LS), LDiff);
}
break;
case ICmpInst::ICMP_EQ:
@@ -3482,7 +3635,7 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) {
const SCEV *LDiff = getMinusSCEV(LA, One);
const SCEV *RDiff = getMinusSCEV(RA, LS);
if (LDiff == RDiff)
- return getAddExpr(getUMaxExpr(LS, One), LDiff);
+ return getAddExpr(getUMaxExpr(One, LS), LDiff);
}
break;
default:
@@ -3579,9 +3732,9 @@ ScalarEvolution::getBackedgeTakenInfo(const Loop *L) {
Instruction *I = Worklist.pop_back_val();
if (!Visited.insert(I)) continue;
- std::map<SCEVCallbackVH, const SCEV *>::iterator It =
- Scalars.find(static_cast<Value *>(I));
- if (It != Scalars.end()) {
+ ValueExprMapType::iterator It =
+ ValueExprMap.find(static_cast<Value *>(I));
+ if (It != ValueExprMap.end()) {
// SCEVUnknown for a PHI either means that it has an unrecognized
// structure, or it's a PHI that's in the progress of being computed
// by createNodeForPHI. In the former case, additional loop trip
@@ -3590,7 +3743,7 @@ ScalarEvolution::getBackedgeTakenInfo(const Loop *L) {
// own when it gets to that point.
if (!isa<PHINode>(I) || !isa<SCEVUnknown>(It->second)) {
ValuesAtScopes.erase(It->second);
- Scalars.erase(It);
+ ValueExprMap.erase(It);
}
if (PHINode *PN = dyn_cast<PHINode>(I))
ConstantEvolutionLoopExitValue.erase(PN);
@@ -3619,11 +3772,10 @@ void ScalarEvolution::forgetLoop(const Loop *L) {
Instruction *I = Worklist.pop_back_val();
if (!Visited.insert(I)) continue;
- std::map<SCEVCallbackVH, const SCEV *>::iterator It =
- Scalars.find(static_cast<Value *>(I));
- if (It != Scalars.end()) {
+ ValueExprMapType::iterator It = ValueExprMap.find(static_cast<Value *>(I));
+ if (It != ValueExprMap.end()) {
ValuesAtScopes.erase(It->second);
- Scalars.erase(It);
+ ValueExprMap.erase(It);
if (PHINode *PN = dyn_cast<PHINode>(I))
ConstantEvolutionLoopExitValue.erase(PN);
}
@@ -3648,35 +3800,14 @@ void ScalarEvolution::forgetValue(Value *V) {
I = Worklist.pop_back_val();
if (!Visited.insert(I)) continue;
- std::map<SCEVCallbackVH, const SCEV *>::iterator It =
- Scalars.find(static_cast<Value *>(I));
- if (It != Scalars.end()) {
+ ValueExprMapType::iterator It = ValueExprMap.find(static_cast<Value *>(I));
+ if (It != ValueExprMap.end()) {
ValuesAtScopes.erase(It->second);
- Scalars.erase(It);
+ ValueExprMap.erase(It);
if (PHINode *PN = dyn_cast<PHINode>(I))
ConstantEvolutionLoopExitValue.erase(PN);
}
- // If there's a SCEVUnknown tying this value into the SCEV
- // space, remove it from the folding set map. The SCEVUnknown
- // object and any other SCEV objects which reference it
- // (transitively) remain allocated, effectively leaked until
- // the underlying BumpPtrAllocator is freed.
- //
- // This permits SCEV pointers to be used as keys in maps
- // such as the ValuesAtScopes map.
- FoldingSetNodeID ID;
- ID.AddInteger(scUnknown);
- ID.AddPointer(I);
- void *IP;
- if (SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) {
- UniqueSCEVs.RemoveNode(S);
-
- // This isn't necessary, but we might as well remove the
- // value from the ValuesAtScopes map too.
- ValuesAtScopes.erase(S);
- }
-
PushDefUseChildren(I, Worklist);
}
}
@@ -3816,14 +3947,13 @@ ScalarEvolution::ComputeBackedgeTakenCountFromExitCond(const Loop *L,
else
MaxBECount = getUMinFromMismatchedTypes(BTI0.Max, BTI1.Max);
} else {
- // Both conditions must be true for the loop to exit.
+ // Both conditions must be true at the same time for the loop to exit.
+ // For now, be conservative.
assert(L->contains(FBB) && "Loop block has no successor in loop!");
- if (BTI0.Exact != getCouldNotCompute() &&
- BTI1.Exact != getCouldNotCompute())
- BECount = getUMaxFromMismatchedTypes(BTI0.Exact, BTI1.Exact);
- if (BTI0.Max != getCouldNotCompute() &&
- BTI1.Max != getCouldNotCompute())
- MaxBECount = getUMaxFromMismatchedTypes(BTI0.Max, BTI1.Max);
+ if (BTI0.Max == BTI1.Max)
+ MaxBECount = BTI0.Max;
+ if (BTI0.Exact == BTI1.Exact)
+ BECount = BTI0.Exact;
}
return BackedgeTakenInfo(BECount, MaxBECount);
@@ -3851,14 +3981,13 @@ ScalarEvolution::ComputeBackedgeTakenCountFromExitCond(const Loop *L,
else
MaxBECount = getUMinFromMismatchedTypes(BTI0.Max, BTI1.Max);
} else {
- // Both conditions must be false for the loop to exit.
+ // Both conditions must be false at the same time for the loop to exit.
+ // For now, be conservative.
assert(L->contains(TBB) && "Loop block has no successor in loop!");
- if (BTI0.Exact != getCouldNotCompute() &&
- BTI1.Exact != getCouldNotCompute())
- BECount = getUMaxFromMismatchedTypes(BTI0.Exact, BTI1.Exact);
- if (BTI0.Max != getCouldNotCompute() &&
- BTI1.Max != getCouldNotCompute())
- MaxBECount = getUMaxFromMismatchedTypes(BTI0.Max, BTI1.Max);
+ if (BTI0.Max == BTI1.Max)
+ MaxBECount = BTI0.Max;
+ if (BTI0.Exact == BTI1.Exact)
+ BECount = BTI0.Exact;
}
return BackedgeTakenInfo(BECount, MaxBECount);
@@ -4203,7 +4332,7 @@ Constant *
ScalarEvolution::getConstantEvolutionLoopExitValue(PHINode *PN,
const APInt &BEs,
const Loop *L) {
- std::map<PHINode*, Constant*>::iterator I =
+ std::map<PHINode*, Constant*>::const_iterator I =
ConstantEvolutionLoopExitValue.find(PN);
if (I != ConstantEvolutionLoopExitValue.end())
return I->second;
@@ -5185,7 +5314,8 @@ ScalarEvolution::isLoopBackedgeGuardedByCond(const Loop *L,
LoopContinuePredicate->isUnconditional())
return false;
- return isImpliedCond(LoopContinuePredicate->getCondition(), Pred, LHS, RHS,
+ return isImpliedCond(Pred, LHS, RHS,
+ LoopContinuePredicate->getCondition(),
LoopContinuePredicate->getSuccessor(0) != L->getHeader());
}
@@ -5214,7 +5344,8 @@ ScalarEvolution::isLoopEntryGuardedByCond(const Loop *L,
LoopEntryPredicate->isUnconditional())
continue;
- if (isImpliedCond(LoopEntryPredicate->getCondition(), Pred, LHS, RHS,
+ if (isImpliedCond(Pred, LHS, RHS,
+ LoopEntryPredicate->getCondition(),
LoopEntryPredicate->getSuccessor(0) != Pair.second))
return true;
}
@@ -5224,24 +5355,24 @@ ScalarEvolution::isLoopEntryGuardedByCond(const Loop *L,
/// isImpliedCond - Test whether the condition described by Pred, LHS,
/// and RHS is true whenever the given Cond value evaluates to true.
-bool ScalarEvolution::isImpliedCond(Value *CondValue,
- ICmpInst::Predicate Pred,
+bool ScalarEvolution::isImpliedCond(ICmpInst::Predicate Pred,
const SCEV *LHS, const SCEV *RHS,
+ Value *FoundCondValue,
bool Inverse) {
// Recursively handle And and Or conditions.
- if (BinaryOperator *BO = dyn_cast<BinaryOperator>(CondValue)) {
+ if (BinaryOperator *BO = dyn_cast<BinaryOperator>(FoundCondValue)) {
if (BO->getOpcode() == Instruction::And) {
if (!Inverse)
- return isImpliedCond(BO->getOperand(0), Pred, LHS, RHS, Inverse) ||
- isImpliedCond(BO->getOperand(1), Pred, LHS, RHS, Inverse);
+ return isImpliedCond(Pred, LHS, RHS, BO->getOperand(0), Inverse) ||
+ isImpliedCond(Pred, LHS, RHS, BO->getOperand(1), Inverse);
} else if (BO->getOpcode() == Instruction::Or) {
if (Inverse)
- return isImpliedCond(BO->getOperand(0), Pred, LHS, RHS, Inverse) ||
- isImpliedCond(BO->getOperand(1), Pred, LHS, RHS, Inverse);
+ return isImpliedCond(Pred, LHS, RHS, BO->getOperand(0), Inverse) ||
+ isImpliedCond(Pred, LHS, RHS, BO->getOperand(1), Inverse);
}
}
- ICmpInst *ICI = dyn_cast<ICmpInst>(CondValue);
+ ICmpInst *ICI = dyn_cast<ICmpInst>(FoundCondValue);
if (!ICI) return false;
// Bail if the ICmp's operands' types are wider than the needed type
@@ -5658,20 +5789,19 @@ void ScalarEvolution::SCEVCallbackVH::deleted() {
assert(SE && "SCEVCallbackVH called with a null ScalarEvolution!");
if (PHINode *PN = dyn_cast<PHINode>(getValPtr()))
SE->ConstantEvolutionLoopExitValue.erase(PN);
- SE->Scalars.erase(getValPtr());
+ SE->ValueExprMap.erase(getValPtr());
// this now dangles!
}
-void ScalarEvolution::SCEVCallbackVH::allUsesReplacedWith(Value *) {
+void ScalarEvolution::SCEVCallbackVH::allUsesReplacedWith(Value *V) {
assert(SE && "SCEVCallbackVH called with a null ScalarEvolution!");
// Forget all the expressions associated with users of the old value,
// so that future queries will recompute the expressions using the new
// value.
+ Value *Old = getValPtr();
SmallVector<User *, 16> Worklist;
SmallPtrSet<User *, 8> Visited;
- Value *Old = getValPtr();
- bool DeleteOld = false;
for (Value::use_iterator UI = Old->use_begin(), UE = Old->use_end();
UI != UE; ++UI)
Worklist.push_back(*UI);
@@ -5679,27 +5809,22 @@ void ScalarEvolution::SCEVCallbackVH::allUsesReplacedWith(Value *) {
User *U = Worklist.pop_back_val();
// Deleting the Old value will cause this to dangle. Postpone
// that until everything else is done.
- if (U == Old) {
- DeleteOld = true;
+ if (U == Old)
continue;
- }
if (!Visited.insert(U))
continue;
if (PHINode *PN = dyn_cast<PHINode>(U))
SE->ConstantEvolutionLoopExitValue.erase(PN);
- SE->Scalars.erase(U);
+ SE->ValueExprMap.erase(U);
for (Value::use_iterator UI = U->use_begin(), UE = U->use_end();
UI != UE; ++UI)
Worklist.push_back(*UI);
}
- // Delete the Old value if it (indirectly) references itself.
- if (DeleteOld) {
- if (PHINode *PN = dyn_cast<PHINode>(Old))
- SE->ConstantEvolutionLoopExitValue.erase(PN);
- SE->Scalars.erase(Old);
- // this now dangles!
- }
- // this may dangle!
+ // Delete the Old value.
+ if (PHINode *PN = dyn_cast<PHINode>(Old))
+ SE->ConstantEvolutionLoopExitValue.erase(PN);
+ SE->ValueExprMap.erase(Old);
+ // this now dangles!
}
ScalarEvolution::SCEVCallbackVH::SCEVCallbackVH(Value *V, ScalarEvolution *se)
@@ -5710,7 +5835,7 @@ ScalarEvolution::SCEVCallbackVH::SCEVCallbackVH(Value *V, ScalarEvolution *se)
//===----------------------------------------------------------------------===//
ScalarEvolution::ScalarEvolution()
- : FunctionPass(&ID) {
+ : FunctionPass(ID), FirstUnknown(0) {
}
bool ScalarEvolution::runOnFunction(Function &F) {
@@ -5722,7 +5847,13 @@ bool ScalarEvolution::runOnFunction(Function &F) {
}
void ScalarEvolution::releaseMemory() {
- Scalars.clear();
+ // Iterate through all the SCEVUnknown instances and call their
+ // destructors, so that they release their references to their values.
+ for (SCEVUnknown *U = FirstUnknown; U; U = U->Next)
+ U->~SCEVUnknown();
+ FirstUnknown = 0;
+
+ ValueExprMap.clear();
BackedgeTakenCounts.clear();
ConstantEvolutionLoopExitValue.clear();
ValuesAtScopes.clear();
diff --git a/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp b/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp
index 58711b8be59e8..93b2a8b06fbe9 100644
--- a/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp
+++ b/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp
@@ -34,14 +34,14 @@ namespace {
public:
static char ID; // Class identification, replacement for typeinfo
- ScalarEvolutionAliasAnalysis() : FunctionPass(&ID), SE(0) {}
+ ScalarEvolutionAliasAnalysis() : FunctionPass(ID), SE(0) {}
/// getAdjustedAnalysisPointer - This method is used when a pass implements
/// an analysis interface through multiple inheritance. If needed, it
/// should override this to adjust the this pointer as needed for the
/// specified pass info.
- virtual void *getAdjustedAnalysisPointer(const PassInfo *PI) {
- if (PI->isPassID(&AliasAnalysis::ID))
+ virtual void *getAdjustedAnalysisPointer(AnalysisID PI) {
+ if (PI == &AliasAnalysis::ID)
return (AliasAnalysis*)this;
return this;
}
@@ -58,11 +58,8 @@ namespace {
// Register this pass...
char ScalarEvolutionAliasAnalysis::ID = 0;
-static RegisterPass<ScalarEvolutionAliasAnalysis>
-X("scev-aa", "ScalarEvolution-based Alias Analysis", false, true);
-
-// Declare that we implement the AliasAnalysis interface
-static RegisterAnalysisGroup<AliasAnalysis> Y(X);
+INITIALIZE_AG_PASS(ScalarEvolutionAliasAnalysis, AliasAnalysis, "scev-aa",
+ "ScalarEvolution-based Alias Analysis", false, true, false);
FunctionPass *llvm::createScalarEvolutionAliasAnalysisPass() {
return new ScalarEvolutionAliasAnalysis();
@@ -158,8 +155,8 @@ ScalarEvolutionAliasAnalysis::alias(const Value *A, unsigned ASize,
Value *AO = GetBaseValue(AS);
Value *BO = GetBaseValue(BS);
if ((AO && AO != A) || (BO && BO != B))
- if (alias(AO ? AO : A, AO ? ~0u : ASize,
- BO ? BO : B, BO ? ~0u : BSize) == NoAlias)
+ if (alias(AO ? AO : A, AO ? UnknownSize : ASize,
+ BO ? BO : B, BO ? UnknownSize : BSize) == NoAlias)
return NoAlias;
// Forward the query to the next analysis.
diff --git a/lib/Analysis/ScalarEvolutionExpander.cpp b/lib/Analysis/ScalarEvolutionExpander.cpp
index d4a4b26e25ec4..66a06aeac43ca 100644
--- a/lib/Analysis/ScalarEvolutionExpander.cpp
+++ b/lib/Analysis/ScalarEvolutionExpander.cpp
@@ -647,6 +647,11 @@ public:
bool operator()(std::pair<const Loop *, const SCEV *> LHS,
std::pair<const Loop *, const SCEV *> RHS) const {
+ // Keep pointer operands sorted at the end.
+ if (LHS.second->getType()->isPointerTy() !=
+ RHS.second->getType()->isPointerTy())
+ return LHS.second->getType()->isPointerTy();
+
// Compare loops with PickMostRelevantLoop.
if (LHS.first != RHS.first)
return PickMostRelevantLoop(LHS.first, RHS.first, DT) != LHS.first;
@@ -699,8 +704,15 @@ Value *SCEVExpander::visitAddExpr(const SCEVAddExpr *S) {
// The running sum expression is a pointer. Try to form a getelementptr
// at this level with that as the base.
SmallVector<const SCEV *, 4> NewOps;
- for (; I != E && I->first == CurLoop; ++I)
- NewOps.push_back(I->second);
+ for (; I != E && I->first == CurLoop; ++I) {
+ // If the operand is SCEVUnknown and not instructions, peek through
+ // it, to enable more of it to be folded into the GEP.
+ const SCEV *X = I->second;
+ if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(X))
+ if (!isa<Instruction>(U->getValue()))
+ X = SE.getSCEV(U->getValue());
+ NewOps.push_back(X);
+ }
Sum = expandAddToGEP(NewOps.begin(), NewOps.end(), PTy, Ty, Sum);
} else if (const PointerType *PTy = dyn_cast<PointerType>(Op->getType())) {
// The running sum is an integer, and there's a pointer at this level.
@@ -1047,9 +1059,7 @@ Value *SCEVExpander::visitAddRecExpr(const SCEVAddRecExpr *S) {
// First check for an existing canonical IV in a suitable type.
PHINode *CanonicalIV = 0;
if (PHINode *PN = L->getCanonicalInductionVariable())
- if (SE.isSCEVable(PN->getType()) &&
- SE.getEffectiveSCEVType(PN->getType())->isIntegerTy() &&
- SE.getTypeSizeInBits(PN->getType()) >= SE.getTypeSizeInBits(Ty))
+ if (SE.getTypeSizeInBits(PN->getType()) >= SE.getTypeSizeInBits(Ty))
CanonicalIV = PN;
// Rewrite an AddRec in terms of the canonical induction variable, if
@@ -1102,21 +1112,13 @@ Value *SCEVExpander::visitAddRecExpr(const SCEVAddRecExpr *S) {
SE.getUnknown(expand(Rest))));
}
- // {0,+,1} --> Insert a canonical induction variable into the loop!
- if (S->isAffine() && S->getOperand(1)->isOne()) {
- // If there's a canonical IV, just use it.
- if (CanonicalIV) {
- assert(Ty == SE.getEffectiveSCEVType(CanonicalIV->getType()) &&
- "IVs with types different from the canonical IV should "
- "already have been handled!");
- return CanonicalIV;
- }
-
+ // If we don't yet have a canonical IV, create one.
+ if (!CanonicalIV) {
// Create and insert the PHI node for the induction variable in the
// specified loop.
BasicBlock *Header = L->getHeader();
- PHINode *PN = PHINode::Create(Ty, "indvar", Header->begin());
- rememberInstruction(PN);
+ CanonicalIV = PHINode::Create(Ty, "indvar", Header->begin());
+ rememberInstruction(CanonicalIV);
Constant *One = ConstantInt::get(Ty, 1);
for (pred_iterator HPI = pred_begin(Header), HPE = pred_end(Header);
@@ -1125,40 +1127,45 @@ Value *SCEVExpander::visitAddRecExpr(const SCEVAddRecExpr *S) {
if (L->contains(HP)) {
// Insert a unit add instruction right before the terminator
// corresponding to the back-edge.
- Instruction *Add = BinaryOperator::CreateAdd(PN, One, "indvar.next",
- HP->getTerminator());
+ Instruction *Add = BinaryOperator::CreateAdd(CanonicalIV, One,
+ "indvar.next",
+ HP->getTerminator());
rememberInstruction(Add);
- PN->addIncoming(Add, HP);
+ CanonicalIV->addIncoming(Add, HP);
} else {
- PN->addIncoming(Constant::getNullValue(Ty), HP);
+ CanonicalIV->addIncoming(Constant::getNullValue(Ty), HP);
}
}
}
+ // {0,+,1} --> Insert a canonical induction variable into the loop!
+ if (S->isAffine() && S->getOperand(1)->isOne()) {
+ assert(Ty == SE.getEffectiveSCEVType(CanonicalIV->getType()) &&
+ "IVs with types different from the canonical IV should "
+ "already have been handled!");
+ return CanonicalIV;
+ }
+
// {0,+,F} --> {0,+,1} * F
- // Get the canonical induction variable I for this loop.
- Value *I = CanonicalIV ?
- CanonicalIV :
- getOrInsertCanonicalInductionVariable(L, Ty);
// If this is a simple linear addrec, emit it now as a special case.
if (S->isAffine()) // {0,+,F} --> i*F
return
expand(SE.getTruncateOrNoop(
- SE.getMulExpr(SE.getUnknown(I),
+ SE.getMulExpr(SE.getUnknown(CanonicalIV),
SE.getNoopOrAnyExtend(S->getOperand(1),
- I->getType())),
+ CanonicalIV->getType())),
Ty));
// If this is a chain of recurrences, turn it into a closed form, using the
// folders, then expandCodeFor the closed form. This allows the folders to
// simplify the expression without having to build a bunch of special code
// into this folder.
- const SCEV *IH = SE.getUnknown(I); // Get I as a "symbolic" SCEV.
+ const SCEV *IH = SE.getUnknown(CanonicalIV); // Get I as a "symbolic" SCEV.
// Promote S up to the canonical IV type, if the cast is foldable.
const SCEV *NewS = S;
- const SCEV *Ext = SE.getNoopOrAnyExtend(S, I->getType());
+ const SCEV *Ext = SE.getNoopOrAnyExtend(S, CanonicalIV->getType());
if (isa<SCEVAddRecExpr>(Ext))
NewS = Ext;
@@ -1337,16 +1344,21 @@ void SCEVExpander::restoreInsertPoint(BasicBlock *BB, BasicBlock::iterator I) {
/// canonical induction variable of the specified type for the specified
/// loop (inserting one if there is none). A canonical induction variable
/// starts at zero and steps by one on each iteration.
-Value *
+PHINode *
SCEVExpander::getOrInsertCanonicalInductionVariable(const Loop *L,
const Type *Ty) {
assert(Ty->isIntegerTy() && "Can only insert integer induction variables!");
+
+ // Build a SCEV for {0,+,1}<L>.
const SCEV *H = SE.getAddRecExpr(SE.getConstant(Ty, 0),
SE.getConstant(Ty, 1), L);
+
+ // Emit code for it.
BasicBlock *SaveInsertBB = Builder.GetInsertBlock();
BasicBlock::iterator SaveInsertPt = Builder.GetInsertPoint();
- Value *V = expandCodeFor(H, 0, L->getHeader()->begin());
+ PHINode *V = cast<PHINode>(expandCodeFor(H, 0, L->getHeader()->begin()));
if (SaveInsertBB)
restoreInsertPoint(SaveInsertBB, SaveInsertPt);
+
return V;
}
diff --git a/lib/Analysis/ScalarEvolutionNormalization.cpp b/lib/Analysis/ScalarEvolutionNormalization.cpp
index 563fd2fa96e2e..ac36cef89ebb5 100644
--- a/lib/Analysis/ScalarEvolutionNormalization.cpp
+++ b/lib/Analysis/ScalarEvolutionNormalization.cpp
@@ -26,7 +26,7 @@ using namespace llvm;
/// post-inc value when we cannot) or it can end up adding extra live-ranges to
/// the loop, resulting in reg-reg copies (if we use the pre-inc value when we
/// should use the post-inc value).
-static bool IVUseShouldUsePostIncValue(Instruction *User, Instruction *IV,
+static bool IVUseShouldUsePostIncValue(Instruction *User, Value *Operand,
const Loop *L, DominatorTree *DT) {
// If the user is in the loop, use the preinc value.
if (L->contains(User)) return false;
@@ -45,20 +45,17 @@ static bool IVUseShouldUsePostIncValue(Instruction *User, Instruction *IV,
// their uses occur in the predecessor block, not the block the PHI lives in)
// should still use the post-inc value. Check for this case now.
PHINode *PN = dyn_cast<PHINode>(User);
- if (!PN) return false; // not a phi, not dominated by latch block.
+ if (!PN || !Operand) return false; // not a phi, not dominated by latch block.
- // Look at all of the uses of IV by the PHI node. If any use corresponds to
- // a block that is not dominated by the latch block, give up and use the
+ // Look at all of the uses of Operand by the PHI node. If any use corresponds
+ // to a block that is not dominated by the latch block, give up and use the
// preincremented value.
- unsigned NumUses = 0;
for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
- if (PN->getIncomingValue(i) == IV) {
- ++NumUses;
- if (!DT->dominates(LatchBlock, PN->getIncomingBlock(i)))
- return false;
- }
+ if (PN->getIncomingValue(i) == Operand &&
+ !DT->dominates(LatchBlock, PN->getIncomingBlock(i)))
+ return false;
- // Okay, all uses of IV by PN are in predecessor blocks that really are
+ // Okay, all uses of Operand by PN are in predecessor blocks that really are
// dominated by the latch block. Use the post-incremented value.
return true;
}
@@ -72,6 +69,7 @@ const SCEV *llvm::TransformForPostIncUse(TransformKind Kind,
DominatorTree &DT) {
if (isa<SCEVConstant>(S) || isa<SCEVUnknown>(S))
return S;
+
if (const SCEVCastExpr *X = dyn_cast<SCEVCastExpr>(S)) {
const SCEV *O = X->getOperand();
const SCEV *N = TransformForPostIncUse(Kind, O, User, OperandValToReplace,
@@ -85,9 +83,69 @@ const SCEV *llvm::TransformForPostIncUse(TransformKind Kind,
}
return S;
}
+
+ if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
+ // An addrec. This is the interesting part.
+ SmallVector<const SCEV *, 8> Operands;
+ const Loop *L = AR->getLoop();
+ // The addrec conceptually uses its operands at loop entry.
+ Instruction *LUser = L->getHeader()->begin();
+ // Transform each operand.
+ for (SCEVNAryExpr::op_iterator I = AR->op_begin(), E = AR->op_end();
+ I != E; ++I) {
+ const SCEV *O = *I;
+ const SCEV *N = TransformForPostIncUse(Kind, O, LUser, 0, Loops, SE, DT);
+ Operands.push_back(N);
+ }
+ const SCEV *Result = SE.getAddRecExpr(Operands, L);
+ switch (Kind) {
+ default: llvm_unreachable("Unexpected transform name!");
+ case NormalizeAutodetect:
+ if (IVUseShouldUsePostIncValue(User, OperandValToReplace, L, &DT)) {
+ const SCEV *TransformedStep =
+ TransformForPostIncUse(Kind, AR->getStepRecurrence(SE),
+ User, OperandValToReplace, Loops, SE, DT);
+ Result = SE.getMinusSCEV(Result, TransformedStep);
+ Loops.insert(L);
+ }
+#if 0
+ // This assert is conceptually correct, but ScalarEvolution currently
+ // sometimes fails to canonicalize two equal SCEVs to exactly the same
+ // form. It's possibly a pessimization when this happens, but it isn't a
+ // correctness problem, so disable this assert for now.
+ assert(S == TransformForPostIncUse(Denormalize, Result,
+ User, OperandValToReplace,
+ Loops, SE, DT) &&
+ "SCEV normalization is not invertible!");
+#endif
+ break;
+ case Normalize:
+ if (Loops.count(L)) {
+ const SCEV *TransformedStep =
+ TransformForPostIncUse(Kind, AR->getStepRecurrence(SE),
+ User, OperandValToReplace, Loops, SE, DT);
+ Result = SE.getMinusSCEV(Result, TransformedStep);
+ }
+#if 0
+ // See the comment on the assert above.
+ assert(S == TransformForPostIncUse(Denormalize, Result,
+ User, OperandValToReplace,
+ Loops, SE, DT) &&
+ "SCEV normalization is not invertible!");
+#endif
+ break;
+ case Denormalize:
+ if (Loops.count(L))
+ Result = cast<SCEVAddRecExpr>(Result)->getPostIncExpr(SE);
+ break;
+ }
+ return Result;
+ }
+
if (const SCEVNAryExpr *X = dyn_cast<SCEVNAryExpr>(S)) {
SmallVector<const SCEV *, 8> Operands;
bool Changed = false;
+ // Transform each operand.
for (SCEVNAryExpr::op_iterator I = X->op_begin(), E = X->op_end();
I != E; ++I) {
const SCEV *O = *I;
@@ -96,37 +154,7 @@ const SCEV *llvm::TransformForPostIncUse(TransformKind Kind,
Changed |= N != O;
Operands.push_back(N);
}
- if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
- // An addrec. This is the interesting part.
- const Loop *L = AR->getLoop();
- const SCEV *Result = SE.getAddRecExpr(Operands, L);
- switch (Kind) {
- default: llvm_unreachable("Unexpected transform name!");
- case NormalizeAutodetect:
- if (Instruction *OI = dyn_cast<Instruction>(OperandValToReplace))
- if (IVUseShouldUsePostIncValue(User, OI, L, &DT)) {
- const SCEV *TransformedStep =
- TransformForPostIncUse(Kind, AR->getStepRecurrence(SE),
- User, OperandValToReplace, Loops, SE, DT);
- Result = SE.getMinusSCEV(Result, TransformedStep);
- Loops.insert(L);
- }
- break;
- case Normalize:
- if (Loops.count(L)) {
- const SCEV *TransformedStep =
- TransformForPostIncUse(Kind, AR->getStepRecurrence(SE),
- User, OperandValToReplace, Loops, SE, DT);
- Result = SE.getMinusSCEV(Result, TransformedStep);
- }
- break;
- case Denormalize:
- if (Loops.count(L))
- Result = SE.getAddExpr(Result, AR->getStepRecurrence(SE));
- break;
- }
- return Result;
- }
+ // If any operand actually changed, return a transformed result.
if (Changed)
switch (S->getSCEVType()) {
case scAddExpr: return SE.getAddExpr(Operands);
@@ -137,6 +165,7 @@ const SCEV *llvm::TransformForPostIncUse(TransformKind Kind,
}
return S;
}
+
if (const SCEVUDivExpr *X = dyn_cast<SCEVUDivExpr>(S)) {
const SCEV *LO = X->getLHS();
const SCEV *RO = X->getRHS();
@@ -148,6 +177,7 @@ const SCEV *llvm::TransformForPostIncUse(TransformKind Kind,
return SE.getUDivExpr(LN, RN);
return S;
}
+
llvm_unreachable("Unexpected SCEV kind!");
return 0;
}
diff --git a/lib/Analysis/TypeBasedAliasAnalysis.cpp b/lib/Analysis/TypeBasedAliasAnalysis.cpp
new file mode 100644
index 0000000000000..bbfdcec3f9b4b
--- /dev/null
+++ b/lib/Analysis/TypeBasedAliasAnalysis.cpp
@@ -0,0 +1,191 @@
+//===- TypeBasedAliasAnalysis.cpp - Type-Based Alias Analysis -------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the TypeBasedAliasAnalysis pass, which implements
+// metadata-based TBAA.
+//
+// In LLVM IR, memory does not have types, so LLVM's own type system is not
+// suitable for doing TBAA. Instead, metadata is added to the IR to describe
+// a type system of a higher level language.
+//
+// This pass is language-independent. The type system is encoded in
+// metadata. This allows this pass to support typical C and C++ TBAA, but
+// it can also support custom aliasing behavior for other languages.
+//
+// This is a work-in-progress. It doesn't work yet, and the metadata
+// format isn't stable.
+//
+// TODO: getModRefBehavior. The AliasAnalysis infrastructure will need to
+// be extended.
+// TODO: AA chaining
+// TODO: struct fields
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/Passes.h"
+#include "llvm/Module.h"
+#include "llvm/Metadata.h"
+#include "llvm/Pass.h"
+using namespace llvm;
+
+namespace {
+ /// TBAANode - This is a simple wrapper around an MDNode which provides a
+ /// higher-level interface by hiding the details of how alias analysis
+ /// information is encoded in its operands.
+ class TBAANode {
+ const MDNode *Node;
+
+ public:
+ TBAANode() : Node(0) {}
+ explicit TBAANode(MDNode *N) : Node(N) {}
+
+ /// getNode - Get the MDNode for this TBAANode.
+ const MDNode *getNode() const { return Node; }
+
+ /// getParent - Get this TBAANode's Alias DAG parent.
+ TBAANode getParent() const {
+ if (Node->getNumOperands() < 2)
+ return TBAANode();
+ MDNode *P = dyn_cast<MDNode>(Node->getOperand(1));
+ if (!P)
+ return TBAANode();
+ // Ok, this node has a valid parent. Return it.
+ return TBAANode(P);
+ }
+
+ /// TypeIsImmutable - Test if this TBAANode represents a type for objects
+ /// which are not modified (by any means) in the context where this
+ /// AliasAnalysis is relevant.
+ bool TypeIsImmutable() const {
+ if (Node->getNumOperands() < 3)
+ return false;
+ ConstantInt *CI = dyn_cast<ConstantInt>(Node->getOperand(2));
+ if (!CI)
+ return false;
+ // TODO: Think about the encoding.
+ return CI->isOne();
+ }
+ };
+}
+
+namespace {
+ /// TypeBasedAliasAnalysis - This is a simple alias analysis
+ /// implementation that uses TypeBased to answer queries.
+ class TypeBasedAliasAnalysis : public ImmutablePass,
+ public AliasAnalysis {
+ public:
+ static char ID; // Class identification, replacement for typeinfo
+ TypeBasedAliasAnalysis() : ImmutablePass(ID) {}
+
+ /// getAdjustedAnalysisPointer - This method is used when a pass implements
+ /// an analysis interface through multiple inheritance. If needed, it
+ /// should override this to adjust the this pointer as needed for the
+ /// specified pass info.
+ virtual void *getAdjustedAnalysisPointer(const void *PI) {
+ if (PI == &AliasAnalysis::ID)
+ return (AliasAnalysis*)this;
+ return this;
+ }
+
+ private:
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const;
+ virtual AliasResult alias(const Value *V1, unsigned V1Size,
+ const Value *V2, unsigned V2Size);
+ virtual bool pointsToConstantMemory(const Value *P);
+ };
+} // End of anonymous namespace
+
+// Register this pass...
+char TypeBasedAliasAnalysis::ID = 0;
+INITIALIZE_AG_PASS(TypeBasedAliasAnalysis, AliasAnalysis, "tbaa",
+ "Type-Based Alias Analysis", false, true, false);
+
+ImmutablePass *llvm::createTypeBasedAliasAnalysisPass() {
+ return new TypeBasedAliasAnalysis();
+}
+
+void
+TypeBasedAliasAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ AliasAnalysis::getAnalysisUsage(AU);
+}
+
+AliasAnalysis::AliasResult
+TypeBasedAliasAnalysis::alias(const Value *A, unsigned ASize,
+ const Value *B, unsigned BSize) {
+ // Currently, metadata can only be attached to Instructions.
+ const Instruction *AI = dyn_cast<Instruction>(A);
+ if (!AI) return MayAlias;
+ const Instruction *BI = dyn_cast<Instruction>(B);
+ if (!BI) return MayAlias;
+
+ // Get the attached MDNodes. If either value lacks a tbaa MDNode, we must
+ // be conservative.
+ MDNode *AM =
+ AI->getMetadata(AI->getParent()->getParent()->getParent()
+ ->getMDKindID("tbaa"));
+ if (!AM) return MayAlias;
+ MDNode *BM =
+ BI->getMetadata(BI->getParent()->getParent()->getParent()
+ ->getMDKindID("tbaa"));
+ if (!BM) return MayAlias;
+
+ // Keep track of the root node for A and B.
+ TBAANode RootA, RootB;
+
+ // Climb the DAG from A to see if we reach B.
+ for (TBAANode T(AM); ; ) {
+ if (T.getNode() == BM)
+ // B is an ancestor of A.
+ return MayAlias;
+
+ RootA = T;
+ T = T.getParent();
+ if (!T.getNode())
+ break;
+ }
+
+ // Climb the DAG from B to see if we reach A.
+ for (TBAANode T(BM); ; ) {
+ if (T.getNode() == AM)
+ // A is an ancestor of B.
+ return MayAlias;
+
+ RootB = T;
+ T = T.getParent();
+ if (!T.getNode())
+ break;
+ }
+
+ // Neither node is an ancestor of the other.
+
+ // If they have the same root, then we've proved there's no alias.
+ if (RootA.getNode() == RootB.getNode())
+ return NoAlias;
+
+ // If they have different roots, they're part of different potentially
+ // unrelated type systems, so we must be conservative.
+ return MayAlias;
+}
+
+bool TypeBasedAliasAnalysis::pointsToConstantMemory(const Value *P) {
+ // Currently, metadata can only be attached to Instructions.
+ const Instruction *I = dyn_cast<Instruction>(P);
+ if (!I) return false;
+
+ MDNode *M =
+ I->getMetadata(I->getParent()->getParent()->getParent()
+ ->getMDKindID("tbaa"));
+ if (!M) return false;
+
+ // If this is an "immutable" type, we can assume the pointer is pointing
+ // to constant memory.
+ return TBAANode(M).TypeIsImmutable();
+}
diff --git a/lib/Analysis/ValueTracking.cpp b/lib/Analysis/ValueTracking.cpp
index b4c9884a20ed8..181c9b01980c3 100644
--- a/lib/Analysis/ValueTracking.cpp
+++ b/lib/Analysis/ValueTracking.cpp
@@ -880,19 +880,20 @@ bool llvm::ComputeMultiple(Value *V, unsigned Base, Value *&Multiple,
}
Value *Mul0 = NULL;
- Value *Mul1 = NULL;
- bool M0 = ComputeMultiple(Op0, Base, Mul0,
- LookThroughSExt, Depth+1);
- bool M1 = ComputeMultiple(Op1, Base, Mul1,
- LookThroughSExt, Depth+1);
-
- if (M0) {
- if (isa<Constant>(Op1) && isa<Constant>(Mul0)) {
- // V == Base * (Mul0 * Op1), so return (Mul0 * Op1)
- Multiple = ConstantExpr::getMul(cast<Constant>(Mul0),
- cast<Constant>(Op1));
- return true;
- }
+ if (ComputeMultiple(Op0, Base, Mul0, LookThroughSExt, Depth+1)) {
+ if (Constant *Op1C = dyn_cast<Constant>(Op1))
+ if (Constant *MulC = dyn_cast<Constant>(Mul0)) {
+ if (Op1C->getType()->getPrimitiveSizeInBits() <
+ MulC->getType()->getPrimitiveSizeInBits())
+ Op1C = ConstantExpr::getZExt(Op1C, MulC->getType());
+ if (Op1C->getType()->getPrimitiveSizeInBits() >
+ MulC->getType()->getPrimitiveSizeInBits())
+ MulC = ConstantExpr::getZExt(MulC, Op1C->getType());
+
+ // V == Base * (Mul0 * Op1), so return (Mul0 * Op1)
+ Multiple = ConstantExpr::getMul(MulC, Op1C);
+ return true;
+ }
if (ConstantInt *Mul0CI = dyn_cast<ConstantInt>(Mul0))
if (Mul0CI->getValue() == 1) {
@@ -902,13 +903,21 @@ bool llvm::ComputeMultiple(Value *V, unsigned Base, Value *&Multiple,
}
}
- if (M1) {
- if (isa<Constant>(Op0) && isa<Constant>(Mul1)) {
- // V == Base * (Mul1 * Op0), so return (Mul1 * Op0)
- Multiple = ConstantExpr::getMul(cast<Constant>(Mul1),
- cast<Constant>(Op0));
- return true;
- }
+ Value *Mul1 = NULL;
+ if (ComputeMultiple(Op1, Base, Mul1, LookThroughSExt, Depth+1)) {
+ if (Constant *Op0C = dyn_cast<Constant>(Op0))
+ if (Constant *MulC = dyn_cast<Constant>(Mul1)) {
+ if (Op0C->getType()->getPrimitiveSizeInBits() <
+ MulC->getType()->getPrimitiveSizeInBits())
+ Op0C = ConstantExpr::getZExt(Op0C, MulC->getType());
+ if (Op0C->getType()->getPrimitiveSizeInBits() >
+ MulC->getType()->getPrimitiveSizeInBits())
+ MulC = ConstantExpr::getZExt(MulC, Op0C->getType());
+
+ // V == Base * (Mul1 * Op0), so return (Mul1 * Op0)
+ Multiple = ConstantExpr::getMul(MulC, Op0C);
+ return true;
+ }
if (ConstantInt *Mul1CI = dyn_cast<ConstantInt>(Mul1))
if (Mul1CI->getValue() == 1) {
@@ -973,195 +982,6 @@ bool llvm::CannotBeNegativeZero(const Value *V, unsigned Depth) {
return false;
}
-
-/// GetLinearExpression - Analyze the specified value as a linear expression:
-/// "A*V + B", where A and B are constant integers. Return the scale and offset
-/// values as APInts and return V as a Value*. The incoming Value is known to
-/// have IntegerType. Note that this looks through extends, so the high bits
-/// may not be represented in the result.
-static Value *GetLinearExpression(Value *V, APInt &Scale, APInt &Offset,
- const TargetData *TD, unsigned Depth) {
- assert(V->getType()->isIntegerTy() && "Not an integer value");
-
- // Limit our recursion depth.
- if (Depth == 6) {
- Scale = 1;
- Offset = 0;
- return V;
- }
-
- if (BinaryOperator *BOp = dyn_cast<BinaryOperator>(V)) {
- if (ConstantInt *RHSC = dyn_cast<ConstantInt>(BOp->getOperand(1))) {
- switch (BOp->getOpcode()) {
- default: break;
- case Instruction::Or:
- // X|C == X+C if all the bits in C are unset in X. Otherwise we can't
- // analyze it.
- if (!MaskedValueIsZero(BOp->getOperand(0), RHSC->getValue(), TD))
- break;
- // FALL THROUGH.
- case Instruction::Add:
- V = GetLinearExpression(BOp->getOperand(0), Scale, Offset, TD, Depth+1);
- Offset += RHSC->getValue();
- return V;
- case Instruction::Mul:
- V = GetLinearExpression(BOp->getOperand(0), Scale, Offset, TD, Depth+1);
- Offset *= RHSC->getValue();
- Scale *= RHSC->getValue();
- return V;
- case Instruction::Shl:
- V = GetLinearExpression(BOp->getOperand(0), Scale, Offset, TD, Depth+1);
- Offset <<= RHSC->getValue().getLimitedValue();
- Scale <<= RHSC->getValue().getLimitedValue();
- return V;
- }
- }
- }
-
- // Since clients don't care about the high bits of the value, just scales and
- // offsets, we can look through extensions.
- if (isa<SExtInst>(V) || isa<ZExtInst>(V)) {
- Value *CastOp = cast<CastInst>(V)->getOperand(0);
- unsigned OldWidth = Scale.getBitWidth();
- unsigned SmallWidth = CastOp->getType()->getPrimitiveSizeInBits();
- Scale.trunc(SmallWidth);
- Offset.trunc(SmallWidth);
- Value *Result = GetLinearExpression(CastOp, Scale, Offset, TD, Depth+1);
- Scale.zext(OldWidth);
- Offset.zext(OldWidth);
- return Result;
- }
-
- Scale = 1;
- Offset = 0;
- return V;
-}
-
-/// DecomposeGEPExpression - If V is a symbolic pointer expression, decompose it
-/// into a base pointer with a constant offset and a number of scaled symbolic
-/// offsets.
-///
-/// The scaled symbolic offsets (represented by pairs of a Value* and a scale in
-/// the VarIndices vector) are Value*'s that are known to be scaled by the
-/// specified amount, but which may have other unrepresented high bits. As such,
-/// the gep cannot necessarily be reconstructed from its decomposed form.
-///
-/// When TargetData is around, this function is capable of analyzing everything
-/// that Value::getUnderlyingObject() can look through. When not, it just looks
-/// through pointer casts.
-///
-const Value *llvm::DecomposeGEPExpression(const Value *V, int64_t &BaseOffs,
- SmallVectorImpl<std::pair<const Value*, int64_t> > &VarIndices,
- const TargetData *TD) {
- // Limit recursion depth to limit compile time in crazy cases.
- unsigned MaxLookup = 6;
-
- BaseOffs = 0;
- do {
- // See if this is a bitcast or GEP.
- const Operator *Op = dyn_cast<Operator>(V);
- if (Op == 0) {
- // The only non-operator case we can handle are GlobalAliases.
- if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(V)) {
- if (!GA->mayBeOverridden()) {
- V = GA->getAliasee();
- continue;
- }
- }
- return V;
- }
-
- if (Op->getOpcode() == Instruction::BitCast) {
- V = Op->getOperand(0);
- continue;
- }
-
- const GEPOperator *GEPOp = dyn_cast<GEPOperator>(Op);
- if (GEPOp == 0)
- return V;
-
- // Don't attempt to analyze GEPs over unsized objects.
- if (!cast<PointerType>(GEPOp->getOperand(0)->getType())
- ->getElementType()->isSized())
- return V;
-
- // If we are lacking TargetData information, we can't compute the offets of
- // elements computed by GEPs. However, we can handle bitcast equivalent
- // GEPs.
- if (!TD) {
- if (!GEPOp->hasAllZeroIndices())
- return V;
- V = GEPOp->getOperand(0);
- continue;
- }
-
- // Walk the indices of the GEP, accumulating them into BaseOff/VarIndices.
- gep_type_iterator GTI = gep_type_begin(GEPOp);
- for (User::const_op_iterator I = GEPOp->op_begin()+1,
- E = GEPOp->op_end(); I != E; ++I) {
- Value *Index = *I;
- // Compute the (potentially symbolic) offset in bytes for this index.
- if (const StructType *STy = dyn_cast<StructType>(*GTI++)) {
- // For a struct, add the member offset.
- unsigned FieldNo = cast<ConstantInt>(Index)->getZExtValue();
- if (FieldNo == 0) continue;
-
- BaseOffs += TD->getStructLayout(STy)->getElementOffset(FieldNo);
- continue;
- }
-
- // For an array/pointer, add the element offset, explicitly scaled.
- if (ConstantInt *CIdx = dyn_cast<ConstantInt>(Index)) {
- if (CIdx->isZero()) continue;
- BaseOffs += TD->getTypeAllocSize(*GTI)*CIdx->getSExtValue();
- continue;
- }
-
- uint64_t Scale = TD->getTypeAllocSize(*GTI);
-
- // Use GetLinearExpression to decompose the index into a C1*V+C2 form.
- unsigned Width = cast<IntegerType>(Index->getType())->getBitWidth();
- APInt IndexScale(Width, 0), IndexOffset(Width, 0);
- Index = GetLinearExpression(Index, IndexScale, IndexOffset, TD, 0);
-
- // The GEP index scale ("Scale") scales C1*V+C2, yielding (C1*V+C2)*Scale.
- // This gives us an aggregate computation of (C1*Scale)*V + C2*Scale.
- BaseOffs += IndexOffset.getZExtValue()*Scale;
- Scale *= IndexScale.getZExtValue();
-
-
- // If we already had an occurrance of this index variable, merge this
- // scale into it. For example, we want to handle:
- // A[x][x] -> x*16 + x*4 -> x*20
- // This also ensures that 'x' only appears in the index list once.
- for (unsigned i = 0, e = VarIndices.size(); i != e; ++i) {
- if (VarIndices[i].first == Index) {
- Scale += VarIndices[i].second;
- VarIndices.erase(VarIndices.begin()+i);
- break;
- }
- }
-
- // Make sure that we have a scale that makes sense for this target's
- // pointer size.
- if (unsigned ShiftBits = 64-TD->getPointerSizeInBits()) {
- Scale <<= ShiftBits;
- Scale >>= ShiftBits;
- }
-
- if (Scale)
- VarIndices.push_back(std::make_pair(Index, Scale));
- }
-
- // Analyze the base pointer next.
- V = GEPOp->getOperand(0);
- } while (--MaxLookup);
-
- // If the chain of expressions is too deep, just return early.
- return V;
-}
-
-
// This is the recursive version of BuildSubAggregate. It takes a few different
// arguments. Idxs is the index within the nested struct From that we are
// looking at now (which is of type IndexedType). IdxSkip is the number of
diff --git a/lib/AsmParser/LLLexer.cpp b/lib/AsmParser/LLLexer.cpp
index f4c0e50fd94df..032753a3b2c61 100644
--- a/lib/AsmParser/LLLexer.cpp
+++ b/lib/AsmParser/LLLexer.cpp
@@ -493,6 +493,7 @@ lltok::Kind LLLexer::LexIdentifier() {
KEYWORD(private);
KEYWORD(linker_private);
KEYWORD(linker_private_weak);
+ KEYWORD(linker_private_weak_def_auto);
KEYWORD(internal);
KEYWORD(available_externally);
KEYWORD(linkonce);
@@ -572,7 +573,6 @@ lltok::Kind LLLexer::LexIdentifier() {
KEYWORD(type);
KEYWORD(opaque);
- KEYWORD(union);
KEYWORD(eq); KEYWORD(ne); KEYWORD(slt); KEYWORD(sgt); KEYWORD(sle);
KEYWORD(sge); KEYWORD(ult); KEYWORD(ugt); KEYWORD(ule); KEYWORD(uge);
diff --git a/lib/AsmParser/LLParser.cpp b/lib/AsmParser/LLParser.cpp
index 221b994db55fa..f21a065473b62 100644
--- a/lib/AsmParser/LLParser.cpp
+++ b/lib/AsmParser/LLParser.cpp
@@ -199,6 +199,7 @@ bool LLParser::ParseTopLevelEntities() {
case lltok::kw_private: // OptionalLinkage
case lltok::kw_linker_private: // OptionalLinkage
case lltok::kw_linker_private_weak: // OptionalLinkage
+ case lltok::kw_linker_private_weak_def_auto: // OptionalLinkage
case lltok::kw_internal: // OptionalLinkage
case lltok::kw_weak: // OptionalLinkage
case lltok::kw_weak_odr: // OptionalLinkage
@@ -517,11 +518,7 @@ bool LLParser::ParseMDNodeID(MDNode *&Result) {
if (Result) return false;
// Otherwise, create MDNode forward reference.
-
- // FIXME: This is not unique enough!
- std::string FwdRefName = "llvm.mdnode.fwdref." + utostr(MID);
- Value *V = MDString::get(Context, FwdRefName);
- MDNode *FwdNode = MDNode::get(Context, &V, 1);
+ MDNode *FwdNode = MDNode::getTemporary(Context, 0, 0);
ForwardRefMDNodes[MID] = std::make_pair(FwdNode, Lex.getLoc());
if (NumberedMetadata.size() <= MID)
@@ -543,27 +540,20 @@ bool LLParser::ParseNamedMetadata() {
ParseToken(lltok::lbrace, "Expected '{' here"))
return true;
- SmallVector<MDNode *, 8> Elts;
+ NamedMDNode *NMD = M->getOrInsertNamedMetadata(Name);
if (Lex.getKind() != lltok::rbrace)
do {
- // Null is a special case since it is typeless.
- if (EatIfPresent(lltok::kw_null)) {
- Elts.push_back(0);
- continue;
- }
-
if (ParseToken(lltok::exclaim, "Expected '!' here"))
return true;
MDNode *N = 0;
if (ParseMDNodeID(N)) return true;
- Elts.push_back(N);
+ NMD->addOperand(N);
} while (EatIfPresent(lltok::comma));
if (ParseToken(lltok::rbrace, "expected end of metadata node"))
return true;
- NamedMDNode::Create(Context, Name, Elts.data(), Elts.size(), M);
return false;
}
@@ -592,7 +582,9 @@ bool LLParser::ParseStandaloneMetadata() {
std::map<unsigned, std::pair<TrackingVH<MDNode>, LocTy> >::iterator
FI = ForwardRefMDNodes.find(MetadataID);
if (FI != ForwardRefMDNodes.end()) {
- FI->second.first->replaceAllUsesWith(Init);
+ MDNode *Temp = FI->second.first;
+ Temp->replaceAllUsesWith(Init);
+ MDNode::deleteTemporary(Temp);
ForwardRefMDNodes.erase(FI);
assert(NumberedMetadata[MetadataID] == Init && "Tracking VH didn't work");
@@ -632,7 +624,8 @@ bool LLParser::ParseAlias(const std::string &Name, LocTy NameLoc,
Linkage != GlobalValue::InternalLinkage &&
Linkage != GlobalValue::PrivateLinkage &&
Linkage != GlobalValue::LinkerPrivateLinkage &&
- Linkage != GlobalValue::LinkerPrivateWeakLinkage)
+ Linkage != GlobalValue::LinkerPrivateWeakLinkage &&
+ Linkage != GlobalValue::LinkerPrivateWeakDefAutoLinkage)
return Error(LinkageLoc, "invalid linkage type for alias");
Constant *Aliasee;
@@ -1017,6 +1010,7 @@ bool LLParser::ParseOptionalAttrs(unsigned &Attrs, unsigned AttrKind) {
/// ::= 'private'
/// ::= 'linker_private'
/// ::= 'linker_private_weak'
+/// ::= 'linker_private_weak_def_auto'
/// ::= 'internal'
/// ::= 'weak'
/// ::= 'weak_odr'
@@ -1038,6 +1032,9 @@ bool LLParser::ParseOptionalLinkage(unsigned &Res, bool &HasLinkage) {
case lltok::kw_linker_private_weak:
Res = GlobalValue::LinkerPrivateWeakLinkage;
break;
+ case lltok::kw_linker_private_weak_def_auto:
+ Res = GlobalValue::LinkerPrivateWeakDefAutoLinkage;
+ break;
case lltok::kw_internal: Res = GlobalValue::InternalLinkage; break;
case lltok::kw_weak: Res = GlobalValue::WeakAnyLinkage; break;
case lltok::kw_weak_odr: Res = GlobalValue::WeakODRLinkage; break;
@@ -1120,29 +1117,44 @@ bool LLParser::ParseOptionalCallingConv(CallingConv::ID &CC) {
/// ParseInstructionMetadata
/// ::= !dbg !42 (',' !dbg !57)*
-bool LLParser::ParseInstructionMetadata(Instruction *Inst) {
+bool LLParser::ParseInstructionMetadata(Instruction *Inst,
+ PerFunctionState *PFS) {
do {
if (Lex.getKind() != lltok::MetadataVar)
return TokError("expected metadata after comma");
std::string Name = Lex.getStrVal();
+ unsigned MDK = M->getMDKindID(Name.c_str());
Lex.Lex();
MDNode *Node;
unsigned NodeID;
SMLoc Loc = Lex.getLoc();
- if (ParseToken(lltok::exclaim, "expected '!' here") ||
- ParseMDNodeID(Node, NodeID))
+
+ if (ParseToken(lltok::exclaim, "expected '!' here"))
return true;
- unsigned MDK = M->getMDKindID(Name.c_str());
- if (Node) {
- // If we got the node, add it to the instruction.
- Inst->setMetadata(MDK, Node);
+ // This code is similar to that of ParseMetadataValue, however it needs to
+ // have special-case code for a forward reference; see the comments on
+ // ForwardRefInstMetadata for details. Also, MDStrings are not supported
+ // at the top level here.
+ if (Lex.getKind() == lltok::lbrace) {
+ ValID ID;
+ if (ParseMetadataListValue(ID, PFS))
+ return true;
+ assert(ID.Kind == ValID::t_MDNode);
+ Inst->setMetadata(MDK, ID.MDNodeVal);
} else {
- MDRef R = { Loc, MDK, NodeID };
- // Otherwise, remember that this should be resolved later.
- ForwardRefInstMetadata[Inst].push_back(R);
+ if (ParseMDNodeID(Node, NodeID))
+ return true;
+ if (Node) {
+ // If we got the node, add it to the instruction.
+ Inst->setMetadata(MDK, Node);
+ } else {
+ MDRef R = { Loc, MDK, NodeID };
+ // Otherwise, remember that this should be resolved later.
+ ForwardRefInstMetadata[Inst].push_back(R);
+ }
}
// If this is the end of the list, we're done.
@@ -1161,6 +1173,8 @@ bool LLParser::ParseOptionalAlignment(unsigned &Alignment) {
if (ParseUInt32(Alignment)) return true;
if (!isPowerOf2_32(Alignment))
return Error(AlignLoc, "alignment is not a power of two");
+ if (Alignment > Value::MaximumAlignment)
+ return Error(AlignLoc, "huge alignments are not supported yet");
return false;
}
@@ -1183,6 +1197,7 @@ bool LLParser::ParseOptionalCommaAlign(unsigned &Alignment,
if (Lex.getKind() != lltok::kw_align)
return Error(Lex.getLoc(), "expected metadata or 'align'");
+ LocTy AlignLoc = Lex.getLoc();
if (ParseOptionalAlignment(Alignment)) return true;
}
@@ -1344,11 +1359,6 @@ bool LLParser::ParseTypeRec(PATypeHolder &Result) {
if (ParseStructType(Result, false))
return true;
break;
- case lltok::kw_union:
- // TypeRec ::= 'union' '{' ... '}'
- if (ParseUnionType(Result))
- return true;
- break;
case lltok::lsquare:
// TypeRec ::= '[' ... ']'
Lex.Lex(); // eat the lsquare.
@@ -1658,38 +1668,6 @@ bool LLParser::ParseStructType(PATypeHolder &Result, bool Packed) {
return false;
}
-/// ParseUnionType
-/// TypeRec
-/// ::= 'union' '{' TypeRec (',' TypeRec)* '}'
-bool LLParser::ParseUnionType(PATypeHolder &Result) {
- assert(Lex.getKind() == lltok::kw_union);
- Lex.Lex(); // Consume the 'union'
-
- if (ParseToken(lltok::lbrace, "'{' expected after 'union'")) return true;
-
- SmallVector<PATypeHolder, 8> ParamsList;
- do {
- LocTy EltTyLoc = Lex.getLoc();
- if (ParseTypeRec(Result)) return true;
- ParamsList.push_back(Result);
-
- if (Result->isVoidTy())
- return Error(EltTyLoc, "union element can not have void type");
- if (!UnionType::isValidElementType(Result))
- return Error(EltTyLoc, "invalid element type for union");
-
- } while (EatIfPresent(lltok::comma)) ;
-
- if (ParseToken(lltok::rbrace, "expected '}' at end of union"))
- return true;
-
- SmallVector<const Type*, 8> ParamsListTy;
- for (unsigned i = 0, e = ParamsList.size(); i != e; ++i)
- ParamsListTy.push_back(ParamsList[i].get());
- Result = HandleUpRefs(UnionType::get(&ParamsListTy[0], ParamsListTy.size()));
- return false;
-}
-
/// ParseArrayVectorType - Parse an array or vector type, assuming the first
/// token has already been consumed.
/// TypeRec
@@ -2504,6 +2482,20 @@ bool LLParser::ParseGlobalValueVector(SmallVectorImpl<Constant*> &Elts) {
return false;
}
+bool LLParser::ParseMetadataListValue(ValID &ID, PerFunctionState *PFS) {
+ assert(Lex.getKind() == lltok::lbrace);
+ Lex.Lex();
+
+ SmallVector<Value*, 16> Elts;
+ if (ParseMDNodeVector(Elts, PFS) ||
+ ParseToken(lltok::rbrace, "expected end of metadata node"))
+ return true;
+
+ ID.MDNodeVal = MDNode::get(Context, Elts.data(), Elts.size());
+ ID.Kind = ValID::t_MDNode;
+ return false;
+}
+
/// ParseMetadataValue
/// ::= !42
/// ::= !{...}
@@ -2514,16 +2506,8 @@ bool LLParser::ParseMetadataValue(ValID &ID, PerFunctionState *PFS) {
// MDNode:
// !{ ... }
- if (EatIfPresent(lltok::lbrace)) {
- SmallVector<Value*, 16> Elts;
- if (ParseMDNodeVector(Elts, PFS) ||
- ParseToken(lltok::rbrace, "expected end of metadata node"))
- return true;
-
- ID.MDNodeVal = MDNode::get(Context, Elts.data(), Elts.size());
- ID.Kind = ValID::t_MDNode;
- return false;
- }
+ if (Lex.getKind() == lltok::lbrace)
+ return ParseMetadataListValue(ID, PFS);
// Standalone metadata reference
// !42
@@ -2635,16 +2619,8 @@ bool LLParser::ConvertValIDToValue(const Type *Ty, ValID &ID, Value *&V,
V = Constant::getNullValue(Ty);
return false;
case ValID::t_Constant:
- if (ID.ConstantVal->getType() != Ty) {
- // Allow a constant struct with a single member to be converted
- // to a union, if the union has a member which is the same type
- // as the struct member.
- if (const UnionType* utype = dyn_cast<UnionType>(Ty)) {
- return ParseUnionValue(utype, ID, V);
- }
-
+ if (ID.ConstantVal->getType() != Ty)
return Error(ID.Loc, "constant expression type mismatch");
- }
V = ID.ConstantVal;
return false;
@@ -2675,22 +2651,6 @@ bool LLParser::ParseTypeAndBasicBlock(BasicBlock *&BB, LocTy &Loc,
return false;
}
-bool LLParser::ParseUnionValue(const UnionType* utype, ValID &ID, Value *&V) {
- if (const StructType* stype = dyn_cast<StructType>(ID.ConstantVal->getType())) {
- if (stype->getNumContainedTypes() != 1)
- return Error(ID.Loc, "constant expression type mismatch");
- int index = utype->getElementTypeIndex(stype->getContainedType(0));
- if (index < 0)
- return Error(ID.Loc, "initializer type is not a member of the union");
-
- V = ConstantUnion::get(
- utype, cast<Constant>(ID.ConstantVal->getOperand(0)));
- return false;
- }
-
- return Error(ID.Loc, "constant expression type mismatch");
-}
-
/// FunctionHeader
/// ::= OptionalLinkage OptionalVisibility OptionalCallingConv OptRetAttrs
@@ -2724,6 +2684,7 @@ bool LLParser::ParseFunctionHeader(Function *&Fn, bool isDefine) {
case GlobalValue::PrivateLinkage:
case GlobalValue::LinkerPrivateLinkage:
case GlobalValue::LinkerPrivateWeakLinkage:
+ case GlobalValue::LinkerPrivateWeakDefAutoLinkage:
case GlobalValue::InternalLinkage:
case GlobalValue::AvailableExternallyLinkage:
case GlobalValue::LinkOnceAnyLinkage:
@@ -2980,7 +2941,7 @@ bool LLParser::ParseBasicBlock(PerFunctionState &PFS) {
// With a normal result, we check to see if the instruction is followed by
// a comma and metadata.
if (EatIfPresent(lltok::comma))
- if (ParseInstructionMetadata(Inst))
+ if (ParseInstructionMetadata(Inst, &PFS))
return true;
break;
case InstExtraComma:
@@ -2988,7 +2949,7 @@ bool LLParser::ParseBasicBlock(PerFunctionState &PFS) {
// If the instruction parser ate an extra comma at the end of it, it
// *must* be followed by metadata.
- if (ParseInstructionMetadata(Inst))
+ if (ParseInstructionMetadata(Inst, &PFS))
return true;
break;
}
diff --git a/lib/AsmParser/LLParser.h b/lib/AsmParser/LLParser.h
index f765a2ae4e6ce..404cec3ed7c74 100644
--- a/lib/AsmParser/LLParser.h
+++ b/lib/AsmParser/LLParser.h
@@ -32,7 +32,6 @@ namespace llvm {
class GlobalValue;
class MDString;
class MDNode;
- class UnionType;
/// ValID - Represents a reference of a definition of some sort with no type.
/// There are several cases where we have to parse the value but where the
@@ -80,6 +79,14 @@ namespace llvm {
// Instruction metadata resolution. Each instruction can have a list of
// MDRef info associated with them.
+ //
+ // The simpler approach of just creating temporary MDNodes and then calling
+ // RAUW on them when the definition is processed doesn't work because some
+ // instruction metadata kinds, such as dbg, get stored in the IR in an
+ // "optimized" format which doesn't participate in the normal value use
+ // lists. This means that RAUW doesn't work, even on temporary MDNodes
+ // which otherwise support RAUW. Instead, we defer resolving MDNode
+ // references until the definitions have been processed.
struct MDRef {
SMLoc Loc;
unsigned MDKind, MDSlot;
@@ -180,7 +187,6 @@ namespace llvm {
bool ParseOptionalCallingConv(CallingConv::ID &CC);
bool ParseOptionalAlignment(unsigned &Alignment);
bool ParseOptionalStackAlignment(unsigned &Alignment);
- bool ParseInstructionMetadata(Instruction *Inst);
bool ParseOptionalCommaAlign(unsigned &Alignment, bool &AteExtraComma);
bool ParseIndexList(SmallVectorImpl<unsigned> &Indices,bool &AteExtraComma);
bool ParseIndexList(SmallVectorImpl<unsigned> &Indices) {
@@ -222,7 +228,6 @@ namespace llvm {
}
bool ParseTypeRec(PATypeHolder &H);
bool ParseStructType(PATypeHolder &H, bool Packed);
- bool ParseUnionType(PATypeHolder &H);
bool ParseArrayVectorType(PATypeHolder &H, bool isVector);
bool ParseFunctionType(PATypeHolder &Result);
PATypeHolder HandleUpRefs(const Type *Ty);
@@ -291,7 +296,6 @@ namespace llvm {
return ParseTypeAndBasicBlock(BB, Loc, PFS);
}
- bool ParseUnionValue(const UnionType* utype, ValID &ID, Value *&V);
struct ParamInfo {
LocTy Loc;
@@ -308,8 +312,10 @@ namespace llvm {
bool ParseGlobalValue(const Type *Ty, Constant *&V);
bool ParseGlobalTypeAndValue(Constant *&V);
bool ParseGlobalValueVector(SmallVectorImpl<Constant*> &Elts);
+ bool ParseMetadataListValue(ValID &ID, PerFunctionState *PFS);
bool ParseMetadataValue(ValID &ID, PerFunctionState *PFS);
bool ParseMDNodeVector(SmallVectorImpl<Value*> &, PerFunctionState *PFS);
+ bool ParseInstructionMetadata(Instruction *Inst, PerFunctionState *PFS);
// Function Parsing.
struct ArgInfo {
diff --git a/lib/AsmParser/LLToken.h b/lib/AsmParser/LLToken.h
index 2703134ec1a96..61f93a4274981 100644
--- a/lib/AsmParser/LLToken.h
+++ b/lib/AsmParser/LLToken.h
@@ -37,7 +37,8 @@ namespace lltok {
kw_declare, kw_define,
kw_global, kw_constant,
- kw_private, kw_linker_private, kw_linker_private_weak, kw_internal,
+ kw_private, kw_linker_private, kw_linker_private_weak,
+ kw_linker_private_weak_def_auto, kw_internal,
kw_linkonce, kw_linkonce_odr, kw_weak, kw_weak_odr, kw_appending,
kw_dllimport, kw_dllexport, kw_common, kw_available_externally,
kw_default, kw_hidden, kw_protected,
@@ -97,7 +98,6 @@ namespace lltok {
kw_type,
kw_opaque,
- kw_union,
kw_eq, kw_ne, kw_slt, kw_sgt, kw_sle, kw_sge, kw_ult, kw_ugt, kw_ule,
kw_uge, kw_oeq, kw_one, kw_olt, kw_ogt, kw_ole, kw_oge, kw_ord, kw_uno,
diff --git a/lib/AsmParser/Parser.cpp b/lib/AsmParser/Parser.cpp
index e511cbe29c756..e7cef9b5c3c58 100644
--- a/lib/AsmParser/Parser.cpp
+++ b/lib/AsmParser/Parser.cpp
@@ -45,8 +45,7 @@ Module *llvm::ParseAssemblyFile(const std::string &Filename, SMDiagnostic &Err,
MemoryBuffer *F = MemoryBuffer::getFileOrSTDIN(Filename.c_str(), &ErrorStr);
if (F == 0) {
Err = SMDiagnostic(Filename,
- "Could not open input file '" + Filename + "': " +
- ErrorStr);
+ "Could not open input file: " + ErrorStr);
return 0;
}
diff --git a/lib/Bitcode/Reader/BitcodeReader.cpp b/lib/Bitcode/Reader/BitcodeReader.cpp
index b3f0776d29d54..830c79aa3b54e 100644
--- a/lib/Bitcode/Reader/BitcodeReader.cpp
+++ b/lib/Bitcode/Reader/BitcodeReader.cpp
@@ -39,6 +39,7 @@ void BitcodeReader::FreeState() {
std::vector<BasicBlock*>().swap(FunctionBBs);
std::vector<Function*>().swap(FunctionsWithBodies);
DeferredFunctionInfo.clear();
+ MDKindMap.clear();
}
//===----------------------------------------------------------------------===//
@@ -76,6 +77,7 @@ static GlobalValue::LinkageTypes GetDecodedLinkage(unsigned Val) {
case 12: return GlobalValue::AvailableExternallyLinkage;
case 13: return GlobalValue::LinkerPrivateLinkage;
case 14: return GlobalValue::LinkerPrivateWeakLinkage;
+ case 15: return GlobalValue::LinkerPrivateWeakDefAutoLinkage;
}
}
@@ -295,8 +297,6 @@ void BitcodeReaderValueList::ResolveConstantForwardRefs() {
} else if (ConstantStruct *UserCS = dyn_cast<ConstantStruct>(UserC)) {
NewC = ConstantStruct::get(Context, &NewOps[0], NewOps.size(),
UserCS->getType()->isPacked());
- } else if (ConstantUnion *UserCU = dyn_cast<ConstantUnion>(UserC)) {
- NewC = ConstantUnion::get(UserCU->getType(), NewOps[0]);
} else if (isa<ConstantVector>(UserC)) {
NewC = ConstantVector::get(&NewOps[0], NewOps.size());
} else {
@@ -332,9 +332,9 @@ void BitcodeReaderMDValueList::AssignValue(Value *V, unsigned Idx) {
}
// If there was a forward reference to this value, replace it.
- Value *PrevVal = OldV;
+ MDNode *PrevVal = cast<MDNode>(OldV);
OldV->replaceAllUsesWith(V);
- delete PrevVal;
+ MDNode::deleteTemporary(PrevVal);
// Deleting PrevVal sets Idx value in MDValuePtrs to null. Set new
// value for Idx.
MDValuePtrs[Idx] = V;
@@ -350,7 +350,7 @@ Value *BitcodeReaderMDValueList::getValueFwdRef(unsigned Idx) {
}
// Create and return a placeholder, which will later be RAUW'd.
- Value *V = new Argument(Type::getMetadataTy(Context));
+ Value *V = MDNode::getTemporary(Context, 0, 0);
MDValuePtrs[Idx] = V;
return V;
}
@@ -589,13 +589,6 @@ bool BitcodeReader::ParseTypeTable() {
ResultTy = StructType::get(Context, EltTys, Record[0]);
break;
}
- case bitc::TYPE_CODE_UNION: { // UNION: [eltty x N]
- SmallVector<const Type*, 8> EltTys;
- for (unsigned i = 0, e = Record.size(); i != e; ++i)
- EltTys.push_back(getTypeByID(Record[i], true));
- ResultTy = UnionType::get(&EltTys[0], EltTys.size());
- break;
- }
case bitc::TYPE_CODE_ARRAY: // ARRAY: [numelts, eltty]
if (Record.size() < 2)
return Error("Invalid ARRAY type record");
@@ -781,7 +774,8 @@ bool BitcodeReader::ParseMetadata() {
bool IsFunctionLocal = false;
// Read a record.
Record.clear();
- switch (Stream.ReadRecord(Code, Record)) {
+ Code = Stream.ReadRecord(Code, Record);
+ switch (Code) {
default: // Default behavior: ignore.
break;
case bitc::METADATA_NAME: {
@@ -794,34 +788,46 @@ bool BitcodeReader::ParseMetadata() {
Record.clear();
Code = Stream.ReadCode();
- // METADATA_NAME is always followed by METADATA_NAMED_NODE.
- if (Stream.ReadRecord(Code, Record) != bitc::METADATA_NAMED_NODE)
+ // METADATA_NAME is always followed by METADATA_NAMED_NODE2.
+ // Or METADATA_NAMED_NODE in LLVM 2.7. FIXME: Remove this in LLVM 3.0.
+ unsigned NextBitCode = Stream.ReadRecord(Code, Record);
+ if (NextBitCode == bitc::METADATA_NAMED_NODE) {
+ LLVM2_7MetadataDetected = true;
+ } else if (NextBitCode != bitc::METADATA_NAMED_NODE2)
assert ( 0 && "Inavlid Named Metadata record");
// Read named metadata elements.
unsigned Size = Record.size();
- SmallVector<MDNode *, 8> Elts;
+ NamedMDNode *NMD = TheModule->getOrInsertNamedMetadata(Name);
for (unsigned i = 0; i != Size; ++i) {
- if (Record[i] == ~0U) {
- Elts.push_back(NULL);
- continue;
- }
MDNode *MD = dyn_cast<MDNode>(MDValueList.getValueFwdRef(Record[i]));
if (MD == 0)
return Error("Malformed metadata record");
- Elts.push_back(MD);
+ NMD->addOperand(MD);
}
- Value *V = NamedMDNode::Create(Context, Name.str(), Elts.data(),
- Elts.size(), TheModule);
- MDValueList.AssignValue(V, NextMDValueNo++);
+ // Backwards compatibility hack: NamedMDValues used to be Values,
+ // and they got their own slots in the value numbering. They are no
+ // longer Values, however we still need to account for them in the
+ // numbering in order to be able to read old bitcode files.
+ // FIXME: Remove this in LLVM 3.0.
+ if (LLVM2_7MetadataDetected)
+ MDValueList.AssignValue(0, NextMDValueNo++);
break;
}
- case bitc::METADATA_FN_NODE:
+ case bitc::METADATA_FN_NODE: // FIXME: Remove in LLVM 3.0.
+ case bitc::METADATA_FN_NODE2:
IsFunctionLocal = true;
// fall-through
- case bitc::METADATA_NODE: {
+ case bitc::METADATA_NODE: // FIXME: Remove in LLVM 3.0.
+ case bitc::METADATA_NODE2: {
+
+ // Detect 2.7-era metadata.
+ // FIXME: Remove in LLVM 3.0.
+ if (Code == bitc::METADATA_FN_NODE || Code == bitc::METADATA_NODE)
+ LLVM2_7MetadataDetected = true;
+
if (Record.size() % 2 == 1)
- return Error("Invalid METADATA_NODE record");
+ return Error("Invalid METADATA_NODE2 record");
unsigned Size = Record.size();
SmallVector<Value*, 8> Elts;
@@ -859,13 +865,12 @@ bool BitcodeReader::ParseMetadata() {
SmallString<8> Name;
Name.resize(RecordLength-1);
unsigned Kind = Record[0];
- (void) Kind;
for (unsigned i = 1; i != RecordLength; ++i)
Name[i-1] = Record[i];
unsigned NewKind = TheModule->getMDKindID(Name.str());
- assert(Kind == NewKind &&
- "FIXME: Unable to handle custom metadata mismatch!");(void)NewKind;
+ if (!MDKindMap.insert(std::make_pair(Kind, NewKind)).second)
+ return Error("Conflicting METADATA_KIND records");
break;
}
}
@@ -1020,11 +1025,6 @@ bool BitcodeReader::ParseConstants() {
Elts.push_back(ValueList.getConstantFwdRef(Record[i],
STy->getElementType(i)));
V = ConstantStruct::get(STy, Elts);
- } else if (const UnionType *UnTy = dyn_cast<UnionType>(CurTy)) {
- uint64_t Index = Record[0];
- Constant *Val = ValueList.getConstantFwdRef(Record[1],
- UnTy->getElementType(Index));
- V = ConstantUnion::get(UnTy, Val);
} else if (const ArrayType *ATy = dyn_cast<ArrayType>(CurTy)) {
const Type *EltTy = ATy->getElementType();
for (unsigned i = 0; i != Size; ++i)
@@ -1297,6 +1297,12 @@ bool BitcodeReader::ParseModule() {
UpgradedIntrinsics.push_back(std::make_pair(FI, NewFn));
}
+ // Look for global variables which need to be renamed.
+ for (Module::global_iterator
+ GI = TheModule->global_begin(), GE = TheModule->global_end();
+ GI != GE; ++GI)
+ UpgradeGlobalVariable(GI);
+
// Force deallocation of memory for these vectors to favor the client that
// want lazy deserialization.
std::vector<std::pair<GlobalVariable*, unsigned> >().swap(GlobalInits);
@@ -1614,15 +1620,22 @@ bool BitcodeReader::ParseMetadataAttachment() {
switch (Stream.ReadRecord(Code, Record)) {
default: // Default behavior: ignore.
break;
- case bitc::METADATA_ATTACHMENT: {
+ // FIXME: Remove in LLVM 3.0.
+ case bitc::METADATA_ATTACHMENT:
+ LLVM2_7MetadataDetected = true;
+ case bitc::METADATA_ATTACHMENT2: {
unsigned RecordLength = Record.size();
if (Record.empty() || (RecordLength - 1) % 2 == 1)
return Error ("Invalid METADATA_ATTACHMENT reader!");
Instruction *Inst = InstructionList[Record[0]];
for (unsigned i = 1; i != RecordLength; i = i+2) {
unsigned Kind = Record[i];
+ DenseMap<unsigned, unsigned>::iterator I =
+ MDKindMap.find(Kind);
+ if (I == MDKindMap.end())
+ return Error("Invalid metadata kind ID");
Value *Node = MDValueList.getValueFwdRef(Record[i+1]);
- Inst->setMetadata(Kind, cast<MDNode>(Node));
+ Inst->setMetadata(I->second, cast<MDNode>(Node));
}
break;
}
@@ -1638,6 +1651,7 @@ bool BitcodeReader::ParseFunctionBody(Function *F) {
InstructionList.clear();
unsigned ModuleValueListSize = ValueList.size();
+ unsigned ModuleMDValueListSize = MDValueList.size();
// Add all the function arguments to the value table.
for(Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); I != E; ++I)
@@ -1722,7 +1736,10 @@ bool BitcodeReader::ParseFunctionBody(Function *F) {
I = 0;
continue;
- case bitc::FUNC_CODE_DEBUG_LOC: { // DEBUG_LOC: [line, col, scope, ia]
+ // FIXME: Remove this in LLVM 3.0.
+ case bitc::FUNC_CODE_DEBUG_LOC:
+ LLVM2_7MetadataDetected = true;
+ case bitc::FUNC_CODE_DEBUG_LOC2: { // DEBUG_LOC: [line, col, scope, ia]
I = 0; // Get the last instruction emitted.
if (CurBB && !CurBB->empty())
I = &CurBB->back();
@@ -1988,6 +2005,7 @@ bool BitcodeReader::ParseFunctionBody(Function *F) {
} while(OpNum != Record.size());
const Type *ReturnType = F->getReturnType();
+ // Handle multiple return values. FIXME: Remove in LLVM 3.0.
if (Vs.size() > 1 ||
(ReturnType->isStructTy() &&
(Vs.empty() || Vs[0]->getType() != ReturnType))) {
@@ -2183,7 +2201,7 @@ bool BitcodeReader::ParseFunctionBody(Function *F) {
}
case bitc::FUNC_CODE_INST_ALLOCA: { // ALLOCA: [instty, opty, op, align]
// For backward compatibility, tolerate a lack of an opty, and use i32.
- // LLVM 3.0: Remove this.
+ // Remove this in LLVM 3.0.
if (Record.size() < 3 || Record.size() > 4)
return Error("Invalid ALLOCA record");
unsigned OpNum = 0;
@@ -2236,7 +2254,10 @@ bool BitcodeReader::ParseFunctionBody(Function *F) {
InstructionList.push_back(I);
break;
}
- case bitc::FUNC_CODE_INST_CALL: {
+ // FIXME: Remove this in LLVM 3.0.
+ case bitc::FUNC_CODE_INST_CALL:
+ LLVM2_7MetadataDetected = true;
+ case bitc::FUNC_CODE_INST_CALL2: {
// CALL: [paramattrs, cc, fnty, fnid, arg0, arg1...]
if (Record.size() < 3)
return Error("Invalid CALL record");
@@ -2324,7 +2345,7 @@ bool BitcodeReader::ParseFunctionBody(Function *F) {
if (A->getParent() == 0) {
// We found at least one unresolved value. Nuke them all to avoid leaks.
for (unsigned i = ModuleValueListSize, e = ValueList.size(); i != e; ++i){
- if ((A = dyn_cast<Argument>(ValueList.back())) && A->getParent() == 0) {
+ if ((A = dyn_cast<Argument>(ValueList[i])) && A->getParent() == 0) {
A->replaceAllUsesWith(UndefValue::get(A->getType()));
delete A;
}
@@ -2333,6 +2354,9 @@ bool BitcodeReader::ParseFunctionBody(Function *F) {
}
}
+ // FIXME: Check for unresolved forward-declared metadata references
+ // and clean up leaks.
+
// See if anything took the address of blocks in this function. If so,
// resolve them now.
DenseMap<Function*, std::vector<BlockAddrRefTy> >::iterator BAFRI =
@@ -2352,8 +2376,21 @@ bool BitcodeReader::ParseFunctionBody(Function *F) {
BlockAddrFwdRefs.erase(BAFRI);
}
+ // FIXME: Remove this in LLVM 3.0.
+ unsigned NewMDValueListSize = MDValueList.size();
+
// Trim the value list down to the size it was before we parsed this function.
ValueList.shrinkTo(ModuleValueListSize);
+ MDValueList.shrinkTo(ModuleMDValueListSize);
+
+ // Backwards compatibility hack: Function-local metadata numbers
+ // were previously not reset between functions. This is now fixed,
+ // however we still need to understand the old numbering in order
+ // to be able to read old bitcode files.
+ // FIXME: Remove this in LLVM 3.0.
+ if (LLVM2_7MetadataDetected)
+ MDValueList.resize(NewMDValueListSize);
+
std::vector<BasicBlock*>().swap(FunctionBBs);
return false;
diff --git a/lib/Bitcode/Reader/BitcodeReader.h b/lib/Bitcode/Reader/BitcodeReader.h
index 55c71f7c886fe..053121bdad6e5 100644
--- a/lib/Bitcode/Reader/BitcodeReader.h
+++ b/lib/Bitcode/Reader/BitcodeReader.h
@@ -156,6 +156,9 @@ class BitcodeReader : public GVMaterializer {
// stored here with their replacement function.
typedef std::vector<std::pair<Function*, Function*> > UpgradedIntrinsicMap;
UpgradedIntrinsicMap UpgradedIntrinsics;
+
+ // Map the bitcode's custom MDKind ID to the Module's MDKind ID.
+ DenseMap<unsigned, unsigned> MDKindMap;
// After the module header has been read, the FunctionsWithBodies list is
// reversed. This keeps track of whether we've done this yet.
@@ -170,11 +173,18 @@ class BitcodeReader : public GVMaterializer {
/// are resolved lazily when functions are loaded.
typedef std::pair<unsigned, GlobalVariable*> BlockAddrRefTy;
DenseMap<Function*, std::vector<BlockAddrRefTy> > BlockAddrFwdRefs;
+
+ /// LLVM2_7MetadataDetected - True if metadata produced by LLVM 2.7 or
+ /// earlier was detected, in which case we behave slightly differently,
+ /// for compatibility.
+ /// FIXME: Remove in LLVM 3.0.
+ bool LLVM2_7MetadataDetected;
public:
explicit BitcodeReader(MemoryBuffer *buffer, LLVMContext &C)
: Context(C), TheModule(0), Buffer(buffer), BufferOwned(false),
- ErrorString(0), ValueList(C), MDValueList(C) {
+ ErrorString(0), ValueList(C), MDValueList(C),
+ LLVM2_7MetadataDetected(false) {
HasReversedFunctionsWithBodies = false;
}
~BitcodeReader() {
diff --git a/lib/Bitcode/Writer/BitcodeWriter.cpp b/lib/Bitcode/Writer/BitcodeWriter.cpp
index fa1b2c4bee2b2..7b6fc6cd928df 100644
--- a/lib/Bitcode/Writer/BitcodeWriter.cpp
+++ b/lib/Bitcode/Writer/BitcodeWriter.cpp
@@ -181,14 +181,6 @@ static void WriteTypeTable(const ValueEnumerator &VE, BitstreamWriter &Stream) {
Log2_32_Ceil(VE.getTypes().size()+1)));
unsigned StructAbbrev = Stream.EmitAbbrev(Abbv);
- // Abbrev for TYPE_CODE_UNION.
- Abbv = new BitCodeAbbrev();
- Abbv->Add(BitCodeAbbrevOp(bitc::TYPE_CODE_UNION));
- Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
- Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed,
- Log2_32_Ceil(VE.getTypes().size()+1)));
- unsigned UnionAbbrev = Stream.EmitAbbrev(Abbv);
-
// Abbrev for TYPE_CODE_ARRAY.
Abbv = new BitCodeAbbrev();
Abbv->Add(BitCodeAbbrevOp(bitc::TYPE_CODE_ARRAY));
@@ -258,17 +250,6 @@ static void WriteTypeTable(const ValueEnumerator &VE, BitstreamWriter &Stream) {
AbbrevToUse = StructAbbrev;
break;
}
- case Type::UnionTyID: {
- const UnionType *UT = cast<UnionType>(T);
- // UNION: [eltty x N]
- Code = bitc::TYPE_CODE_UNION;
- // Output all of the element types.
- for (UnionType::element_iterator I = UT->element_begin(),
- E = UT->element_end(); I != E; ++I)
- TypeVals.push_back(VE.getTypeID(*I));
- AbbrevToUse = UnionAbbrev;
- break;
- }
case Type::ArrayTyID: {
const ArrayType *AT = cast<ArrayType>(T);
// ARRAY: [numelts, eltty]
@@ -299,21 +280,22 @@ static void WriteTypeTable(const ValueEnumerator &VE, BitstreamWriter &Stream) {
static unsigned getEncodedLinkage(const GlobalValue *GV) {
switch (GV->getLinkage()) {
default: llvm_unreachable("Invalid linkage!");
- case GlobalValue::ExternalLinkage: return 0;
- case GlobalValue::WeakAnyLinkage: return 1;
- case GlobalValue::AppendingLinkage: return 2;
- case GlobalValue::InternalLinkage: return 3;
- case GlobalValue::LinkOnceAnyLinkage: return 4;
- case GlobalValue::DLLImportLinkage: return 5;
- case GlobalValue::DLLExportLinkage: return 6;
- case GlobalValue::ExternalWeakLinkage: return 7;
- case GlobalValue::CommonLinkage: return 8;
- case GlobalValue::PrivateLinkage: return 9;
- case GlobalValue::WeakODRLinkage: return 10;
- case GlobalValue::LinkOnceODRLinkage: return 11;
- case GlobalValue::AvailableExternallyLinkage: return 12;
- case GlobalValue::LinkerPrivateLinkage: return 13;
- case GlobalValue::LinkerPrivateWeakLinkage: return 14;
+ case GlobalValue::ExternalLinkage: return 0;
+ case GlobalValue::WeakAnyLinkage: return 1;
+ case GlobalValue::AppendingLinkage: return 2;
+ case GlobalValue::InternalLinkage: return 3;
+ case GlobalValue::LinkOnceAnyLinkage: return 4;
+ case GlobalValue::DLLImportLinkage: return 5;
+ case GlobalValue::DLLExportLinkage: return 6;
+ case GlobalValue::ExternalWeakLinkage: return 7;
+ case GlobalValue::CommonLinkage: return 8;
+ case GlobalValue::PrivateLinkage: return 9;
+ case GlobalValue::WeakODRLinkage: return 10;
+ case GlobalValue::LinkOnceODRLinkage: return 11;
+ case GlobalValue::AvailableExternallyLinkage: return 12;
+ case GlobalValue::LinkerPrivateLinkage: return 13;
+ case GlobalValue::LinkerPrivateWeakLinkage: return 14;
+ case GlobalValue::LinkerPrivateWeakDefAutoLinkage: return 15;
}
}
@@ -503,13 +485,14 @@ static void WriteMDNode(const MDNode *N,
Record.push_back(0);
}
}
- unsigned MDCode = N->isFunctionLocal() ? bitc::METADATA_FN_NODE :
- bitc::METADATA_NODE;
+ unsigned MDCode = N->isFunctionLocal() ? bitc::METADATA_FN_NODE2 :
+ bitc::METADATA_NODE2;
Stream.EmitRecord(MDCode, Record, 0);
Record.clear();
}
-static void WriteModuleMetadata(const ValueEnumerator &VE,
+static void WriteModuleMetadata(const Module *M,
+ const ValueEnumerator &VE,
BitstreamWriter &Stream) {
const ValueEnumerator::ValueList &Vals = VE.getMDValues();
bool StartedMetadataBlock = false;
@@ -544,29 +527,30 @@ static void WriteModuleMetadata(const ValueEnumerator &VE,
// Emit the finished record.
Stream.EmitRecord(bitc::METADATA_STRING, Record, MDSAbbrev);
Record.clear();
- } else if (const NamedMDNode *NMD = dyn_cast<NamedMDNode>(Vals[i].first)) {
- if (!StartedMetadataBlock) {
- Stream.EnterSubblock(bitc::METADATA_BLOCK_ID, 3);
- StartedMetadataBlock = true;
- }
-
- // Write name.
- StringRef Str = NMD->getName();
- for (unsigned i = 0, e = Str.size(); i != e; ++i)
- Record.push_back(Str[i]);
- Stream.EmitRecord(bitc::METADATA_NAME, Record, 0/*TODO*/);
- Record.clear();
+ }
+ }
- // Write named metadata operands.
- for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i) {
- if (NMD->getOperand(i))
- Record.push_back(VE.getValueID(NMD->getOperand(i)));
- else
- Record.push_back(~0U);
- }
- Stream.EmitRecord(bitc::METADATA_NAMED_NODE, Record, 0);
- Record.clear();
+ // Write named metadata.
+ for (Module::const_named_metadata_iterator I = M->named_metadata_begin(),
+ E = M->named_metadata_end(); I != E; ++I) {
+ const NamedMDNode *NMD = I;
+ if (!StartedMetadataBlock) {
+ Stream.EnterSubblock(bitc::METADATA_BLOCK_ID, 3);
+ StartedMetadataBlock = true;
}
+
+ // Write name.
+ StringRef Str = NMD->getName();
+ for (unsigned i = 0, e = Str.size(); i != e; ++i)
+ Record.push_back(Str[i]);
+ Stream.EmitRecord(bitc::METADATA_NAME, Record, 0/*TODO*/);
+ Record.clear();
+
+ // Write named metadata operands.
+ for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i)
+ Record.push_back(VE.getValueID(NMD->getOperand(i)));
+ Stream.EmitRecord(bitc::METADATA_NAMED_NODE2, Record, 0);
+ Record.clear();
}
if (StartedMetadataBlock)
@@ -601,7 +585,7 @@ static void WriteMetadataAttachment(const Function &F,
SmallVector<uint64_t, 64> Record;
// Write metadata attachments
- // METADATA_ATTACHMENT - [m x [value, [n x [id, mdnode]]]
+ // METADATA_ATTACHMENT2 - [m x [value, [n x [id, mdnode]]]
SmallVector<std::pair<unsigned, MDNode*>, 4> MDs;
for (Function::const_iterator BB = F.begin(), E = F.end(); BB != E; ++BB)
@@ -619,7 +603,7 @@ static void WriteMetadataAttachment(const Function &F,
Record.push_back(MDs[i].first);
Record.push_back(VE.getValueID(MDs[i].second));
}
- Stream.EmitRecord(bitc::METADATA_ATTACHMENT, Record, 0);
+ Stream.EmitRecord(bitc::METADATA_ATTACHMENT2, Record, 0);
Record.clear();
}
@@ -634,12 +618,11 @@ static void WriteModuleMetadataStore(const Module *M, BitstreamWriter &Stream) {
SmallVector<StringRef, 4> Names;
M->getMDKindNames(Names);
- assert(Names[0] == "" && "MDKind #0 is invalid");
- if (Names.size() == 1) return;
+ if (Names.empty()) return;
Stream.EnterSubblock(bitc::METADATA_BLOCK_ID, 3);
- for (unsigned MDKindID = 1, e = Names.size(); MDKindID != e; ++MDKindID) {
+ for (unsigned MDKindID = 0, e = Names.size(); MDKindID != e; ++MDKindID) {
Record.push_back(MDKindID);
StringRef KName = Names[MDKindID];
Record.append(KName.begin(), KName.end());
@@ -734,8 +717,8 @@ static void WriteConstants(unsigned FirstVal, unsigned LastVal,
Code = bitc::CST_CODE_UNDEF;
} else if (const ConstantInt *IV = dyn_cast<ConstantInt>(C)) {
if (IV->getBitWidth() <= 64) {
- int64_t V = IV->getSExtValue();
- if (V >= 0)
+ uint64_t V = IV->getSExtValue();
+ if ((int64_t)V >= 0)
Record.push_back(V << 1);
else
Record.push_back((-V << 1) | 1);
@@ -809,20 +792,6 @@ static void WriteConstants(unsigned FirstVal, unsigned LastVal,
for (unsigned i = 0, e = C->getNumOperands(); i != e; ++i)
Record.push_back(VE.getValueID(C->getOperand(i)));
AbbrevToUse = AggregateAbbrev;
- } else if (isa<ConstantUnion>(C)) {
- Code = bitc::CST_CODE_AGGREGATE;
-
- // Unions only have one entry but we must send type along with it.
- const Type *EntryKind = C->getOperand(0)->getType();
-
- const UnionType *UnTy = cast<UnionType>(C->getType());
- int UnionIndex = UnTy->getElementTypeIndex(EntryKind);
- assert(UnionIndex != -1 && "Constant union contains invalid entry");
-
- Record.push_back(UnionIndex);
- Record.push_back(VE.getValueID(C->getOperand(0)));
-
- AbbrevToUse = AggregateAbbrev;
} else if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) {
switch (CE->getOpcode()) {
default:
@@ -902,6 +871,9 @@ static void WriteConstants(unsigned FirstVal, unsigned LastVal,
Record.push_back(VE.getValueID(BA->getFunction()));
Record.push_back(VE.getGlobalBasicBlockID(BA->getBasicBlock()));
} else {
+#ifndef NDEBUG
+ C->dump();
+#endif
llvm_unreachable("Unknown constant!");
}
Stream.EmitRecord(Code, Record, AbbrevToUse);
@@ -1139,7 +1111,7 @@ static void WriteInstruction(const Instruction &I, unsigned InstID,
const PointerType *PTy = cast<PointerType>(CI.getCalledValue()->getType());
const FunctionType *FTy = cast<FunctionType>(PTy->getElementType());
- Code = bitc::FUNC_CODE_INST_CALL;
+ Code = bitc::FUNC_CODE_INST_CALL2;
Vals.push_back(VE.getAttributeID(CI.getAttributes()));
Vals.push_back((CI.getCallingConv() << 1) | unsigned(CI.isTailCall()));
@@ -1283,7 +1255,7 @@ static void WriteFunction(const Function &F, ValueEnumerator &VE,
Vals.push_back(DL.getCol());
Vals.push_back(Scope ? VE.getValueID(Scope)+1 : 0);
Vals.push_back(IA ? VE.getValueID(IA)+1 : 0);
- Stream.EmitRecord(bitc::FUNC_CODE_DEBUG_LOC, Vals);
+ Stream.EmitRecord(bitc::FUNC_CODE_DEBUG_LOC2, Vals);
Vals.clear();
LastDL = DL;
@@ -1532,7 +1504,7 @@ static void WriteModule(const Module *M, BitstreamWriter &Stream) {
WriteModuleConstants(VE, Stream);
// Emit metadata.
- WriteModuleMetadata(VE, Stream);
+ WriteModuleMetadata(M, VE, Stream);
// Emit function bodies.
for (Module::const_iterator I = M->begin(), E = M->end(); I != E; ++I)
diff --git a/lib/Bitcode/Writer/BitcodeWriterPass.cpp b/lib/Bitcode/Writer/BitcodeWriterPass.cpp
index 3a0d3ce0be994..91e115cba6cc4 100644
--- a/lib/Bitcode/Writer/BitcodeWriterPass.cpp
+++ b/lib/Bitcode/Writer/BitcodeWriterPass.cpp
@@ -21,7 +21,7 @@ namespace {
public:
static char ID; // Pass identification, replacement for typeid
explicit WriteBitcodePass(raw_ostream &o)
- : ModulePass(&ID), OS(o) {}
+ : ModulePass(ID), OS(o) {}
const char *getPassName() const { return "Bitcode Writer"; }
diff --git a/lib/Bitcode/Writer/ValueEnumerator.cpp b/lib/Bitcode/Writer/ValueEnumerator.cpp
index 7fa425a7d871f..2f02262c36aff 100644
--- a/lib/Bitcode/Writer/ValueEnumerator.cpp
+++ b/lib/Bitcode/Writer/ValueEnumerator.cpp
@@ -75,7 +75,7 @@ ValueEnumerator::ValueEnumerator(const Module *M) {
// Insert constants and metadata that are named at module level into the slot
// pool so that the module symbol table can refer to them...
EnumerateValueSymbolTable(M->getValueSymbolTable());
- EnumerateMDSymbolTable(M->getMDSymbolTable());
+ EnumerateNamedMetadata(M);
SmallVector<std::pair<unsigned, MDNode*>, 8> MDs;
@@ -137,7 +137,7 @@ ValueEnumerator::ValueEnumerator(const Module *M) {
unsigned ValueEnumerator::getInstructionID(const Instruction *Inst) const {
InstructionMapType::const_iterator I = InstructionMap.find(Inst);
assert (I != InstructionMap.end() && "Instruction is not mapped!");
- return I->second;
+ return I->second;
}
void ValueEnumerator::setInstructionID(const Instruction *I) {
@@ -207,35 +207,48 @@ void ValueEnumerator::EnumerateValueSymbolTable(const ValueSymbolTable &VST) {
EnumerateValue(VI->getValue());
}
-/// EnumerateMDSymbolTable - Insert all of the values in the specified metadata
-/// table.
-void ValueEnumerator::EnumerateMDSymbolTable(const MDSymbolTable &MST) {
- for (MDSymbolTable::const_iterator MI = MST.begin(), ME = MST.end();
- MI != ME; ++MI)
- EnumerateValue(MI->getValue());
+/// EnumerateNamedMetadata - Insert all of the values referenced by
+/// named metadata in the specified module.
+void ValueEnumerator::EnumerateNamedMetadata(const Module *M) {
+ for (Module::const_named_metadata_iterator I = M->named_metadata_begin(),
+ E = M->named_metadata_end(); I != E; ++I)
+ EnumerateNamedMDNode(I);
}
void ValueEnumerator::EnumerateNamedMDNode(const NamedMDNode *MD) {
- // Check to see if it's already in!
- unsigned &MDValueID = MDValueMap[MD];
- if (MDValueID) {
- // Increment use count.
- MDValues[MDValueID-1].second++;
- return;
+ for (unsigned i = 0, e = MD->getNumOperands(); i != e; ++i)
+ EnumerateMetadata(MD->getOperand(i));
+}
+
+/// EnumerateMDNodeOperands - Enumerate all non-function-local values
+/// and types referenced by the given MDNode.
+void ValueEnumerator::EnumerateMDNodeOperands(const MDNode *N) {
+ for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
+ if (Value *V = N->getOperand(i)) {
+ if (isa<MDNode>(V) || isa<MDString>(V))
+ EnumerateMetadata(V);
+ else if (!isa<Instruction>(V) && !isa<Argument>(V))
+ EnumerateValue(V);
+ } else
+ EnumerateType(Type::getVoidTy(N->getContext()));
}
+}
+
+void ValueEnumerator::EnumerateMetadata(const Value *MD) {
+ assert((isa<MDNode>(MD) || isa<MDString>(MD)) && "Invalid metadata kind");
// Enumerate the type of this value.
EnumerateType(MD->getType());
- for (unsigned i = 0, e = MD->getNumOperands(); i != e; ++i)
- if (MDNode *E = MD->getOperand(i))
- EnumerateValue(E);
- MDValues.push_back(std::make_pair(MD, 1U));
- MDValueMap[MD] = Values.size();
-}
+ const MDNode *N = dyn_cast<MDNode>(MD);
+
+ // In the module-level pass, skip function-local nodes themselves, but
+ // do walk their operands.
+ if (N && N->isFunctionLocal() && N->getFunction()) {
+ EnumerateMDNodeOperands(N);
+ return;
+ }
-void ValueEnumerator::EnumerateMetadata(const Value *MD) {
- assert((isa<MDNode>(MD) || isa<MDString>(MD)) && "Invalid metadata kind");
// Check to see if it's already in!
unsigned &MDValueID = MDValueMap[MD];
if (MDValueID) {
@@ -243,37 +256,52 @@ void ValueEnumerator::EnumerateMetadata(const Value *MD) {
MDValues[MDValueID-1].second++;
return;
}
+ MDValues.push_back(std::make_pair(MD, 1U));
+ MDValueID = MDValues.size();
+
+ // Enumerate all non-function-local operands.
+ if (N)
+ EnumerateMDNodeOperands(N);
+}
+
+/// EnumerateFunctionLocalMetadataa - Incorporate function-local metadata
+/// information reachable from the given MDNode.
+void ValueEnumerator::EnumerateFunctionLocalMetadata(const MDNode *N) {
+ assert(N->isFunctionLocal() && N->getFunction() &&
+ "EnumerateFunctionLocalMetadata called on non-function-local mdnode!");
// Enumerate the type of this value.
- EnumerateType(MD->getType());
+ EnumerateType(N->getType());
- if (const MDNode *N = dyn_cast<MDNode>(MD)) {
- MDValues.push_back(std::make_pair(MD, 1U));
- MDValueMap[MD] = MDValues.size();
- MDValueID = MDValues.size();
- for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
- if (Value *V = N->getOperand(i))
- EnumerateValue(V);
- else
- EnumerateType(Type::getVoidTy(MD->getContext()));
- }
- if (N->isFunctionLocal() && N->getFunction())
- FunctionLocalMDs.push_back(N);
+ // Check to see if it's already in!
+ unsigned &MDValueID = MDValueMap[N];
+ if (MDValueID) {
+ // Increment use count.
+ MDValues[MDValueID-1].second++;
return;
}
-
- // Add the value.
- assert(isa<MDString>(MD) && "Unknown metadata kind");
- MDValues.push_back(std::make_pair(MD, 1U));
+ MDValues.push_back(std::make_pair(N, 1U));
MDValueID = MDValues.size();
+
+ // To incoroporate function-local information visit all function-local
+ // MDNodes and all function-local values they reference.
+ for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
+ if (Value *V = N->getOperand(i)) {
+ if (MDNode *O = dyn_cast<MDNode>(V)) {
+ if (O->isFunctionLocal() && O->getFunction())
+ EnumerateFunctionLocalMetadata(O);
+ } else if (isa<Instruction>(V) || isa<Argument>(V))
+ EnumerateValue(V);
+ }
+
+ // Also, collect all function-local MDNodes for easy access.
+ FunctionLocalMDs.push_back(N);
}
void ValueEnumerator::EnumerateValue(const Value *V) {
assert(!V->getType()->isVoidTy() && "Can't insert void values!");
- if (isa<MDNode>(V) || isa<MDString>(V))
- return EnumerateMetadata(V);
- else if (const NamedMDNode *NMD = dyn_cast<NamedMDNode>(V))
- return EnumerateNamedMDNode(NMD);
+ assert(!isa<MDNode>(V) && !isa<MDString>(V) &&
+ "EnumerateValue doesn't handle Metadata!");
// Check to see if it's already in!
unsigned &ValueID = ValueMap[V];
@@ -359,7 +387,7 @@ void ValueEnumerator::EnumerateOperandType(const Value *V) {
// blockaddress.
if (isa<BasicBlock>(Op)) continue;
- EnumerateOperandType(cast<Constant>(Op));
+ EnumerateOperandType(Op);
}
if (const MDNode *N = dyn_cast<MDNode>(V)) {
@@ -368,7 +396,7 @@ void ValueEnumerator::EnumerateOperandType(const Value *V) {
EnumerateOperandType(Elem);
}
} else if (isa<MDString>(V) || isa<MDNode>(V))
- EnumerateValue(V);
+ EnumerateMetadata(V);
}
void ValueEnumerator::EnumerateAttributes(const AttrListPtr &PAL) {
@@ -386,10 +414,11 @@ void ValueEnumerator::EnumerateAttributes(const AttrListPtr &PAL) {
void ValueEnumerator::incorporateFunction(const Function &F) {
InstructionCount = 0;
NumModuleValues = Values.size();
+ NumModuleMDValues = MDValues.size();
// Adding function arguments to the value table.
- for(Function::const_arg_iterator I = F.arg_begin(), E = F.arg_end();
- I != E; ++I)
+ for (Function::const_arg_iterator I = F.arg_begin(), E = F.arg_end();
+ I != E; ++I)
EnumerateValue(I);
FirstFuncConstantID = Values.size();
@@ -416,7 +445,6 @@ void ValueEnumerator::incorporateFunction(const Function &F) {
FirstInstID = Values.size();
- FunctionLocalMDs.clear();
SmallVector<MDNode *, 8> FnLocalMDVector;
// Add all of the instructions.
for (Function::const_iterator BB = F.begin(), E = F.end(); BB != E; ++BB) {
@@ -428,6 +456,15 @@ void ValueEnumerator::incorporateFunction(const Function &F) {
// Enumerate metadata after the instructions they might refer to.
FnLocalMDVector.push_back(MD);
}
+
+ SmallVector<std::pair<unsigned, MDNode*>, 8> MDs;
+ I->getAllMetadataOtherThanDebugLoc(MDs);
+ for (unsigned i = 0, e = MDs.size(); i != e; ++i) {
+ MDNode *N = MDs[i].second;
+ if (N->isFunctionLocal() && N->getFunction())
+ FnLocalMDVector.push_back(N);
+ }
+
if (!I->getType()->isVoidTy())
EnumerateValue(I);
}
@@ -435,18 +472,22 @@ void ValueEnumerator::incorporateFunction(const Function &F) {
// Add all of the function-local metadata.
for (unsigned i = 0, e = FnLocalMDVector.size(); i != e; ++i)
- EnumerateOperandType(FnLocalMDVector[i]);
+ EnumerateFunctionLocalMetadata(FnLocalMDVector[i]);
}
void ValueEnumerator::purgeFunction() {
/// Remove purged values from the ValueMap.
for (unsigned i = NumModuleValues, e = Values.size(); i != e; ++i)
ValueMap.erase(Values[i].first);
+ for (unsigned i = NumModuleMDValues, e = MDValues.size(); i != e; ++i)
+ MDValueMap.erase(MDValues[i].first);
for (unsigned i = 0, e = BasicBlocks.size(); i != e; ++i)
ValueMap.erase(BasicBlocks[i]);
Values.resize(NumModuleValues);
+ MDValues.resize(NumModuleMDValues);
BasicBlocks.clear();
+ FunctionLocalMDs.clear();
}
static void IncorporateFunctionInfoGlobalBBIDs(const Function *F,
diff --git a/lib/Bitcode/Writer/ValueEnumerator.h b/lib/Bitcode/Writer/ValueEnumerator.h
index 2b9b15fa5a77a..cd1d2371b701a 100644
--- a/lib/Bitcode/Writer/ValueEnumerator.h
+++ b/lib/Bitcode/Writer/ValueEnumerator.h
@@ -72,6 +72,11 @@ private:
/// When a function is incorporated, this is the size of the Values list
/// before incorporation.
unsigned NumModuleValues;
+
+ /// When a function is incorporated, this is the size of the MDValues list
+ /// before incorporation.
+ unsigned NumModuleMDValues;
+
unsigned FirstFuncConstantID;
unsigned FirstInstID;
@@ -132,7 +137,9 @@ public:
private:
void OptimizeConstants(unsigned CstStart, unsigned CstEnd);
+ void EnumerateMDNodeOperands(const MDNode *N);
void EnumerateMetadata(const Value *MD);
+ void EnumerateFunctionLocalMetadata(const MDNode *N);
void EnumerateNamedMDNode(const NamedMDNode *NMD);
void EnumerateValue(const Value *V);
void EnumerateType(const Type *T);
@@ -141,7 +148,7 @@ private:
void EnumerateTypeSymbolTable(const TypeSymbolTable &ST);
void EnumerateValueSymbolTable(const ValueSymbolTable &ST);
- void EnumerateMDSymbolTable(const MDSymbolTable &ST);
+ void EnumerateNamedMetadata(const Module *M);
};
} // End llvm namespace
diff --git a/lib/CodeGen/AggressiveAntiDepBreaker.cpp b/lib/CodeGen/AggressiveAntiDepBreaker.cpp
index a7189acc3fecd..5a634d6ccb018 100644
--- a/lib/CodeGen/AggressiveAntiDepBreaker.cpp
+++ b/lib/CodeGen/AggressiveAntiDepBreaker.cpp
@@ -41,8 +41,11 @@ DebugMod("agg-antidep-debugmod",
AggressiveAntiDepState::AggressiveAntiDepState(const unsigned TargetRegs,
MachineBasicBlock *BB) :
- NumTargetRegs(TargetRegs), GroupNodes(TargetRegs, 0) {
-
+ NumTargetRegs(TargetRegs), GroupNodes(TargetRegs, 0),
+ GroupNodeIndices(TargetRegs, 0),
+ KillIndices(TargetRegs, 0),
+ DefIndices(TargetRegs, 0)
+{
const unsigned BBSize = BB->size();
for (unsigned i = 0; i < NumTargetRegs; ++i) {
// Initialize all registers to be in their own group. Initially we
@@ -54,8 +57,7 @@ AggressiveAntiDepState::AggressiveAntiDepState(const unsigned TargetRegs,
}
}
-unsigned AggressiveAntiDepState::GetGroup(unsigned Reg)
-{
+unsigned AggressiveAntiDepState::GetGroup(unsigned Reg) {
unsigned Node = GroupNodeIndices[Reg];
while (GroupNodes[Node] != Node)
Node = GroupNodes[Node];
@@ -145,8 +147,8 @@ void AggressiveAntiDepBreaker::StartBlock(MachineBasicBlock *BB) {
State = new AggressiveAntiDepState(TRI->getNumRegs(), BB);
bool IsReturnBlock = (!BB->empty() && BB->back().getDesc().isReturn());
- unsigned *KillIndices = State->GetKillIndices();
- unsigned *DefIndices = State->GetDefIndices();
+ std::vector<unsigned> &KillIndices = State->GetKillIndices();
+ std::vector<unsigned> &DefIndices = State->GetDefIndices();
// Determine the live-out physregs for this block.
if (IsReturnBlock) {
@@ -226,7 +228,7 @@ void AggressiveAntiDepBreaker::Observe(MachineInstr *MI, unsigned Count,
DEBUG(MI->dump());
DEBUG(dbgs() << "\tRegs:");
- unsigned *DefIndices = State->GetDefIndices();
+ std::vector<unsigned> &DefIndices = State->GetDefIndices();
for (unsigned Reg = 0; Reg != TRI->getNumRegs(); ++Reg) {
// If Reg is current live, then mark that it can't be renamed as
// we don't know the extent of its live-range anymore (now that it
@@ -328,8 +330,8 @@ void AggressiveAntiDepBreaker::HandleLastUse(unsigned Reg, unsigned KillIdx,
const char *tag,
const char *header,
const char *footer) {
- unsigned *KillIndices = State->GetKillIndices();
- unsigned *DefIndices = State->GetDefIndices();
+ std::vector<unsigned> &KillIndices = State->GetKillIndices();
+ std::vector<unsigned> &DefIndices = State->GetDefIndices();
std::multimap<unsigned, AggressiveAntiDepState::RegisterReference>&
RegRefs = State->GetRegRefs();
@@ -364,7 +366,7 @@ void AggressiveAntiDepBreaker::HandleLastUse(unsigned Reg, unsigned KillIdx,
void AggressiveAntiDepBreaker::PrescanInstruction(MachineInstr *MI,
unsigned Count,
std::set<unsigned>& PassthruRegs) {
- unsigned *DefIndices = State->GetDefIndices();
+ std::vector<unsigned> &DefIndices = State->GetDefIndices();
std::multimap<unsigned, AggressiveAntiDepState::RegisterReference>&
RegRefs = State->GetRegRefs();
@@ -560,8 +562,8 @@ bool AggressiveAntiDepBreaker::FindSuitableFreeRegisters(
unsigned AntiDepGroupIndex,
RenameOrderType& RenameOrder,
std::map<unsigned, unsigned> &RenameMap) {
- unsigned *KillIndices = State->GetKillIndices();
- unsigned *DefIndices = State->GetDefIndices();
+ std::vector<unsigned> &KillIndices = State->GetKillIndices();
+ std::vector<unsigned> &DefIndices = State->GetDefIndices();
std::multimap<unsigned, AggressiveAntiDepState::RegisterReference>&
RegRefs = State->GetRegRefs();
@@ -652,6 +654,8 @@ bool AggressiveAntiDepBreaker::FindSuitableFreeRegisters(
if (R == RB) R = RE;
--R;
const unsigned NewSuperReg = *R;
+ // Don't consider non-allocatable registers
+ if (!AllocatableSet.test(NewSuperReg)) continue;
// Don't replace a register with itself.
if (NewSuperReg == SuperReg) continue;
@@ -733,8 +737,8 @@ unsigned AggressiveAntiDepBreaker::BreakAntiDependencies(
MachineBasicBlock::iterator Begin,
MachineBasicBlock::iterator End,
unsigned InsertPosIndex) {
- unsigned *KillIndices = State->GetKillIndices();
- unsigned *DefIndices = State->GetDefIndices();
+ std::vector<unsigned> &KillIndices = State->GetKillIndices();
+ std::vector<unsigned> &DefIndices = State->GetDefIndices();
std::multimap<unsigned, AggressiveAntiDepState::RegisterReference>&
RegRefs = State->GetRegRefs();
diff --git a/lib/CodeGen/AggressiveAntiDepBreaker.h b/lib/CodeGen/AggressiveAntiDepBreaker.h
index 91ebb850d19db..9d715ccf79f8d 100644
--- a/lib/CodeGen/AggressiveAntiDepBreaker.h
+++ b/lib/CodeGen/AggressiveAntiDepBreaker.h
@@ -59,27 +59,27 @@ namespace llvm {
/// currently representing the group that the register belongs to.
/// Register 0 is always represented by the 0 group, a group
/// composed of registers that are not eligible for anti-aliasing.
- unsigned GroupNodeIndices[TargetRegisterInfo::FirstVirtualRegister];
+ std::vector<unsigned> GroupNodeIndices;
/// RegRefs - Map registers to all their references within a live range.
std::multimap<unsigned, RegisterReference> RegRefs;
/// KillIndices - The index of the most recent kill (proceding bottom-up),
/// or ~0u if the register is not live.
- unsigned KillIndices[TargetRegisterInfo::FirstVirtualRegister];
+ std::vector<unsigned> KillIndices;
/// DefIndices - The index of the most recent complete def (proceding bottom
/// up), or ~0u if the register is live.
- unsigned DefIndices[TargetRegisterInfo::FirstVirtualRegister];
+ std::vector<unsigned> DefIndices;
public:
AggressiveAntiDepState(const unsigned TargetRegs, MachineBasicBlock *BB);
/// GetKillIndices - Return the kill indices.
- unsigned *GetKillIndices() { return KillIndices; }
+ std::vector<unsigned> &GetKillIndices() { return KillIndices; }
/// GetDefIndices - Return the define indices.
- unsigned *GetDefIndices() { return DefIndices; }
+ std::vector<unsigned> &GetDefIndices() { return DefIndices; }
/// GetRegRefs - Return the RegRefs map.
std::multimap<unsigned, RegisterReference>& GetRegRefs() { return RegRefs; }
diff --git a/lib/CodeGen/Analysis.cpp b/lib/CodeGen/Analysis.cpp
index f71eee5d01b89..e3dd646c952ed 100644
--- a/lib/CodeGen/Analysis.cpp
+++ b/lib/CodeGen/Analysis.cpp
@@ -109,7 +109,7 @@ GlobalVariable *llvm::ExtractTypeInfo(Value *V) {
V = V->stripPointerCasts();
GlobalVariable *GV = dyn_cast<GlobalVariable>(V);
- if (GV && GV->getName() == ".llvm.eh.catch.all.value") {
+ if (GV && GV->getName() == "llvm.eh.catch.all.value") {
assert(GV->hasInitializer() &&
"The EH catch-all value must have an initializer");
Value *Init = GV->getInitializer();
@@ -171,7 +171,7 @@ ISD::CondCode llvm::getFCmpCondCode(FCmpInst::Predicate Pred) {
FOC = FPC = ISD::SETFALSE;
break;
}
- if (FiniteOnlyFPMath())
+ if (NoNaNsFPMath)
return FOC;
else
return FPC;
diff --git a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index db1b37ab263fb..d358ab20ffc53 100644
--- a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -91,7 +91,7 @@ static unsigned getGVAlignmentLog2(const GlobalValue *GV, const TargetData &TD,
AsmPrinter::AsmPrinter(TargetMachine &tm, MCStreamer &Streamer)
- : MachineFunctionPass(&ID),
+ : MachineFunctionPass(ID),
TM(tm), MAI(tm.getMCAsmInfo()),
OutContext(Streamer.getContext()),
OutStreamer(Streamer),
@@ -200,11 +200,17 @@ void AsmPrinter::EmitLinkage(unsigned Linkage, MCSymbol *GVSym) const {
case GlobalValue::WeakAnyLinkage:
case GlobalValue::WeakODRLinkage:
case GlobalValue::LinkerPrivateWeakLinkage:
+ case GlobalValue::LinkerPrivateWeakDefAutoLinkage:
if (MAI->getWeakDefDirective() != 0) {
// .globl _foo
OutStreamer.EmitSymbolAttribute(GVSym, MCSA_Global);
- // .weak_definition _foo
- OutStreamer.EmitSymbolAttribute(GVSym, MCSA_WeakDefinition);
+
+ if ((GlobalValue::LinkageTypes)Linkage !=
+ GlobalValue::LinkerPrivateWeakDefAutoLinkage)
+ // .weak_definition _foo
+ OutStreamer.EmitSymbolAttribute(GVSym, MCSA_WeakDefinition);
+ else
+ OutStreamer.EmitSymbolAttribute(GVSym, MCSA_WeakDefAutoPrivate);
} else if (MAI->getLinkOnceDirective() != 0) {
// .globl _foo
OutStreamer.EmitSymbolAttribute(GVSym, MCSA_Global);
@@ -510,12 +516,8 @@ static void EmitComments(const MachineInstr &MI, raw_ostream &CommentOS) {
}
// Check for spill-induced copies
- unsigned SrcReg, DstReg, SrcSubIdx, DstSubIdx;
- if (TM.getInstrInfo()->isMoveInstr(MI, SrcReg, DstReg,
- SrcSubIdx, DstSubIdx)) {
- if (MI.getAsmPrinterFlag(MachineInstr::ReloadReuse))
- CommentOS << " Reload Reuse\n";
- }
+ if (MI.getAsmPrinterFlag(MachineInstr::ReloadReuse))
+ CommentOS << " Reload Reuse\n";
}
/// EmitImplicitDef - This method emits the specified machine instruction
@@ -603,12 +605,15 @@ void AsmPrinter::EmitFunctionBody() {
// Print out code for the function.
bool HasAnyRealCode = false;
+ const MachineInstr *LastMI = 0;
for (MachineFunction::const_iterator I = MF->begin(), E = MF->end();
I != E; ++I) {
// Print a label for the basic block.
EmitBasicBlockStart(I);
for (MachineBasicBlock::const_iterator II = I->begin(), IE = I->end();
II != IE; ++II) {
+ LastMI = II;
+
// Print the assembly for the instruction.
if (!II->isLabel() && !II->isImplicitDef() && !II->isKill() &&
!II->isDebugValue()) {
@@ -625,7 +630,7 @@ void AsmPrinter::EmitFunctionBody() {
EmitComments(*II, OutStreamer.GetCommentOS());
switch (II->getOpcode()) {
- case TargetOpcode::DBG_LABEL:
+ case TargetOpcode::PROLOG_LABEL:
case TargetOpcode::EH_LABEL:
case TargetOpcode::GC_LABEL:
OutStreamer.EmitLabel(II->getOperand(0).getMCSymbol());
@@ -656,11 +661,18 @@ void AsmPrinter::EmitFunctionBody() {
}
}
}
-
+
+ // If the last instruction was a prolog label, then we have a situation where
+ // we emitted a prolog but no function body. This results in the ending prolog
+ // label equaling the end of function label and an invalid "row" in the
+ // FDE. We need to emit a noop in this situation so that the FDE's rows are
+ // valid.
+ bool RequiresNoop = LastMI && LastMI->isPrologLabel();
+
// If the function is empty and the object file uses .subsections_via_symbols,
// then we need to emit *something* to the function body to prevent the
// labels from collapsing together. Just emit a noop.
- if (MAI->hasSubsectionsViaSymbols() && !HasAnyRealCode) {
+ if ((MAI->hasSubsectionsViaSymbols() && !HasAnyRealCode) || RequiresNoop) {
MCInst Noop;
TM.getInstrInfo()->getNoopForMachoTarget(Noop);
if (Noop.getOpcode()) {
@@ -1206,6 +1218,22 @@ void AsmPrinter::EmitLabelOffsetDifference(const MCSymbol *Hi, uint64_t Offset,
OutStreamer.EmitSymbolValue(SetLabel, 4, 0/*AddrSpace*/);
}
}
+
+/// EmitLabelPlusOffset - Emit something like ".long Label+Offset"
+/// where the size in bytes of the directive is specified by Size and Label
+/// specifies the label. This implicitly uses .set if it is available.
+void AsmPrinter::EmitLabelPlusOffset(const MCSymbol *Label, uint64_t Offset,
+ unsigned Size)
+ const {
+
+ // Emit Label+Offset
+ const MCExpr *Plus =
+ MCBinaryExpr::CreateAdd(MCSymbolRefExpr::Create(Label, OutContext),
+ MCConstantExpr::Create(Offset, OutContext),
+ OutContext);
+
+ OutStreamer.EmitValue(Plus, 4, 0/*AddrSpace*/);
+}
//===----------------------------------------------------------------------===//
@@ -1244,6 +1272,7 @@ static const MCExpr *LowerConstant(const Constant *CV, AsmPrinter &AP) {
if (const GlobalValue *GV = dyn_cast<GlobalValue>(CV))
return MCSymbolRefExpr::Create(AP.Mang->getSymbol(GV), Ctx);
+
if (const BlockAddress *BA = dyn_cast<BlockAddress>(CV))
return MCSymbolRefExpr::Create(AP.GetBlockAddressSymbol(BA), Ctx);
@@ -1262,10 +1291,17 @@ static const MCExpr *LowerConstant(const Constant *CV, AsmPrinter &AP) {
ConstantFoldConstantExpression(CE, AP.TM.getTargetData()))
if (C != CE)
return LowerConstant(C, AP);
-#ifndef NDEBUG
- CE->dump();
-#endif
- llvm_unreachable("FIXME: Don't support this constant expr");
+
+ // Otherwise report the problem to the user.
+ {
+ std::string S;
+ raw_string_ostream OS(S);
+ OS << "Unsupported expression in static initializer: ";
+ WriteAsOperand(OS, CE, /*PrintType=*/false,
+ !AP.MF ? 0 : AP.MF->getFunction()->getParent());
+ report_fatal_error(OS.str());
+ }
+ return MCConstantExpr::Create(0, Ctx);
case Instruction::GetElementPtr: {
const TargetData &TD = *AP.TM.getTargetData();
// Generate a symbolic expression for the byte address
@@ -1413,21 +1449,6 @@ static void EmitGlobalConstantStruct(const ConstantStruct *CS,
"Layout of constant struct may be incorrect!");
}
-static void EmitGlobalConstantUnion(const ConstantUnion *CU,
- unsigned AddrSpace, AsmPrinter &AP) {
- const TargetData *TD = AP.TM.getTargetData();
- unsigned Size = TD->getTypeAllocSize(CU->getType());
-
- const Constant *Contents = CU->getOperand(0);
- unsigned FilledSize = TD->getTypeAllocSize(Contents->getType());
-
- // Print the actually filled part
- EmitGlobalConstantImpl(Contents, AddrSpace, AP);
-
- // And pad with enough zeroes
- AP.OutStreamer.EmitZeros(Size-FilledSize, AddrSpace);
-}
-
static void EmitGlobalConstantFP(const ConstantFP *CFP, unsigned AddrSpace,
AsmPrinter &AP) {
// FP Constants are printed as integer constants to avoid losing
@@ -1530,7 +1551,7 @@ static void EmitGlobalConstantImpl(const Constant *CV, unsigned AddrSpace,
case 8:
if (AP.isVerbose())
AP.OutStreamer.GetCommentOS() << format("0x%llx\n", CI->getZExtValue());
- AP.OutStreamer.EmitIntValue(CI->getZExtValue(), Size, AddrSpace);
+ AP.OutStreamer.EmitIntValue(CI->getZExtValue(), Size, AddrSpace);
return;
default:
EmitGlobalConstantLargeInt(CI, AddrSpace, AP);
@@ -1553,9 +1574,6 @@ static void EmitGlobalConstantImpl(const Constant *CV, unsigned AddrSpace,
return;
}
- if (const ConstantUnion *CVU = dyn_cast<ConstantUnion>(CV))
- return EmitGlobalConstantUnion(CVU, AddrSpace, AP);
-
if (const ConstantVector *V = dyn_cast<ConstantVector>(CV))
return EmitGlobalConstantVector(V, AddrSpace, AP);
diff --git a/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp b/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp
index b310578584bc9..ce4519c541e3b 100644
--- a/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp
+++ b/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp
@@ -36,7 +36,7 @@ void AsmPrinter::EmitSLEB128(int Value, const char *Desc) const {
if (isVerbose() && Desc)
OutStreamer.AddComment(Desc);
- if (MAI->hasLEB128()) {
+ if (MAI->hasLEB128() && OutStreamer.hasRawTextSupport()) {
// FIXME: MCize.
OutStreamer.EmitRawText("\t.sleb128\t" + Twine(Value));
return;
@@ -61,7 +61,7 @@ void AsmPrinter::EmitULEB128(unsigned Value, const char *Desc,
if (isVerbose() && Desc)
OutStreamer.AddComment(Desc);
- if (MAI->hasLEB128() && PadTo == 0) {
+ if (MAI->hasLEB128() && PadTo == 0 && OutStreamer.hasRawTextSupport()) {
// FIXME: MCize.
OutStreamer.EmitRawText("\t.uleb128\t" + Twine(Value));
return;
diff --git a/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp b/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
index 202d9b67fd157..df0316814c08b 100644
--- a/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
+++ b/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
@@ -22,7 +22,6 @@
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSymbol.h"
-#include "llvm/MC/MCParser/AsmParser.h"
#include "llvm/Target/TargetAsmParser.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetRegistry.h"
@@ -72,16 +71,18 @@ void AsmPrinter::EmitInlineAsm(StringRef Str, unsigned LocCookie) const {
// Tell SrcMgr about this buffer, it takes ownership of the buffer.
SrcMgr.AddNewSourceBuffer(Buffer, SMLoc());
- AsmParser Parser(TM.getTarget(), SrcMgr, OutContext, OutStreamer, *MAI);
- OwningPtr<TargetAsmParser> TAP(TM.getTarget().createAsmParser(Parser));
+ OwningPtr<MCAsmParser> Parser(createMCAsmParser(TM.getTarget(), SrcMgr,
+ OutContext, OutStreamer,
+ *MAI));
+ OwningPtr<TargetAsmParser> TAP(TM.getTarget().createAsmParser(*Parser, TM));
if (!TAP)
report_fatal_error("Inline asm not supported by this streamer because"
" we don't have an asm parser for this target\n");
- Parser.setTargetParser(*TAP.get());
+ Parser->setTargetParser(*TAP.get());
// Don't implicitly switch to the text section before the asm.
- int Res = Parser.Run(/*NoInitialTextSection*/ true,
- /*NoFinalize*/ true);
+ int Res = Parser->Run(/*NoInitialTextSection*/ true,
+ /*NoFinalize*/ true);
if (Res && !HasDiagHandler)
report_fatal_error("Error parsing inline asm\n");
}
diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
index 65c1d190216fa..c886a5ecc6153 100644
--- a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
@@ -44,7 +44,7 @@ using namespace llvm;
static cl::opt<bool> PrintDbgScope("print-dbgscope", cl::Hidden,
cl::desc("Print DbgScope information for each machine instruction"));
-static cl::opt<bool> DisableDebugInfoPrinting("disable-debug-info-print",
+static cl::opt<bool> DisableDebugInfoPrinting("disable-debug-info-print",
cl::Hidden,
cl::desc("Disable debug info printing"));
@@ -116,8 +116,8 @@ public:
/// addGlobalType - Add a new global type to the compile unit.
///
- void addGlobalType(StringRef Name, DIE *Die) {
- GlobalTypes[Name] = Die;
+ void addGlobalType(StringRef Name, DIE *Die) {
+ GlobalTypes[Name] = Die;
}
/// getDIE - Returns the debug information entry map slot for the
@@ -131,8 +131,9 @@ public:
/// getDIEEntry - Returns the debug information entry for the speciefied
/// debug variable.
- DIEEntry *getDIEEntry(const MDNode *N) {
- DenseMap<const MDNode *, DIEEntry *>::iterator I = MDNodeToDIEEntryMap.find(N);
+ DIEEntry *getDIEEntry(const MDNode *N) {
+ DenseMap<const MDNode *, DIEEntry *>::iterator I =
+ MDNodeToDIEEntryMap.find(N);
if (I == MDNodeToDIEEntryMap.end())
return NULL;
return I->second;
@@ -179,6 +180,73 @@ public:
DIE *getDIE() const { return TheDIE; }
void setDotDebugLocOffset(unsigned O) { DotDebugLocOffset = O; }
unsigned getDotDebugLocOffset() const { return DotDebugLocOffset; }
+ StringRef getName() const { return Var.getName(); }
+ unsigned getTag() const { return Var.getTag(); }
+ bool variableHasComplexAddress() const {
+ assert(Var.Verify() && "Invalid complex DbgVariable!");
+ return Var.hasComplexAddress();
+ }
+ bool isBlockByrefVariable() const {
+ assert(Var.Verify() && "Invalid complex DbgVariable!");
+ return Var.isBlockByrefVariable();
+ }
+ unsigned getNumAddrElements() const {
+ assert(Var.Verify() && "Invalid complex DbgVariable!");
+ return Var.getNumAddrElements();
+ }
+ uint64_t getAddrElement(unsigned i) const {
+ return Var.getAddrElement(i);
+ }
+ DIType getType() const {
+ DIType Ty = Var.getType();
+ // FIXME: isBlockByrefVariable should be reformulated in terms of complex
+ // addresses instead.
+ if (Var.isBlockByrefVariable()) {
+ /* Byref variables, in Blocks, are declared by the programmer as
+ "SomeType VarName;", but the compiler creates a
+ __Block_byref_x_VarName struct, and gives the variable VarName
+ either the struct, or a pointer to the struct, as its type. This
+ is necessary for various behind-the-scenes things the compiler
+ needs to do with by-reference variables in blocks.
+
+ However, as far as the original *programmer* is concerned, the
+ variable should still have type 'SomeType', as originally declared.
+
+ The following function dives into the __Block_byref_x_VarName
+ struct to find the original type of the variable. This will be
+ passed back to the code generating the type for the Debug
+ Information Entry for the variable 'VarName'. 'VarName' will then
+ have the original type 'SomeType' in its debug information.
+
+ The original type 'SomeType' will be the type of the field named
+ 'VarName' inside the __Block_byref_x_VarName struct.
+
+ NOTE: In order for this to not completely fail on the debugger
+ side, the Debug Information Entry for the variable VarName needs to
+ have a DW_AT_location that tells the debugger how to unwind through
+ the pointers and __Block_byref_x_VarName struct to find the actual
+ value of the variable. The function addBlockByrefType does this. */
+ DIType subType = Ty;
+ unsigned tag = Ty.getTag();
+
+ if (tag == dwarf::DW_TAG_pointer_type) {
+ DIDerivedType DTy = DIDerivedType(Ty);
+ subType = DTy.getTypeDerivedFrom();
+ }
+
+ DICompositeType blockStruct = DICompositeType(subType);
+ DIArray Elements = blockStruct.getTypeArray();
+
+ for (unsigned i = 0, N = Elements.getNumElements(); i < N; ++i) {
+ DIDescriptor Element = Elements.getElement(i);
+ DIDerivedType DT = DIDerivedType(Element);
+ if (getName() == DT.getName())
+ return (DT.getTypeDerivedFrom());
+ }
+ return Ty;
+ }
+ return Ty;
+ }
};
//===----------------------------------------------------------------------===//
@@ -194,7 +262,7 @@ class DbgScope {
DbgScope *Parent; // Parent to this scope.
DIDescriptor Desc; // Debug info descriptor for scope.
// Location at which this scope is inlined.
- AssertingVH<const MDNode> InlinedAtLocation;
+ AssertingVH<const MDNode> InlinedAtLocation;
bool AbstractScope; // Abstract Scope
const MachineInstr *LastInsn; // Last instruction of this scope.
const MachineInstr *FirstInsn; // First instruction of this scope.
@@ -220,19 +288,19 @@ public:
const MDNode *getInlinedAt() const { return InlinedAtLocation; }
const MDNode *getScopeNode() const { return Desc; }
const SmallVector<DbgScope *, 4> &getScopes() { return Scopes; }
- const SmallVector<DbgVariable *, 8> &getVariables() { return Variables; }
+ const SmallVector<DbgVariable *, 8> &getDbgVariables() { return Variables; }
const SmallVector<DbgRange, 4> &getRanges() { return Ranges; }
/// openInsnRange - This scope covers instruction range starting from MI.
void openInsnRange(const MachineInstr *MI) {
- if (!FirstInsn)
+ if (!FirstInsn)
FirstInsn = MI;
-
+
if (Parent)
Parent->openInsnRange(MI);
}
- /// extendInsnRange - Extend the current instruction range covered by
+ /// extendInsnRange - Extend the current instruction range covered by
/// this scope.
void extendInsnRange(const MachineInstr *MI) {
assert (FirstInsn && "MI Range is not open!");
@@ -247,9 +315,9 @@ public:
void closeInsnRange(DbgScope *NewScope = NULL) {
assert (LastInsn && "Last insn missing!");
Ranges.push_back(DbgRange(FirstInsn, LastInsn));
- FirstInsn = NULL;
+ FirstInsn = NULL;
LastInsn = NULL;
- // If Parent dominates NewScope then do not close Parent's instruction
+ // If Parent dominates NewScope then do not close Parent's instruction
// range.
if (Parent && (!NewScope || !Parent->dominates(NewScope)))
Parent->closeInsnRange(NewScope);
@@ -264,7 +332,7 @@ public:
unsigned getDFSIn() const { return DFSIn; }
void setDFSIn(unsigned I) { DFSIn = I; }
bool dominates(const DbgScope *S) {
- if (S == this)
+ if (S == this)
return true;
if (DFSIn < S->getDFSIn() && DFSOut > S->getDFSOut())
return true;
@@ -313,14 +381,13 @@ DbgScope::~DbgScope() {
DwarfDebug::DwarfDebug(AsmPrinter *A, Module *M)
: Asm(A), MMI(Asm->MMI), FirstCU(0),
- AbbreviationsSet(InitAbbreviationsSetSize),
+ AbbreviationsSet(InitAbbreviationsSetSize),
CurrentFnDbgScope(0), PrevLabel(NULL) {
NextStringPoolNumber = 0;
-
+
DwarfFrameSectionSym = DwarfInfoSectionSym = DwarfAbbrevSectionSym = 0;
DwarfStrSectionSym = TextSectionSym = 0;
- DwarfDebugRangeSectionSym = DwarfDebugLocSectionSym = 0;
- DwarfDebugLineSectionSym = CurrentLineSectionSym = 0;
+ DwarfDebugRangeSectionSym = DwarfDebugLocSectionSym = 0;
FunctionBeginSym = FunctionEndSym = 0;
DIEIntegerOne = new (DIEValueAllocator) DIEInteger(1);
{
@@ -377,7 +444,7 @@ DIEEntry *DwarfDebug::createDIEEntry(DIE *Entry) {
void DwarfDebug::addUInt(DIE *Die, unsigned Attribute,
unsigned Form, uint64_t Integer) {
if (!Form) Form = DIEInteger::BestForm(false, Integer);
- DIEValue *Value = Integer == 1 ?
+ DIEValue *Value = Integer == 1 ?
DIEIntegerOne : new (DIEValueAllocator) DIEInteger(Integer);
Die->addValue(Attribute, Form, Value);
}
@@ -392,7 +459,7 @@ void DwarfDebug::addSInt(DIE *Die, unsigned Attribute,
}
/// addString - Add a string attribute data and value. DIEString only
-/// keeps string reference.
+/// keeps string reference.
void DwarfDebug::addString(DIE *Die, unsigned Attribute, unsigned Form,
StringRef String) {
DIEValue *Value = new (DIEValueAllocator) DIEString(String);
@@ -434,14 +501,14 @@ void DwarfDebug::addBlock(DIE *Die, unsigned Attribute, unsigned Form,
/// addSourceLine - Add location information to specified debug information
/// entry.
-void DwarfDebug::addSourceLine(DIE *Die, const DIVariable *V) {
+void DwarfDebug::addSourceLine(DIE *Die, DIVariable V) {
// Verify variable.
- if (!V->Verify())
+ if (!V.Verify())
return;
- unsigned Line = V->getLineNumber();
- unsigned FileID = GetOrCreateSourceID(V->getContext().getDirectory(),
- V->getContext().getFilename());
+ unsigned Line = V.getLineNumber();
+ unsigned FileID = GetOrCreateSourceID(V.getContext().getDirectory(),
+ V.getContext().getFilename());
assert(FileID && "Invalid file id");
addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID);
addUInt(Die, dwarf::DW_AT_decl_line, 0, Line);
@@ -449,14 +516,14 @@ void DwarfDebug::addSourceLine(DIE *Die, const DIVariable *V) {
/// addSourceLine - Add location information to specified debug information
/// entry.
-void DwarfDebug::addSourceLine(DIE *Die, const DIGlobalVariable *G) {
+void DwarfDebug::addSourceLine(DIE *Die, DIGlobalVariable G) {
// Verify global variable.
- if (!G->Verify())
+ if (!G.Verify())
return;
- unsigned Line = G->getLineNumber();
- unsigned FileID = GetOrCreateSourceID(G->getContext().getDirectory(),
- G->getContext().getFilename());
+ unsigned Line = G.getLineNumber();
+ unsigned FileID = GetOrCreateSourceID(G.getContext().getDirectory(),
+ G.getContext().getFilename());
assert(FileID && "Invalid file id");
addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID);
addUInt(Die, dwarf::DW_AT_decl_line, 0, Line);
@@ -464,19 +531,19 @@ void DwarfDebug::addSourceLine(DIE *Die, const DIGlobalVariable *G) {
/// addSourceLine - Add location information to specified debug information
/// entry.
-void DwarfDebug::addSourceLine(DIE *Die, const DISubprogram *SP) {
+void DwarfDebug::addSourceLine(DIE *Die, DISubprogram SP) {
// Verify subprogram.
- if (!SP->Verify())
+ if (!SP.Verify())
return;
// If the line number is 0, don't add it.
- if (SP->getLineNumber() == 0)
+ if (SP.getLineNumber() == 0)
return;
- unsigned Line = SP->getLineNumber();
- if (!SP->getContext().Verify())
+ unsigned Line = SP.getLineNumber();
+ if (!SP.getContext().Verify())
return;
- unsigned FileID = GetOrCreateSourceID(SP->getDirectory(),
- SP->getFilename());
+ unsigned FileID = GetOrCreateSourceID(SP.getDirectory(),
+ SP.getFilename());
assert(FileID && "Invalid file id");
addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID);
addUInt(Die, dwarf::DW_AT_decl_line, 0, Line);
@@ -484,16 +551,16 @@ void DwarfDebug::addSourceLine(DIE *Die, const DISubprogram *SP) {
/// addSourceLine - Add location information to specified debug information
/// entry.
-void DwarfDebug::addSourceLine(DIE *Die, const DIType *Ty) {
+void DwarfDebug::addSourceLine(DIE *Die, DIType Ty) {
// Verify type.
- if (!Ty->Verify())
+ if (!Ty.Verify())
return;
- unsigned Line = Ty->getLineNumber();
- if (!Ty->getContext().Verify())
+ unsigned Line = Ty.getLineNumber();
+ if (!Ty.getContext().Verify())
return;
- unsigned FileID = GetOrCreateSourceID(Ty->getContext().getDirectory(),
- Ty->getContext().getFilename());
+ unsigned FileID = GetOrCreateSourceID(Ty.getContext().getDirectory(),
+ Ty.getContext().getFilename());
assert(FileID && "Invalid file id");
addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID);
addUInt(Die, dwarf::DW_AT_decl_line, 0, Line);
@@ -501,14 +568,14 @@ void DwarfDebug::addSourceLine(DIE *Die, const DIType *Ty) {
/// addSourceLine - Add location information to specified debug information
/// entry.
-void DwarfDebug::addSourceLine(DIE *Die, const DINameSpace *NS) {
+void DwarfDebug::addSourceLine(DIE *Die, DINameSpace NS) {
// Verify namespace.
- if (!NS->Verify())
+ if (!NS.Verify())
return;
- unsigned Line = NS->getLineNumber();
- StringRef FN = NS->getFilename();
- StringRef Dir = NS->getDirectory();
+ unsigned Line = NS.getLineNumber();
+ StringRef FN = NS.getFilename();
+ StringRef Dir = NS.getDirectory();
unsigned FileID = GetOrCreateSourceID(Dir, FN);
assert(FileID && "Invalid file id");
@@ -516,55 +583,21 @@ void DwarfDebug::addSourceLine(DIE *Die, const DINameSpace *NS) {
addUInt(Die, dwarf::DW_AT_decl_line, 0, Line);
}
-/* Byref variables, in Blocks, are declared by the programmer as
- "SomeType VarName;", but the compiler creates a
- __Block_byref_x_VarName struct, and gives the variable VarName
- either the struct, or a pointer to the struct, as its type. This
- is necessary for various behind-the-scenes things the compiler
- needs to do with by-reference variables in blocks.
-
- However, as far as the original *programmer* is concerned, the
- variable should still have type 'SomeType', as originally declared.
-
- The following function dives into the __Block_byref_x_VarName
- struct to find the original type of the variable. This will be
- passed back to the code generating the type for the Debug
- Information Entry for the variable 'VarName'. 'VarName' will then
- have the original type 'SomeType' in its debug information.
-
- The original type 'SomeType' will be the type of the field named
- 'VarName' inside the __Block_byref_x_VarName struct.
-
- NOTE: In order for this to not completely fail on the debugger
- side, the Debug Information Entry for the variable VarName needs to
- have a DW_AT_location that tells the debugger how to unwind through
- the pointers and __Block_byref_x_VarName struct to find the actual
- value of the variable. The function addBlockByrefType does this. */
-
-/// Find the type the programmer originally declared the variable to be
-/// and return that type.
-///
-DIType DwarfDebug::getBlockByrefType(DIType Ty, std::string Name) {
-
- DIType subType = Ty;
- unsigned tag = Ty.getTag();
-
- if (tag == dwarf::DW_TAG_pointer_type) {
- DIDerivedType DTy = DIDerivedType(Ty);
- subType = DTy.getTypeDerivedFrom();
- }
-
- DICompositeType blockStruct = DICompositeType(subType);
- DIArray Elements = blockStruct.getTypeArray();
-
- for (unsigned i = 0, N = Elements.getNumElements(); i < N; ++i) {
- DIDescriptor Element = Elements.getElement(i);
- DIDerivedType DT = DIDerivedType(Element);
- if (Name == DT.getName())
- return (DT.getTypeDerivedFrom());
- }
+/// addVariableAddress - Add DW_AT_location attribute for a DbgVariable based
+/// on provided frame index.
+void DwarfDebug::addVariableAddress(DbgVariable *&DV, DIE *Die, int64_t FI) {
+ MachineLocation Location;
+ unsigned FrameReg;
+ const TargetRegisterInfo *RI = Asm->TM.getRegisterInfo();
+ int Offset = RI->getFrameIndexReference(*Asm->MF, FI, FrameReg);
+ Location.set(FrameReg, Offset);
- return Ty;
+ if (DV->variableHasComplexAddress())
+ addComplexAddress(DV, Die, dwarf::DW_AT_location, Location);
+ else if (DV->isBlockByrefVariable())
+ addBlockByrefAddress(DV, Die, dwarf::DW_AT_location, Location);
+ else
+ addAddress(Die, dwarf::DW_AT_location, Location);
}
/// addComplexAddress - Start with the address based on the location provided,
@@ -575,8 +608,7 @@ DIType DwarfDebug::getBlockByrefType(DIType Ty, std::string Name) {
void DwarfDebug::addComplexAddress(DbgVariable *&DV, DIE *Die,
unsigned Attribute,
const MachineLocation &Location) {
- const DIVariable &VD = DV->getVariable();
- DIType Ty = VD.getType();
+ DIType Ty = DV->getType();
// Decode the original location, and use that as the start of the byref
// variable's location.
@@ -603,12 +635,12 @@ void DwarfDebug::addComplexAddress(DbgVariable *&DV, DIE *Die,
addUInt(Block, 0, dwarf::DW_FORM_sdata, Location.getOffset());
}
- for (unsigned i = 0, N = VD.getNumAddrElements(); i < N; ++i) {
- uint64_t Element = VD.getAddrElement(i);
+ for (unsigned i = 0, N = DV->getNumAddrElements(); i < N; ++i) {
+ uint64_t Element = DV->getAddrElement(i);
if (Element == DIFactory::OpPlus) {
addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_plus_uconst);
- addUInt(Block, 0, dwarf::DW_FORM_udata, VD.getAddrElement(++i));
+ addUInt(Block, 0, dwarf::DW_FORM_udata, DV->getAddrElement(++i));
} else if (Element == DIFactory::OpDeref) {
addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_deref);
} else llvm_unreachable("unknown DIFactory Opcode");
@@ -681,13 +713,12 @@ void DwarfDebug::addComplexAddress(DbgVariable *&DV, DIE *Die,
void DwarfDebug::addBlockByrefAddress(DbgVariable *&DV, DIE *Die,
unsigned Attribute,
const MachineLocation &Location) {
- const DIVariable &VD = DV->getVariable();
- DIType Ty = VD.getType();
+ DIType Ty = DV->getType();
DIType TmpTy = Ty;
unsigned Tag = Ty.getTag();
bool isPointer = false;
- StringRef varName = VD.getName();
+ StringRef varName = DV->getName();
if (Tag == dwarf::DW_TAG_pointer_type) {
DIDerivedType DTy = DIDerivedType(Ty);
@@ -835,26 +866,26 @@ bool DwarfDebug::addConstantFPValue(DIE *Die, const MCSymbol *VS,
assert (MO.isFPImm() && "Invalid machine operand!");
DIEBlock *Block = new (DIEValueAllocator) DIEBlock();
APFloat FPImm = MO.getFPImm()->getValueAPF();
-
+
// Get the raw data form of the floating point.
const APInt FltVal = FPImm.bitcastToAPInt();
const char *FltPtr = (const char*)FltVal.getRawData();
-
+
int NumBytes = FltVal.getBitWidth() / 8; // 8 bits per byte.
bool LittleEndian = Asm->getTargetData().isLittleEndian();
int Incr = (LittleEndian ? 1 : -1);
int Start = (LittleEndian ? 0 : NumBytes - 1);
int Stop = (LittleEndian ? NumBytes : -1);
-
+
// Output the constant to DWARF one byte at a time.
for (; Start != Stop; Start += Incr)
addUInt(Block, 0, dwarf::DW_FORM_data1,
(unsigned char)0xFF & FltPtr[Start]);
-
+
addBlock(Die, dwarf::DW_AT_const_value, 0, Block);
if (VS)
addLabel(Die, dwarf::DW_AT_start_scope, dwarf::DW_FORM_addr, VS);
- return true;
+ return true;
}
@@ -872,7 +903,7 @@ void DwarfDebug::addToContextOwner(DIE *Die, DIDescriptor Context) {
ContextDIE->addChild(Die);
} else if (DIE *ContextDIE = getCompileUnit(Context)->getDIE(Context))
ContextDIE->addChild(Die);
- else
+ else
getCompileUnit(Context)->addDie(Die);
}
@@ -965,7 +996,7 @@ void DwarfDebug::constructTypeDIE(DIE &Buffer, DIDerivedType DTy) {
// Add source line info if available and TyDesc is not a forward declaration.
if (!DTy.isForwardDecl())
- addSourceLine(&Buffer, &DTy);
+ addSourceLine(&Buffer, DTy);
}
/// constructTypeDIE - Construct type DIE from DICompositeType.
@@ -1039,7 +1070,7 @@ void DwarfDebug::constructTypeDIE(DIE &Buffer, DICompositeType CTy) {
addType(ElemDie, DV.getType());
addUInt(ElemDie, dwarf::DW_AT_declaration, dwarf::DW_FORM_flag, 1);
addUInt(ElemDie, dwarf::DW_AT_external, dwarf::DW_FORM_flag, 1);
- addSourceLine(ElemDie, &DV);
+ addSourceLine(ElemDie, DV);
} else if (Element.isDerivedType())
ElemDie = createMemberDIE(DIDerivedType(Element));
else
@@ -1057,7 +1088,7 @@ void DwarfDebug::constructTypeDIE(DIE &Buffer, DICompositeType CTy) {
DICompositeType ContainingType = CTy.getContainingType();
if (DIDescriptor(ContainingType).isCompositeType())
- addDIEEntry(&Buffer, dwarf::DW_AT_containing_type, dwarf::DW_FORM_ref4,
+ addDIEEntry(&Buffer, dwarf::DW_AT_containing_type, dwarf::DW_FORM_ref4,
getOrCreateTypeDIE(DIType(ContainingType)));
else {
DIDescriptor Context = CTy.getContext();
@@ -1073,7 +1104,7 @@ void DwarfDebug::constructTypeDIE(DIE &Buffer, DICompositeType CTy) {
if (!Name.empty())
addString(&Buffer, dwarf::DW_AT_name, dwarf::DW_FORM_string, Name);
- if (Tag == dwarf::DW_TAG_enumeration_type || Tag == dwarf::DW_TAG_class_type
+ if (Tag == dwarf::DW_TAG_enumeration_type || Tag == dwarf::DW_TAG_class_type
|| Tag == dwarf::DW_TAG_structure_type || Tag == dwarf::DW_TAG_union_type)
{
// Add size if non-zero (derived types might be zero-sized.)
@@ -1089,7 +1120,7 @@ void DwarfDebug::constructTypeDIE(DIE &Buffer, DICompositeType CTy) {
// Add source line info if available.
if (!CTy.isForwardDecl())
- addSourceLine(&Buffer, &CTy);
+ addSourceLine(&Buffer, CTy);
}
}
@@ -1149,7 +1180,7 @@ DIE *DwarfDebug::constructEnumTypeDIE(DIEnumerator ETy) {
return Enumerator;
}
-/// getRealLinkageName - If special LLVM prefix that is used to inform the asm
+/// getRealLinkageName - If special LLVM prefix that is used to inform the asm
/// printer to not emit usual symbol prefix before the symbol name is used then
/// return linkage name after skipping this special LLVM prefix.
static StringRef getRealLinkageName(StringRef LinkageName) {
@@ -1159,40 +1190,16 @@ static StringRef getRealLinkageName(StringRef LinkageName) {
return LinkageName;
}
-/// createGlobalVariableDIE - Create new DIE using GV.
-DIE *DwarfDebug::createGlobalVariableDIE(const DIGlobalVariable &GV) {
- // If the global variable was optmized out then no need to create debug info
- // entry.
- if (!GV.getGlobal()) return NULL;
- if (GV.getDisplayName().empty()) return NULL;
-
- DIE *GVDie = new DIE(dwarf::DW_TAG_variable);
- addString(GVDie, dwarf::DW_AT_name, dwarf::DW_FORM_string,
- GV.getDisplayName());
-
- StringRef LinkageName = GV.getLinkageName();
- if (!LinkageName.empty())
- addString(GVDie, dwarf::DW_AT_MIPS_linkage_name, dwarf::DW_FORM_string,
- getRealLinkageName(LinkageName));
-
- addType(GVDie, GV.getType());
- if (!GV.isLocalToUnit())
- addUInt(GVDie, dwarf::DW_AT_external, dwarf::DW_FORM_flag, 1);
- addSourceLine(GVDie, &GV);
-
- return GVDie;
-}
-
/// createMemberDIE - Create new member DIE.
-DIE *DwarfDebug::createMemberDIE(const DIDerivedType &DT) {
+DIE *DwarfDebug::createMemberDIE(DIDerivedType DT) {
DIE *MemberDie = new DIE(DT.getTag());
StringRef Name = DT.getName();
if (!Name.empty())
addString(MemberDie, dwarf::DW_AT_name, dwarf::DW_FORM_string, Name);
-
+
addType(MemberDie, DT.getTypeDerivedFrom());
- addSourceLine(MemberDie, &DT);
+ addSourceLine(MemberDie, DT);
DIEBlock *MemLocationDie = new (DIEValueAllocator) DIEBlock();
addUInt(MemLocationDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_plus_uconst);
@@ -1240,7 +1247,7 @@ DIE *DwarfDebug::createMemberDIE(const DIDerivedType &DT) {
addUInt(VBaseLocationDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_deref);
addUInt(VBaseLocationDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_plus);
- addBlock(MemberDie, dwarf::DW_AT_data_member_location, 0,
+ addBlock(MemberDie, dwarf::DW_AT_data_member_location, 0,
VBaseLocationDie);
} else
addBlock(MemberDie, dwarf::DW_AT_data_member_location, 0, MemLocationDie);
@@ -1261,7 +1268,7 @@ DIE *DwarfDebug::createMemberDIE(const DIDerivedType &DT) {
}
/// createSubprogramDIE - Create new DIE using SP.
-DIE *DwarfDebug::createSubprogramDIE(const DISubprogram &SP, bool MakeDecl) {
+DIE *DwarfDebug::createSubprogramDIE(DISubprogram SP, bool MakeDecl) {
CompileUnit *SPCU = getCompileUnit(SP);
DIE *SPDie = SPCU->getDIE(SP);
if (SPDie)
@@ -1277,7 +1284,7 @@ DIE *DwarfDebug::createSubprogramDIE(const DISubprogram &SP, bool MakeDecl) {
addString(SPDie, dwarf::DW_AT_MIPS_linkage_name, dwarf::DW_FORM_string,
getRealLinkageName(LinkageName));
- addSourceLine(SPDie, &SP);
+ addSourceLine(SPDie, SP);
// Add prototyped tag, if C or ObjC.
unsigned Lang = SP.getCompileUnit().getLanguage();
@@ -1302,7 +1309,7 @@ DIE *DwarfDebug::createSubprogramDIE(const DISubprogram &SP, bool MakeDecl) {
addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_constu);
addUInt(Block, 0, dwarf::DW_FORM_data1, SP.getVirtualIndex());
addBlock(SPDie, dwarf::DW_AT_vtable_elem_location, 0, Block);
- ContainingTypeMap.insert(std::make_pair(SPDie,
+ ContainingTypeMap.insert(std::make_pair(SPDie,
SP.getContainingType()));
}
@@ -1331,10 +1338,14 @@ DIE *DwarfDebug::createSubprogramDIE(const DISubprogram &SP, bool MakeDecl) {
if (!SP.isLocalToUnit())
addUInt(SPDie, dwarf::DW_AT_external, dwarf::DW_FORM_flag, 1);
-
+
if (SP.isOptimized())
addUInt(SPDie, dwarf::DW_AT_APPLE_optimized, dwarf::DW_FORM_flag, 1);
+ if (unsigned isa = Asm->getISAEncoding()) {
+ addUInt(SPDie, dwarf::DW_AT_APPLE_isa, dwarf::DW_FORM_flag, isa);
+ }
+
// DW_TAG_inlined_subroutine may refer to this DIE.
SPCU->insertDIE(SP, SPDie);
@@ -1394,18 +1405,18 @@ DIE *DwarfDebug::updateSubprogramScopeDIE(const MDNode *SPNode) {
assert(SPDie && "Unable to find subprogram DIE!");
DISubprogram SP(SPNode);
-
+
// There is not any need to generate specification DIE for a function
// defined at compile unit level. If a function is defined inside another
// function then gdb prefers the definition at top level and but does not
- // expect specification DIE in parent function. So avoid creating
+ // expect specification DIE in parent function. So avoid creating
// specification DIE for a function defined inside a function.
if (SP.isDefinition() && !SP.getContext().isCompileUnit() &&
- !SP.getContext().isFile() &&
+ !SP.getContext().isFile() &&
!isSubprogramContext(SP.getContext())) {
addUInt(SPDie, dwarf::DW_AT_declaration, dwarf::DW_FORM_flag, 1);
-
- // Add arguments.
+
+ // Add arguments.
DICompositeType SPTy = SP.getType();
DIArray Args = SPTy.getTypeArray();
unsigned SPTag = SPTy.getTag();
@@ -1420,11 +1431,11 @@ DIE *DwarfDebug::updateSubprogramScopeDIE(const MDNode *SPNode) {
}
DIE *SPDeclDie = SPDie;
SPDie = new DIE(dwarf::DW_TAG_subprogram);
- addDIEEntry(SPDie, dwarf::DW_AT_specification, dwarf::DW_FORM_ref4,
+ addDIEEntry(SPDie, dwarf::DW_AT_specification, dwarf::DW_FORM_ref4,
SPDeclDie);
SPCU->addDie(SPDie);
}
-
+
// Pick up abstract subprogram DIE.
if (DIE *AbsSPDIE = AbstractSPDies.lookup(SPNode)) {
SPDie = new DIE(dwarf::DW_TAG_subprogram);
@@ -1459,7 +1470,7 @@ DIE *DwarfDebug::constructLexicalScopeDIE(DbgScope *Scope) {
SmallVector<DbgRange, 4>::const_iterator RI = Ranges.begin();
if (Ranges.size() > 1) {
// .debug_range section has not been laid out yet. Emit offset in
- // .debug_range as a uint, size 4, for now. emitDIE will handle
+ // .debug_range as a uint, size 4, for now. emitDIE will handle
// DW_AT_ranges appropriately.
addUInt(ScopeDIE, dwarf::DW_AT_ranges, dwarf::DW_FORM_data4,
DebugRangeSymbols.size() * Asm->getTargetData().getPointerSize());
@@ -1480,7 +1491,7 @@ DIE *DwarfDebug::constructLexicalScopeDIE(DbgScope *Scope) {
assert(Start->isDefined() && "Invalid starting label for an inlined scope!");
assert(End->isDefined() && "Invalid end label for an inlined scope!");
-
+
addLabel(ScopeDIE, dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr, Start);
addLabel(ScopeDIE, dwarf::DW_AT_high_pc, dwarf::DW_FORM_addr, End);
@@ -1493,7 +1504,7 @@ DIE *DwarfDebug::constructLexicalScopeDIE(DbgScope *Scope) {
DIE *DwarfDebug::constructInlinedScopeDIE(DbgScope *Scope) {
const SmallVector<DbgRange, 4> &Ranges = Scope->getRanges();
- assert (Ranges.empty() == false
+ assert (Ranges.empty() == false
&& "DbgScope does not have instruction markers!");
// FIXME : .debug_inlined section specification does not clearly state how
@@ -1551,16 +1562,14 @@ DIE *DwarfDebug::constructInlinedScopeDIE(DbgScope *Scope) {
/// constructVariableDIE - Construct a DIE for the given DbgVariable.
DIE *DwarfDebug::constructVariableDIE(DbgVariable *DV, DbgScope *Scope) {
- // Get the descriptor.
- const DIVariable &VD = DV->getVariable();
- StringRef Name = VD.getName();
+ StringRef Name = DV->getName();
if (Name.empty())
return NULL;
// Translate tag to proper Dwarf tag. The result variable is dropped for
// now.
unsigned Tag;
- switch (VD.getTag()) {
+ switch (DV->getTag()) {
case dwarf::DW_TAG_return_variable:
return NULL;
case dwarf::DW_TAG_arg_variable:
@@ -1586,18 +1595,13 @@ DIE *DwarfDebug::constructVariableDIE(DbgVariable *DV, DbgScope *Scope) {
dwarf::DW_FORM_ref4, AbsDIE);
else {
addString(VariableDie, dwarf::DW_AT_name, dwarf::DW_FORM_string, Name);
- addSourceLine(VariableDie, &VD);
+ addSourceLine(VariableDie, DV->getVariable());
// Add variable type.
- // FIXME: isBlockByrefVariable should be reformulated in terms of complex
- // addresses instead.
- if (VD.isBlockByrefVariable())
- addType(VariableDie, getBlockByrefType(VD.getType(), Name));
- else
- addType(VariableDie, VD.getType());
+ addType(VariableDie, DV->getType());
}
- if (Tag == dwarf::DW_TAG_formal_parameter && VD.getType().isArtificial())
+ if (Tag == dwarf::DW_TAG_formal_parameter && DV->getType().isArtificial())
addUInt(VariableDie, dwarf::DW_AT_artificial, dwarf::DW_FORM_flag, 1);
if (Scope->isAbstractScope()) {
@@ -1623,15 +1627,22 @@ DIE *DwarfDebug::constructVariableDIE(DbgVariable *DV, DbgScope *Scope) {
const MachineInstr *DVInsn = DVI->second;
const MCSymbol *DVLabel = findVariableLabel(DV);
bool updated = false;
- // FIXME : Handle getNumOperands != 3
+ // FIXME : Handle getNumOperands != 3
if (DVInsn->getNumOperands() == 3) {
- if (DVInsn->getOperand(0).isReg())
- updated =
- addRegisterAddress(VariableDie, DVLabel, DVInsn->getOperand(0));
+ if (DVInsn->getOperand(0).isReg()) {
+ const MachineOperand RegOp = DVInsn->getOperand(0);
+ const TargetRegisterInfo *TRI = Asm->TM.getRegisterInfo();
+ if (DVInsn->getOperand(1).isImm() &&
+ TRI->getFrameRegister(*Asm->MF) == RegOp.getReg()) {
+ addVariableAddress(DV, VariableDie, DVInsn->getOperand(1).getImm());
+ updated = true;
+ } else
+ updated = addRegisterAddress(VariableDie, DVLabel, RegOp);
+ }
else if (DVInsn->getOperand(0).isImm())
updated = addConstantValue(VariableDie, DVLabel, DVInsn->getOperand(0));
- else if (DVInsn->getOperand(0).isFPImm())
- updated =
+ else if (DVInsn->getOperand(0).isFPImm())
+ updated =
addConstantFPValue(VariableDie, DVLabel, DVInsn->getOperand(0));
} else {
MachineLocation Location = Asm->getDebugValueLocation(DVInsn);
@@ -1651,24 +1662,13 @@ DIE *DwarfDebug::constructVariableDIE(DbgVariable *DV, DbgScope *Scope) {
}
DV->setDIE(VariableDie);
return VariableDie;
- }
+ }
// .. else use frame index, if available.
- MachineLocation Location;
- unsigned FrameReg;
- const TargetRegisterInfo *RI = Asm->TM.getRegisterInfo();
int FI = 0;
- if (findVariableFrameIndex(DV, &FI)) {
- int Offset = RI->getFrameIndexReference(*Asm->MF, FI, FrameReg);
- Location.set(FrameReg, Offset);
-
- if (VD.hasComplexAddress())
- addComplexAddress(DV, VariableDie, dwarf::DW_AT_location, Location);
- else if (VD.isBlockByrefVariable())
- addBlockByrefAddress(DV, VariableDie, dwarf::DW_AT_location, Location);
- else
- addAddress(VariableDie, dwarf::DW_AT_location, Location);
- }
+ if (findVariableFrameIndex(DV, &FI))
+ addVariableAddress(DV, VariableDie, FI);
+
DV->setDIE(VariableDie);
return VariableDie;
@@ -1677,7 +1677,7 @@ DIE *DwarfDebug::constructVariableDIE(DbgVariable *DV, DbgScope *Scope) {
void DwarfDebug::addPubTypes(DISubprogram SP) {
DICompositeType SPTy = SP.getType();
unsigned SPTag = SPTy.getTag();
- if (SPTag != dwarf::DW_TAG_subroutine_type)
+ if (SPTag != dwarf::DW_TAG_subroutine_type)
return;
DIArray Args = SPTy.getTypeArray();
@@ -1699,7 +1699,7 @@ void DwarfDebug::addPubTypes(DISubprogram SP) {
DIE *DwarfDebug::constructScopeDIE(DbgScope *Scope) {
if (!Scope || !Scope->getScopeNode())
return NULL;
-
+
DIScope DS(Scope->getScopeNode());
DIE *ScopeDIE = NULL;
if (Scope->getInlinedAt())
@@ -1718,9 +1718,9 @@ DIE *DwarfDebug::constructScopeDIE(DbgScope *Scope) {
else
ScopeDIE = constructLexicalScopeDIE(Scope);
if (!ScopeDIE) return NULL;
-
+
// Add variables to scope.
- const SmallVector<DbgVariable *, 8> &Variables = Scope->getVariables();
+ const SmallVector<DbgVariable *, 8> &Variables = Scope->getDbgVariables();
for (unsigned i = 0, N = Variables.size(); i < N; ++i) {
DIE *VariableDIE = constructVariableDIE(Variables[i], Scope);
if (VariableDIE)
@@ -1736,9 +1736,9 @@ DIE *DwarfDebug::constructScopeDIE(DbgScope *Scope) {
ScopeDIE->addChild(NestedDIE);
}
- if (DS.isSubprogram())
+ if (DS.isSubprogram())
addPubTypes(DISubprogram(DS));
-
+
return ScopeDIE;
}
@@ -1748,6 +1748,8 @@ DIE *DwarfDebug::constructScopeDIE(DbgScope *Scope) {
/// maps as well.
unsigned DwarfDebug::GetOrCreateSourceID(StringRef DirName, StringRef FileName){
unsigned DId;
+ assert (DirName.empty() == false && "Invalid directory name!");
+
StringMap<unsigned>::iterator DI = DirectoryIdMap.find(DirName);
if (DI != DirectoryIdMap.end()) {
DId = DI->getValue();
@@ -1789,12 +1791,12 @@ DIE *DwarfDebug::getOrCreateNameSpace(DINameSpace NS) {
TheCU->insertDIE(NS, NDie);
if (!NS.getName().empty())
addString(NDie, dwarf::DW_AT_name, dwarf::DW_FORM_string, NS.getName());
- addSourceLine(NDie, &NS);
+ addSourceLine(NDie, NS);
addToContextOwner(NDie, NS.getContext());
return NDie;
}
-/// constructCompileUnit - Create new CompileUnit for the given
+/// constructCompileUnit - Create new CompileUnit for the given
/// metadata node with tag DW_TAG_compile_unit.
void DwarfDebug::constructCompileUnit(const MDNode *N) {
DICompileUnit DIUnit(N);
@@ -1812,9 +1814,12 @@ void DwarfDebug::constructCompileUnit(const MDNode *N) {
// simplifies debug range entries.
addUInt(Die, dwarf::DW_AT_entry_pc, dwarf::DW_FORM_addr, 0);
// DW_AT_stmt_list is a offset of line number information for this
- // compile unit in debug_line section. This offset is calculated
- // during endMoudle().
- addLabel(Die, dwarf::DW_AT_stmt_list, dwarf::DW_FORM_data4, 0);
+ // compile unit in debug_line section.
+ if (Asm->MAI->doesDwarfUsesAbsoluteLabelForStmtList())
+ addLabel(Die, dwarf::DW_AT_stmt_list, dwarf::DW_FORM_addr,
+ Asm->GetTempSymbol("section_line"));
+ else
+ addUInt(Die, dwarf::DW_AT_stmt_list, dwarf::DW_FORM_data4, 0);
if (!Dir.empty())
addString(Die, dwarf::DW_AT_comp_dir, dwarf::DW_FORM_string, Dir);
@@ -1865,64 +1870,98 @@ CompileUnit *DwarfDebug::getCompileUnit(const MDNode *N) const {
return I->second;
}
+/// isUnsignedDIType - Return true if type encoding is unsigned.
+static bool isUnsignedDIType(DIType Ty) {
+ DIDerivedType DTy(Ty);
+ if (DTy.Verify())
+ return isUnsignedDIType(DTy.getTypeDerivedFrom());
+
+ DIBasicType BTy(Ty);
+ if (BTy.Verify()) {
+ unsigned Encoding = BTy.getEncoding();
+ if (Encoding == dwarf::DW_ATE_unsigned ||
+ Encoding == dwarf::DW_ATE_unsigned_char)
+ return true;
+ }
+ return false;
+}
/// constructGlobalVariableDIE - Construct global variable DIE.
void DwarfDebug::constructGlobalVariableDIE(const MDNode *N) {
- DIGlobalVariable DI_GV(N);
+ DIGlobalVariable GV(N);
// If debug information is malformed then ignore it.
- if (DI_GV.Verify() == false)
+ if (GV.Verify() == false)
return;
// Check for pre-existence.
CompileUnit *TheCU = getCompileUnit(N);
- if (TheCU->getDIE(DI_GV))
+ if (TheCU->getDIE(GV))
return;
- DIE *VariableDie = createGlobalVariableDIE(DI_GV);
- if (!VariableDie)
- return;
-
- // Add to map.
- TheCU->insertDIE(N, VariableDie);
+ DIType GTy = GV.getType();
+ DIE *VariableDIE = new DIE(GV.getTag());
- // Add to context owner.
- DIDescriptor GVContext = DI_GV.getContext();
- // Do not create specification DIE if context is either compile unit
- // or a subprogram.
- if (DI_GV.isDefinition() && !GVContext.isCompileUnit() &&
- !GVContext.isFile() &&
- !isSubprogramContext(GVContext)) {
- // Create specification DIE.
- DIE *VariableSpecDIE = new DIE(dwarf::DW_TAG_variable);
- addDIEEntry(VariableSpecDIE, dwarf::DW_AT_specification,
- dwarf::DW_FORM_ref4, VariableDie);
- DIEBlock *Block = new (DIEValueAllocator) DIEBlock();
- addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_addr);
- addLabel(Block, 0, dwarf::DW_FORM_udata,
- Asm->Mang->getSymbol(DI_GV.getGlobal()));
- addBlock(VariableSpecDIE, dwarf::DW_AT_location, 0, Block);
- addUInt(VariableDie, dwarf::DW_AT_declaration, dwarf::DW_FORM_flag, 1);
- TheCU->addDie(VariableSpecDIE);
- } else {
- DIEBlock *Block = new (DIEValueAllocator) DIEBlock();
- addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_addr);
- addLabel(Block, 0, dwarf::DW_FORM_udata,
- Asm->Mang->getSymbol(DI_GV.getGlobal()));
- addBlock(VariableDie, dwarf::DW_AT_location, 0, Block);
- }
- addToContextOwner(VariableDie, GVContext);
-
- // Expose as global. FIXME - need to check external flag.
- TheCU->addGlobal(DI_GV.getName(), VariableDie);
+ bool isGlobalVariable = GV.getGlobal() != NULL;
- DIType GTy = DI_GV.getType();
+ // Add name.
+ addString(VariableDIE, dwarf::DW_AT_name, dwarf::DW_FORM_string,
+ GV.getDisplayName());
+ StringRef LinkageName = GV.getLinkageName();
+ if (!LinkageName.empty() && isGlobalVariable)
+ addString(VariableDIE, dwarf::DW_AT_MIPS_linkage_name, dwarf::DW_FORM_string,
+ getRealLinkageName(LinkageName));
+ // Add type.
+ addType(VariableDIE, GTy);
if (GTy.isCompositeType() && !GTy.getName().empty()
&& !GTy.isForwardDecl()) {
DIEEntry *Entry = TheCU->getDIEEntry(GTy);
assert(Entry && "Missing global type!");
TheCU->addGlobalType(GTy.getName(), Entry->getEntry());
}
+ // Add scoping info.
+ if (!GV.isLocalToUnit()) {
+ addUInt(VariableDIE, dwarf::DW_AT_external, dwarf::DW_FORM_flag, 1);
+ // Expose as global.
+ TheCU->addGlobal(GV.getName(), VariableDIE);
+ }
+ // Add line number info.
+ addSourceLine(VariableDIE, GV);
+ // Add to map.
+ TheCU->insertDIE(N, VariableDIE);
+ // Add to context owner.
+ DIDescriptor GVContext = GV.getContext();
+ addToContextOwner(VariableDIE, GVContext);
+ // Add location.
+ if (isGlobalVariable) {
+ DIEBlock *Block = new (DIEValueAllocator) DIEBlock();
+ addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_addr);
+ addLabel(Block, 0, dwarf::DW_FORM_udata,
+ Asm->Mang->getSymbol(GV.getGlobal()));
+ // Do not create specification DIE if context is either compile unit
+ // or a subprogram.
+ if (GV.isDefinition() && !GVContext.isCompileUnit() &&
+ !GVContext.isFile() && !isSubprogramContext(GVContext)) {
+ // Create specification DIE.
+ DIE *VariableSpecDIE = new DIE(dwarf::DW_TAG_variable);
+ addDIEEntry(VariableSpecDIE, dwarf::DW_AT_specification,
+ dwarf::DW_FORM_ref4, VariableDIE);
+ addBlock(VariableSpecDIE, dwarf::DW_AT_location, 0, Block);
+ addUInt(VariableDIE, dwarf::DW_AT_declaration, dwarf::DW_FORM_flag, 1);
+ TheCU->addDie(VariableSpecDIE);
+ } else {
+ addBlock(VariableDIE, dwarf::DW_AT_location, 0, Block);
+ }
+ } else if (Constant *C = GV.getConstant()) {
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(C)) {
+ if (isUnsignedDIType(GTy))
+ addUInt(VariableDIE, dwarf::DW_AT_const_value, dwarf::DW_FORM_udata,
+ CI->getZExtValue());
+ else
+ addSInt(VariableDIE, dwarf::DW_AT_const_value, dwarf::DW_FORM_sdata,
+ CI->getSExtValue());
+ }
+ }
return;
}
@@ -1965,7 +2004,7 @@ void DwarfDebug::beginModule(Module *M) {
DbgFinder.processModule(*M);
bool HasDebugInfo = false;
-
+
// Scan all the compile-units to see if there are any marked as the main unit.
// if not, we do not generate debug info.
for (DebugInfoFinder::iterator I = DbgFinder.compile_unit_begin(),
@@ -1975,15 +2014,15 @@ void DwarfDebug::beginModule(Module *M) {
break;
}
}
-
+
if (!HasDebugInfo) return;
// Tell MMI that we have debug info.
MMI->setDebugInfoAvailability(true);
-
+
// Emit initial sections.
EmitSectionLabels();
-
+
// Create all the compile unit DIEs.
for (DebugInfoFinder::iterator I = DbgFinder.compile_unit_begin(),
E = DbgFinder.compile_unit_end(); I != E; ++I)
@@ -1999,6 +2038,11 @@ void DwarfDebug::beginModule(Module *M) {
E = DbgFinder.global_variable_end(); I != E; ++I)
constructGlobalVariableDIE(*I);
+ //getOrCreateTypeDIE
+ if (NamedMDNode *NMD = M->getNamedMetadata("llvm.dbg.enum"))
+ for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i)
+ getOrCreateTypeDIE(DIType(NMD->getOperand(i)));
+
// Prime section data.
SectionMap.insert(Asm->getObjFileLowering().getTextSection());
@@ -2025,6 +2069,7 @@ void DwarfDebug::beginModule(Module *M) {
void DwarfDebug::endModule() {
if (!FirstCU) return;
const Module *M = MMI->getModule();
+ DenseMap<const MDNode *, DbgScope *> DeadFnScopeMap;
if (NamedMDNode *AllSPs = M->getNamedMetadata("llvm.dbg.sp")) {
for (unsigned SI = 0, SE = AllSPs->getNumOperands(); SI != SE; ++SI) {
if (ProcessedSPNodes.count(AllSPs->getOperand(SI)) != 0) continue;
@@ -2032,25 +2077,27 @@ void DwarfDebug::endModule() {
if (!SP.Verify()) continue;
// Collect info for variables that were optimized out.
+ if (!SP.isDefinition()) continue;
StringRef FName = SP.getLinkageName();
if (FName.empty())
FName = SP.getName();
- NamedMDNode *NMD =
+ NamedMDNode *NMD =
M->getNamedMetadata(Twine("llvm.dbg.lv.", getRealLinkageName(FName)));
if (!NMD) continue;
unsigned E = NMD->getNumOperands();
if (!E) continue;
DbgScope *Scope = new DbgScope(NULL, DIDescriptor(SP), NULL);
+ DeadFnScopeMap[SP] = Scope;
for (unsigned I = 0; I != E; ++I) {
DIVariable DV(NMD->getOperand(I));
if (!DV.Verify()) continue;
Scope->addVariable(new DbgVariable(DV));
}
-
+
// Construct subprogram DIE and add variables DIEs.
constructSubprogramDIE(SP);
DIE *ScopeDIE = getCompileUnit(SP)->getDIE(SP);
- const SmallVector<DbgVariable *, 8> &Variables = Scope->getVariables();
+ const SmallVector<DbgVariable *, 8> &Variables = Scope->getDbgVariables();
for (unsigned i = 0, N = Variables.size(); i < N; ++i) {
DIE *VariableDIE = constructVariableDIE(Variables[i], Scope);
if (VariableDIE)
@@ -2099,15 +2146,15 @@ void DwarfDebug::endModule() {
// Compute DIE offsets and sizes.
computeSizeAndOffsets();
- // Emit source line correspondence into a debug line section.
- emitDebugLines();
-
// Emit all the DIEs into a debug info section
emitDebugInfo();
// Corresponding abbreviations into a abbrev section.
emitAbbreviations();
+ // Emit source line correspondence into a debug line section.
+ emitDebugLines();
+
// Emit info into a debug pubnames section.
emitDebugPubNames();
@@ -2131,7 +2178,9 @@ void DwarfDebug::endModule() {
// Emit info into a debug str section.
emitDebugStr();
-
+
+ // clean up.
+ DeleteContainerSeconds(DeadFnScopeMap);
for (DenseMap<const MDNode *, CompileUnit *>::iterator I = CUMap.begin(),
E = CUMap.end(); I != E; ++I)
delete I->second;
@@ -2139,7 +2188,7 @@ void DwarfDebug::endModule() {
}
/// findAbstractVariable - Find abstract variable, if any, associated with Var.
-DbgVariable *DwarfDebug::findAbstractVariable(DIVariable &Var,
+DbgVariable *DwarfDebug::findAbstractVariable(DIVariable &Var,
DebugLoc ScopeLoc) {
DbgVariable *AbsDbgVariable = AbstractVariables.lookup(Var);
@@ -2159,7 +2208,7 @@ DbgVariable *DwarfDebug::findAbstractVariable(DIVariable &Var,
/// collectVariableInfoFromMMITable - Collect variable information from
/// side table maintained by MMI.
-void
+void
DwarfDebug::collectVariableInfoFromMMITable(const MachineFunction * MF,
SmallPtrSet<const MDNode *, 16> &Processed) {
const LLVMContext &Ctx = Asm->MF->getFunction()->getContext();
@@ -2177,7 +2226,7 @@ DwarfDebug::collectVariableInfoFromMMITable(const MachineFunction * MF,
Scope = ConcreteScopes.lookup(IA);
if (Scope == 0)
Scope = DbgScopeMap.lookup(VP.second.getScope(Ctx));
-
+
// If variable scope is not found then skip this variable.
if (Scope == 0)
continue;
@@ -2193,7 +2242,7 @@ DwarfDebug::collectVariableInfoFromMMITable(const MachineFunction * MF,
}
}
-/// isDbgValueInUndefinedReg - Return true if debug value, encoded by
+/// isDbgValueInUndefinedReg - Return true if debug value, encoded by
/// DBG_VALUE instruction, is in undefined reg.
static bool isDbgValueInUndefinedReg(const MachineInstr *MI) {
assert (MI->isDebugValue() && "Invalid DBG_VALUE machine instruction!");
@@ -2202,7 +2251,7 @@ static bool isDbgValueInUndefinedReg(const MachineInstr *MI) {
return false;
}
-/// isDbgValueInDefinedReg - Return true if debug value, encoded by
+/// isDbgValueInDefinedReg - Return true if debug value, encoded by
/// DBG_VALUE instruction, is in a defined reg.
static bool isDbgValueInDefinedReg(const MachineInstr *MI) {
assert (MI->isDebugValue() && "Invalid DBG_VALUE machine instruction!");
@@ -2212,10 +2261,10 @@ static bool isDbgValueInDefinedReg(const MachineInstr *MI) {
}
/// collectVariableInfo - Populate DbgScope entries with variables' info.
-void
+void
DwarfDebug::collectVariableInfo(const MachineFunction *MF,
SmallPtrSet<const MDNode *, 16> &Processed) {
-
+
/// collection info from MMI table.
collectVariableInfoFromMMITable(MF, Processed);
@@ -2244,11 +2293,11 @@ DwarfDebug::collectVariableInfo(const MachineFunction *MF,
continue;
const MachineInstr *PrevMI = MInsn;
- for (SmallVector<const MachineInstr *, 8>::iterator MI = I+1,
+ for (SmallVector<const MachineInstr *, 8>::iterator MI = I+1,
ME = DbgValues.end(); MI != ME; ++MI) {
- const MDNode *Var =
+ const MDNode *Var =
(*MI)->getOperand((*MI)->getNumOperands()-1).getMetadata();
- if (Var == DV && isDbgValueInDefinedReg(*MI) &&
+ if (Var == DV && isDbgValueInDefinedReg(*MI) &&
!PrevMI->isIdenticalTo(*MI))
MultipleValues.push_back(*MI);
PrevMI = *MI;
@@ -2269,7 +2318,7 @@ DwarfDebug::collectVariableInfo(const MachineFunction *MF,
DbgVariable *RegVar = new DbgVariable(DV);
Scope->addVariable(RegVar);
if (!CurFnArg)
- DbgVariableLabelsMap[RegVar] = getLabelBeforeInsn(MInsn);
+ DbgVariableLabelsMap[RegVar] = getLabelBeforeInsn(MInsn);
if (DbgVariable *AbsVar = findAbstractVariable(DV, MInsn->getDebugLoc())) {
DbgVariableToDbgInstMap[AbsVar] = MInsn;
VarToAbstractVarMap[RegVar] = AbsVar;
@@ -2286,26 +2335,39 @@ DwarfDebug::collectVariableInfo(const MachineFunction *MF,
RegVar->setDotDebugLocOffset(DotDebugLocEntries.size());
const MachineInstr *Begin = NULL;
const MachineInstr *End = NULL;
- for (SmallVector<const MachineInstr *, 4>::iterator
- MVI = MultipleValues.begin(), MVE = MultipleValues.end();
+ for (SmallVector<const MachineInstr *, 4>::iterator
+ MVI = MultipleValues.begin(), MVE = MultipleValues.end();
MVI != MVE; ++MVI) {
if (!Begin) {
Begin = *MVI;
continue;
- }
+ }
End = *MVI;
MachineLocation MLoc;
- MLoc.set(Begin->getOperand(0).getReg(), 0);
+ if (Begin->getNumOperands() == 3) {
+ if (Begin->getOperand(0).isReg() && Begin->getOperand(1).isImm())
+ MLoc.set(Begin->getOperand(0).getReg(), Begin->getOperand(1).getImm());
+ } else
+ MLoc = Asm->getDebugValueLocation(Begin);
+
const MCSymbol *FLabel = getLabelBeforeInsn(Begin);
const MCSymbol *SLabel = getLabelBeforeInsn(End);
- DotDebugLocEntries.push_back(DotDebugLocEntry(FLabel, SLabel, MLoc));
+ if (MLoc.getReg())
+ DotDebugLocEntries.push_back(DotDebugLocEntry(FLabel, SLabel, MLoc));
+
Begin = End;
if (MVI + 1 == MVE) {
// If End is the last instruction then its value is valid
// until the end of the funtion.
- MLoc.set(End->getOperand(0).getReg(), 0);
- DotDebugLocEntries.
- push_back(DotDebugLocEntry(SLabel, FunctionEndSym, MLoc));
+ MachineLocation EMLoc;
+ if (End->getNumOperands() == 3) {
+ if (End->getOperand(0).isReg() && Begin->getOperand(1).isImm())
+ EMLoc.set(Begin->getOperand(0).getReg(), Begin->getOperand(1).getImm());
+ } else
+ EMLoc = Asm->getDebugValueLocation(End);
+ if (EMLoc.getReg())
+ DotDebugLocEntries.
+ push_back(DotDebugLocEntry(SLabel, FunctionEndSym, EMLoc));
}
}
DotDebugLocEntries.push_back(DotDebugLocEntry());
@@ -2314,11 +2376,11 @@ DwarfDebug::collectVariableInfo(const MachineFunction *MF,
// Collect info for variables that were optimized out.
const Function *F = MF->getFunction();
const Module *M = F->getParent();
- if (NamedMDNode *NMD =
- M->getNamedMetadata(Twine("llvm.dbg.lv.",
+ if (NamedMDNode *NMD =
+ M->getNamedMetadata(Twine("llvm.dbg.lv.",
getRealLinkageName(F->getName())))) {
for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i) {
- DIVariable DV(cast_or_null<MDNode>(NMD->getOperand(i)));
+ DIVariable DV(cast<MDNode>(NMD->getOperand(i)));
if (!DV || !Processed.insert(DV))
continue;
DbgScope *Scope = DbgScopeMap.lookup(DV.getContext());
@@ -2364,7 +2426,7 @@ void DwarfDebug::beginScope(const MachineInstr *MI) {
return;
}
- // If location is unknown then use temp label for this DBG_VALUE
+ // If location is unknown then use temp label for this DBG_VALUE
// instruction.
if (MI->isDebugValue()) {
PrevLabel = MMI->getContext().CreateTempSymbol();
@@ -2393,7 +2455,7 @@ void DwarfDebug::endScope(const MachineInstr *MI) {
}
/// getOrCreateDbgScope - Create DbgScope for the scope.
-DbgScope *DwarfDebug::getOrCreateDbgScope(const MDNode *Scope,
+DbgScope *DwarfDebug::getOrCreateDbgScope(const MDNode *Scope,
const MDNode *InlinedAt) {
if (!InlinedAt) {
DbgScope *WScope = DbgScopeMap.lookup(Scope);
@@ -2402,7 +2464,7 @@ DbgScope *DwarfDebug::getOrCreateDbgScope(const MDNode *Scope,
WScope = new DbgScope(NULL, DIDescriptor(Scope), NULL);
DbgScopeMap.insert(std::make_pair(Scope, WScope));
if (DIDescriptor(Scope).isLexicalBlock()) {
- DbgScope *Parent =
+ DbgScope *Parent =
getOrCreateDbgScope(DILexicalBlock(Scope).getContext(), NULL);
WScope->setParent(Parent);
Parent->addScope(WScope);
@@ -2419,7 +2481,7 @@ DbgScope *DwarfDebug::getOrCreateDbgScope(const MDNode *Scope,
DISubprogram(Scope).getFunction() == Asm->MF->getFunction())
CurrentFnDbgScope = WScope;
}
-
+
return WScope;
}
@@ -2448,14 +2510,14 @@ static bool hasValidLocation(LLVMContext &Ctx,
const MDNode *&Scope, const MDNode *&InlinedAt) {
DebugLoc DL = MInsn->getDebugLoc();
if (DL.isUnknown()) return false;
-
+
const MDNode *S = DL.getScope(Ctx);
-
+
// There is no need to create another DIE for compile unit. For all
// other scopes, create one DbgScope now. This will be translated
// into a scope DIE at the end.
if (DIScope(S).isCompileUnit()) return false;
-
+
Scope = S;
InlinedAt = DL.getInlinedAt(Ctx);
return true;
@@ -2490,7 +2552,7 @@ static void calculateDominanceGraph(DbgScope *Scope) {
}
/// printDbgScopeInfo - Print DbgScope info for each machine instruction.
-static
+static
void printDbgScopeInfo(LLVMContext &Ctx, const MachineFunction *MF,
DenseMap<const MachineInstr *, DbgScope *> &MI2ScopeMap)
{
@@ -2507,9 +2569,9 @@ void printDbgScopeInfo(LLVMContext &Ctx, const MachineFunction *MF,
// Check if instruction has valid location information.
if (hasValidLocation(Ctx, MInsn, Scope, InlinedAt)) {
dbgs() << " [ ";
- if (InlinedAt)
+ if (InlinedAt)
dbgs() << "*";
- DenseMap<const MachineInstr *, DbgScope *>::iterator DI =
+ DenseMap<const MachineInstr *, DbgScope *>::iterator DI =
MI2ScopeMap.find(MInsn);
if (DI != MI2ScopeMap.end()) {
DbgScope *S = DI->second;
@@ -2517,7 +2579,7 @@ void printDbgScopeInfo(LLVMContext &Ctx, const MachineFunction *MF,
PrevDFSIn = S->getDFSIn();
} else
dbgs() << PrevDFSIn;
- } else
+ } else
dbgs() << " [ x" << PrevDFSIn;
dbgs() << " ]";
MInsn->dump();
@@ -2555,26 +2617,26 @@ bool DwarfDebug::extractScopeInformation() {
PrevMI = MInsn;
continue;
}
-
+
// If scope has not changed then skip this instruction.
if (Scope == PrevScope && PrevInlinedAt == InlinedAt) {
PrevMI = MInsn;
continue;
}
- if (RangeBeginMI) {
- // If we have alread seen a beginning of a instruction range and
+ if (RangeBeginMI) {
+ // If we have alread seen a beginning of a instruction range and
// current instruction scope does not match scope of first instruction
// in this range then create a new instruction range.
DbgRange R(RangeBeginMI, PrevMI);
- MI2ScopeMap[RangeBeginMI] = getOrCreateDbgScope(PrevScope,
+ MI2ScopeMap[RangeBeginMI] = getOrCreateDbgScope(PrevScope,
PrevInlinedAt);
MIRanges.push_back(R);
- }
+ }
// This is a beginning of a new instruction range.
RangeBeginMI = MInsn;
-
+
// Reset previous markers.
PrevMI = MInsn;
PrevScope = Scope;
@@ -2588,7 +2650,7 @@ bool DwarfDebug::extractScopeInformation() {
MIRanges.push_back(R);
MI2ScopeMap[RangeBeginMI] = getOrCreateDbgScope(PrevScope, PrevInlinedAt);
}
-
+
if (!CurrentFnDbgScope)
return false;
@@ -2618,7 +2680,7 @@ bool DwarfDebug::extractScopeInformation() {
return !DbgScopeMap.empty();
}
-/// identifyScopeMarkers() -
+/// identifyScopeMarkers() -
/// Each DbgScope has first instruction and last instruction to mark beginning
/// and end of a scope respectively. Create an inverse map that list scopes
/// starts (and ends) with an instruction. One instruction may start (or end)
@@ -2628,23 +2690,23 @@ void DwarfDebug::identifyScopeMarkers() {
WorkList.push_back(CurrentFnDbgScope);
while (!WorkList.empty()) {
DbgScope *S = WorkList.pop_back_val();
-
+
const SmallVector<DbgScope *, 4> &Children = S->getScopes();
- if (!Children.empty())
+ if (!Children.empty())
for (SmallVector<DbgScope *, 4>::const_iterator SI = Children.begin(),
SE = Children.end(); SI != SE; ++SI)
WorkList.push_back(*SI);
if (S->isAbstractScope())
continue;
-
+
const SmallVector<DbgRange, 4> &Ranges = S->getRanges();
if (Ranges.empty())
continue;
for (SmallVector<DbgRange, 4>::const_iterator RI = Ranges.begin(),
RE = Ranges.end(); RI != RE; ++RI) {
- assert(RI->first && "DbgRange does not have first instruction!");
- assert(RI->second && "DbgRange does not have second instruction!");
+ assert(RI->first && "DbgRange does not have first instruction!");
+ assert(RI->second && "DbgRange does not have second instruction!");
InsnsEndScopeSet.insert(RI->second);
}
}
@@ -2680,20 +2742,23 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) {
// function.
DebugLoc FDL = FindFirstDebugLoc(MF);
if (FDL.isUnknown()) return;
-
+
const MDNode *Scope = FDL.getScope(MF->getFunction()->getContext());
-
+ const MDNode *TheScope = 0;
+
DISubprogram SP = getDISubprogram(Scope);
unsigned Line, Col;
if (SP.Verify()) {
Line = SP.getLineNumber();
Col = 0;
+ TheScope = SP;
} else {
Line = FDL.getLine();
Col = FDL.getCol();
+ TheScope = Scope;
}
-
- recordSourceLine(Line, Col, Scope);
+
+ recordSourceLine(Line, Col, TheScope);
/// ProcessedArgs - Collection of arguments already processed.
SmallPtrSet<const MDNode *, 8> ProcessedArgs;
@@ -2710,7 +2775,7 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) {
DIVariable DV(MI->getOperand(MI->getNumOperands() - 1).getMetadata());
if (!DV.Verify()) continue;
// If DBG_VALUE is for a local variable then it needs a label.
- if (DV.getTag() != dwarf::DW_TAG_arg_variable
+ if (DV.getTag() != dwarf::DW_TAG_arg_variable
&& isDbgValueInUndefinedReg(MI) == false)
InsnNeedsLabel.insert(MI);
// DBG_VALUE for inlined functions argument needs a label.
@@ -2718,10 +2783,11 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) {
describes(MF->getFunction()))
InsnNeedsLabel.insert(MI);
// DBG_VALUE indicating argument location change needs a label.
- else if (isDbgValueInUndefinedReg(MI) == false && !ProcessedArgs.insert(DV))
+ else if (isDbgValueInUndefinedReg(MI) == false
+ && !ProcessedArgs.insert(DV))
InsnNeedsLabel.insert(MI);
} else {
- // If location is unknown then instruction needs a location only if
+ // If location is unknown then instruction needs a location only if
// UnknownLocations flag is set.
if (DL.isUnknown()) {
if (UnknownLocations && !PrevLoc.isUnknown())
@@ -2730,7 +2796,7 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) {
// Otherwise, instruction needs a location only if it is new location.
InsnNeedsLabel.insert(MI);
}
-
+
if (!DL.isUnknown() || UnknownLocations)
PrevLoc = DL;
}
@@ -2750,7 +2816,7 @@ void DwarfDebug::endFunction(const MachineFunction *MF) {
Asm->getFunctionNumber());
// Assumes in correct section after the entry point.
Asm->OutStreamer.EmitLabel(FunctionEndSym);
-
+
SmallPtrSet<const MDNode *, 16> ProcessedVars;
collectVariableInfo(MF, ProcessedVars);
@@ -2764,7 +2830,7 @@ void DwarfDebug::endFunction(const MachineFunction *MF) {
SectionLineInfos.insert(SectionLineInfos.end(),
Lines.begin(), Lines.end());
}
-
+
// Construct abstract scopes.
for (SmallVector<DbgScope *, 4>::iterator AI = AbstractScopesList.begin(),
AE = AbstractScopesList.end(); AI != AE; ++AI) {
@@ -2775,11 +2841,11 @@ void DwarfDebug::endFunction(const MachineFunction *MF) {
if (FName.empty())
FName = SP.getName();
const Module *M = MF->getFunction()->getParent();
- if (NamedMDNode *NMD =
- M->getNamedMetadata(Twine("llvm.dbg.lv.",
+ if (NamedMDNode *NMD =
+ M->getNamedMetadata(Twine("llvm.dbg.lv.",
getRealLinkageName(FName)))) {
for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i) {
- DIVariable DV(cast_or_null<MDNode>(NMD->getOperand(i)));
+ DIVariable DV(cast<MDNode>(NMD->getOperand(i)));
if (!DV || !ProcessedVars.insert(DV))
continue;
DbgScope *Scope = AbstractScopes.lookup(DV.getContext());
@@ -2793,9 +2859,9 @@ void DwarfDebug::endFunction(const MachineFunction *MF) {
}
DIE *CurFnDIE = constructScopeDIE(CurrentFnDbgScope);
-
+
if (!DisableFramePointerElim(*MF))
- addUInt(CurFnDIE, dwarf::DW_AT_APPLE_omit_frame_ptr,
+ addUInt(CurFnDIE, dwarf::DW_AT_APPLE_omit_frame_ptr,
dwarf::DW_FORM_flag, 1);
@@ -2849,22 +2915,22 @@ const MCSymbol *DwarfDebug::findVariableLabel(const DbgVariable *V) {
else return I->second;
}
-/// findDbgScope - Find DbgScope for the debug loc attached with an
+/// findDbgScope - Find DbgScope for the debug loc attached with an
/// instruction.
DbgScope *DwarfDebug::findDbgScope(const MachineInstr *MInsn) {
DbgScope *Scope = NULL;
- LLVMContext &Ctx =
+ LLVMContext &Ctx =
MInsn->getParent()->getParent()->getFunction()->getContext();
DebugLoc DL = MInsn->getDebugLoc();
- if (DL.isUnknown())
+ if (DL.isUnknown())
return Scope;
if (const MDNode *IA = DL.getInlinedAt(Ctx))
Scope = ConcreteScopes.lookup(IA);
if (Scope == 0)
Scope = DbgScopeMap.lookup(DL.getScope(Ctx));
-
+
return Scope;
}
@@ -2872,7 +2938,7 @@ DbgScope *DwarfDebug::findDbgScope(const MachineInstr *MInsn) {
/// recordSourceLine - Register a source line with debug info. Returns the
/// unique label that was emitted and which provides correspondence to
/// the source line list.
-MCSymbol *DwarfDebug::recordSourceLine(unsigned Line, unsigned Col,
+MCSymbol *DwarfDebug::recordSourceLine(unsigned Line, unsigned Col,
const MDNode *S) {
StringRef Dir;
StringRef Fn;
@@ -2899,16 +2965,6 @@ MCSymbol *DwarfDebug::recordSourceLine(unsigned Line, unsigned Col,
Src = GetOrCreateSourceID(Dir, Fn);
}
-#if 0
- if (!Lines.empty()) {
- SrcLineInfo lastSrcLineInfo = Lines.back();
- // Emitting sequential line records with the same line number (but
- // different addresses) seems to confuse GDB. Avoid this.
- if (lastSrcLineInfo.getLine() == Line)
- return NULL;
- }
-#endif
-
MCSymbol *Label = MMI->getContext().CreateTempSymbol();
Lines.push_back(SrcLineInfo(Line, Col, Src, Label));
@@ -2991,7 +3047,7 @@ static MCSymbol *EmitSectionSym(AsmPrinter *Asm, const MCSection *Section,
const char *SymbolStem = 0) {
Asm->OutStreamer.SwitchSection(Section);
if (!SymbolStem) return 0;
-
+
MCSymbol *TmpSym = Asm->GetTempSymbol(SymbolStem);
Asm->OutStreamer.EmitLabel(TmpSym);
return TmpSym;
@@ -3008,21 +3064,20 @@ void DwarfDebug::EmitSectionLabels() {
EmitSectionSym(Asm, TLOF.getDwarfFrameSection(), "section_debug_frame");
}
- DwarfInfoSectionSym =
+ DwarfInfoSectionSym =
EmitSectionSym(Asm, TLOF.getDwarfInfoSection(), "section_info");
- DwarfAbbrevSectionSym =
+ DwarfAbbrevSectionSym =
EmitSectionSym(Asm, TLOF.getDwarfAbbrevSection(), "section_abbrev");
EmitSectionSym(Asm, TLOF.getDwarfARangesSection());
-
+
if (const MCSection *MacroInfo = TLOF.getDwarfMacroInfoSection())
EmitSectionSym(Asm, MacroInfo);
- DwarfDebugLineSectionSym =
- EmitSectionSym(Asm, TLOF.getDwarfLineSection(), "section_line");
+ EmitSectionSym(Asm, TLOF.getDwarfLineSection(), "section_line");
EmitSectionSym(Asm, TLOF.getDwarfLocSection());
EmitSectionSym(Asm, TLOF.getDwarfPubNamesSection());
EmitSectionSym(Asm, TLOF.getDwarfPubTypesSection());
- DwarfStrSectionSym =
+ DwarfStrSectionSym =
EmitSectionSym(Asm, TLOF.getDwarfStrSection(), "section_str");
DwarfDebugRangeSectionSym = EmitSectionSym(Asm, TLOF.getDwarfRangesSection(),
"debug_range");
@@ -3060,7 +3115,7 @@ void DwarfDebug::emitDIE(DIE *Die) {
if (Asm->isVerbose())
Asm->OutStreamer.AddComment(dwarf::AttributeString(Attr));
-
+
switch (Attr) {
case dwarf::DW_AT_sibling:
Asm->EmitInt32(Die->getSiblingOffset());
@@ -3075,15 +3130,17 @@ void DwarfDebug::emitDIE(DIE *Die) {
case dwarf::DW_AT_ranges: {
// DW_AT_range Value encodes offset in debug_range section.
DIEInteger *V = cast<DIEInteger>(Values[i]);
- Asm->EmitLabelOffsetDifference(DwarfDebugRangeSectionSym,
- V->getValue(),
- DwarfDebugRangeSectionSym,
- 4);
- break;
- }
- case dwarf::DW_AT_stmt_list: {
- Asm->EmitLabelDifference(CurrentLineSectionSym,
- DwarfDebugLineSectionSym, 4);
+
+ if (Asm->MAI->doesDwarfUsesLabelOffsetForRanges()) {
+ Asm->EmitLabelPlusOffset(DwarfDebugRangeSectionSym,
+ V->getValue(),
+ 4);
+ } else {
+ Asm->EmitLabelOffsetDifference(DwarfDebugRangeSectionSym,
+ V->getValue(),
+ DwarfDebugRangeSectionSym,
+ 4);
+ }
break;
}
case dwarf::DW_AT_location: {
@@ -3124,18 +3181,18 @@ void DwarfDebug::emitDebugInfo() {
E = CUMap.end(); I != E; ++I) {
CompileUnit *TheCU = I->second;
DIE *Die = TheCU->getCUDie();
-
+
// Emit the compile units header.
Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("info_begin",
TheCU->getID()));
-
+
// Emit size of content not including length itself
unsigned ContentSize = Die->getSize() +
sizeof(int16_t) + // DWARF version number
sizeof(int32_t) + // Offset Into Abbrev. Section
sizeof(int8_t) + // Pointer Size (in bytes)
sizeof(int32_t); // FIXME - extra pad for gdb bug.
-
+
Asm->OutStreamer.AddComment("Length of Compilation Unit Info");
Asm->EmitInt32(ContentSize);
Asm->OutStreamer.AddComment("DWARF version number");
@@ -3145,7 +3202,7 @@ void DwarfDebug::emitDebugInfo() {
DwarfAbbrevSectionSym);
Asm->OutStreamer.AddComment("Address Size (in bytes)");
Asm->EmitInt8(Asm->getTargetData().getPointerSize());
-
+
emitDIE(Die);
// FIXME - extra padding for gdb bug.
Asm->OutStreamer.AddComment("4 extra padding bytes for GDB");
@@ -3194,7 +3251,7 @@ void DwarfDebug::emitEndOfLineMatrix(unsigned SectionEnd) {
// Define last address of section.
Asm->OutStreamer.AddComment("Extended Op");
Asm->EmitInt8(0);
-
+
Asm->OutStreamer.AddComment("Op size");
Asm->EmitInt8(Asm->getTargetData().getPointerSize() + 1);
Asm->OutStreamer.AddComment("DW_LNE_set_address");
@@ -3231,15 +3288,13 @@ void DwarfDebug::emitDebugLines() {
Asm->getObjFileLowering().getDwarfLineSection());
// Construct the section header.
- CurrentLineSectionSym = Asm->GetTempSymbol("section_line_begin");
- Asm->OutStreamer.EmitLabel(CurrentLineSectionSym);
Asm->OutStreamer.AddComment("Length of Source Line Info");
Asm->EmitLabelDifference(Asm->GetTempSymbol("line_end"),
Asm->GetTempSymbol("line_begin"), 4);
Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("line_begin"));
Asm->OutStreamer.AddComment("DWARF version number");
- Asm->EmitInt16(dwarf::DWARF_VERSION);
+ Asm->EmitInt16(dwarf::DWARF_VERSION);
Asm->OutStreamer.AddComment("Prolog Length");
Asm->EmitLabelDifference(Asm->GetTempSymbol("line_prolog_end"),
@@ -3294,7 +3349,7 @@ void DwarfDebug::emitDebugLines() {
const std::string &FN = getSourceFileName(Id.second);
if (Asm->isVerbose()) Asm->OutStreamer.AddComment("Source");
Asm->OutStreamer.EmitBytes(StringRef(FN.c_str(), FN.size()+1), 0);
-
+
Asm->EmitULEB128(Id.first, "Directory #");
Asm->EmitULEB128(0, "Mod date");
Asm->EmitULEB128(0, "File size");
@@ -3338,18 +3393,18 @@ void DwarfDebug::emitDebugLines() {
Asm->EmitInt8(Asm->getTargetData().getPointerSize() + 1);
Asm->OutStreamer.AddComment("DW_LNE_set_address");
- Asm->EmitInt8(dwarf::DW_LNE_set_address);
+ Asm->EmitInt8(dwarf::DW_LNE_set_address);
Asm->OutStreamer.AddComment("Location label");
Asm->OutStreamer.EmitSymbolValue(Label,
Asm->getTargetData().getPointerSize(),
0/*AddrSpace*/);
-
+
// If change of source, then switch to the new source.
if (Source != LineInfo.getSourceID()) {
Source = LineInfo.getSourceID();
Asm->OutStreamer.AddComment("DW_LNS_set_file");
- Asm->EmitInt8(dwarf::DW_LNS_set_file);
+ Asm->EmitInt8(dwarf::DW_LNS_set_file);
Asm->EmitULEB128(Source, "New Source");
}
@@ -3457,7 +3512,7 @@ emitFunctionDebugFrame(const FunctionDebugFrameInfo &DebugFrameInfo) {
Asm->OutStreamer.EmitLabel(DebugFrameBegin);
Asm->OutStreamer.AddComment("FDE CIE offset");
- Asm->EmitSectionOffset(Asm->GetTempSymbol("debug_frame_common"),
+ Asm->EmitSectionOffset(Asm->GetTempSymbol("debug_frame_common"),
DwarfFrameSectionSym);
Asm->OutStreamer.AddComment("FDE initial location");
@@ -3466,8 +3521,8 @@ emitFunctionDebugFrame(const FunctionDebugFrameInfo &DebugFrameInfo) {
Asm->OutStreamer.EmitSymbolValue(FuncBeginSym,
Asm->getTargetData().getPointerSize(),
0/*AddrSpace*/);
-
-
+
+
Asm->OutStreamer.AddComment("FDE address range");
Asm->EmitLabelDifference(Asm->GetTempSymbol("func_end",DebugFrameInfo.Number),
FuncBeginSym, Asm->getTargetData().getPointerSize());
@@ -3487,41 +3542,41 @@ void DwarfDebug::emitDebugPubNames() {
// Start the dwarf pubnames section.
Asm->OutStreamer.SwitchSection(
Asm->getObjFileLowering().getDwarfPubNamesSection());
-
+
Asm->OutStreamer.AddComment("Length of Public Names Info");
Asm->EmitLabelDifference(
Asm->GetTempSymbol("pubnames_end", TheCU->getID()),
Asm->GetTempSymbol("pubnames_begin", TheCU->getID()), 4);
-
+
Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("pubnames_begin",
TheCU->getID()));
-
+
Asm->OutStreamer.AddComment("DWARF Version");
- Asm->EmitInt16(dwarf::DWARF_VERSION);
-
+ Asm->EmitInt16(dwarf::DWARF_VERSION);
+
Asm->OutStreamer.AddComment("Offset of Compilation Unit Info");
- Asm->EmitSectionOffset(Asm->GetTempSymbol("info_begin", TheCU->getID()),
+ Asm->EmitSectionOffset(Asm->GetTempSymbol("info_begin", TheCU->getID()),
DwarfInfoSectionSym);
-
+
Asm->OutStreamer.AddComment("Compilation Unit Length");
Asm->EmitLabelDifference(Asm->GetTempSymbol("info_end", TheCU->getID()),
Asm->GetTempSymbol("info_begin", TheCU->getID()),
4);
-
+
const StringMap<DIE*> &Globals = TheCU->getGlobals();
for (StringMap<DIE*>::const_iterator
GI = Globals.begin(), GE = Globals.end(); GI != GE; ++GI) {
const char *Name = GI->getKeyData();
DIE *Entity = GI->second;
-
+
Asm->OutStreamer.AddComment("DIE offset");
Asm->EmitInt32(Entity->getOffset());
-
+
if (Asm->isVerbose())
Asm->OutStreamer.AddComment("External Name");
Asm->OutStreamer.EmitBytes(StringRef(Name, strlen(Name)+1), 0);
}
-
+
Asm->OutStreamer.AddComment("End Mark");
Asm->EmitInt32(0);
Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("pubnames_end",
@@ -3540,37 +3595,37 @@ void DwarfDebug::emitDebugPubTypes() {
Asm->EmitLabelDifference(
Asm->GetTempSymbol("pubtypes_end", TheCU->getID()),
Asm->GetTempSymbol("pubtypes_begin", TheCU->getID()), 4);
-
+
Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("pubtypes_begin",
TheCU->getID()));
-
+
if (Asm->isVerbose()) Asm->OutStreamer.AddComment("DWARF Version");
Asm->EmitInt16(dwarf::DWARF_VERSION);
-
+
Asm->OutStreamer.AddComment("Offset of Compilation Unit Info");
Asm->EmitSectionOffset(Asm->GetTempSymbol("info_begin", TheCU->getID()),
DwarfInfoSectionSym);
-
+
Asm->OutStreamer.AddComment("Compilation Unit Length");
Asm->EmitLabelDifference(Asm->GetTempSymbol("info_end", TheCU->getID()),
Asm->GetTempSymbol("info_begin", TheCU->getID()),
4);
-
+
const StringMap<DIE*> &Globals = TheCU->getGlobalTypes();
for (StringMap<DIE*>::const_iterator
GI = Globals.begin(), GE = Globals.end(); GI != GE; ++GI) {
const char *Name = GI->getKeyData();
DIE * Entity = GI->second;
-
+
if (Asm->isVerbose()) Asm->OutStreamer.AddComment("DIE offset");
Asm->EmitInt32(Entity->getOffset());
-
+
if (Asm->isVerbose()) Asm->OutStreamer.AddComment("External Name");
Asm->OutStreamer.EmitBytes(StringRef(Name, GI->getKeyLength()+1), 0);
}
-
+
Asm->OutStreamer.AddComment("End Mark");
- Asm->EmitInt32(0);
+ Asm->EmitInt32(0);
Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("pubtypes_end",
TheCU->getID()));
}
@@ -3581,26 +3636,26 @@ void DwarfDebug::emitDebugPubTypes() {
void DwarfDebug::emitDebugStr() {
// Check to see if it is worth the effort.
if (StringPool.empty()) return;
-
+
// Start the dwarf str section.
Asm->OutStreamer.SwitchSection(
Asm->getObjFileLowering().getDwarfStrSection());
// Get all of the string pool entries and put them in an array by their ID so
// we can sort them.
- SmallVector<std::pair<unsigned,
+ SmallVector<std::pair<unsigned,
StringMapEntry<std::pair<MCSymbol*, unsigned> >*>, 64> Entries;
-
+
for (StringMap<std::pair<MCSymbol*, unsigned> >::iterator
I = StringPool.begin(), E = StringPool.end(); I != E; ++I)
Entries.push_back(std::make_pair(I->second.second, &*I));
-
+
array_pod_sort(Entries.begin(), Entries.end());
-
+
for (unsigned i = 0, e = Entries.size(); i != e; ++i) {
// Emit a label for reference from debug information entries.
Asm->OutStreamer.EmitLabel(Entries[i].second->getValue().first);
-
+
// Emit the string itself.
Asm->OutStreamer.EmitBytes(Entries[i].second->getKey(), 0/*addrspace*/);
}
@@ -3618,8 +3673,8 @@ void DwarfDebug::emitDebugLoc() {
unsigned char Size = Asm->getTargetData().getPointerSize();
Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("debug_loc", 0));
unsigned index = 1;
- for (SmallVector<DotDebugLocEntry, 4>::iterator
- I = DotDebugLocEntries.begin(), E = DotDebugLocEntries.end();
+ for (SmallVector<DotDebugLocEntry, 4>::iterator
+ I = DotDebugLocEntries.begin(), E = DotDebugLocEntries.end();
I != E; ++I, ++index) {
DotDebugLocEntry Entry = *I;
if (Entry.isEmpty()) {
@@ -3631,15 +3686,30 @@ void DwarfDebug::emitDebugLoc() {
Asm->OutStreamer.EmitSymbolValue(Entry.End, Size, 0);
const TargetRegisterInfo *RI = Asm->TM.getRegisterInfo();
unsigned Reg = RI->getDwarfRegNum(Entry.Loc.getReg(), false);
- if (Reg < 32) {
+ if (int Offset = Entry.Loc.getOffset()) {
+ // If the value is at a certain offset from frame register then
+ // use DW_OP_fbreg.
+ unsigned OffsetSize = Offset ? MCAsmInfo::getSLEB128Size(Offset) : 1;
Asm->OutStreamer.AddComment("Loc expr size");
- Asm->EmitInt16(1);
- Asm->EmitInt8(dwarf::DW_OP_reg0 + Reg);
+ Asm->EmitInt16(1 + OffsetSize);
+ Asm->OutStreamer.AddComment(
+ dwarf::OperationEncodingString(dwarf::DW_OP_fbreg));
+ Asm->EmitInt8(dwarf::DW_OP_fbreg);
+ Asm->OutStreamer.AddComment("Offset");
+ Asm->EmitSLEB128(Offset);
} else {
- Asm->OutStreamer.AddComment("Loc expr size");
- Asm->EmitInt16(1+MCAsmInfo::getULEB128Size(Reg));
- Asm->EmitInt8(dwarf::DW_OP_regx);
- Asm->EmitULEB128(Reg);
+ if (Reg < 32) {
+ Asm->OutStreamer.AddComment("Loc expr size");
+ Asm->EmitInt16(1);
+ Asm->OutStreamer.AddComment(
+ dwarf::OperationEncodingString(dwarf::DW_OP_reg0 + Reg));
+ Asm->EmitInt8(dwarf::DW_OP_reg0 + Reg);
+ } else {
+ Asm->OutStreamer.AddComment("Loc expr size");
+ Asm->EmitInt16(1 + MCAsmInfo::getULEB128Size(Reg));
+ Asm->EmitInt8(dwarf::DW_OP_regx);
+ Asm->EmitULEB128(Reg);
+ }
}
}
}
@@ -3661,7 +3731,7 @@ void DwarfDebug::emitDebugRanges() {
Asm->getObjFileLowering().getDwarfRangesSection());
unsigned char Size = Asm->getTargetData().getPointerSize();
for (SmallVector<const MCSymbol *, 8>::iterator
- I = DebugRangeSymbols.begin(), E = DebugRangeSymbols.end();
+ I = DebugRangeSymbols.begin(), E = DebugRangeSymbols.end();
I != E; ++I) {
if (*I)
Asm->OutStreamer.EmitSymbolValue(const_cast<MCSymbol*>(*I), Size, 0);
@@ -3734,7 +3804,7 @@ void DwarfDebug::emitDebugInlineInfo() {
if (LName.empty()) {
Asm->OutStreamer.EmitBytes(Name, 0);
Asm->OutStreamer.EmitIntValue(0, 1, 0); // nul terminator.
- } else
+ } else
Asm->EmitSectionOffset(getStringPoolEntry(getRealLinkageName(LName)),
DwarfStrSectionSym);
diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.h b/lib/CodeGen/AsmPrinter/DwarfDebug.h
index 5a281c8517481..f0ff3bc71699a 100644
--- a/lib/CodeGen/AsmPrinter/DwarfDebug.h
+++ b/lib/CodeGen/AsmPrinter/DwarfDebug.h
@@ -261,7 +261,6 @@ class DwarfDebug {
MCSymbol *DwarfFrameSectionSym, *DwarfInfoSectionSym, *DwarfAbbrevSectionSym;
MCSymbol *DwarfStrSectionSym, *TextSectionSym, *DwarfDebugRangeSectionSym;
MCSymbol *DwarfDebugLocSectionSym;
- MCSymbol *DwarfDebugLineSectionSym, *CurrentLineSectionSym;
MCSymbol *FunctionBeginSym, *FunctionEndSym;
DIEInteger *DIEIntegerOne;
@@ -338,11 +337,11 @@ private:
/// addSourceLine - Add location information to specified debug information
/// entry.
- void addSourceLine(DIE *Die, const DIVariable *V);
- void addSourceLine(DIE *Die, const DIGlobalVariable *G);
- void addSourceLine(DIE *Die, const DISubprogram *SP);
- void addSourceLine(DIE *Die, const DIType *Ty);
- void addSourceLine(DIE *Die, const DINameSpace *NS);
+ void addSourceLine(DIE *Die, DIVariable V);
+ void addSourceLine(DIE *Die, DIGlobalVariable G);
+ void addSourceLine(DIE *Die, DISubprogram SP);
+ void addSourceLine(DIE *Die, DIType Ty);
+ void addSourceLine(DIE *Die, DINameSpace NS);
/// addAddress - Add an address attribute to a die based on the location
/// provided.
@@ -376,6 +375,10 @@ private:
void addBlockByrefAddress(DbgVariable *&DV, DIE *Die, unsigned Attribute,
const MachineLocation &Location);
+ /// addVariableAddress - Add DW_AT_location attribute for a DbgVariable based
+ /// on provided frame index.
+ void addVariableAddress(DbgVariable *&DV, DIE *Die, int64_t FI);
+
/// addToContextOwner - Add Die into the list of its context owner's children.
void addToContextOwner(DIE *Die, DIDescriptor Context);
@@ -414,14 +417,11 @@ private:
/// constructEnumTypeDIE - Construct enum type DIE from DIEnumerator.
DIE *constructEnumTypeDIE(DIEnumerator ETy);
- /// createGlobalVariableDIE - Create new DIE using GV.
- DIE *createGlobalVariableDIE(const DIGlobalVariable &GV);
-
/// createMemberDIE - Create new member DIE.
- DIE *createMemberDIE(const DIDerivedType &DT);
+ DIE *createMemberDIE(DIDerivedType DT);
/// createSubprogramDIE - Create new DIE using SP.
- DIE *createSubprogramDIE(const DISubprogram &SP, bool MakeDecl = false);
+ DIE *createSubprogramDIE(DISubprogram SP, bool MakeDecl = false);
/// getOrCreateDbgScope - Create DbgScope for the scope.
DbgScope *getOrCreateDbgScope(const MDNode *Scope, const MDNode *InlinedAt);
@@ -560,12 +560,6 @@ private:
/// construct SubprogramDIE - Construct subprogram DIE.
void constructSubprogramDIE(const MDNode *N);
- // FIXME: This should go away in favor of complex addresses.
- /// Find the type the programmer originally declared the variable to be
- /// and return that type. Obsolete, use GetComplexAddrType instead.
- ///
- DIType getBlockByrefType(DIType Ty, std::string Name);
-
/// recordSourceLine - Register a source line with debug info. Returns the
/// unique label that was emitted and which provides correspondence to
/// the source line list.
diff --git a/lib/CodeGen/AsmPrinter/DwarfException.cpp b/lib/CodeGen/AsmPrinter/DwarfException.cpp
index c87284083cded..86a368831e0e2 100644
--- a/lib/CodeGen/AsmPrinter/DwarfException.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfException.cpp
@@ -894,7 +894,7 @@ void DwarfException::EndModule() {
if (!shouldEmitMovesModule && !shouldEmitTableModule)
return;
- const std::vector<const Function *> Personalities = MMI->getPersonalities();
+ const std::vector<const Function*> &Personalities = MMI->getPersonalities();
for (unsigned I = 0, E = Personalities.size(); I < E; ++I)
EmitCIE(Personalities[I], I);
diff --git a/lib/CodeGen/BranchFolding.cpp b/lib/CodeGen/BranchFolding.cpp
index 7f98df0d22ea4..cb81aa3c88ce6 100644
--- a/lib/CodeGen/BranchFolding.cpp
+++ b/lib/CodeGen/BranchFolding.cpp
@@ -65,7 +65,7 @@ namespace {
public:
static char ID;
explicit BranchFolderPass(bool defaultEnableTailMerge)
- : MachineFunctionPass(&ID), BranchFolder(defaultEnableTailMerge) {}
+ : MachineFunctionPass(ID), BranchFolder(defaultEnableTailMerge) {}
virtual bool runOnMachineFunction(MachineFunction &MF);
virtual const char *getPassName() const { return "Control Flow Optimizer"; }
diff --git a/lib/CodeGen/CMakeLists.txt b/lib/CodeGen/CMakeLists.txt
index ffeff1ee27a64..2ef115dbd2056 100644
--- a/lib/CodeGen/CMakeLists.txt
+++ b/lib/CodeGen/CMakeLists.txt
@@ -22,6 +22,7 @@ add_llvm_library(LLVMCodeGen
LiveIntervalAnalysis.cpp
LiveStackAnalysis.cpp
LiveVariables.cpp
+ LocalStackSlotAllocation.cpp
LowerSubregs.cpp
MachineBasicBlock.cpp
MachineCSE.cpp
@@ -42,10 +43,10 @@ add_llvm_library(LLVMCodeGen
MachineVerifier.cpp
ObjectCodeEmitter.cpp
OcamlGC.cpp
- OptimizeExts.cpp
OptimizePHIs.cpp
PHIElimination.cpp
Passes.cpp
+ PeepholeOptimizer.cpp
PostRAHazardRecognizer.cpp
PostRASchedulerList.cpp
PreAllocSplitting.cpp
@@ -57,6 +58,7 @@ add_llvm_library(LLVMCodeGen
RegAllocPBQP.cpp
RegisterCoalescer.cpp
RegisterScavenging.cpp
+ RenderMachineFunction.cpp
ScheduleDAG.cpp
ScheduleDAGEmit.cpp
ScheduleDAGInstrs.cpp
@@ -67,6 +69,8 @@ add_llvm_library(LLVMCodeGen
SjLjEHPrepare.cpp
SlotIndexes.cpp
Spiller.cpp
+ SplitKit.cpp
+ Splitter.cpp
StackProtector.cpp
StackSlotColoring.cpp
StrongPHIElimination.cpp
diff --git a/lib/CodeGen/CalcSpillWeights.cpp b/lib/CodeGen/CalcSpillWeights.cpp
index 240a7b94fccfd..1b7e08a8b6bb8 100644
--- a/lib/CodeGen/CalcSpillWeights.cpp
+++ b/lib/CodeGen/CalcSpillWeights.cpp
@@ -25,8 +25,8 @@
using namespace llvm;
char CalculateSpillWeights::ID = 0;
-static RegisterPass<CalculateSpillWeights> X("calcspillweights",
- "Calculate spill weights");
+INITIALIZE_PASS(CalculateSpillWeights, "calcspillweights",
+ "Calculate spill weights", false, false);
void CalculateSpillWeights::getAnalysisUsage(AnalysisUsage &au) const {
au.addRequired<LiveIntervals>();
@@ -41,108 +41,184 @@ bool CalculateSpillWeights::runOnMachineFunction(MachineFunction &fn) {
<< "********** Function: "
<< fn.getFunction()->getName() << '\n');
- LiveIntervals *lis = &getAnalysis<LiveIntervals>();
- MachineLoopInfo *loopInfo = &getAnalysis<MachineLoopInfo>();
- const TargetInstrInfo *tii = fn.getTarget().getInstrInfo();
- MachineRegisterInfo *mri = &fn.getRegInfo();
-
- SmallSet<unsigned, 4> processed;
- for (MachineFunction::iterator mbbi = fn.begin(), mbbe = fn.end();
- mbbi != mbbe; ++mbbi) {
- MachineBasicBlock* mbb = mbbi;
- SlotIndex mbbEnd = lis->getMBBEndIdx(mbb);
- MachineLoop* loop = loopInfo->getLoopFor(mbb);
- unsigned loopDepth = loop ? loop->getLoopDepth() : 0;
- bool isExiting = loop ? loop->isLoopExiting(mbb) : false;
-
- for (MachineBasicBlock::const_iterator mii = mbb->begin(), mie = mbb->end();
- mii != mie; ++mii) {
- const MachineInstr *mi = mii;
- if (tii->isIdentityCopy(*mi) || mi->isImplicitDef() || mi->isDebugValue())
- continue;
-
- for (unsigned i = 0, e = mi->getNumOperands(); i != e; ++i) {
- const MachineOperand &mopi = mi->getOperand(i);
- if (!mopi.isReg() || mopi.getReg() == 0)
- continue;
- unsigned reg = mopi.getReg();
- if (!TargetRegisterInfo::isVirtualRegister(mopi.getReg()))
- continue;
- // Multiple uses of reg by the same instruction. It should not
- // contribute to spill weight again.
- if (!processed.insert(reg))
- continue;
-
- bool hasDef = mopi.isDef();
- bool hasUse = !hasDef;
- for (unsigned j = i+1; j != e; ++j) {
- const MachineOperand &mopj = mi->getOperand(j);
- if (!mopj.isReg() || mopj.getReg() != reg)
- continue;
- hasDef |= mopj.isDef();
- hasUse |= mopj.isUse();
- if (hasDef && hasUse)
- break;
- }
-
- LiveInterval &regInt = lis->getInterval(reg);
- float weight = lis->getSpillWeight(hasDef, hasUse, loopDepth);
- if (hasDef && isExiting) {
- // Looks like this is a loop count variable update.
- SlotIndex defIdx = lis->getInstructionIndex(mi).getDefIndex();
- const LiveRange *dlr =
- lis->getInterval(reg).getLiveRangeContaining(defIdx);
- if (dlr->end >= mbbEnd)
- weight *= 3.0F;
- }
- regInt.weight += weight;
- }
- processed.clear();
- }
+ LiveIntervals &lis = getAnalysis<LiveIntervals>();
+ VirtRegAuxInfo vrai(fn, lis, getAnalysis<MachineLoopInfo>());
+ for (LiveIntervals::iterator I = lis.begin(), E = lis.end(); I != E; ++I) {
+ LiveInterval &li = *I->second;
+ if (TargetRegisterInfo::isVirtualRegister(li.reg))
+ vrai.CalculateWeightAndHint(li);
+ }
+ return false;
+}
+
+// Return the preferred allocation register for reg, given a COPY instruction.
+static unsigned copyHint(const MachineInstr *mi, unsigned reg,
+ const TargetRegisterInfo &tri,
+ const MachineRegisterInfo &mri) {
+ unsigned sub, hreg, hsub;
+ if (mi->getOperand(0).getReg() == reg) {
+ sub = mi->getOperand(0).getSubReg();
+ hreg = mi->getOperand(1).getReg();
+ hsub = mi->getOperand(1).getSubReg();
+ } else {
+ sub = mi->getOperand(1).getSubReg();
+ hreg = mi->getOperand(0).getReg();
+ hsub = mi->getOperand(0).getSubReg();
}
- for (LiveIntervals::iterator I = lis->begin(), E = lis->end(); I != E; ++I) {
- LiveInterval &li = *I->second;
- if (TargetRegisterInfo::isVirtualRegister(li.reg)) {
- // If the live interval length is essentially zero, i.e. in every live
- // range the use follows def immediately, it doesn't make sense to spill
- // it and hope it will be easier to allocate for this li.
- if (isZeroLengthInterval(&li)) {
- li.weight = HUGE_VALF;
- continue;
- }
-
- bool isLoad = false;
- SmallVector<LiveInterval*, 4> spillIs;
- if (lis->isReMaterializable(li, spillIs, isLoad)) {
- // If all of the definitions of the interval are re-materializable,
- // it is a preferred candidate for spilling. If none of the defs are
- // loads, then it's potentially very cheap to re-materialize.
- // FIXME: this gets much more complicated once we support non-trivial
- // re-materialization.
- if (isLoad)
- li.weight *= 0.9F;
- else
- li.weight *= 0.5F;
- }
-
- // Slightly prefer live interval that has been assigned a preferred reg.
- std::pair<unsigned, unsigned> Hint = mri->getRegAllocationHint(li.reg);
- if (Hint.first || Hint.second)
- li.weight *= 1.01F;
-
- lis->normalizeSpillWeight(li);
+ if (!hreg)
+ return 0;
+
+ if (TargetRegisterInfo::isVirtualRegister(hreg))
+ return sub == hsub ? hreg : 0;
+
+ const TargetRegisterClass *rc = mri.getRegClass(reg);
+
+ // Only allow physreg hints in rc.
+ if (sub == 0)
+ return rc->contains(hreg) ? hreg : 0;
+
+ // reg:sub should match the physreg hreg.
+ return tri.getMatchingSuperReg(hreg, sub, rc);
+}
+
+void VirtRegAuxInfo::CalculateWeightAndHint(LiveInterval &li) {
+ MachineRegisterInfo &mri = mf_.getRegInfo();
+ const TargetRegisterInfo &tri = *mf_.getTarget().getRegisterInfo();
+ MachineBasicBlock *mbb = 0;
+ MachineLoop *loop = 0;
+ unsigned loopDepth = 0;
+ bool isExiting = false;
+ float totalWeight = 0;
+ SmallPtrSet<MachineInstr*, 8> visited;
+
+ // Find the best physreg hist and the best virtreg hint.
+ float bestPhys = 0, bestVirt = 0;
+ unsigned hintPhys = 0, hintVirt = 0;
+
+ // Don't recompute a target specific hint.
+ bool noHint = mri.getRegAllocationHint(li.reg).first != 0;
+
+ for (MachineRegisterInfo::reg_iterator I = mri.reg_begin(li.reg);
+ MachineInstr *mi = I.skipInstruction();) {
+ if (mi->isIdentityCopy() || mi->isImplicitDef() || mi->isDebugValue())
+ continue;
+ if (!visited.insert(mi))
+ continue;
+
+ // Get loop info for mi.
+ if (mi->getParent() != mbb) {
+ mbb = mi->getParent();
+ loop = loops_.getLoopFor(mbb);
+ loopDepth = loop ? loop->getLoopDepth() : 0;
+ isExiting = loop ? loop->isLoopExiting(mbb) : false;
+ }
+
+ // Calculate instr weight.
+ bool reads, writes;
+ tie(reads, writes) = mi->readsWritesVirtualRegister(li.reg);
+ float weight = LiveIntervals::getSpillWeight(writes, reads, loopDepth);
+
+ // Give extra weight to what looks like a loop induction variable update.
+ if (writes && isExiting && lis_.isLiveOutOfMBB(li, mbb))
+ weight *= 3;
+
+ totalWeight += weight;
+
+ // Get allocation hints from copies.
+ if (noHint || !mi->isCopy())
+ continue;
+ unsigned hint = copyHint(mi, li.reg, tri, mri);
+ if (!hint)
+ continue;
+ float hweight = hint_[hint] += weight;
+ if (TargetRegisterInfo::isPhysicalRegister(hint)) {
+ if (hweight > bestPhys && lis_.isAllocatable(hint))
+ bestPhys = hweight, hintPhys = hint;
+ } else {
+ if (hweight > bestVirt)
+ bestVirt = hweight, hintVirt = hint;
}
}
-
- return false;
+
+ hint_.clear();
+
+ // Always prefer the physreg hint.
+ if (unsigned hint = hintPhys ? hintPhys : hintVirt) {
+ mri.setRegAllocationHint(li.reg, 0, hint);
+ // Weakly boost the spill weifght of hinted registers.
+ totalWeight *= 1.01F;
+ }
+
+ // Mark li as unspillable if all live ranges are tiny.
+ if (li.isZeroLength()) {
+ li.markNotSpillable();
+ return;
+ }
+
+ // If all of the definitions of the interval are re-materializable,
+ // it is a preferred candidate for spilling. If none of the defs are
+ // loads, then it's potentially very cheap to re-materialize.
+ // FIXME: this gets much more complicated once we support non-trivial
+ // re-materialization.
+ bool isLoad = false;
+ SmallVector<LiveInterval*, 4> spillIs;
+ if (lis_.isReMaterializable(li, spillIs, isLoad)) {
+ if (isLoad)
+ totalWeight *= 0.9F;
+ else
+ totalWeight *= 0.5F;
+ }
+
+ li.weight = totalWeight;
+ lis_.normalizeSpillWeight(li);
}
-/// Returns true if the given live interval is zero length.
-bool CalculateSpillWeights::isZeroLengthInterval(LiveInterval *li) const {
- for (LiveInterval::Ranges::const_iterator
- i = li->ranges.begin(), e = li->ranges.end(); i != e; ++i)
- if (i->end.getPrevIndex() > i->start)
- return false;
- return true;
+void VirtRegAuxInfo::CalculateRegClass(unsigned reg) {
+ MachineRegisterInfo &mri = mf_.getRegInfo();
+ const TargetRegisterInfo *tri = mf_.getTarget().getRegisterInfo();
+ const TargetRegisterClass *orc = mri.getRegClass(reg);
+ SmallPtrSet<const TargetRegisterClass*,8> rcs;
+
+ for (MachineRegisterInfo::reg_nodbg_iterator I = mri.reg_nodbg_begin(reg),
+ E = mri.reg_nodbg_end(); I != E; ++I) {
+ // The targets don't have accurate enough regclass descriptions that we can
+ // handle subregs. We need something similar to
+ // TRI::getMatchingSuperRegClass, but returning a super class instead of a
+ // sub class.
+ if (I.getOperand().getSubReg()) {
+ DEBUG(dbgs() << "Cannot handle subregs: " << I.getOperand() << '\n');
+ return;
+ }
+ if (const TargetRegisterClass *rc =
+ I->getDesc().getRegClass(I.getOperandNo(), tri))
+ rcs.insert(rc);
+ }
+
+ // If we found no regclass constraints, just leave reg as is.
+ // In theory, we could inflate to the largest superclass of reg's existing
+ // class, but that might not be legal for the current cpu setting.
+ // This could happen if reg is only used by COPY instructions, so we may need
+ // to improve on this.
+ if (rcs.empty()) {
+ return;
+ }
+
+ // Compute the intersection of all classes in rcs.
+ // This ought to be independent of iteration order, but if the target register
+ // classes don't form a proper algebra, it is possible to get different
+ // results. The solution is to make sure the intersection of any two register
+ // classes is also a register class or the null set.
+ const TargetRegisterClass *rc = 0;
+ for (SmallPtrSet<const TargetRegisterClass*,8>::iterator I = rcs.begin(),
+ E = rcs.end(); I != E; ++I) {
+ rc = rc ? getCommonSubClass(rc, *I) : *I;
+ assert(rc && "Incompatible regclass constraints found");
+ }
+
+ if (rc == orc)
+ return;
+ DEBUG(dbgs() << "Inflating " << orc->getName() << ":%reg" << reg << " to "
+ << rc->getName() <<".\n");
+ mri.setRegClass(reg, rc);
}
diff --git a/lib/CodeGen/CodePlacementOpt.cpp b/lib/CodeGen/CodePlacementOpt.cpp
index e0e315c6c677f..91a9536e7757c 100644
--- a/lib/CodeGen/CodePlacementOpt.cpp
+++ b/lib/CodeGen/CodePlacementOpt.cpp
@@ -36,7 +36,7 @@ namespace {
public:
static char ID;
- CodePlacementOpt() : MachineFunctionPass(&ID) {}
+ CodePlacementOpt() : MachineFunctionPass(ID) {}
virtual bool runOnMachineFunction(MachineFunction &MF);
virtual const char *getPassName() const {
diff --git a/lib/CodeGen/CriticalAntiDepBreaker.cpp b/lib/CodeGen/CriticalAntiDepBreaker.cpp
index e3746a9856447..335d2d8e9bac7 100644
--- a/lib/CodeGen/CriticalAntiDepBreaker.cpp
+++ b/lib/CodeGen/CriticalAntiDepBreaker.cpp
@@ -32,21 +32,21 @@ CriticalAntiDepBreaker(MachineFunction& MFi) :
MRI(MF.getRegInfo()),
TII(MF.getTarget().getInstrInfo()),
TRI(MF.getTarget().getRegisterInfo()),
- AllocatableSet(TRI->getAllocatableSet(MF))
-{
-}
+ AllocatableSet(TRI->getAllocatableSet(MF)),
+ Classes(TRI->getNumRegs(), static_cast<const TargetRegisterClass *>(0)),
+ KillIndices(TRI->getNumRegs(), 0),
+ DefIndices(TRI->getNumRegs(), 0) {}
CriticalAntiDepBreaker::~CriticalAntiDepBreaker() {
}
void CriticalAntiDepBreaker::StartBlock(MachineBasicBlock *BB) {
- // Clear out the register class data.
- std::fill(Classes, array_endof(Classes),
- static_cast<const TargetRegisterClass *>(0));
-
- // Initialize the indices to indicate that no registers are live.
const unsigned BBSize = BB->size();
- for (unsigned i = 0; i < TRI->getNumRegs(); ++i) {
+ for (unsigned i = 0, e = TRI->getNumRegs(); i != e; ++i) {
+ // Clear out the register class data.
+ Classes[i] = static_cast<const TargetRegisterClass *>(0);
+
+ // Initialize the indices to indicate that no registers are live.
KillIndices[i] = ~0u;
DefIndices[i] = BBSize;
}
@@ -65,6 +65,7 @@ void CriticalAntiDepBreaker::StartBlock(MachineBasicBlock *BB) {
Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1);
KillIndices[Reg] = BB->size();
DefIndices[Reg] = ~0u;
+
// Repeat, for all aliases.
for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) {
unsigned AliasReg = *Alias;
@@ -86,6 +87,7 @@ void CriticalAntiDepBreaker::StartBlock(MachineBasicBlock *BB) {
Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1);
KillIndices[Reg] = BB->size();
DefIndices[Reg] = ~0u;
+
// Repeat, for all aliases.
for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) {
unsigned AliasReg = *Alias;
@@ -106,6 +108,7 @@ void CriticalAntiDepBreaker::StartBlock(MachineBasicBlock *BB) {
Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1);
KillIndices[Reg] = BB->size();
DefIndices[Reg] = ~0u;
+
// Repeat, for all aliases.
for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) {
unsigned AliasReg = *Alias;
@@ -134,8 +137,10 @@ void CriticalAntiDepBreaker::Observe(MachineInstr *MI, unsigned Count,
for (unsigned Reg = 0; Reg != TRI->getNumRegs(); ++Reg)
if (DefIndices[Reg] < InsertPosIndex && DefIndices[Reg] >= Count) {
assert(KillIndices[Reg] == ~0u && "Clobbered register is live!");
+
// Mark this register to be non-renamable.
Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1);
+
// Move the def index to the end of the previous region, to reflect
// that the def could theoretically have been scheduled at the end.
DefIndices[Reg] = InsertPosIndex;
@@ -325,6 +330,8 @@ CriticalAntiDepBreaker::findSuitableFreeRegister(MachineInstr *MI,
for (TargetRegisterClass::iterator R = RC->allocation_order_begin(MF),
RE = RC->allocation_order_end(MF); R != RE; ++R) {
unsigned NewReg = *R;
+ // Don't consider non-allocatable registers
+ if (!AllocatableSet.test(NewReg)) continue;
// Don't replace a register with itself.
if (NewReg == AntiDepReg) continue;
// Don't replace a register with one that was recently used to repair
@@ -433,7 +440,7 @@ BreakAntiDependencies(const std::vector<SUnit>& SUnits,
// fix that remaining critical edge too. This is a little more involved,
// because unlike the most recent register, less recent registers should
// still be considered, though only if no other registers are available.
- unsigned LastNewReg[TargetRegisterInfo::FirstVirtualRegister] = {};
+ std::vector<unsigned> LastNewReg(TRI->getNumRegs(), 0);
// Attempt to break anti-dependence edges on the critical path. Walk the
// instructions from the bottom up, tracking information about liveness
diff --git a/lib/CodeGen/CriticalAntiDepBreaker.h b/lib/CodeGen/CriticalAntiDepBreaker.h
index 540630083bcc6..0ed7c35b0f0ca 100644
--- a/lib/CodeGen/CriticalAntiDepBreaker.h
+++ b/lib/CodeGen/CriticalAntiDepBreaker.h
@@ -46,19 +46,18 @@ class TargetRegisterInfo;
/// corresponding value is null. If the register is live but used in
/// multiple register classes, the corresponding value is -1 casted to a
/// pointer.
- const TargetRegisterClass *
- Classes[TargetRegisterInfo::FirstVirtualRegister];
+ std::vector<const TargetRegisterClass*> Classes;
/// RegRegs - Map registers to all their references within a live range.
std::multimap<unsigned, MachineOperand *> RegRefs;
/// KillIndices - The index of the most recent kill (proceding bottom-up),
/// or ~0u if the register is not live.
- unsigned KillIndices[TargetRegisterInfo::FirstVirtualRegister];
+ std::vector<unsigned> KillIndices;
/// DefIndices - The index of the most recent complete def (proceding bottom
/// up), or ~0u if the register is live.
- unsigned DefIndices[TargetRegisterInfo::FirstVirtualRegister];
+ std::vector<unsigned> DefIndices;
/// KeepRegs - A set of registers which are live and cannot be changed to
/// break anti-dependencies.
diff --git a/lib/CodeGen/DeadMachineInstructionElim.cpp b/lib/CodeGen/DeadMachineInstructionElim.cpp
index d69c995b3e037..318d922adebf1 100644
--- a/lib/CodeGen/DeadMachineInstructionElim.cpp
+++ b/lib/CodeGen/DeadMachineInstructionElim.cpp
@@ -36,7 +36,7 @@ namespace {
public:
static char ID; // Pass identification, replacement for typeid
- DeadMachineInstructionElim() : MachineFunctionPass(&ID) {}
+ DeadMachineInstructionElim() : MachineFunctionPass(ID) {}
private:
bool isDead(const MachineInstr *MI) const;
@@ -44,9 +44,8 @@ namespace {
}
char DeadMachineInstructionElim::ID = 0;
-static RegisterPass<DeadMachineInstructionElim>
-Y("dead-mi-elimination",
- "Remove dead machine instructions");
+INITIALIZE_PASS(DeadMachineInstructionElim, "dead-mi-elimination",
+ "Remove dead machine instructions", false, false);
FunctionPass *llvm::createDeadMachineInstructionElimPass() {
return new DeadMachineInstructionElim();
@@ -81,9 +80,8 @@ bool DeadMachineInstructionElim::runOnMachineFunction(MachineFunction &MF) {
TRI = MF.getTarget().getRegisterInfo();
TII = MF.getTarget().getInstrInfo();
- // Compute a bitvector to represent all non-allocatable physregs.
- BitVector NonAllocatableRegs = TRI->getAllocatableSet(MF);
- NonAllocatableRegs.flip();
+ // Treat reserved registers as always live.
+ BitVector ReservedRegs = TRI->getReservedRegs(MF);
// Loop over all instructions in all blocks, from bottom to top, so that it's
// more likely that chains of dependent but ultimately dead instructions will
@@ -92,9 +90,8 @@ bool DeadMachineInstructionElim::runOnMachineFunction(MachineFunction &MF) {
I != E; ++I) {
MachineBasicBlock *MBB = &*I;
- // Start out assuming that all non-allocatable registers are live
- // out of this block.
- LivePhysRegs = NonAllocatableRegs;
+ // Start out assuming that reserved registers are live out of this block.
+ LivePhysRegs = ReservedRegs;
// Also add any explicit live-out physregs for this block.
if (!MBB->empty() && MBB->back().getDesc().isReturn())
@@ -105,6 +102,10 @@ bool DeadMachineInstructionElim::runOnMachineFunction(MachineFunction &MF) {
LivePhysRegs.set(Reg);
}
+ // FIXME: Add live-ins from sucessors to LivePhysRegs. Normally, physregs
+ // are not live across blocks, but some targets (x86) can have flags live
+ // out of a block.
+
// Now scan the instructions and delete dead ones, tracking physreg
// liveness as we go.
for (MachineBasicBlock::reverse_iterator MII = MBB->rbegin(),
diff --git a/lib/CodeGen/DwarfEHPrepare.cpp b/lib/CodeGen/DwarfEHPrepare.cpp
index 01b31b4209318..550fd3e25fb7a 100644
--- a/lib/CodeGen/DwarfEHPrepare.cpp
+++ b/lib/CodeGen/DwarfEHPrepare.cpp
@@ -25,19 +25,17 @@
#include "llvm/Support/CallSite.h"
#include "llvm/Target/TargetLowering.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
-#include "llvm/Transforms/Utils/PromoteMemToReg.h"
+#include "llvm/Transforms/Utils/SSAUpdater.h"
using namespace llvm;
STATISTIC(NumLandingPadsSplit, "Number of landing pads split");
STATISTIC(NumUnwindsLowered, "Number of unwind instructions lowered");
STATISTIC(NumExceptionValuesMoved, "Number of eh.exception calls moved");
-STATISTIC(NumStackTempsIntroduced, "Number of stack temporaries introduced");
namespace {
class DwarfEHPrepare : public FunctionPass {
const TargetMachine *TM;
const TargetLowering *TLI;
- bool CompileFast;
// The eh.exception intrinsic.
Function *ExceptionValueIntrinsic;
@@ -54,9 +52,8 @@ namespace {
// _Unwind_Resume or the target equivalent.
Constant *RewindFunction;
- // Dominator info is used when turning stack temporaries into registers.
+ // We both use and preserve dominator info.
DominatorTree *DT;
- DominanceFrontier *DF;
// The function we are running on.
Function *F;
@@ -65,28 +62,14 @@ namespace {
typedef SmallPtrSet<BasicBlock*, 8> BBSet;
BBSet LandingPads;
- // Stack temporary used to hold eh.exception values.
- AllocaInst *ExceptionValueVar;
-
bool NormalizeLandingPads();
bool LowerUnwinds();
bool MoveExceptionValueCalls();
- bool FinishStackTemporaries();
- bool PromoteStackTemporaries();
Instruction *CreateExceptionValueCall(BasicBlock *BB);
- Instruction *CreateValueLoad(BasicBlock *BB);
-
- /// CreateReadOfExceptionValue - Return the result of the eh.exception
- /// intrinsic by calling the intrinsic if in a landing pad, or loading it
- /// from the exception value variable otherwise.
- Instruction *CreateReadOfExceptionValue(BasicBlock *BB) {
- return LandingPads.count(BB) ?
- CreateExceptionValueCall(BB) : CreateValueLoad(BB);
- }
/// CleanupSelectors - Any remaining eh.selector intrinsic calls which still
- /// use the ".llvm.eh.catch.all.value" call need to convert to using its
+ /// use the "llvm.eh.catch.all.value" call need to convert to using its
/// initializer instead.
bool CleanupSelectors(SmallPtrSet<IntrinsicInst*, 32> &Sels);
@@ -112,69 +95,19 @@ namespace {
bool FindSelectorAndURoR(Instruction *Inst, bool &URoRInvoke,
SmallPtrSet<IntrinsicInst*, 8> &SelCalls);
- /// DoMem2RegPromotion - Take an alloca call and promote it from memory to a
- /// register.
- bool DoMem2RegPromotion(Value *V) {
- AllocaInst *AI = dyn_cast<AllocaInst>(V);
- if (!AI || !isAllocaPromotable(AI)) return false;
-
- // Turn the alloca into a register.
- std::vector<AllocaInst*> Allocas(1, AI);
- PromoteMemToReg(Allocas, *DT, *DF);
- return true;
- }
-
- /// PromoteStoreInst - Perform Mem2Reg on a StoreInst.
- bool PromoteStoreInst(StoreInst *SI) {
- if (!SI || !DT || !DF) return false;
- if (DoMem2RegPromotion(SI->getOperand(1)))
- return true;
- return false;
- }
-
- /// PromoteEHPtrStore - Promote the storing of an EH pointer into a
- /// register. This should get rid of the store and subsequent loads.
- bool PromoteEHPtrStore(IntrinsicInst *II) {
- if (!DT || !DF) return false;
-
- bool Changed = false;
- StoreInst *SI;
-
- while (1) {
- SI = 0;
- for (Value::use_iterator
- I = II->use_begin(), E = II->use_end(); I != E; ++I) {
- SI = dyn_cast<StoreInst>(I);
- if (SI) break;
- }
-
- if (!PromoteStoreInst(SI))
- break;
-
- Changed = true;
- }
-
- return Changed;
- }
-
public:
static char ID; // Pass identification, replacement for typeid.
- DwarfEHPrepare(const TargetMachine *tm, bool fast) :
- FunctionPass(&ID), TM(tm), TLI(TM->getTargetLowering()),
- CompileFast(fast),
+ DwarfEHPrepare(const TargetMachine *tm) :
+ FunctionPass(ID), TM(tm), TLI(TM->getTargetLowering()),
ExceptionValueIntrinsic(0), SelectorIntrinsic(0),
URoR(0), EHCatchAllValue(0), RewindFunction(0) {}
virtual bool runOnFunction(Function &Fn);
- // getAnalysisUsage - We need dominance frontiers for memory promotion.
+ // getAnalysisUsage - We need the dominator tree for handling URoR.
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
- if (!CompileFast)
- AU.addRequired<DominatorTree>();
+ AU.addRequired<DominatorTree>();
AU.addPreserved<DominatorTree>();
- if (!CompileFast)
- AU.addRequired<DominanceFrontier>();
- AU.addPreserved<DominanceFrontier>();
}
const char *getPassName() const {
@@ -186,8 +119,8 @@ namespace {
char DwarfEHPrepare::ID = 0;
-FunctionPass *llvm::createDwarfEHPass(const TargetMachine *tm, bool fast) {
- return new DwarfEHPrepare(tm, fast);
+FunctionPass *llvm::createDwarfEHPass(const TargetMachine *tm) {
+ return new DwarfEHPrepare(tm);
}
/// HasCatchAllInSelector - Return true if the intrinsic instruction has a
@@ -207,7 +140,7 @@ FindAllCleanupSelectors(SmallPtrSet<IntrinsicInst*, 32> &Sels,
for (Value::use_iterator
I = SelectorIntrinsic->use_begin(),
E = SelectorIntrinsic->use_end(); I != E; ++I) {
- IntrinsicInst *II = cast<IntrinsicInst>(I);
+ IntrinsicInst *II = cast<IntrinsicInst>(*I);
if (II->getParent()->getParent() != F)
continue;
@@ -225,13 +158,13 @@ FindAllURoRInvokes(SmallPtrSet<InvokeInst*, 32> &URoRInvokes) {
for (Value::use_iterator
I = URoR->use_begin(),
E = URoR->use_end(); I != E; ++I) {
- if (InvokeInst *II = dyn_cast<InvokeInst>(I))
+ if (InvokeInst *II = dyn_cast<InvokeInst>(*I))
URoRInvokes.insert(II);
}
}
/// CleanupSelectors - Any remaining eh.selector intrinsic calls which still use
-/// the ".llvm.eh.catch.all.value" call need to convert to using its
+/// the "llvm.eh.catch.all.value" call need to convert to using its
/// initializer instead.
bool DwarfEHPrepare::CleanupSelectors(SmallPtrSet<IntrinsicInst*, 32> &Sels) {
if (!EHCatchAllValue) return false;
@@ -247,7 +180,7 @@ bool DwarfEHPrepare::CleanupSelectors(SmallPtrSet<IntrinsicInst*, 32> &Sels) {
I = Sels.begin(), E = Sels.end(); I != E; ++I) {
IntrinsicInst *Sel = *I;
- // Index of the ".llvm.eh.catch.all.value" variable.
+ // Index of the "llvm.eh.catch.all.value" variable.
unsigned OpIdx = Sel->getNumArgOperands() - 1;
GlobalVariable *GV = dyn_cast<GlobalVariable>(Sel->getArgOperand(OpIdx));
if (GV != EHCatchAllValue) continue;
@@ -268,10 +201,9 @@ DwarfEHPrepare::FindSelectorAndURoR(Instruction *Inst, bool &URoRInvoke,
SmallPtrSet<PHINode*, 32> SeenPHIs;
bool Changed = false;
- restart:
for (Value::use_iterator
I = Inst->use_begin(), E = Inst->use_end(); I != E; ++I) {
- Instruction *II = dyn_cast<Instruction>(I);
+ Instruction *II = dyn_cast<Instruction>(*I);
if (!II || II->getParent()->getParent() != F) continue;
if (IntrinsicInst *Sel = dyn_cast<IntrinsicInst>(II)) {
@@ -282,11 +214,6 @@ DwarfEHPrepare::FindSelectorAndURoR(Instruction *Inst, bool &URoRInvoke,
URoRInvoke = true;
} else if (CastInst *CI = dyn_cast<CastInst>(II)) {
Changed |= FindSelectorAndURoR(CI, URoRInvoke, SelCalls);
- } else if (StoreInst *SI = dyn_cast<StoreInst>(II)) {
- if (!PromoteStoreInst(SI)) continue;
- Changed = true;
- SeenPHIs.clear();
- goto restart; // Uses may have changed, restart loop.
} else if (PHINode *PN = dyn_cast<PHINode>(II)) {
if (SeenPHIs.insert(PN))
// Don't process a PHI node more than once.
@@ -304,7 +231,7 @@ DwarfEHPrepare::FindSelectorAndURoR(Instruction *Inst, bool &URoRInvoke,
bool DwarfEHPrepare::HandleURoRInvokes() {
if (!EHCatchAllValue) {
EHCatchAllValue =
- F->getParent()->getNamedGlobal(".llvm.eh.catch.all.value");
+ F->getParent()->getNamedGlobal("llvm.eh.catch.all.value");
if (!EHCatchAllValue) return false;
}
@@ -318,10 +245,6 @@ bool DwarfEHPrepare::HandleURoRInvokes() {
SmallPtrSet<IntrinsicInst*, 32> CatchAllSels;
FindAllCleanupSelectors(Sels, CatchAllSels);
- if (!DT)
- // We require DominatorTree information.
- return CleanupSelectors(CatchAllSels);
-
if (!URoR) {
URoR = F->getParent()->getFunction("_Unwind_Resume_or_Rethrow");
if (!URoR) return CleanupSelectors(CatchAllSels);
@@ -338,7 +261,7 @@ bool DwarfEHPrepare::HandleURoRInvokes() {
for (SmallPtrSet<InvokeInst*, 32>::iterator
UI = URoRInvokes.begin(), UE = URoRInvokes.end(); UI != UE; ++UI) {
const BasicBlock *URoRBB = (*UI)->getParent();
- if (SelBB == URoRBB || DT->dominates(SelBB, URoRBB)) {
+ if (DT->dominates(SelBB, URoRBB)) {
SelsToConvert.insert(*SI);
break;
}
@@ -360,11 +283,9 @@ bool DwarfEHPrepare::HandleURoRInvokes() {
for (Value::use_iterator
I = ExceptionValueIntrinsic->use_begin(),
E = ExceptionValueIntrinsic->use_end(); I != E; ++I) {
- IntrinsicInst *EHPtr = dyn_cast<IntrinsicInst>(I);
+ IntrinsicInst *EHPtr = dyn_cast<IntrinsicInst>(*I);
if (!EHPtr || EHPtr->getParent()->getParent() != F) continue;
- Changed |= PromoteEHPtrStore(EHPtr);
-
bool URoRInvoke = false;
SmallPtrSet<IntrinsicInst*, 8> SelCalls;
Changed |= FindSelectorAndURoR(EHPtr, URoRInvoke, SelCalls);
@@ -532,11 +453,8 @@ bool DwarfEHPrepare::NormalizeLandingPads() {
// Add a fallthrough from NewBB to the original landing pad.
BranchInst::Create(LPad, NewBB);
- // Now update DominatorTree and DominanceFrontier analysis information.
- if (DT)
- DT->splitBlock(NewBB);
- if (DF)
- DF->splitBlock(NewBB);
+ // Now update DominatorTree analysis information.
+ DT->splitBlock(NewBB);
// Remember the newly constructed landing pad. The original landing pad
// LPad is no longer a landing pad now that all unwind edges have been
@@ -586,7 +504,7 @@ bool DwarfEHPrepare::LowerUnwinds() {
// Create the call...
CallInst *CI = CallInst::Create(RewindFunction,
- CreateReadOfExceptionValue(TI->getParent()),
+ CreateExceptionValueCall(TI->getParent()),
"", TI);
CI->setCallingConv(TLI->getLibcallCallingConv(RTLIB::UNWIND_RESUME));
// ...followed by an UnreachableInst.
@@ -602,9 +520,11 @@ bool DwarfEHPrepare::LowerUnwinds() {
}
/// MoveExceptionValueCalls - Ensure that eh.exception is only ever called from
-/// landing pads by replacing calls outside of landing pads with loads from a
-/// stack temporary. Move eh.exception calls inside landing pads to the start
-/// of the landing pad (optional, but may make things simpler for later passes).
+/// landing pads by replacing calls outside of landing pads with direct use of
+/// a register holding the appropriate value; this requires adding calls inside
+/// all landing pads to initialize the register. Also, move eh.exception calls
+/// inside landing pads to the start of the landing pad (optional, but may make
+/// things simpler for later passes).
bool DwarfEHPrepare::MoveExceptionValueCalls() {
// If the eh.exception intrinsic is not declared in the module then there is
// nothing to do. Speed up compilation by checking for this common case.
@@ -614,61 +534,87 @@ bool DwarfEHPrepare::MoveExceptionValueCalls() {
bool Changed = false;
- for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) {
- for (BasicBlock::iterator II = BB->begin(), E = BB->end(); II != E;)
- if (IntrinsicInst *CI = dyn_cast<IntrinsicInst>(II++))
- if (CI->getIntrinsicID() == Intrinsic::eh_exception) {
- if (!CI->use_empty()) {
- Value *ExceptionValue = CreateReadOfExceptionValue(BB);
- if (CI == ExceptionValue) {
- // The call was at the start of a landing pad - leave it alone.
- assert(LandingPads.count(BB) &&
- "Created eh.exception call outside landing pad!");
- continue;
- }
- CI->replaceAllUsesWith(ExceptionValue);
- }
- CI->eraseFromParent();
- ++NumExceptionValuesMoved;
- Changed = true;
+ // Move calls to eh.exception that are inside a landing pad to the start of
+ // the landing pad.
+ for (BBSet::const_iterator LI = LandingPads.begin(), LE = LandingPads.end();
+ LI != LE; ++LI) {
+ BasicBlock *LP = *LI;
+ for (BasicBlock::iterator II = LP->getFirstNonPHIOrDbg(), IE = LP->end();
+ II != IE;)
+ if (EHExceptionInst *EI = dyn_cast<EHExceptionInst>(II++)) {
+ // Found a call to eh.exception.
+ if (!EI->use_empty()) {
+ // If there is already a call to eh.exception at the start of the
+ // landing pad, then get hold of it; otherwise create such a call.
+ Value *CallAtStart = CreateExceptionValueCall(LP);
+
+ // If the call was at the start of a landing pad then leave it alone.
+ if (EI == CallAtStart)
+ continue;
+ EI->replaceAllUsesWith(CallAtStart);
}
+ EI->eraseFromParent();
+ ++NumExceptionValuesMoved;
+ Changed = true;
+ }
}
- return Changed;
-}
-
-/// FinishStackTemporaries - If we introduced a stack variable to hold the
-/// exception value then initialize it in each landing pad.
-bool DwarfEHPrepare::FinishStackTemporaries() {
- if (!ExceptionValueVar)
- // Nothing to do.
- return false;
+ // Look for calls to eh.exception that are not in a landing pad. If one is
+ // found, then a register that holds the exception value will be created in
+ // each landing pad, and the SSAUpdater will be used to compute the values
+ // returned by eh.exception calls outside of landing pads.
+ SSAUpdater SSA;
+
+ // Remember where we found the eh.exception call, to avoid rescanning earlier
+ // basic blocks which we already know contain no eh.exception calls.
+ bool FoundCallOutsideLandingPad = false;
+ Function::iterator BB = F->begin();
+ for (Function::iterator BE = F->end(); BB != BE; ++BB) {
+ // Skip over landing pads.
+ if (LandingPads.count(BB))
+ continue;
- bool Changed = false;
+ for (BasicBlock::iterator II = BB->getFirstNonPHIOrDbg(), IE = BB->end();
+ II != IE; ++II)
+ if (isa<EHExceptionInst>(II)) {
+ SSA.Initialize(II->getType(), II->getName());
+ FoundCallOutsideLandingPad = true;
+ break;
+ }
- // Make sure that there is a store of the exception value at the start of
- // each landing pad.
- for (BBSet::iterator LI = LandingPads.begin(), LE = LandingPads.end();
- LI != LE; ++LI) {
- Instruction *ExceptionValue = CreateReadOfExceptionValue(*LI);
- Instruction *Store = new StoreInst(ExceptionValue, ExceptionValueVar);
- Store->insertAfter(ExceptionValue);
- Changed = true;
+ if (FoundCallOutsideLandingPad)
+ break;
}
- return Changed;
-}
+ // If all calls to eh.exception are in landing pads then we are done.
+ if (!FoundCallOutsideLandingPad)
+ return Changed;
-/// PromoteStackTemporaries - Turn any stack temporaries we introduced into
-/// registers if possible.
-bool DwarfEHPrepare::PromoteStackTemporaries() {
- if (ExceptionValueVar && DT && DF && isAllocaPromotable(ExceptionValueVar)) {
- // Turn the exception temporary into registers and phi nodes if possible.
- std::vector<AllocaInst*> Allocas(1, ExceptionValueVar);
- PromoteMemToReg(Allocas, *DT, *DF);
- return true;
+ // Add a call to eh.exception at the start of each landing pad, and tell the
+ // SSAUpdater that this is the value produced by the landing pad.
+ for (BBSet::iterator LI = LandingPads.begin(), LE = LandingPads.end();
+ LI != LE; ++LI)
+ SSA.AddAvailableValue(*LI, CreateExceptionValueCall(*LI));
+
+ // Now turn all calls to eh.exception that are not in a landing pad into a use
+ // of the appropriate register.
+ for (Function::iterator BE = F->end(); BB != BE; ++BB) {
+ // Skip over landing pads.
+ if (LandingPads.count(BB))
+ continue;
+
+ for (BasicBlock::iterator II = BB->getFirstNonPHIOrDbg(), IE = BB->end();
+ II != IE;)
+ if (EHExceptionInst *EI = dyn_cast<EHExceptionInst>(II++)) {
+ // Found a call to eh.exception, replace it with the value from any
+ // upstream landing pad(s).
+ EI->replaceAllUsesWith(SSA.GetValueAtEndOfBlock(BB));
+ EI->eraseFromParent();
+ ++NumExceptionValuesMoved;
+ }
}
- return false;
+
+ return true;
}
/// CreateExceptionValueCall - Insert a call to the eh.exception intrinsic at
@@ -691,36 +637,11 @@ Instruction *DwarfEHPrepare::CreateExceptionValueCall(BasicBlock *BB) {
return CallInst::Create(ExceptionValueIntrinsic, "eh.value.call", Start);
}
-/// CreateValueLoad - Insert a load of the exception value stack variable
-/// (creating it if necessary) at the start of the basic block (unless
-/// there already is a load, in which case the existing load is returned).
-Instruction *DwarfEHPrepare::CreateValueLoad(BasicBlock *BB) {
- Instruction *Start = BB->getFirstNonPHIOrDbg();
- // Is this a load of the exception temporary?
- if (ExceptionValueVar)
- if (LoadInst* LI = dyn_cast<LoadInst>(Start))
- if (LI->getPointerOperand() == ExceptionValueVar)
- // Reuse the existing load.
- return Start;
-
- // Create the temporary if we didn't already.
- if (!ExceptionValueVar) {
- ExceptionValueVar = new AllocaInst(PointerType::getUnqual(
- Type::getInt8Ty(BB->getContext())), "eh.value", F->begin()->begin());
- ++NumStackTempsIntroduced;
- }
-
- // Load the value.
- return new LoadInst(ExceptionValueVar, "eh.value.load", Start);
-}
-
bool DwarfEHPrepare::runOnFunction(Function &Fn) {
bool Changed = false;
// Initialize internal state.
- DT = getAnalysisIfAvailable<DominatorTree>();
- DF = getAnalysisIfAvailable<DominanceFrontier>();
- ExceptionValueVar = 0;
+ DT = &getAnalysis<DominatorTree>();
F = &Fn;
// Ensure that only unwind edges end at landing pads (a landing pad is a
@@ -735,13 +656,6 @@ bool DwarfEHPrepare::runOnFunction(Function &Fn) {
// Move eh.exception calls to landing pads.
Changed |= MoveExceptionValueCalls();
- // Initialize any stack temporaries we introduced.
- Changed |= FinishStackTemporaries();
-
- // Turn any stack temporaries into registers if possible.
- if (!CompileFast)
- Changed |= PromoteStackTemporaries();
-
Changed |= HandleURoRInvokes();
LandingPads.clear();
diff --git a/lib/CodeGen/ELF.h b/lib/CodeGen/ELF.h
index cb5a8c0eae1d3..fb884c9e8b712 100644
--- a/lib/CodeGen/ELF.h
+++ b/lib/CodeGen/ELF.h
@@ -22,36 +22,12 @@
#include "llvm/CodeGen/BinaryObject.h"
#include "llvm/CodeGen/MachineRelocation.h"
+#include "llvm/Support/ELF.h"
#include "llvm/System/DataTypes.h"
namespace llvm {
class GlobalValue;
- // Identification Indexes
- enum {
- EI_MAG0 = 0,
- EI_MAG1 = 1,
- EI_MAG2 = 2,
- EI_MAG3 = 3
- };
-
- // File types
- enum {
- ET_NONE = 0, // No file type
- ET_REL = 1, // Relocatable file
- ET_EXEC = 2, // Executable file
- ET_DYN = 3, // Shared object file
- ET_CORE = 4, // Core file
- ET_LOPROC = 0xff00, // Beginning of processor-specific codes
- ET_HIPROC = 0xffff // Processor-specific
- };
-
- // Versioning
- enum {
- EV_NONE = 0,
- EV_CURRENT = 1
- };
-
/// ELFSym - This struct contains information about each symbol that is
/// added to logical symbol table for the module. This is eventually
/// turned into a real symbol table in the file.
@@ -108,9 +84,9 @@ namespace llvm {
static ELFSym *getExtSym(const char *Ext) {
ELFSym *Sym = new ELFSym();
Sym->Source.Ext = Ext;
- Sym->setBind(STB_GLOBAL);
- Sym->setType(STT_NOTYPE);
- Sym->setVisibility(STV_DEFAULT);
+ Sym->setBind(ELF::STB_GLOBAL);
+ Sym->setType(ELF::STT_NOTYPE);
+ Sym->setVisibility(ELF::STV_DEFAULT);
Sym->SourceType = isExtSym;
return Sym;
}
@@ -118,9 +94,9 @@ namespace llvm {
// getSectionSym - Returns a elf symbol to represent an elf section
static ELFSym *getSectionSym() {
ELFSym *Sym = new ELFSym();
- Sym->setBind(STB_LOCAL);
- Sym->setType(STT_SECTION);
- Sym->setVisibility(STV_DEFAULT);
+ Sym->setBind(ELF::STB_LOCAL);
+ Sym->setType(ELF::STT_SECTION);
+ Sym->setVisibility(ELF::STV_DEFAULT);
Sym->SourceType = isOther;
return Sym;
}
@@ -128,9 +104,9 @@ namespace llvm {
// getFileSym - Returns a elf symbol to represent the module identifier
static ELFSym *getFileSym() {
ELFSym *Sym = new ELFSym();
- Sym->setBind(STB_LOCAL);
- Sym->setType(STT_FILE);
- Sym->setVisibility(STV_DEFAULT);
+ Sym->setBind(ELF::STB_LOCAL);
+ Sym->setType(ELF::STT_FILE);
+ Sym->setVisibility(ELF::STV_DEFAULT);
Sym->SectionIdx = 0xfff1; // ELFSection::SHN_ABS;
Sym->SourceType = isOther;
return Sym;
@@ -141,8 +117,8 @@ namespace llvm {
ELFSym *Sym = new ELFSym();
Sym->Source.GV = GV;
Sym->setBind(Bind);
- Sym->setType(STT_NOTYPE);
- Sym->setVisibility(STV_DEFAULT);
+ Sym->setType(ELF::STT_NOTYPE);
+ Sym->setVisibility(ELF::STV_DEFAULT);
Sym->SectionIdx = 0; //ELFSection::SHN_UNDEF;
Sym->SourceType = isGV;
return Sym;
@@ -159,35 +135,14 @@ namespace llvm {
// Symbol index into the Symbol table
unsigned SymTabIdx;
- enum {
- STB_LOCAL = 0, // Local sym, not visible outside obj file containing def
- STB_GLOBAL = 1, // Global sym, visible to all object files being combined
- STB_WEAK = 2 // Weak symbol, like global but lower-precedence
- };
-
- enum {
- STT_NOTYPE = 0, // Symbol's type is not specified
- STT_OBJECT = 1, // Symbol is a data object (variable, array, etc.)
- STT_FUNC = 2, // Symbol is executable code (function, etc.)
- STT_SECTION = 3, // Symbol refers to a section
- STT_FILE = 4 // Local, absolute symbol that refers to a file
- };
-
- enum {
- STV_DEFAULT = 0, // Visibility is specified by binding type
- STV_INTERNAL = 1, // Defined by processor supplements
- STV_HIDDEN = 2, // Not visible to other components
- STV_PROTECTED = 3 // Visible in other components but not preemptable
- };
-
ELFSym() : SourceType(isOther), NameIdx(0), Value(0),
- Size(0), Info(0), Other(STV_DEFAULT), SectionIdx(0),
+ Size(0), Info(0), Other(ELF::STV_DEFAULT), SectionIdx(0),
SymTabIdx(0) {}
unsigned getBind() const { return (Info >> 4) & 0xf; }
unsigned getType() const { return Info & 0xf; }
- bool isLocalBind() const { return getBind() == STB_LOCAL; }
- bool isFileType() const { return getType() == STT_FILE; }
+ bool isLocalBind() const { return getBind() == ELF::STB_LOCAL; }
+ bool isFileType() const { return getType() == ELF::STT_FILE; }
void setBind(unsigned X) {
assert(X == (X & 0xF) && "Bind value out of range!");
@@ -222,51 +177,6 @@ namespace llvm {
unsigned Align; // sh_addralign - Alignment of section.
unsigned EntSize; // sh_entsize - Size of entries in the section e
- // Section Header Flags
- enum {
- SHF_WRITE = 1 << 0, // Writable
- SHF_ALLOC = 1 << 1, // Mapped into the process addr space
- SHF_EXECINSTR = 1 << 2, // Executable
- SHF_MERGE = 1 << 4, // Might be merged if equal
- SHF_STRINGS = 1 << 5, // Contains null-terminated strings
- SHF_INFO_LINK = 1 << 6, // 'sh_info' contains SHT index
- SHF_LINK_ORDER = 1 << 7, // Preserve order after combining
- SHF_OS_NONCONFORMING = 1 << 8, // nonstandard OS support required
- SHF_GROUP = 1 << 9, // Section is a member of a group
- SHF_TLS = 1 << 10 // Section holds thread-local data
- };
-
- // Section Types
- enum {
- SHT_NULL = 0, // No associated section (inactive entry).
- SHT_PROGBITS = 1, // Program-defined contents.
- SHT_SYMTAB = 2, // Symbol table.
- SHT_STRTAB = 3, // String table.
- SHT_RELA = 4, // Relocation entries; explicit addends.
- SHT_HASH = 5, // Symbol hash table.
- SHT_DYNAMIC = 6, // Information for dynamic linking.
- SHT_NOTE = 7, // Information about the file.
- SHT_NOBITS = 8, // Data occupies no space in the file.
- SHT_REL = 9, // Relocation entries; no explicit addends.
- SHT_SHLIB = 10, // Reserved.
- SHT_DYNSYM = 11, // Symbol table.
- SHT_LOPROC = 0x70000000, // Lowest processor arch-specific type.
- SHT_HIPROC = 0x7fffffff, // Highest processor arch-specific type.
- SHT_LOUSER = 0x80000000, // Lowest type reserved for applications.
- SHT_HIUSER = 0xffffffff // Highest type reserved for applications.
- };
-
- // Special section indices.
- enum {
- SHN_UNDEF = 0, // Undefined, missing, irrelevant
- SHN_LORESERVE = 0xff00, // Lowest reserved index
- SHN_LOPROC = 0xff00, // Lowest processor-specific index
- SHN_HIPROC = 0xff1f, // Highest processor-specific index
- SHN_ABS = 0xfff1, // Symbol has absolute value; no relocation
- SHN_COMMON = 0xfff2, // FORTRAN COMMON or C external global variables
- SHN_HIRESERVE = 0xffff // Highest reserved index
- };
-
/// SectionIdx - The number of the section in the Section Table.
unsigned short SectionIdx;
diff --git a/lib/CodeGen/ELFCodeEmitter.cpp b/lib/CodeGen/ELFCodeEmitter.cpp
index 36b0e6514b3a9..3fb087c5ea8b8 100644
--- a/lib/CodeGen/ELFCodeEmitter.cpp
+++ b/lib/CodeGen/ELFCodeEmitter.cpp
@@ -71,7 +71,7 @@ void ELFCodeEmitter::startFunction(MachineFunction &MF) {
bool ELFCodeEmitter::finishFunction(MachineFunction &MF) {
// Add a symbol to represent the function.
const Function *F = MF.getFunction();
- ELFSym *FnSym = ELFSym::getGV(F, EW.getGlobalELFBinding(F), ELFSym::STT_FUNC,
+ ELFSym *FnSym = ELFSym::getGV(F, EW.getGlobalELFBinding(F), ELF::STT_FUNC,
EW.getGlobalELFVisibility(F));
FnSym->SectionIdx = ES->SectionIdx;
FnSym->Size = ES->getCurrentPCOffset()-FnStartOff;
diff --git a/lib/CodeGen/ELFWriter.cpp b/lib/CodeGen/ELFWriter.cpp
index b644ebeb4be53..d14728d8a36c9 100644
--- a/lib/CodeGen/ELFWriter.cpp
+++ b/lib/CodeGen/ELFWriter.cpp
@@ -63,7 +63,7 @@ char ELFWriter::ID = 0;
//===----------------------------------------------------------------------===//
ELFWriter::ELFWriter(raw_ostream &o, TargetMachine &tm)
- : MachineFunctionPass(&ID), O(o), TM(tm),
+ : MachineFunctionPass(ID), O(o), TM(tm),
OutContext(*new MCContext(*TM.getMCAsmInfo())),
TLOF(TM.getTargetLowering()->getObjFileLowering()),
is64Bit(TM.getTargetData()->getPointerSizeInBits() == 64),
@@ -129,12 +129,12 @@ bool ELFWriter::doInitialization(Module &M) {
ElfHdr.emitByte(TEW->getEIClass()); // e_ident[EI_CLASS]
ElfHdr.emitByte(TEW->getEIData()); // e_ident[EI_DATA]
- ElfHdr.emitByte(EV_CURRENT); // e_ident[EI_VERSION]
+ ElfHdr.emitByte(ELF::EV_CURRENT); // e_ident[EI_VERSION]
ElfHdr.emitAlignment(16); // e_ident[EI_NIDENT-EI_PAD]
- ElfHdr.emitWord16(ET_REL); // e_type
+ ElfHdr.emitWord16(ELF::ET_REL); // e_type
ElfHdr.emitWord16(TEW->getEMachine()); // e_machine = target
- ElfHdr.emitWord32(EV_CURRENT); // e_version
+ ElfHdr.emitWord32(ELF::EV_CURRENT); // e_version
ElfHdr.emitWord(0); // e_entry, no entry point in .o file
ElfHdr.emitWord(0); // e_phoff, no program header for .o
ELFHdr_e_shoff_Offset = ElfHdr.size();
@@ -252,7 +252,7 @@ ELFSection &ELFWriter::getConstantPoolSection(MachineConstantPoolEntry &CPE) {
// is true if the relocation section contains entries with addends.
ELFSection &ELFWriter::getRelocSection(ELFSection &S) {
unsigned SectionType = TEW->hasRelocationAddend() ?
- ELFSection::SHT_RELA : ELFSection::SHT_REL;
+ ELF::SHT_RELA : ELF::SHT_REL;
std::string SectionName(".rel");
if (TEW->hasRelocationAddend())
@@ -268,11 +268,11 @@ unsigned ELFWriter::getGlobalELFVisibility(const GlobalValue *GV) {
default:
llvm_unreachable("unknown visibility type");
case GlobalValue::DefaultVisibility:
- return ELFSym::STV_DEFAULT;
+ return ELF::STV_DEFAULT;
case GlobalValue::HiddenVisibility:
- return ELFSym::STV_HIDDEN;
+ return ELF::STV_HIDDEN;
case GlobalValue::ProtectedVisibility:
- return ELFSym::STV_PROTECTED;
+ return ELF::STV_PROTECTED;
}
return 0;
}
@@ -280,23 +280,23 @@ unsigned ELFWriter::getGlobalELFVisibility(const GlobalValue *GV) {
// getGlobalELFBinding - Returns the ELF specific binding type
unsigned ELFWriter::getGlobalELFBinding(const GlobalValue *GV) {
if (GV->hasInternalLinkage())
- return ELFSym::STB_LOCAL;
+ return ELF::STB_LOCAL;
if (GV->isWeakForLinker() && !GV->hasCommonLinkage())
- return ELFSym::STB_WEAK;
+ return ELF::STB_WEAK;
- return ELFSym::STB_GLOBAL;
+ return ELF::STB_GLOBAL;
}
// getGlobalELFType - Returns the ELF specific type for a global
unsigned ELFWriter::getGlobalELFType(const GlobalValue *GV) {
if (GV->isDeclaration())
- return ELFSym::STT_NOTYPE;
+ return ELF::STT_NOTYPE;
if (isa<Function>(GV))
- return ELFSym::STT_FUNC;
+ return ELF::STT_FUNC;
- return ELFSym::STT_OBJECT;
+ return ELF::STT_OBJECT;
}
// IsELFUndefSym - True if the global value must be marked as a symbol
@@ -364,7 +364,7 @@ void ELFWriter::EmitGlobal(const GlobalValue *GV) {
GblSym->Size = Size;
if (S->HasCommonSymbols()) { // Symbol must go to a common section
- GblSym->SectionIdx = ELFSection::SHN_COMMON;
+ GblSym->SectionIdx = ELF::SHN_COMMON;
// A new linkonce section is created for each global in the
// common section, the default alignment is 1 and the symbol
diff --git a/lib/CodeGen/ELFWriter.h b/lib/CodeGen/ELFWriter.h
index db66ecc6dd83b..b8bac5598ecfc 100644
--- a/lib/CodeGen/ELFWriter.h
+++ b/lib/CodeGen/ELFWriter.h
@@ -39,6 +39,7 @@ namespace llvm {
class raw_ostream;
class SectionKind;
class MCContext;
+ class TargetMachine;
typedef std::vector<ELFSym*>::iterator ELFSymIter;
typedef std::vector<ELFSection*>::iterator ELFSectionIter;
@@ -160,29 +161,29 @@ namespace llvm {
SN->SectionIdx = NumSections++;
SN->Type = Type;
SN->Flags = Flags;
- SN->Link = ELFSection::SHN_UNDEF;
+ SN->Link = ELF::SHN_UNDEF;
SN->Align = Align;
return *SN;
}
ELFSection &getNonExecStackSection() {
- return getSection(".note.GNU-stack", ELFSection::SHT_PROGBITS, 0, 1);
+ return getSection(".note.GNU-stack", ELF::SHT_PROGBITS, 0, 1);
}
ELFSection &getSymbolTableSection() {
- return getSection(".symtab", ELFSection::SHT_SYMTAB, 0);
+ return getSection(".symtab", ELF::SHT_SYMTAB, 0);
}
ELFSection &getStringTableSection() {
- return getSection(".strtab", ELFSection::SHT_STRTAB, 0, 1);
+ return getSection(".strtab", ELF::SHT_STRTAB, 0, 1);
}
ELFSection &getSectionHeaderStringTableSection() {
- return getSection(".shstrtab", ELFSection::SHT_STRTAB, 0, 1);
+ return getSection(".shstrtab", ELF::SHT_STRTAB, 0, 1);
}
ELFSection &getNullSection() {
- return getSection("", ELFSection::SHT_NULL, 0);
+ return getSection("", ELF::SHT_NULL, 0);
}
ELFSection &getDataSection();
diff --git a/lib/CodeGen/GCMetadata.cpp b/lib/CodeGen/GCMetadata.cpp
index ab0a800225311..0f6e882a7be43 100644
--- a/lib/CodeGen/GCMetadata.cpp
+++ b/lib/CodeGen/GCMetadata.cpp
@@ -30,8 +30,8 @@ namespace {
raw_ostream &OS;
public:
- Printer() : FunctionPass(&ID), OS(errs()) {}
- explicit Printer(raw_ostream &OS) : FunctionPass(&ID), OS(OS) {}
+ Printer() : FunctionPass(ID), OS(errs()) {}
+ explicit Printer(raw_ostream &OS) : FunctionPass(ID), OS(OS) {}
const char *getPassName() const;
@@ -55,8 +55,8 @@ namespace {
}
-static RegisterPass<GCModuleInfo>
-X("collector-metadata", "Create Garbage Collector Module Metadata");
+INITIALIZE_PASS(GCModuleInfo, "collector-metadata",
+ "Create Garbage Collector Module Metadata", false, false);
// -----------------------------------------------------------------------------
@@ -70,7 +70,7 @@ GCFunctionInfo::~GCFunctionInfo() {}
char GCModuleInfo::ID = 0;
GCModuleInfo::GCModuleInfo()
- : ImmutablePass(&ID) {}
+ : ImmutablePass(ID) {}
GCModuleInfo::~GCModuleInfo() {
clear();
@@ -189,7 +189,7 @@ FunctionPass *llvm::createGCInfoDeleter() {
return new Deleter();
}
-Deleter::Deleter() : FunctionPass(&ID) {}
+Deleter::Deleter() : FunctionPass(ID) {}
const char *Deleter::getPassName() const {
return "Delete Garbage Collector Information";
diff --git a/lib/CodeGen/GCStrategy.cpp b/lib/CodeGen/GCStrategy.cpp
index 71506cc6abb9c..719fa194d8da1 100644
--- a/lib/CodeGen/GCStrategy.cpp
+++ b/lib/CodeGen/GCStrategy.cpp
@@ -130,7 +130,7 @@ FunctionPass *llvm::createGCLoweringPass() {
char LowerIntrinsics::ID = 0;
LowerIntrinsics::LowerIntrinsics()
- : FunctionPass(&ID) {}
+ : FunctionPass(ID) {}
const char *LowerIntrinsics::getPassName() const {
return "Lower Garbage Collection Instructions";
@@ -260,7 +260,7 @@ bool LowerIntrinsics::PerformDefaultLowering(Function &F, GCStrategy &S) {
bool LowerRd = !S.customReadBarrier();
bool InitRoots = S.initializeRoots();
- SmallVector<AllocaInst*,32> Roots;
+ SmallVector<AllocaInst*, 32> Roots;
bool MadeChange = false;
for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) {
@@ -271,7 +271,8 @@ bool LowerIntrinsics::PerformDefaultLowering(Function &F, GCStrategy &S) {
case Intrinsic::gcwrite:
if (LowerWr) {
// Replace a write barrier with a simple store.
- Value *St = new StoreInst(CI->getArgOperand(0), CI->getArgOperand(2), CI);
+ Value *St = new StoreInst(CI->getArgOperand(0),
+ CI->getArgOperand(2), CI);
CI->replaceAllUsesWith(St);
CI->eraseFromParent();
}
@@ -317,7 +318,7 @@ FunctionPass *llvm::createGCMachineCodeAnalysisPass() {
char MachineCodeAnalysis::ID = 0;
MachineCodeAnalysis::MachineCodeAnalysis()
- : MachineFunctionPass(&ID) {}
+ : MachineFunctionPass(ID) {}
const char *MachineCodeAnalysis::getPassName() const {
return "Analyze Machine Code For Garbage Collection";
diff --git a/lib/CodeGen/IfConversion.cpp b/lib/CodeGen/IfConversion.cpp
index 6b445e0b8e0f0..0ea30d7a7929d 100644
--- a/lib/CodeGen/IfConversion.cpp
+++ b/lib/CodeGen/IfConversion.cpp
@@ -154,7 +154,7 @@ namespace {
int FnNum;
public:
static char ID;
- IfConverter() : MachineFunctionPass(&ID), FnNum(-1) {}
+ IfConverter() : MachineFunctionPass(ID), FnNum(-1) {}
virtual bool runOnMachineFunction(MachineFunction &MF);
virtual const char *getPassName() const { return "If Converter"; }
@@ -230,8 +230,7 @@ namespace {
char IfConverter::ID = 0;
}
-static RegisterPass<IfConverter>
-X("if-converter", "If Converter");
+INITIALIZE_PASS(IfConverter, "if-converter", "If Converter", false, false);
FunctionPass *llvm::createIfConverterPass() { return new IfConverter(); }
diff --git a/lib/CodeGen/InlineSpiller.cpp b/lib/CodeGen/InlineSpiller.cpp
index 12adcaa3a22ee..b965bfdcf3b84 100644
--- a/lib/CodeGen/InlineSpiller.cpp
+++ b/lib/CodeGen/InlineSpiller.cpp
@@ -14,10 +14,12 @@
#define DEBUG_TYPE "spiller"
#include "Spiller.h"
+#include "SplitKit.h"
#include "VirtRegMap.h"
#include "llvm/CodeGen/LiveIntervalAnalysis.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetInstrInfo.h"
@@ -28,8 +30,10 @@ using namespace llvm;
namespace {
class InlineSpiller : public Spiller {
+ MachineFunctionPass &pass_;
MachineFunction &mf_;
LiveIntervals &lis_;
+ MachineLoopInfo &loops_;
VirtRegMap &vrm_;
MachineFrameInfo &mfi_;
MachineRegisterInfo &mri_;
@@ -37,9 +41,11 @@ class InlineSpiller : public Spiller {
const TargetRegisterInfo &tri_;
const BitVector reserved_;
+ SplitAnalysis splitAnalysis_;
+
// Variables that are valid during spill(), but used by multiple methods.
LiveInterval *li_;
- std::vector<LiveInterval*> *newIntervals_;
+ SmallVectorImpl<LiveInterval*> *newIntervals_;
const TargetRegisterClass *rc_;
int stackSlot_;
const SmallVectorImpl<LiveInterval*> *spillIs_;
@@ -53,25 +59,34 @@ class InlineSpiller : public Spiller {
~InlineSpiller() {}
public:
- InlineSpiller(MachineFunction *mf, LiveIntervals *lis, VirtRegMap *vrm)
- : mf_(*mf), lis_(*lis), vrm_(*vrm),
- mfi_(*mf->getFrameInfo()),
- mri_(mf->getRegInfo()),
- tii_(*mf->getTarget().getInstrInfo()),
- tri_(*mf->getTarget().getRegisterInfo()),
- reserved_(tri_.getReservedRegs(mf_)) {}
+ InlineSpiller(MachineFunctionPass &pass,
+ MachineFunction &mf,
+ VirtRegMap &vrm)
+ : pass_(pass),
+ mf_(mf),
+ lis_(pass.getAnalysis<LiveIntervals>()),
+ loops_(pass.getAnalysis<MachineLoopInfo>()),
+ vrm_(vrm),
+ mfi_(*mf.getFrameInfo()),
+ mri_(mf.getRegInfo()),
+ tii_(*mf.getTarget().getInstrInfo()),
+ tri_(*mf.getTarget().getRegisterInfo()),
+ reserved_(tri_.getReservedRegs(mf_)),
+ splitAnalysis_(mf, lis_, loops_) {}
void spill(LiveInterval *li,
- std::vector<LiveInterval*> &newIntervals,
- SmallVectorImpl<LiveInterval*> &spillIs,
- SlotIndex *earliestIndex);
+ SmallVectorImpl<LiveInterval*> &newIntervals,
+ SmallVectorImpl<LiveInterval*> &spillIs);
private:
+ bool split();
+
bool allUsesAvailableAt(const MachineInstr *OrigMI, SlotIndex OrigIdx,
SlotIndex UseIdx);
bool reMaterializeFor(MachineBasicBlock::iterator MI);
void reMaterializeAll();
+ bool coalesceStackAccess(MachineInstr *MI);
bool foldMemoryOperand(MachineBasicBlock::iterator MI,
const SmallVectorImpl<unsigned> &Ops);
void insertReload(LiveInterval &NewLI, MachineBasicBlock::iterator MI);
@@ -80,12 +95,43 @@ private:
}
namespace llvm {
-Spiller *createInlineSpiller(MachineFunction *mf,
- LiveIntervals *lis,
- const MachineLoopInfo *mli,
- VirtRegMap *vrm) {
- return new InlineSpiller(mf, lis, vrm);
+Spiller *createInlineSpiller(MachineFunctionPass &pass,
+ MachineFunction &mf,
+ VirtRegMap &vrm) {
+ return new InlineSpiller(pass, mf, vrm);
+}
}
+
+/// split - try splitting the current interval into pieces that may allocate
+/// separately. Return true if successful.
+bool InlineSpiller::split() {
+ splitAnalysis_.analyze(li_);
+
+ if (const MachineLoop *loop = splitAnalysis_.getBestSplitLoop()) {
+ // We can split, but li_ may be left intact with fewer uses.
+ if (SplitEditor(splitAnalysis_, lis_, vrm_, *newIntervals_)
+ .splitAroundLoop(loop))
+ return true;
+ }
+
+ // Try splitting into single block intervals.
+ SplitAnalysis::BlockPtrSet blocks;
+ if (splitAnalysis_.getMultiUseBlocks(blocks)) {
+ if (SplitEditor(splitAnalysis_, lis_, vrm_, *newIntervals_)
+ .splitSingleBlocks(blocks))
+ return true;
+ }
+
+ // Try splitting inside a basic block.
+ if (const MachineBasicBlock *MBB = splitAnalysis_.getBlockForInsideSplit()) {
+ if (SplitEditor(splitAnalysis_, lis_, vrm_, *newIntervals_)
+ .splitInsideBlock(MBB))
+ return true;
+ }
+
+ // We may have been able to split out some uses, but the original interval is
+ // intact, and it should still be spilled.
+ return false;
}
/// allUsesAvailableAt - Return true if all registers used by OrigMI at
@@ -237,7 +283,7 @@ void InlineSpiller::reMaterializeAll() {
lis_.RemoveMachineInstrFromMaps(DefMI);
vrm_.RemoveMachineInstrFromMaps(DefMI);
DefMI->eraseFromParent();
- li_->removeValNo(VNI);
+ VNI->setIsDefAccurate(false);
anyRemoved = true;
}
@@ -253,8 +299,8 @@ void InlineSpiller::reMaterializeAll() {
MachineBasicBlock::iterator NextMI = MI;
++NextMI;
if (NextMI != MI->getParent()->end() && !lis_.isNotInMIMap(NextMI)) {
- SlotIndex NearIdx = lis_.getInstructionIndex(NextMI);
- if (li_->liveAt(NearIdx))
+ VNInfo *VNI = li_->getVNInfoAt(lis_.getInstructionIndex(NextMI));
+ if (VNI && (VNI->hasPHIKill() || usedValues_.count(VNI)))
continue;
}
DEBUG(dbgs() << "Removing debug info due to remat:" << "\t" << *MI);
@@ -262,6 +308,24 @@ void InlineSpiller::reMaterializeAll() {
}
}
+/// If MI is a load or store of stackSlot_, it can be removed.
+bool InlineSpiller::coalesceStackAccess(MachineInstr *MI) {
+ int FI = 0;
+ unsigned reg;
+ if (!(reg = tii_.isLoadFromStackSlot(MI, FI)) &&
+ !(reg = tii_.isStoreToStackSlot(MI, FI)))
+ return false;
+
+ // We have a stack access. Is it the right register and slot?
+ if (reg != li_->reg || FI != stackSlot_)
+ return false;
+
+ DEBUG(dbgs() << "Coalescing stack access: " << *MI);
+ lis_.RemoveMachineInstrFromMaps(MI);
+ MI->eraseFromParent();
+ return true;
+}
+
/// foldMemoryOperand - Try folding stack slot references in Ops into MI.
/// Return true on success, and MI will be erased.
bool InlineSpiller::foldMemoryOperand(MachineBasicBlock::iterator MI,
@@ -323,9 +387,8 @@ void InlineSpiller::insertSpill(LiveInterval &NewLI,
}
void InlineSpiller::spill(LiveInterval *li,
- std::vector<LiveInterval*> &newIntervals,
- SmallVectorImpl<LiveInterval*> &spillIs,
- SlotIndex *earliestIndex) {
+ SmallVectorImpl<LiveInterval*> &newIntervals,
+ SmallVectorImpl<LiveInterval*> &spillIs) {
DEBUG(dbgs() << "Inline spilling " << *li << "\n");
assert(li->isSpillable() && "Attempting to spill already spilled value.");
assert(!li->isStackSlot() && "Trying to spill a stack slot.");
@@ -335,13 +398,18 @@ void InlineSpiller::spill(LiveInterval *li,
rc_ = mri_.getRegClass(li->reg);
spillIs_ = &spillIs;
+ if (split())
+ return;
+
reMaterializeAll();
// Remat may handle everything.
if (li_->empty())
return;
- stackSlot_ = vrm_.assignVirt2StackSlot(li->reg);
+ stackSlot_ = vrm_.getStackSlot(li->reg);
+ if (stackSlot_ == VirtRegMap::NO_STACK_SLOT)
+ stackSlot_ = vrm_.assignVirt2StackSlot(li->reg);
// Iterate over instructions using register.
for (MachineRegisterInfo::reg_iterator RI = mri_.reg_begin(li->reg);
@@ -365,6 +433,10 @@ void InlineSpiller::spill(LiveInterval *li,
continue;
}
+ // Stack slot accesses may coalesce away.
+ if (coalesceStackAccess(MI))
+ continue;
+
// Analyze instruction.
bool Reads, Writes;
SmallVector<unsigned, 8> Ops;
diff --git a/lib/CodeGen/IntrinsicLowering.cpp b/lib/CodeGen/IntrinsicLowering.cpp
index 03ae214ae7dab..3852ebaf64253 100644
--- a/lib/CodeGen/IntrinsicLowering.cpp
+++ b/lib/CodeGen/IntrinsicLowering.cpp
@@ -481,7 +481,8 @@ void IntrinsicLowering::LowerIntrinsicCall(CallInst *CI) {
Value *Ops[3];
Ops[0] = CI->getArgOperand(0);
// Extend the amount to i32.
- Ops[1] = Builder.CreateIntCast(CI->getArgOperand(1), Type::getInt32Ty(Context),
+ Ops[1] = Builder.CreateIntCast(CI->getArgOperand(1),
+ Type::getInt32Ty(Context),
/* isSigned */ false);
Ops[2] = Size;
ReplaceCallWith("memset", CI, Ops, Ops+3, CI->getArgOperand(0)->getType());
diff --git a/lib/CodeGen/LLVMTargetMachine.cpp b/lib/CodeGen/LLVMTargetMachine.cpp
index bf3137e49536e..36038027b2594 100644
--- a/lib/CodeGen/LLVMTargetMachine.cpp
+++ b/lib/CodeGen/LLVMTargetMachine.cpp
@@ -85,7 +85,7 @@ static bool getVerboseAsm() {
case cl::BOU_UNSET: return TargetMachine::getAsmVerbosityDefault();
case cl::BOU_TRUE: return true;
case cl::BOU_FALSE: return false;
- }
+ }
}
// Enable or disable FastISel. Both options are needed, because
@@ -139,8 +139,6 @@ bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM,
getTarget().createMCInstPrinter(MAI.getAssemblerDialect(), MAI);
// Create a code emitter if asked to show the encoding.
- //
- // FIXME: These are currently leaked.
MCCodeEmitter *MCE = 0;
if (ShowMCEncoding)
MCE = getTarget().createCodeEmitter(*this, *Context);
@@ -154,8 +152,6 @@ bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM,
case CGFT_ObjectFile: {
// Create the code emitter for the target if it exists. If not, .o file
// emission fails.
- //
- // FIXME: These are currently leaked.
MCCodeEmitter *MCE = getTarget().createCodeEmitter(*this, *Context);
TargetAsmBackend *TAB = getTarget().createAsmBackend(TargetTriple);
if (MCE == 0 || TAB == 0)
@@ -180,12 +176,12 @@ bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM,
FunctionPass *Printer = getTarget().createAsmPrinter(*this, *AsmStreamer);
if (Printer == 0)
return true;
-
+
// If successful, createAsmPrinter took ownership of AsmStreamer.
AsmStreamer.take();
-
+
PM.add(Printer);
-
+
// Make sure the code model is set.
setCodeModelForStatic();
PM.add(createGCInfoDeleter());
@@ -204,7 +200,7 @@ bool LLVMTargetMachine::addPassesToEmitMachineCode(PassManagerBase &PM,
bool DisableVerify) {
// Make sure the code model is set.
setCodeModelForJIT();
-
+
// Add common CodeGen passes.
MCContext *Ctx = 0;
if (addCommonCodeGenPasses(PM, OptLevel, DisableVerify, Ctx))
@@ -216,19 +212,36 @@ bool LLVMTargetMachine::addPassesToEmitMachineCode(PassManagerBase &PM,
return false; // success!
}
+/// addPassesToEmitMC - Add passes to the specified pass manager to get
+/// machine code emitted with the MCJIT. This method returns true if machine
+/// code is not supported. It fills the MCContext Ctx pointer which can be
+/// used to build custom MCStreamer.
+///
+bool LLVMTargetMachine::addPassesToEmitMC(PassManagerBase &PM,
+ MCContext *&Ctx,
+ CodeGenOpt::Level OptLevel,
+ bool DisableVerify) {
+ // Add common CodeGen passes.
+ if (addCommonCodeGenPasses(PM, OptLevel, DisableVerify, Ctx))
+ return true;
+ // Make sure the code model is set.
+ setCodeModelForJIT();
+
+ return false; // success!
+}
+
static void printNoVerify(PassManagerBase &PM, const char *Banner) {
if (PrintMachineCode)
PM.add(createMachineFunctionPrinterPass(dbgs(), Banner));
}
static void printAndVerify(PassManagerBase &PM,
- const char *Banner,
- bool allowDoubleDefs = false) {
+ const char *Banner) {
if (PrintMachineCode)
PM.add(createMachineFunctionPrinterPass(dbgs(), Banner));
if (VerifyMachineCode)
- PM.add(createMachineVerifierPass(allowDoubleDefs));
+ PM.add(createMachineVerifierPass());
}
/// addCommonCodeGenPasses - Add standard LLVM codegen passes used for both
@@ -258,6 +271,11 @@ bool LLVMTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM,
PM.add(createPrintFunctionPass("\n\n*** Code after LSR ***\n", &dbgs()));
}
+ PM.add(createGCLoweringPass());
+
+ // Make sure that no unreachable blocks are instruction selected.
+ PM.add(createUnreachableBlockEliminationPass());
+
// Turn exception handling constructs into something the code generators can
// handle.
switch (getMCAsmInfo()->getExceptionHandlingType()) {
@@ -269,26 +287,25 @@ bool LLVMTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM,
// pad is shared by multiple invokes and is also a target of a normal
// edge from elsewhere.
PM.add(createSjLjEHPass(getTargetLowering()));
- PM.add(createDwarfEHPass(this, OptLevel==CodeGenOpt::None));
- break;
+ // FALLTHROUGH
case ExceptionHandling::Dwarf:
- PM.add(createDwarfEHPass(this, OptLevel==CodeGenOpt::None));
+ PM.add(createDwarfEHPass(this));
break;
case ExceptionHandling::None:
PM.add(createLowerInvokePass(getTargetLowering()));
+
+ // The lower invoke pass may create unreachable code. Remove it.
+ PM.add(createUnreachableBlockEliminationPass());
break;
}
- PM.add(createGCLoweringPass());
-
- // Make sure that no unreachable blocks are instruction selected.
- PM.add(createUnreachableBlockEliminationPass());
-
if (OptLevel != CodeGenOpt::None && !DisableCGP)
PM.add(createCodeGenPreparePass(getTargetLowering()));
PM.add(createStackProtectorPass(getTargetLowering()));
+ addPreISel(PM, OptLevel);
+
if (PrintISelInput)
PM.add(createPrintFunctionPass("\n\n"
"*** Final LLVM Code input to ISel ***\n",
@@ -300,13 +317,12 @@ bool LLVMTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM,
PM.add(createVerifierPass());
// Standard Lower-Level Passes.
-
+
// Install a MachineModuleInfo class, which is an immutable pass that holds
// all the per-module stuff we're generating, including MCContext.
MachineModuleInfo *MMI = new MachineModuleInfo(*getMCAsmInfo());
PM.add(MMI);
OutContext = &MMI->getContext(); // Return the MCContext specifically by-ref.
-
// Set up a MachineFunction for the rest of CodeGen to work on.
PM.add(new MachineFunctionAnalysis(*this, OptLevel));
@@ -321,44 +337,43 @@ bool LLVMTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM,
return true;
// Print the instruction selected machine code...
- printAndVerify(PM, "After Instruction Selection",
- /* allowDoubleDefs= */ true);
+ printAndVerify(PM, "After Instruction Selection");
// Optimize PHIs before DCE: removing dead PHI cycles may make more
// instructions dead.
if (OptLevel != CodeGenOpt::None)
PM.add(createOptimizePHIsPass());
+ // If the target requests it, assign local variables to stack slots relative
+ // to one another and simplify frame index references where possible.
+ PM.add(createLocalStackSlotAllocationPass());
+
if (OptLevel != CodeGenOpt::None) {
// With optimization, dead code should already be eliminated. However
// there is one known exception: lowered code for arguments that are only
// used by tail calls, where the tail calls reuse the incoming stack
// arguments directly (see t11 in test/CodeGen/X86/sibcall.ll).
PM.add(createDeadMachineInstructionElimPass());
- printAndVerify(PM, "After codegen DCE pass",
- /* allowDoubleDefs= */ true);
+ printAndVerify(PM, "After codegen DCE pass");
- PM.add(createOptimizeExtsPass());
+ PM.add(createPeepholeOptimizerPass());
if (!DisableMachineLICM)
PM.add(createMachineLICMPass());
PM.add(createMachineCSEPass());
if (!DisableMachineSink)
PM.add(createMachineSinkingPass());
- printAndVerify(PM, "After Machine LICM, CSE and Sinking passes",
- /* allowDoubleDefs= */ true);
+ printAndVerify(PM, "After Machine LICM, CSE and Sinking passes");
}
// Pre-ra tail duplication.
if (OptLevel != CodeGenOpt::None && !DisableEarlyTailDup) {
PM.add(createTailDuplicatePass(true));
- printAndVerify(PM, "After Pre-RegAlloc TailDuplicate",
- /* allowDoubleDefs= */ true);
+ printAndVerify(PM, "After Pre-RegAlloc TailDuplicate");
}
// Run pre-ra passes.
if (addPreRegAlloc(PM, OptLevel))
- printAndVerify(PM, "After PreRegAlloc passes",
- /* allowDoubleDefs= */ true);
+ printAndVerify(PM, "After PreRegAlloc passes");
// Perform register allocation.
PM.add(createRegisterAllocator(OptLevel));
diff --git a/lib/CodeGen/LiveInterval.cpp b/lib/CodeGen/LiveInterval.cpp
index ad5728458062f..59f380ad26414 100644
--- a/lib/CodeGen/LiveInterval.cpp
+++ b/lib/CodeGen/LiveInterval.cpp
@@ -166,6 +166,56 @@ bool LiveInterval::overlaps(SlotIndex Start, SlotIndex End) const {
return I != begin() && (--I)->end > Start;
}
+
+/// ValNo is dead, remove it. If it is the largest value number, just nuke it
+/// (and any other deleted values neighboring it), otherwise mark it as ~1U so
+/// it can be nuked later.
+void LiveInterval::markValNoForDeletion(VNInfo *ValNo) {
+ if (ValNo->id == getNumValNums()-1) {
+ do {
+ valnos.pop_back();
+ } while (!valnos.empty() && valnos.back()->isUnused());
+ } else {
+ ValNo->setIsUnused(true);
+ }
+}
+
+/// RenumberValues - Renumber all values in order of appearance and delete the
+/// remaining unused values.
+void LiveInterval::RenumberValues(LiveIntervals &lis) {
+ SmallPtrSet<VNInfo*, 8> Seen;
+ bool seenPHIDef = false;
+ valnos.clear();
+ for (const_iterator I = begin(), E = end(); I != E; ++I) {
+ VNInfo *VNI = I->valno;
+ if (!Seen.insert(VNI))
+ continue;
+ assert(!VNI->isUnused() && "Unused valno used by live range");
+ VNI->id = (unsigned)valnos.size();
+ valnos.push_back(VNI);
+ VNI->setHasPHIKill(false);
+ if (VNI->isPHIDef())
+ seenPHIDef = true;
+ }
+
+ // Recompute phi kill flags.
+ if (!seenPHIDef)
+ return;
+ for (const_vni_iterator I = vni_begin(), E = vni_end(); I != E; ++I) {
+ VNInfo *VNI = *I;
+ if (!VNI->isPHIDef())
+ continue;
+ const MachineBasicBlock *PHIBB = lis.getMBBFromIndex(VNI->def);
+ assert(PHIBB && "No basic block for phi-def");
+ for (MachineBasicBlock::const_pred_iterator PI = PHIBB->pred_begin(),
+ PE = PHIBB->pred_end(); PI != PE; ++PI) {
+ VNInfo *KVNI = getVNInfoAt(lis.getMBBEndIdx(*PI).getPrevSlot());
+ if (KVNI)
+ KVNI->setHasPHIKill(true);
+ }
+ }
+}
+
/// extendIntervalEndTo - This method is used when we want to extend the range
/// specified by I to end at the specified endpoint. To do this, we should
/// merge and eliminate all ranges that this will overlap with. The iterator is
@@ -175,7 +225,7 @@ void LiveInterval::extendIntervalEndTo(Ranges::iterator I, SlotIndex NewEnd) {
VNInfo *ValNo = I->valno;
// Search for the first interval that we can't merge with.
- Ranges::iterator MergeTo = next(I);
+ Ranges::iterator MergeTo = llvm::next(I);
for (; MergeTo != ranges.end() && NewEnd >= MergeTo->end; ++MergeTo) {
assert(MergeTo->valno == ValNo && "Cannot merge with differing values!");
}
@@ -184,11 +234,11 @@ void LiveInterval::extendIntervalEndTo(Ranges::iterator I, SlotIndex NewEnd) {
I->end = std::max(NewEnd, prior(MergeTo)->end);
// Erase any dead ranges.
- ranges.erase(next(I), MergeTo);
+ ranges.erase(llvm::next(I), MergeTo);
// If the newly formed range now touches the range after it and if they have
// the same value number, merge the two ranges into one range.
- Ranges::iterator Next = next(I);
+ Ranges::iterator Next = llvm::next(I);
if (Next != ranges.end() && Next->start <= I->end && Next->valno == ValNo) {
I->end = Next->end;
ranges.erase(Next);
@@ -227,7 +277,7 @@ LiveInterval::extendIntervalStartTo(Ranges::iterator I, SlotIndex NewStart) {
MergeTo->end = I->end;
}
- ranges.erase(next(MergeTo), next(I));
+ ranges.erase(llvm::next(MergeTo), llvm::next(I));
return MergeTo;
}
@@ -280,7 +330,7 @@ LiveInterval::addRangeFrom(LiveRange LR, iterator From) {
return ranges.insert(it, LR);
}
-/// isInOneLiveRange - Return true if the range specified is entirely in
+/// isInOneLiveRange - Return true if the range specified is entirely in
/// a single LiveRange of the live interval.
bool LiveInterval::isInOneLiveRange(SlotIndex Start, SlotIndex End) {
Ranges::iterator I = std::upper_bound(ranges.begin(), ranges.end(), Start);
@@ -314,16 +364,8 @@ void LiveInterval::removeRange(SlotIndex Start, SlotIndex End,
break;
}
if (isDead) {
- // Now that ValNo is dead, remove it. If it is the largest value
- // number, just nuke it (and any other deleted values neighboring it),
- // otherwise mark it as ~1U so it can be nuked later.
- if (ValNo->id == getNumValNums()-1) {
- do {
- valnos.pop_back();
- } while (!valnos.empty() && valnos.back()->isUnused());
- } else {
- ValNo->setIsUnused(true);
- }
+ // Now that ValNo is dead, remove it.
+ markValNoForDeletion(ValNo);
}
}
@@ -345,7 +387,7 @@ void LiveInterval::removeRange(SlotIndex Start, SlotIndex End,
I->end = Start; // Trim the old interval.
// Insert the new one.
- ranges.insert(next(I), LiveRange(End, OldEnd, ValNo));
+ ranges.insert(llvm::next(I), LiveRange(End, OldEnd, ValNo));
}
/// removeValNo - Remove all the ranges defined by the specified value#.
@@ -359,21 +401,13 @@ void LiveInterval::removeValNo(VNInfo *ValNo) {
if (I->valno == ValNo)
ranges.erase(I);
} while (I != E);
- // Now that ValNo is dead, remove it. If it is the largest value
- // number, just nuke it (and any other deleted values neighboring it),
- // otherwise mark it as ~1U so it can be nuked later.
- if (ValNo->id == getNumValNums()-1) {
- do {
- valnos.pop_back();
- } while (!valnos.empty() && valnos.back()->isUnused());
- } else {
- ValNo->setIsUnused(true);
- }
+ // Now that ValNo is dead, remove it.
+ markValNoForDeletion(ValNo);
}
/// getLiveRangeContaining - Return the live range that contains the
/// specified index, or null if there is none.
-LiveInterval::const_iterator
+LiveInterval::const_iterator
LiveInterval::FindLiveRangeContaining(SlotIndex Idx) const {
const_iterator It = std::upper_bound(begin(), end(), Idx);
if (It != ranges.begin()) {
@@ -385,7 +419,7 @@ LiveInterval::FindLiveRangeContaining(SlotIndex Idx) const {
return end();
}
-LiveInterval::iterator
+LiveInterval::iterator
LiveInterval::FindLiveRangeContaining(SlotIndex Idx) {
iterator It = std::upper_bound(begin(), end(), Idx);
if (It != begin()) {
@@ -393,7 +427,7 @@ LiveInterval::FindLiveRangeContaining(SlotIndex Idx) {
if (It->contains(Idx))
return It;
}
-
+
return end();
}
@@ -425,11 +459,11 @@ VNInfo *LiveInterval::findDefinedVNInfoForStackInt(unsigned reg) const {
/// the intervals are not joinable, this aborts.
void LiveInterval::join(LiveInterval &Other,
const int *LHSValNoAssignments,
- const int *RHSValNoAssignments,
+ const int *RHSValNoAssignments,
SmallVector<VNInfo*, 16> &NewVNInfo,
MachineRegisterInfo *MRI) {
// Determine if any of our live range values are mapped. This is uncommon, so
- // we want to avoid the interval scan if not.
+ // we want to avoid the interval scan if not.
bool MustMapCurValNos = false;
unsigned NumVals = getNumValNums();
unsigned NumNewVals = NewVNInfo.size();
@@ -449,7 +483,7 @@ void LiveInterval::join(LiveInterval &Other,
++OutIt;
for (iterator I = OutIt, E = end(); I != E; ++I) {
OutIt->valno = NewVNInfo[LHSValNoAssignments[I->valno->id]];
-
+
// If this live range has the same value # as its immediate predecessor,
// and if they are neighbors, remove one LiveRange. This happens when we
// have [0,3:0)[4,7:1) and map 0/1 onto the same value #.
@@ -460,12 +494,12 @@ void LiveInterval::join(LiveInterval &Other,
OutIt->start = I->start;
OutIt->end = I->end;
}
-
+
// Didn't merge, on to the next one.
++OutIt;
}
}
-
+
// If we merge some live ranges, chop off the end.
ranges.erase(OutIt, end());
}
@@ -483,7 +517,7 @@ void LiveInterval::join(LiveInterval &Other,
if (VNI) {
if (NumValNos >= NumVals)
valnos.push_back(VNI);
- else
+ else
valnos[NumValNos] = VNI;
VNI->id = NumValNos++; // Renumber val#.
}
@@ -502,25 +536,13 @@ void LiveInterval::join(LiveInterval &Other,
}
ComputeJoinedWeight(Other);
-
- // Update regalloc hint if currently there isn't one.
- if (TargetRegisterInfo::isVirtualRegister(reg) &&
- TargetRegisterInfo::isVirtualRegister(Other.reg)) {
- std::pair<unsigned, unsigned> Hint = MRI->getRegAllocationHint(reg);
- if (Hint.first == 0 && Hint.second == 0) {
- std::pair<unsigned, unsigned> OtherHint =
- MRI->getRegAllocationHint(Other.reg);
- if (OtherHint.first || OtherHint.second)
- MRI->setRegAllocationHint(reg, OtherHint.first, OtherHint.second);
- }
- }
}
/// MergeRangesInAsValue - Merge all of the intervals in RHS into this live
/// interval as the specified value number. The LiveRanges in RHS are
/// allowed to overlap with LiveRanges in the current interval, but only if
/// the overlapping LiveRanges have the specified value number.
-void LiveInterval::MergeRangesInAsValue(const LiveInterval &RHS,
+void LiveInterval::MergeRangesInAsValue(const LiveInterval &RHS,
VNInfo *LHSValNo) {
// TODO: Make this more efficient.
iterator InsertPos = begin();
@@ -569,7 +591,7 @@ void LiveInterval::MergeValueInAsValue(
// If this trimmed away the whole range, ignore it.
if (Start == End) continue;
}
-
+
// Map the valno in the other live range to the current live range.
IP = addRangeFrom(LiveRange(Start, End, LHSValNo), IP);
}
@@ -584,18 +606,10 @@ void LiveInterval::MergeValueInAsValue(
if (I->valno == V1) {
isDead = false;
break;
- }
- if (isDead) {
- // Now that V1 is dead, remove it. If it is the largest value number,
- // just nuke it (and any other deleted values neighboring it), otherwise
- // mark it as ~1U so it can be nuked later.
- if (V1->id == getNumValNums()-1) {
- do {
- valnos.pop_back();
- } while (!valnos.empty() && valnos.back()->isUnused());
- } else {
- V1->setIsUnused(true);
}
+ if (isDead) {
+ // Now that V1 is dead, remove it.
+ markValNoForDeletion(V1);
}
}
}
@@ -609,7 +623,7 @@ void LiveInterval::MergeInClobberRanges(LiveIntervals &li_,
const LiveInterval &Clobbers,
VNInfo::Allocator &VNInfoAllocator) {
if (Clobbers.empty()) return;
-
+
DenseMap<VNInfo*, VNInfo*> ValNoMaps;
VNInfo *UnusedValNo = 0;
iterator IP = begin();
@@ -679,10 +693,10 @@ void LiveInterval::MergeInClobberRange(LiveIntervals &li_,
// for unknown values, use it.
VNInfo *ClobberValNo =
getNextValue(li_.getInvalidIndex(), 0, false, VNInfoAllocator);
-
+
iterator IP = begin();
IP = std::upper_bound(IP, end(), Start);
-
+
// If the start of this range overlaps with an existing liverange, trim it.
if (IP != begin() && IP[-1].end > Start) {
Start = IP[-1].end;
@@ -695,7 +709,7 @@ void LiveInterval::MergeInClobberRange(LiveIntervals &li_,
// If this trimmed away the whole range, ignore it.
if (Start == End) return;
}
-
+
// Insert the clobber interval.
addRangeFrom(LiveRange(Start, End, ClobberValNo), IP);
}
@@ -722,7 +736,7 @@ VNInfo* LiveInterval::MergeValueNumberInto(VNInfo *V1, VNInfo *V2) {
for (iterator I = begin(); I != end(); ) {
iterator LR = I++;
if (LR->valno != V1) continue; // Not a V1 LiveRange.
-
+
// Okay, we found a V1 live range. If it had a previous, touching, V2 live
// range, extend it.
if (LR != begin()) {
@@ -736,11 +750,11 @@ VNInfo* LiveInterval::MergeValueNumberInto(VNInfo *V1, VNInfo *V2) {
LR = Prev;
}
}
-
+
// Okay, now we have a V1 or V2 live range that is maximally merged forward.
// Ensure that it is a V2 live-range.
LR->valno = V2;
-
+
// If we can merge it into later V2 live ranges, do so now. We ignore any
// following V1 live ranges, as they will be merged in subsequent iterations
// of the loop.
@@ -752,18 +766,10 @@ VNInfo* LiveInterval::MergeValueNumberInto(VNInfo *V1, VNInfo *V2) {
}
}
}
-
- // Now that V1 is dead, remove it. If it is the largest value number, just
- // nuke it (and any other deleted values neighboring it), otherwise mark it as
- // ~1U so it can be nuked later.
- if (V1->id == getNumValNums()-1) {
- do {
- valnos.pop_back();
- } while (valnos.back()->isUnused());
- } else {
- V1->setIsUnused(true);
- }
-
+
+ // Now that V1 is dead, remove it.
+ markValNoForDeletion(V1);
+
return V2;
}
diff --git a/lib/CodeGen/LiveIntervalAnalysis.cpp b/lib/CodeGen/LiveIntervalAnalysis.cpp
index 194d03d8dbfb5..2726fc337539c 100644
--- a/lib/CodeGen/LiveIntervalAnalysis.cpp
+++ b/lib/CodeGen/LiveIntervalAnalysis.cpp
@@ -47,7 +47,7 @@
using namespace llvm;
// Hidden options for help debugging.
-static cl::opt<bool> DisableReMat("disable-rematerialization",
+static cl::opt<bool> DisableReMat("disable-rematerialization",
cl::init(false), cl::Hidden);
STATISTIC(numIntervals , "Number of original intervals");
@@ -55,22 +55,24 @@ STATISTIC(numFolds , "Number of loads/stores folded into instructions");
STATISTIC(numSplits , "Number of intervals split");
char LiveIntervals::ID = 0;
-static RegisterPass<LiveIntervals> X("liveintervals", "Live Interval Analysis");
+INITIALIZE_PASS(LiveIntervals, "liveintervals",
+ "Live Interval Analysis", false, false);
void LiveIntervals::getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesCFG();
AU.addRequired<AliasAnalysis>();
AU.addPreserved<AliasAnalysis>();
- AU.addPreserved<LiveVariables>();
AU.addRequired<LiveVariables>();
- AU.addPreservedID(MachineLoopInfoID);
+ AU.addPreserved<LiveVariables>();
+ AU.addRequired<MachineLoopInfo>();
+ AU.addPreserved<MachineLoopInfo>();
AU.addPreservedID(MachineDominatorsID);
-
+
if (!StrongPHIElim) {
AU.addPreservedID(PHIEliminationID);
AU.addRequiredID(PHIEliminationID);
}
-
+
AU.addRequiredID(TwoAddressInstructionPassID);
AU.addPreserved<ProcessImplicitDefs>();
AU.addRequired<ProcessImplicitDefs>();
@@ -84,7 +86,7 @@ void LiveIntervals::releaseMemory() {
for (DenseMap<unsigned, LiveInterval*>::iterator I = r2iMap_.begin(),
E = r2iMap_.end(); I != E; ++I)
delete I->second;
-
+
r2iMap_.clear();
// Release VNInfo memory regions, VNInfo objects don't need to be dtor'd.
@@ -188,10 +190,6 @@ bool LiveIntervals::conflictsWithPhysReg(const LiveInterval &li,
const MachineInstr &MI = *I;
// Allow copies to and from li.reg
- unsigned SrcReg, DstReg, SrcSubReg, DstSubReg;
- if (tii_->isMoveInstr(MI, SrcReg, DstReg, SrcSubReg, DstSubReg))
- if (SrcReg == li.reg || DstReg == li.reg)
- continue;
if (MI.isCopy())
if (MI.getOperand(0).getReg() == li.reg ||
MI.getOperand(1).getReg() == li.reg)
@@ -278,7 +276,7 @@ bool MultipleDefsBySameMI(const MachineInstr &MI, unsigned MOIdx) {
/// isPartialRedef - Return true if the specified def at the specific index is
/// partially re-defining the specified live interval. A common case of this is
-/// a definition of the sub-register.
+/// a definition of the sub-register.
bool LiveIntervals::isPartialRedef(SlotIndex MIIdx, MachineOperand &MO,
LiveInterval &interval) {
if (!MO.getSubReg() || MO.isEarlyClobber())
@@ -324,9 +322,7 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb,
mi->addRegisterDefined(interval.reg);
MachineInstr *CopyMI = NULL;
- unsigned SrcReg, DstReg, SrcSubReg, DstSubReg;
- if (mi->isCopyLike() ||
- tii_->isMoveInstr(*mi, SrcReg, DstReg, SrcSubReg, DstSubReg)) {
+ if (mi->isCopyLike()) {
CopyMI = mi;
}
@@ -420,8 +416,8 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb,
// def-and-use register operand.
// It may also be partial redef like this:
- // 80 %reg1041:6<def> = VSHRNv4i16 %reg1034<kill>, 12, pred:14, pred:%reg0
- // 120 %reg1041:5<def> = VSHRNv4i16 %reg1039<kill>, 12, pred:14, pred:%reg0
+ // 80 %reg1041:6<def> = VSHRNv4i16 %reg1034<kill>, 12, pred:14, pred:%reg0
+ // 120 %reg1041:5<def> = VSHRNv4i16 %reg1039<kill>, 12, pred:14, pred:%reg0
bool PartReDef = isPartialRedef(MIIdx, MO, interval);
if (PartReDef || mi->isRegTiedToUseOperand(MOIdx)) {
// If this is a two-address definition, then we have already processed
@@ -454,11 +450,9 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb,
OldValNo->setCopy(0);
// A re-def may be a copy. e.g. %reg1030:6<def> = VMOVD %reg1026, ...
- unsigned SrcReg, DstReg, SrcSubReg, DstSubReg;
- if (PartReDef && (mi->isCopyLike() ||
- tii_->isMoveInstr(*mi, SrcReg, DstReg, SrcSubReg, DstSubReg)))
+ if (PartReDef && mi->isCopyLike())
OldValNo->setCopy(&*mi);
-
+
// Add the new live interval which replaces the range for the input copy.
LiveRange LR(DefIndex, RedefIndex, ValNo);
DEBUG(dbgs() << " replace range with " << LR);
@@ -485,12 +479,10 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb,
VNInfo *ValNo;
MachineInstr *CopyMI = NULL;
- unsigned SrcReg, DstReg, SrcSubReg, DstSubReg;
- if (mi->isCopyLike() ||
- tii_->isMoveInstr(*mi, SrcReg, DstReg, SrcSubReg, DstSubReg))
+ if (mi->isCopyLike())
CopyMI = mi;
ValNo = interval.getNextValue(defIndex, CopyMI, true, VNInfoAllocator);
-
+
SlotIndex killIndex = getMBBEndIdx(mbb);
LiveRange LR(defIndex, killIndex, ValNo);
interval.addRange(LR);
@@ -567,10 +559,10 @@ void LiveIntervals::handlePhysicalRegisterDef(MachineBasicBlock *MBB,
goto exit;
}
}
-
+
baseIndex = baseIndex.getNextIndex();
}
-
+
// The only case we should have a dead physreg here without a killing or
// instruction where we know it's dead is if it is live-in to the function
// and never used. Another possible case is the implicit use of the
@@ -602,9 +594,7 @@ void LiveIntervals::handleRegisterDef(MachineBasicBlock *MBB,
getOrCreateInterval(MO.getReg()));
else if (allocatableRegs_[MO.getReg()]) {
MachineInstr *CopyMI = NULL;
- unsigned SrcReg, DstReg, SrcSubReg, DstSubReg;
- if (MI->isCopyLike() ||
- tii_->isMoveInstr(*MI, SrcReg, DstReg, SrcSubReg, DstSubReg))
+ if (MI->isCopyLike())
CopyMI = MI;
handlePhysicalRegisterDef(MBB, MI, MIIdx, MO,
getOrCreateInterval(MO.getReg()), CopyMI);
@@ -696,7 +686,7 @@ void LiveIntervals::handleLiveInRegister(MachineBasicBlock *MBB,
/// registers. for some ordering of the machine instructions [1,N] a
/// live interval is an interval [i, j) where 1 <= i <= j < N for
/// which a variable is live
-void LiveIntervals::computeIntervals() {
+void LiveIntervals::computeIntervals() {
DEBUG(dbgs() << "********** COMPUTING LIVE INTERVALS **********\n"
<< "********** Function: "
<< ((Value*)mf_->getFunction())->getName() << '\n');
@@ -723,11 +713,11 @@ void LiveIntervals::computeIntervals() {
handleLiveInRegister(MBB, MIIndex, getOrCreateInterval(*AS),
true);
}
-
+
// Skip over empty initial indices.
if (getInstructionFromIndex(MIIndex) == 0)
MIIndex = indexes_->getNextNonNullIndex(MIIndex);
-
+
for (MachineBasicBlock::iterator MI = MBB->begin(), miEnd = MBB->end();
MI != miEnd; ++MI) {
DEBUG(dbgs() << MIIndex << "\t" << *MI);
@@ -746,7 +736,7 @@ void LiveIntervals::computeIntervals() {
else if (MO.isUndef())
UndefUses.push_back(MO.getReg());
}
-
+
// Move to the next instr slot.
MIIndex = indexes_->getNextNonNullIndex(MIIndex);
}
@@ -791,7 +781,7 @@ unsigned LiveIntervals::getReMatImplicitUse(const LiveInterval &li,
unsigned Reg = MO.getReg();
if (Reg == 0 || Reg == li.reg)
continue;
-
+
if (TargetRegisterInfo::isPhysicalRegister(Reg) &&
!allocatableRegs_[Reg])
continue;
@@ -810,7 +800,7 @@ unsigned LiveIntervals::getReMatImplicitUse(const LiveInterval &li,
/// which reaches the given instruction also reaches the specified use index.
bool LiveIntervals::isValNoAvailableAt(const LiveInterval &li, MachineInstr *MI,
SlotIndex UseIdx) const {
- SlotIndex Index = getInstructionIndex(MI);
+ SlotIndex Index = getInstructionIndex(MI);
VNInfo *ValNo = li.FindLiveRangeContaining(Index)->valno;
LiveInterval::const_iterator UI = li.FindLiveRangeContaining(UseIdx);
return UI != li.end() && UI->valno == ValNo;
@@ -915,7 +905,7 @@ static bool FilterFoldedOps(MachineInstr *MI,
}
return false;
}
-
+
/// tryFoldMemoryOperand - Attempts to fold either a spill / restore from
/// slot / to reg or any rematerialized load into ith operand of specified
@@ -1035,7 +1025,7 @@ void LiveIntervals::rewriteImplicitOps(const LiveInterval &li,
/// for addIntervalsForSpills to rewrite uses / defs for the given live range.
bool LiveIntervals::
rewriteInstructionForSpills(const LiveInterval &li, const VNInfo *VNI,
- bool TrySplit, SlotIndex index, SlotIndex end,
+ bool TrySplit, SlotIndex index, SlotIndex end,
MachineInstr *MI,
MachineInstr *ReMatOrigDefMI, MachineInstr *ReMatDefMI,
unsigned Slot, int LdSlot,
@@ -1094,7 +1084,7 @@ rewriteInstructionForSpills(const LiveInterval &li, const VNInfo *VNI,
// keep the src/dst regs pinned.
//
// Keep track of whether we replace a use and/or def so that we can
- // create the spill interval with the appropriate range.
+ // create the spill interval with the appropriate range.
SmallVector<unsigned, 2> Ops;
tie(HasUse, HasDef) = MI->readsWritesVirtualRegister(Reg, &Ops);
@@ -1156,7 +1146,7 @@ rewriteInstructionForSpills(const LiveInterval &li, const VNInfo *VNI,
if (mopj.isImplicit())
rewriteImplicitOps(li, MI, NewVReg, vrm);
}
-
+
if (CreatedNewVReg) {
if (DefIsReMat) {
vrm.setVirtIsReMaterialized(NewVReg, ReMatDefMI);
@@ -1696,7 +1686,7 @@ addIntervalsForSpills(const LiveInterval &li,
if (NeedStackSlot && vrm.getPreSplitReg(li.reg) == 0) {
if (vrm.getStackSlot(li.reg) == VirtRegMap::NO_STACK_SLOT)
Slot = vrm.assignVirt2StackSlot(li.reg);
-
+
// This case only occurs when the prealloc splitter has already assigned
// a stack slot to this vreg.
else
@@ -1753,7 +1743,7 @@ addIntervalsForSpills(const LiveInterval &li,
Ops.push_back(j);
if (MO.isDef())
continue;
- if (isReMat ||
+ if (isReMat ||
(!FoundUse && !alsoFoldARestore(Id, index, VReg,
RestoreMBBs, RestoreIdxes))) {
// MI has two-address uses of the same register. If the use
@@ -1866,7 +1856,6 @@ addIntervalsForSpills(const LiveInterval &li,
for (unsigned i = 0, e = NewLIs.size(); i != e; ++i) {
LiveInterval *LI = NewLIs[i];
if (!LI->empty()) {
- LI->weight /= SlotIndex::NUM * getApproximateInstructionCount(*LI);
if (!AddedKill.count(LI)) {
LiveRange *LR = &LI->ranges[LI->ranges.size()-1];
SlotIndex LastUseIdx = LR->end.getBaseIndex();
@@ -1899,7 +1888,7 @@ bool LiveIntervals::hasAllocatableSuperReg(unsigned Reg) const {
/// getRepresentativeReg - Find the largest super register of the specified
/// physical register.
unsigned LiveIntervals::getRepresentativeReg(unsigned Reg) const {
- // Find the largest super-register that is allocatable.
+ // Find the largest super-register that is allocatable.
unsigned BestReg = Reg;
for (const unsigned* AS = tri_->getSuperRegisters(Reg); *AS; ++AS) {
unsigned SuperReg = *AS;
@@ -2013,7 +2002,7 @@ LiveRange LiveIntervals::addLiveRangeToEndOfBlock(unsigned reg,
SlotIndex(getInstructionIndex(startInst).getDefIndex()),
getMBBEndIdx(startInst->getParent()), VN);
Interval.addRange(LR);
-
+
return LR;
}
diff --git a/lib/CodeGen/LiveStackAnalysis.cpp b/lib/CodeGen/LiveStackAnalysis.cpp
index 709e2c6d5ca7e..b5c385f772394 100644
--- a/lib/CodeGen/LiveStackAnalysis.cpp
+++ b/lib/CodeGen/LiveStackAnalysis.cpp
@@ -25,7 +25,8 @@
using namespace llvm;
char LiveStacks::ID = 0;
-static RegisterPass<LiveStacks> X("livestacks", "Live Stack Slot Analysis");
+INITIALIZE_PASS(LiveStacks, "livestacks",
+ "Live Stack Slot Analysis", false, false);
void LiveStacks::getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesAll();
diff --git a/lib/CodeGen/LiveVariables.cpp b/lib/CodeGen/LiveVariables.cpp
index 41b891d30f23b..375307b973a97 100644
--- a/lib/CodeGen/LiveVariables.cpp
+++ b/lib/CodeGen/LiveVariables.cpp
@@ -42,7 +42,8 @@
using namespace llvm;
char LiveVariables::ID = 0;
-static RegisterPass<LiveVariables> X("livevars", "Live Variable Analysis");
+INITIALIZE_PASS(LiveVariables, "livevars",
+ "Live Variable Analysis", false, false);
void LiveVariables::getAnalysisUsage(AnalysisUsage &AU) const {
@@ -482,21 +483,6 @@ void LiveVariables::UpdatePhysRegDefs(MachineInstr *MI,
}
}
-namespace {
- struct RegSorter {
- const TargetRegisterInfo *TRI;
-
- RegSorter(const TargetRegisterInfo *tri) : TRI(tri) { }
- bool operator()(unsigned A, unsigned B) {
- if (TRI->isSubRegister(A, B))
- return true;
- else if (TRI->isSubRegister(B, A))
- return false;
- return A < B;
- }
- };
-}
-
bool LiveVariables::runOnMachineFunction(MachineFunction &mf) {
MF = &mf;
MRI = &mf.getRegInfo();
diff --git a/lib/CodeGen/LocalStackSlotAllocation.cpp b/lib/CodeGen/LocalStackSlotAllocation.cpp
new file mode 100644
index 0000000000000..7e366f0ceec02
--- /dev/null
+++ b/lib/CodeGen/LocalStackSlotAllocation.cpp
@@ -0,0 +1,354 @@
+//===- LocalStackSlotAllocation.cpp - Pre-allocate locals to stack slots --===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass assigns local frame indices to stack slots relative to one another
+// and allocates additional base registers to access them when the target
+// estimates the are likely to be out of range of stack pointer and frame
+// pointer relative addressing.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "localstackalloc"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Instructions.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetFrameInfo.h"
+
+using namespace llvm;
+
+STATISTIC(NumAllocations, "Number of frame indices allocated into local block");
+STATISTIC(NumBaseRegisters, "Number of virtual frame base registers allocated");
+STATISTIC(NumReplacements, "Number of frame indices references replaced");
+
+namespace {
+ class FrameRef {
+ MachineBasicBlock::iterator MI; // Instr referencing the frame
+ int64_t LocalOffset; // Local offset of the frame idx referenced
+ public:
+ FrameRef(MachineBasicBlock::iterator I, int64_t Offset) :
+ MI(I), LocalOffset(Offset) {}
+ bool operator<(const FrameRef &RHS) const {
+ return LocalOffset < RHS.LocalOffset;
+ }
+ MachineBasicBlock::iterator getMachineInstr() { return MI; }
+ };
+
+ class LocalStackSlotPass: public MachineFunctionPass {
+ SmallVector<int64_t,16> LocalOffsets;
+
+ void AdjustStackOffset(MachineFrameInfo *MFI, int FrameIdx, int64_t &Offset,
+ bool StackGrowsDown, unsigned &MaxAlign);
+ void calculateFrameObjectOffsets(MachineFunction &Fn);
+ bool insertFrameReferenceRegisters(MachineFunction &Fn);
+ public:
+ static char ID; // Pass identification, replacement for typeid
+ explicit LocalStackSlotPass() : MachineFunctionPass(ID) { }
+ bool runOnMachineFunction(MachineFunction &MF);
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+ const char *getPassName() const {
+ return "Local Stack Slot Allocation";
+ }
+
+ private:
+ };
+} // end anonymous namespace
+
+char LocalStackSlotPass::ID = 0;
+
+FunctionPass *llvm::createLocalStackSlotAllocationPass() {
+ return new LocalStackSlotPass();
+}
+
+bool LocalStackSlotPass::runOnMachineFunction(MachineFunction &MF) {
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ const TargetRegisterInfo *TRI = MF.getTarget().getRegisterInfo();
+ unsigned LocalObjectCount = MFI->getObjectIndexEnd();
+
+ // If the target doesn't want/need this pass, or if there are no locals
+ // to consider, early exit.
+ if (!TRI->requiresVirtualBaseRegisters(MF) || LocalObjectCount == 0)
+ return true;
+
+ // Make sure we have enough space to store the local offsets.
+ LocalOffsets.resize(MFI->getObjectIndexEnd());
+
+ // Lay out the local blob.
+ calculateFrameObjectOffsets(MF);
+
+ // Insert virtual base registers to resolve frame index references.
+ bool UsedBaseRegs = insertFrameReferenceRegisters(MF);
+
+ // Tell MFI whether any base registers were allocated. PEI will only
+ // want to use the local block allocations from this pass if there were any.
+ // Otherwise, PEI can do a bit better job of getting the alignment right
+ // without a hole at the start since it knows the alignment of the stack
+ // at the start of local allocation, and this pass doesn't.
+ MFI->setUseLocalStackAllocationBlock(UsedBaseRegs);
+
+ return true;
+}
+
+/// AdjustStackOffset - Helper function used to adjust the stack frame offset.
+void LocalStackSlotPass::AdjustStackOffset(MachineFrameInfo *MFI,
+ int FrameIdx, int64_t &Offset,
+ bool StackGrowsDown,
+ unsigned &MaxAlign) {
+ // If the stack grows down, add the object size to find the lowest address.
+ if (StackGrowsDown)
+ Offset += MFI->getObjectSize(FrameIdx);
+
+ unsigned Align = MFI->getObjectAlignment(FrameIdx);
+
+ // If the alignment of this object is greater than that of the stack, then
+ // increase the stack alignment to match.
+ MaxAlign = std::max(MaxAlign, Align);
+
+ // Adjust to alignment boundary.
+ Offset = (Offset + Align - 1) / Align * Align;
+
+ int64_t LocalOffset = StackGrowsDown ? -Offset : Offset;
+ DEBUG(dbgs() << "Allocate FI(" << FrameIdx << ") to local offset "
+ << LocalOffset << "\n");
+ // Keep the offset available for base register allocation
+ LocalOffsets[FrameIdx] = LocalOffset;
+ // And tell MFI about it for PEI to use later
+ MFI->mapLocalFrameObject(FrameIdx, LocalOffset);
+
+ if (!StackGrowsDown)
+ Offset += MFI->getObjectSize(FrameIdx);
+
+ ++NumAllocations;
+}
+
+/// calculateFrameObjectOffsets - Calculate actual frame offsets for all of the
+/// abstract stack objects.
+///
+void LocalStackSlotPass::calculateFrameObjectOffsets(MachineFunction &Fn) {
+ // Loop over all of the stack objects, assigning sequential addresses...
+ MachineFrameInfo *MFI = Fn.getFrameInfo();
+ const TargetFrameInfo &TFI = *Fn.getTarget().getFrameInfo();
+ bool StackGrowsDown =
+ TFI.getStackGrowthDirection() == TargetFrameInfo::StackGrowsDown;
+ int64_t Offset = 0;
+ unsigned MaxAlign = 0;
+
+ // Make sure that the stack protector comes before the local variables on the
+ // stack.
+ SmallSet<int, 16> LargeStackObjs;
+ if (MFI->getStackProtectorIndex() >= 0) {
+ AdjustStackOffset(MFI, MFI->getStackProtectorIndex(), Offset,
+ StackGrowsDown, MaxAlign);
+
+ // Assign large stack objects first.
+ for (unsigned i = 0, e = MFI->getObjectIndexEnd(); i != e; ++i) {
+ if (MFI->isDeadObjectIndex(i))
+ continue;
+ if (MFI->getStackProtectorIndex() == (int)i)
+ continue;
+ if (!MFI->MayNeedStackProtector(i))
+ continue;
+
+ AdjustStackOffset(MFI, i, Offset, StackGrowsDown, MaxAlign);
+ LargeStackObjs.insert(i);
+ }
+ }
+
+ // Then assign frame offsets to stack objects that are not used to spill
+ // callee saved registers.
+ for (unsigned i = 0, e = MFI->getObjectIndexEnd(); i != e; ++i) {
+ if (MFI->isDeadObjectIndex(i))
+ continue;
+ if (MFI->getStackProtectorIndex() == (int)i)
+ continue;
+ if (LargeStackObjs.count(i))
+ continue;
+
+ AdjustStackOffset(MFI, i, Offset, StackGrowsDown, MaxAlign);
+ }
+
+ // Remember how big this blob of stack space is
+ MFI->setLocalFrameSize(Offset);
+ MFI->setLocalFrameMaxAlign(MaxAlign);
+}
+
+static inline bool
+lookupCandidateBaseReg(const SmallVector<std::pair<unsigned, int64_t>, 8> &Regs,
+ std::pair<unsigned, int64_t> &RegOffset,
+ int64_t FrameSizeAdjust,
+ int64_t LocalFrameOffset,
+ const MachineInstr *MI,
+ const TargetRegisterInfo *TRI) {
+ unsigned e = Regs.size();
+ for (unsigned i = 0; i < e; ++i) {
+ RegOffset = Regs[i];
+ // Check if the relative offset from the where the base register references
+ // to the target address is in range for the instruction.
+ int64_t Offset = FrameSizeAdjust + LocalFrameOffset - RegOffset.second;
+ if (TRI->isFrameOffsetLegal(MI, Offset))
+ return true;
+ }
+ return false;
+}
+
+bool LocalStackSlotPass::insertFrameReferenceRegisters(MachineFunction &Fn) {
+ // Scan the function's instructions looking for frame index references.
+ // For each, ask the target if it wants a virtual base register for it
+ // based on what we can tell it about where the local will end up in the
+ // stack frame. If it wants one, re-use a suitable one we've previously
+ // allocated, or if there isn't one that fits the bill, allocate a new one
+ // and ask the target to create a defining instruction for it.
+ bool UsedBaseReg = false;
+
+ MachineFrameInfo *MFI = Fn.getFrameInfo();
+ const TargetRegisterInfo *TRI = Fn.getTarget().getRegisterInfo();
+ const TargetFrameInfo &TFI = *Fn.getTarget().getFrameInfo();
+ bool StackGrowsDown =
+ TFI.getStackGrowthDirection() == TargetFrameInfo::StackGrowsDown;
+ MachineBasicBlock::iterator InsertionPt = Fn.begin()->begin();
+
+ // Collect all of the instructions in the block that reference
+ // a frame index. Also store the frame index referenced to ease later
+ // lookup. (For any insn that has more than one FI reference, we arbitrarily
+ // choose the first one).
+ SmallVector<FrameRef, 64> FrameReferenceInsns;
+ // A base register definition is a register+offset pair.
+ SmallVector<std::pair<unsigned, int64_t>, 8> BaseRegisters;
+
+
+ for (MachineFunction::iterator BB = Fn.begin(), E = Fn.end(); BB != E; ++BB) {
+ for (MachineBasicBlock::iterator I = BB->begin(); I != BB->end(); ++I) {
+ MachineInstr *MI = I;
+ // Debug value instructions can't be out of range, so they don't need
+ // any updates.
+ if (MI->isDebugValue())
+ continue;
+ // For now, allocate the base register(s) within the basic block
+ // where they're used, and don't try to keep them around outside
+ // of that. It may be beneficial to try sharing them more broadly
+ // than that, but the increased register pressure makes that a
+ // tricky thing to balance. Investigate if re-materializing these
+ // becomes an issue.
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ // Consider replacing all frame index operands that reference
+ // an object allocated in the local block.
+ if (MI->getOperand(i).isFI()) {
+ // Don't try this with values not in the local block.
+ if (!MFI->isObjectPreAllocated(MI->getOperand(i).getIndex()))
+ break;
+ FrameReferenceInsns.
+ push_back(FrameRef(MI, LocalOffsets[MI->getOperand(i).getIndex()]));
+ break;
+ }
+ }
+ }
+ }
+ // Sort the frame references by local offset
+ array_pod_sort(FrameReferenceInsns.begin(), FrameReferenceInsns.end());
+
+
+ // Loop throught the frame references and allocate for them as necessary
+ for (int ref = 0, e = FrameReferenceInsns.size(); ref < e ; ++ref) {
+ MachineBasicBlock::iterator I =
+ FrameReferenceInsns[ref].getMachineInstr();
+ MachineInstr *MI = I;
+ for (unsigned idx = 0, e = MI->getNumOperands(); idx != e; ++idx) {
+ // Consider replacing all frame index operands that reference
+ // an object allocated in the local block.
+ if (MI->getOperand(idx).isFI()) {
+ int FrameIdx = MI->getOperand(idx).getIndex();
+
+ assert(MFI->isObjectPreAllocated(FrameIdx) &&
+ "Only pre-allocated locals expected!");
+
+ DEBUG(dbgs() << "Considering: " << *MI);
+ if (TRI->needsFrameBaseReg(MI, LocalOffsets[FrameIdx])) {
+ unsigned BaseReg = 0;
+ int64_t Offset = 0;
+ int64_t FrameSizeAdjust =
+ StackGrowsDown ? MFI->getLocalFrameSize() : 0;
+
+ DEBUG(dbgs() << " Replacing FI in: " << *MI);
+
+ // If we have a suitable base register available, use it; otherwise
+ // create a new one. Note that any offset encoded in the
+ // instruction itself will be taken into account by the target,
+ // so we don't have to adjust for it here when reusing a base
+ // register.
+ std::pair<unsigned, int64_t> RegOffset;
+ if (lookupCandidateBaseReg(BaseRegisters, RegOffset,
+ FrameSizeAdjust,
+ LocalOffsets[FrameIdx],
+ MI, TRI)) {
+ DEBUG(dbgs() << " Reusing base register " <<
+ RegOffset.first << "\n");
+ // We found a register to reuse.
+ BaseReg = RegOffset.first;
+ Offset = FrameSizeAdjust + LocalOffsets[FrameIdx] -
+ RegOffset.second;
+ } else {
+ // No previously defined register was in range, so create a
+ // new one.
+ int64_t InstrOffset = TRI->getFrameIndexInstrOffset(MI, idx);
+ const TargetRegisterClass *RC = TRI->getPointerRegClass();
+ BaseReg = Fn.getRegInfo().createVirtualRegister(RC);
+
+ DEBUG(dbgs() << " Materializing base register " << BaseReg <<
+ " at frame local offset " <<
+ LocalOffsets[FrameIdx] + InstrOffset << "\n");
+ // Tell the target to insert the instruction to initialize
+ // the base register.
+ TRI->materializeFrameBaseRegister(InsertionPt, BaseReg,
+ FrameIdx, InstrOffset);
+
+ // The base register already includes any offset specified
+ // by the instruction, so account for that so it doesn't get
+ // applied twice.
+ Offset = -InstrOffset;
+
+ int64_t BaseOffset = FrameSizeAdjust + LocalOffsets[FrameIdx] +
+ InstrOffset;
+ BaseRegisters.push_back(
+ std::pair<unsigned, int64_t>(BaseReg, BaseOffset));
+ ++NumBaseRegisters;
+ UsedBaseReg = true;
+ }
+ assert(BaseReg != 0 && "Unable to allocate virtual base register!");
+
+ // Modify the instruction to use the new base register rather
+ // than the frame index operand.
+ TRI->resolveFrameIndex(I, BaseReg, Offset);
+ DEBUG(dbgs() << "Resolved: " << *MI);
+
+ ++NumReplacements;
+ }
+ }
+ }
+ }
+ return UsedBaseReg;
+}
diff --git a/lib/CodeGen/LowerSubregs.cpp b/lib/CodeGen/LowerSubregs.cpp
index dfd4eaeca6603..ad1c537c1911a 100644
--- a/lib/CodeGen/LowerSubregs.cpp
+++ b/lib/CodeGen/LowerSubregs.cpp
@@ -36,7 +36,7 @@ namespace {
public:
static char ID; // Pass identification, replacement for typeid
- LowerSubregsInstructionPass() : MachineFunctionPass(&ID) {}
+ LowerSubregsInstructionPass() : MachineFunctionPass(ID) {}
const char *getPassName() const {
return "Subregister lowering instruction pass";
@@ -58,9 +58,6 @@ namespace {
void TransferDeadFlag(MachineInstr *MI, unsigned DstReg,
const TargetRegisterInfo *TRI);
- void TransferKillFlag(MachineInstr *MI, unsigned SrcReg,
- const TargetRegisterInfo *TRI,
- bool AddIfNotFound = false);
void TransferImplicitDefs(MachineInstr *MI);
};
@@ -87,23 +84,6 @@ LowerSubregsInstructionPass::TransferDeadFlag(MachineInstr *MI,
}
}
-/// TransferKillFlag - MI is a pseudo-instruction with SrcReg killed,
-/// and the lowered replacement instructions immediately precede it.
-/// Mark the replacement instructions with the kill flag.
-void
-LowerSubregsInstructionPass::TransferKillFlag(MachineInstr *MI,
- unsigned SrcReg,
- const TargetRegisterInfo *TRI,
- bool AddIfNotFound) {
- for (MachineBasicBlock::iterator MII =
- prior(MachineBasicBlock::iterator(MI)); ; --MII) {
- if (MII->addRegisterKilled(SrcReg, TRI, AddIfNotFound))
- break;
- assert(MII != MI->getParent()->begin() &&
- "copyPhysReg output doesn't reference source register!");
- }
-}
-
/// TransferImplicitDefs - MI is a pseudo-instruction, and the lowered
/// replacement instructions immediately precede it. Copy any implicit-def
/// operands from MI to the replacement instruction.
diff --git a/lib/CodeGen/MachineBasicBlock.cpp b/lib/CodeGen/MachineBasicBlock.cpp
index a27ee479433be..50f3f672dcedc 100644
--- a/lib/CodeGen/MachineBasicBlock.cpp
+++ b/lib/CodeGen/MachineBasicBlock.cpp
@@ -441,7 +441,7 @@ MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ, Pass *P) {
MachineBasicBlock *NMBB = MF->CreateMachineBasicBlock();
MF->insert(llvm::next(MachineFunction::iterator(this)), NMBB);
- DEBUG(dbgs() << "PHIElimination splitting critical edge:"
+ DEBUG(dbgs() << "Splitting critical edge:"
" BB#" << getNumber()
<< " -- BB#" << NMBB->getNumber()
<< " -- BB#" << Succ->getNumber() << '\n');
@@ -468,11 +468,33 @@ MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ, Pass *P) {
LV->addNewBlock(NMBB, this, Succ);
if (MachineDominatorTree *MDT =
- P->getAnalysisIfAvailable<MachineDominatorTree>())
- MDT->addNewBlock(NMBB, this);
+ P->getAnalysisIfAvailable<MachineDominatorTree>()) {
+ // Update dominator information.
+ MachineDomTreeNode *SucccDTNode = MDT->getNode(Succ);
+
+ bool IsNewIDom = true;
+ for (const_pred_iterator PI = Succ->pred_begin(), E = Succ->pred_end();
+ PI != E; ++PI) {
+ MachineBasicBlock *PredBB = *PI;
+ if (PredBB == NMBB)
+ continue;
+ if (!MDT->dominates(SucccDTNode, MDT->getNode(PredBB))) {
+ IsNewIDom = false;
+ break;
+ }
+ }
+
+ // We know "this" dominates the newly created basic block.
+ MachineDomTreeNode *NewDTNode = MDT->addNewBlock(NMBB, this);
+
+ // If all the other predecessors of "Succ" are dominated by "Succ" itself
+ // then the new block is the new immediate dominator of "Succ". Otherwise,
+ // the new block doesn't dominate anything.
+ if (IsNewIDom)
+ MDT->changeImmediateDominator(SucccDTNode, NewDTNode);
+ }
- if (MachineLoopInfo *MLI =
- P->getAnalysisIfAvailable<MachineLoopInfo>())
+ if (MachineLoopInfo *MLI = P->getAnalysisIfAvailable<MachineLoopInfo>())
if (MachineLoop *TIL = MLI->getLoopFor(this)) {
// If one or the other blocks were not in a loop, the new block is not
// either, and thus LI doesn't need to be updated.
diff --git a/lib/CodeGen/MachineCSE.cpp b/lib/CodeGen/MachineCSE.cpp
index 833cc00027db5..92e2299ec62fa 100644
--- a/lib/CodeGen/MachineCSE.cpp
+++ b/lib/CodeGen/MachineCSE.cpp
@@ -41,7 +41,7 @@ namespace {
MachineRegisterInfo *MRI;
public:
static char ID; // Pass identification
- MachineCSE() : MachineFunctionPass(&ID), LookAheadLimit(5), CurrVN(0) {}
+ MachineCSE() : MachineFunctionPass(ID), LookAheadLimit(5), CurrVN(0) {}
virtual bool runOnMachineFunction(MachineFunction &MF);
@@ -49,6 +49,7 @@ namespace {
AU.setPreservesCFG();
MachineFunctionPass::getAnalysisUsage(AU);
AU.addRequired<AliasAnalysis>();
+ AU.addPreservedID(MachineLoopInfoID);
AU.addRequired<MachineDominatorTree>();
AU.addPreserved<MachineDominatorTree>();
}
@@ -85,8 +86,8 @@ namespace {
} // end anonymous namespace
char MachineCSE::ID = 0;
-static RegisterPass<MachineCSE>
-X("machine-cse", "Machine Common Subexpression Elimination");
+INITIALIZE_PASS(MachineCSE, "machine-cse",
+ "Machine Common Subexpression Elimination", false, false);
FunctionPass *llvm::createMachineCSEPass() { return new MachineCSE(); }
@@ -107,29 +108,9 @@ bool MachineCSE::PerformTrivialCoalescing(MachineInstr *MI,
MachineInstr *DefMI = MRI->getVRegDef(Reg);
if (DefMI->getParent() != MBB)
continue;
- unsigned SrcReg, DstReg, SrcSubIdx, DstSubIdx;
- if (TII->isMoveInstr(*DefMI, SrcReg, DstReg, SrcSubIdx, DstSubIdx) &&
- TargetRegisterInfo::isVirtualRegister(SrcReg) &&
- !SrcSubIdx && !DstSubIdx) {
- const TargetRegisterClass *SRC = MRI->getRegClass(SrcReg);
- const TargetRegisterClass *RC = MRI->getRegClass(Reg);
- const TargetRegisterClass *NewRC = getCommonSubClass(RC, SRC);
- if (!NewRC)
- continue;
- DEBUG(dbgs() << "Coalescing: " << *DefMI);
- DEBUG(dbgs() << "*** to: " << *MI);
- MO.setReg(SrcReg);
- MRI->clearKillFlags(SrcReg);
- if (NewRC != SRC)
- MRI->setRegClass(SrcReg, NewRC);
- DefMI->eraseFromParent();
- ++NumCoalesces;
- Changed = true;
- }
-
if (!DefMI->isCopy())
continue;
- SrcReg = DefMI->getOperand(1).getReg();
+ unsigned SrcReg = DefMI->getOperand(1).getReg();
if (!TargetRegisterInfo::isVirtualRegister(SrcReg))
continue;
if (DefMI->getOperand(0).getSubReg() || DefMI->getOperand(1).getSubReg())
@@ -261,19 +242,13 @@ bool MachineCSE::PhysRegDefReaches(MachineInstr *CSMI, MachineInstr *MI,
return false;
}
-static bool isCopy(const MachineInstr *MI, const TargetInstrInfo *TII) {
- unsigned SrcReg, DstReg, SrcSubIdx, DstSubIdx;
- return MI->isCopyLike() ||
- TII->isMoveInstr(*MI, SrcReg, DstReg, SrcSubIdx, DstSubIdx);
-}
-
bool MachineCSE::isCSECandidate(MachineInstr *MI) {
if (MI->isLabel() || MI->isPHI() || MI->isImplicitDef() ||
MI->isKill() || MI->isInlineAsm() || MI->isDebugValue())
return false;
// Ignore copies.
- if (isCopy(MI, TII))
+ if (MI->isCopyLike())
return false;
// Ignore stuff that we obviously can't move.
@@ -329,7 +304,7 @@ bool MachineCSE::isProfitableToCSE(unsigned CSReg, unsigned Reg,
E = MRI->use_nodbg_end(); I != E; ++I) {
MachineInstr *Use = &*I;
// Ignore copies.
- if (!isCopy(Use, TII)) {
+ if (!Use->isCopyLike()) {
HasNonCopyUse = true;
break;
}
@@ -385,7 +360,7 @@ bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) {
// Look for trivial copy coalescing opportunities.
if (PerformTrivialCoalescing(MI, MBB)) {
// After coalescing MI itself may become a copy.
- if (isCopy(MI, TII))
+ if (MI->isCopyLike())
continue;
FoundCSE = VNT.count(MI);
}
diff --git a/lib/CodeGen/MachineDominators.cpp b/lib/CodeGen/MachineDominators.cpp
index b5f8fbba99deb..3c674789244a2 100644
--- a/lib/CodeGen/MachineDominators.cpp
+++ b/lib/CodeGen/MachineDominators.cpp
@@ -24,10 +24,10 @@ TEMPLATE_INSTANTIATION(class DominatorTreeBase<MachineBasicBlock>);
char MachineDominatorTree::ID = 0;
-static RegisterPass<MachineDominatorTree>
-E("machinedomtree", "MachineDominator Tree Construction", true);
+INITIALIZE_PASS(MachineDominatorTree, "machinedomtree",
+ "MachineDominator Tree Construction", true, true);
-const PassInfo *const llvm::MachineDominatorsID = &E;
+char &llvm::MachineDominatorsID = MachineDominatorTree::ID;
void MachineDominatorTree::getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesAll();
@@ -41,7 +41,7 @@ bool MachineDominatorTree::runOnMachineFunction(MachineFunction &F) {
}
MachineDominatorTree::MachineDominatorTree()
- : MachineFunctionPass(&ID) {
+ : MachineFunctionPass(ID) {
DT = new DominatorTreeBase<MachineBasicBlock>(false);
}
diff --git a/lib/CodeGen/MachineFunction.cpp b/lib/CodeGen/MachineFunction.cpp
index 666120f032c60..017170076cebb 100644
--- a/lib/CodeGen/MachineFunction.cpp
+++ b/lib/CodeGen/MachineFunction.cpp
@@ -397,7 +397,6 @@ void MachineFunction::viewCFGOnly() const
/// create a corresponding virtual register for it.
unsigned MachineFunction::addLiveIn(unsigned PReg,
const TargetRegisterClass *RC) {
- assert(RC->contains(PReg) && "Not the correct regclass!");
MachineRegisterInfo &MRI = getRegInfo();
unsigned VReg = MRI.getLiveInVirtReg(PReg);
if (VReg) {
@@ -447,7 +446,7 @@ int MachineFrameInfo::CreateFixedObject(uint64_t Size, int64_t SPOffset,
unsigned StackAlign = TFI.getStackAlignment();
unsigned Align = MinAlign(SPOffset, StackAlign);
Objects.insert(Objects.begin(), StackObject(Size, Align, SPOffset, Immutable,
- /*isSS*/false));
+ /*isSS*/false, false));
return -++NumFixedObjects;
}
diff --git a/lib/CodeGen/MachineFunctionAnalysis.cpp b/lib/CodeGen/MachineFunctionAnalysis.cpp
index 07a0f45c0f481..4f84b952e0612 100644
--- a/lib/CodeGen/MachineFunctionAnalysis.cpp
+++ b/lib/CodeGen/MachineFunctionAnalysis.cpp
@@ -20,14 +20,14 @@ using namespace llvm;
// a default constructor.
static PassInfo
X("Machine Function Analysis", "machine-function-analysis",
- intptr_t(&MachineFunctionAnalysis::ID), 0,
+ &MachineFunctionAnalysis::ID, 0,
/*CFGOnly=*/false, /*is_analysis=*/true);
char MachineFunctionAnalysis::ID = 0;
MachineFunctionAnalysis::MachineFunctionAnalysis(const TargetMachine &tm,
CodeGenOpt::Level OL) :
- FunctionPass(&ID), TM(tm), OptLevel(OL), MF(0) {
+ FunctionPass(ID), TM(tm), OptLevel(OL), MF(0) {
}
MachineFunctionAnalysis::~MachineFunctionAnalysis() {
diff --git a/lib/CodeGen/MachineFunctionPrinterPass.cpp b/lib/CodeGen/MachineFunctionPrinterPass.cpp
index 547c4febc8dae..2aaa798a02c19 100644
--- a/lib/CodeGen/MachineFunctionPrinterPass.cpp
+++ b/lib/CodeGen/MachineFunctionPrinterPass.cpp
@@ -29,7 +29,7 @@ struct MachineFunctionPrinterPass : public MachineFunctionPass {
const std::string Banner;
MachineFunctionPrinterPass(raw_ostream &os, const std::string &banner)
- : MachineFunctionPass(&ID), OS(os), Banner(banner) {}
+ : MachineFunctionPass(ID), OS(os), Banner(banner) {}
const char *getPassName() const { return "MachineFunction Printer"; }
diff --git a/lib/CodeGen/MachineInstr.cpp b/lib/CodeGen/MachineInstr.cpp
index 6b2e98549c718..446e461d5460f 100644
--- a/lib/CodeGen/MachineInstr.cpp
+++ b/lib/CodeGen/MachineInstr.cpp
@@ -1236,12 +1236,18 @@ static void printDebugLoc(DebugLoc DL, const MachineFunction *MF,
void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM) const {
// We can be a bit tidier if we know the TargetMachine and/or MachineFunction.
const MachineFunction *MF = 0;
+ const MachineRegisterInfo *MRI = 0;
if (const MachineBasicBlock *MBB = getParent()) {
MF = MBB->getParent();
if (!TM && MF)
TM = &MF->getTarget();
+ if (MF)
+ MRI = &MF->getRegInfo();
}
+ // Save a list of virtual registers.
+ SmallVector<unsigned, 8> VirtRegs;
+
// Print explicitly defined operands on the left of an assignment syntax.
unsigned StartOp = 0, e = getNumOperands();
for (; StartOp < e && getOperand(StartOp).isReg() &&
@@ -1250,6 +1256,9 @@ void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM) const {
++StartOp) {
if (StartOp != 0) OS << ", ";
getOperand(StartOp).print(OS, TM);
+ unsigned Reg = getOperand(StartOp).getReg();
+ if (Reg && TargetRegisterInfo::isVirtualRegister(Reg))
+ VirtRegs.push_back(Reg);
}
if (StartOp != 0)
@@ -1264,6 +1273,10 @@ void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM) const {
for (unsigned i = StartOp, e = getNumOperands(); i != e; ++i) {
const MachineOperand &MO = getOperand(i);
+ if (MO.isReg() && MO.getReg() &&
+ TargetRegisterInfo::isVirtualRegister(MO.getReg()))
+ VirtRegs.push_back(MO.getReg());
+
// Omit call-clobbered registers which aren't used anywhere. This makes
// call instructions much less noisy on targets where calls clobber lots
// of registers. Don't rely on MO.isDead() because we may be called before
@@ -1325,11 +1338,29 @@ void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM) const {
for (mmo_iterator i = memoperands_begin(), e = memoperands_end();
i != e; ++i) {
OS << **i;
- if (next(i) != e)
+ if (llvm::next(i) != e)
OS << " ";
}
}
+ // Print the regclass of any virtual registers encountered.
+ if (MRI && !VirtRegs.empty()) {
+ if (!HaveSemi) OS << ";"; HaveSemi = true;
+ for (unsigned i = 0; i != VirtRegs.size(); ++i) {
+ const TargetRegisterClass *RC = MRI->getRegClass(VirtRegs[i]);
+ OS << " " << RC->getName() << ":%reg" << VirtRegs[i];
+ for (unsigned j = i+1; j != VirtRegs.size();) {
+ if (MRI->getRegClass(VirtRegs[j]) != RC) {
+ ++j;
+ continue;
+ }
+ if (VirtRegs[i] != VirtRegs[j])
+ OS << "," << VirtRegs[j];
+ VirtRegs.erase(VirtRegs.begin()+j);
+ }
+ }
+ }
+
if (!debugLoc.isUnknown() && MF) {
if (!HaveSemi) OS << ";";
OS << " dbg:";
diff --git a/lib/CodeGen/MachineLICM.cpp b/lib/CodeGen/MachineLICM.cpp
index 4c054f51f3a8a..1a74b747e9f2a 100644
--- a/lib/CodeGen/MachineLICM.cpp
+++ b/lib/CodeGen/MachineLICM.cpp
@@ -68,16 +68,16 @@ namespace {
BitVector AllocatableSet;
- // For each opcode, keep a list of potentail CSE instructions.
+ // For each opcode, keep a list of potential CSE instructions.
DenseMap<unsigned, std::vector<const MachineInstr*> > CSEMap;
public:
static char ID; // Pass identification, replacement for typeid
MachineLICM() :
- MachineFunctionPass(&ID), PreRegAlloc(true) {}
+ MachineFunctionPass(ID), PreRegAlloc(true) {}
explicit MachineLICM(bool PreRA) :
- MachineFunctionPass(&ID), PreRegAlloc(PreRA) {}
+ MachineFunctionPass(ID), PreRegAlloc(PreRA) {}
virtual bool runOnMachineFunction(MachineFunction &MF);
@@ -189,8 +189,8 @@ namespace {
} // end anonymous namespace
char MachineLICM::ID = 0;
-static RegisterPass<MachineLICM>
-X("machinelicm", "Machine Loop Invariant Code Motion");
+INITIALIZE_PASS(MachineLICM, "machinelicm",
+ "Machine Loop Invariant Code Motion", false, false);
FunctionPass *llvm::createMachineLICMPass(bool PreRegAlloc) {
return new MachineLICM(PreRegAlloc);
@@ -488,9 +488,14 @@ void MachineLICM::HoistRegion(MachineDomTreeNode *N) {
MII = NextMII;
}
- const std::vector<MachineDomTreeNode*> &Children = N->getChildren();
- for (unsigned I = 0, E = Children.size(); I != E; ++I)
- HoistRegion(Children[I]);
+ // Don't hoist things out of a large switch statement. This often causes
+ // code to be hoisted that wasn't going to be executed, and increases
+ // register pressure in a situation where it's likely to matter.
+ if (BB->succ_size() < 25) {
+ const std::vector<MachineDomTreeNode*> &Children = N->getChildren();
+ for (unsigned I = 0, E = Children.size(); I != E; ++I)
+ HoistRegion(Children[I]);
+ }
}
/// IsLICMCandidate - Returns true if the instruction may be a suitable
diff --git a/lib/CodeGen/MachineLoopInfo.cpp b/lib/CodeGen/MachineLoopInfo.cpp
index 269538b31d0b9..bca4b0c28985f 100644
--- a/lib/CodeGen/MachineLoopInfo.cpp
+++ b/lib/CodeGen/MachineLoopInfo.cpp
@@ -30,10 +30,10 @@ TEMPLATE_INSTANTIATION(MLIB);
}
char MachineLoopInfo::ID = 0;
-static RegisterPass<MachineLoopInfo>
-X("machine-loops", "Machine Natural Loop Construction", true);
+INITIALIZE_PASS(MachineLoopInfo, "machine-loops",
+ "Machine Natural Loop Construction", true, true);
-const PassInfo *const llvm::MachineLoopInfoID = &X;
+char &llvm::MachineLoopInfoID = MachineLoopInfo::ID;
bool MachineLoopInfo::runOnMachineFunction(MachineFunction &) {
releaseMemory();
diff --git a/lib/CodeGen/MachineModuleInfo.cpp b/lib/CodeGen/MachineModuleInfo.cpp
index 15778b46fe0a3..b647a4dcc5308 100644
--- a/lib/CodeGen/MachineModuleInfo.cpp
+++ b/lib/CodeGen/MachineModuleInfo.cpp
@@ -28,8 +28,8 @@ using namespace llvm;
using namespace llvm::dwarf;
// Handle the Pass registration stuff necessary to use TargetData's.
-static RegisterPass<MachineModuleInfo>
-X("machinemoduleinfo", "Machine Module Information");
+INITIALIZE_PASS(MachineModuleInfo, "machinemoduleinfo",
+ "Machine Module Information", false, false);
char MachineModuleInfo::ID = 0;
// Out of line virtual method.
@@ -254,7 +254,7 @@ void MMIAddrLabelMapCallbackPtr::allUsesReplacedWith(Value *V2) {
//===----------------------------------------------------------------------===//
MachineModuleInfo::MachineModuleInfo(const MCAsmInfo &MAI)
-: ImmutablePass(&ID), Context(MAI),
+: ImmutablePass(ID), Context(MAI),
ObjFileMMI(0),
CurCallSite(0), CallsEHReturn(0), CallsUnwindInit(0), DbgInfoAvailable(false){
// Always emit some info, by default "no personality" info.
@@ -264,7 +264,7 @@ MachineModuleInfo::MachineModuleInfo(const MCAsmInfo &MAI)
}
MachineModuleInfo::MachineModuleInfo()
-: ImmutablePass(&ID), Context(*(MCAsmInfo*)0) {
+: ImmutablePass(ID), Context(*(MCAsmInfo*)0) {
assert(0 && "This MachineModuleInfo constructor should never be called, MMI "
"should always be explicitly constructed by LLVMTargetMachine");
abort();
@@ -579,10 +579,3 @@ namespace {
}
};
}
-
-MachineModuleInfo::VariableDbgInfoMapTy &
-MachineModuleInfo::getVariableDbgInfo() {
- std::stable_sort(VariableDbgInfo.begin(), VariableDbgInfo.end(),
- VariableDebugSorter());
- return VariableDbgInfo;
-}
diff --git a/lib/CodeGen/MachineSink.cpp b/lib/CodeGen/MachineSink.cpp
index 61334fc1790a1..c8f8fafe227e6 100644
--- a/lib/CodeGen/MachineSink.cpp
+++ b/lib/CodeGen/MachineSink.cpp
@@ -26,11 +26,21 @@
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
-STATISTIC(NumSunk, "Number of machine instructions sunk");
+static cl::opt<bool>
+SplitEdges("machine-sink-split",
+ cl::desc("Split critical edges during machine sinking"),
+ cl::init(false), cl::Hidden);
+static cl::opt<unsigned>
+SplitLimit("split-limit",
+ cl::init(~0u), cl::Hidden);
+
+STATISTIC(NumSunk, "Number of machine instructions sunk");
+STATISTIC(NumSplit, "Number of critical edges split");
namespace {
class MachineSinking : public MachineFunctionPass {
@@ -44,7 +54,7 @@ namespace {
public:
static char ID; // Pass identification
- MachineSinking() : MachineFunctionPass(&ID) {}
+ MachineSinking() : MachineFunctionPass(ID) {}
virtual bool runOnMachineFunction(MachineFunction &MF);
@@ -59,21 +69,28 @@ namespace {
}
private:
bool ProcessBlock(MachineBasicBlock &MBB);
+ MachineBasicBlock *SplitCriticalEdge(MachineBasicBlock *From,
+ MachineBasicBlock *To);
bool SinkInstruction(MachineInstr *MI, bool &SawStore);
- bool AllUsesDominatedByBlock(unsigned Reg, MachineBasicBlock *MBB) const;
+ bool AllUsesDominatedByBlock(unsigned Reg, MachineBasicBlock *MBB,
+ MachineBasicBlock *DefMBB, bool &LocalUse) const;
};
} // end anonymous namespace
char MachineSinking::ID = 0;
-static RegisterPass<MachineSinking>
-X("machine-sink", "Machine code sinking");
+INITIALIZE_PASS(MachineSinking, "machine-sink",
+ "Machine code sinking", false, false);
FunctionPass *llvm::createMachineSinkingPass() { return new MachineSinking(); }
/// AllUsesDominatedByBlock - Return true if all uses of the specified register
-/// occur in blocks dominated by the specified block.
+/// occur in blocks dominated by the specified block. If any use is in the
+/// definition block, then return false since it is never legal to move def
+/// after uses.
bool MachineSinking::AllUsesDominatedByBlock(unsigned Reg,
- MachineBasicBlock *MBB) const {
+ MachineBasicBlock *MBB,
+ MachineBasicBlock *DefMBB,
+ bool &LocalUse) const {
assert(TargetRegisterInfo::isVirtualRegister(Reg) &&
"Only makes sense for vregs");
// Ignoring debug uses is necessary so debug info doesn't affect the code.
@@ -91,6 +108,9 @@ bool MachineSinking::AllUsesDominatedByBlock(unsigned Reg,
// PHI nodes use the operand in the predecessor block, not the block with
// the PHI.
UseBlock = UseInst->getOperand(I.getOperandNo()+1).getMBB();
+ } else if (UseBlock == DefMBB) {
+ LocalUse = true;
+ return false;
}
// Check that it dominates.
@@ -166,6 +186,66 @@ bool MachineSinking::ProcessBlock(MachineBasicBlock &MBB) {
return MadeChange;
}
+MachineBasicBlock *MachineSinking::SplitCriticalEdge(MachineBasicBlock *FromBB,
+ MachineBasicBlock *ToBB) {
+ // Avoid breaking back edge. From == To means backedge for single BB loop.
+ if (!SplitEdges || NumSplit == SplitLimit || FromBB == ToBB)
+ return 0;
+
+ // Check for more "complex" loops.
+ if (LI->getLoopFor(FromBB) != LI->getLoopFor(ToBB) ||
+ !LI->isLoopHeader(ToBB)) {
+ // It's not always legal to break critical edges and sink the computation
+ // to the edge.
+ //
+ // BB#1:
+ // v1024
+ // Beq BB#3
+ // <fallthrough>
+ // BB#2:
+ // ... no uses of v1024
+ // <fallthrough>
+ // BB#3:
+ // ...
+ // = v1024
+ //
+ // If BB#1 -> BB#3 edge is broken and computation of v1024 is inserted:
+ //
+ // BB#1:
+ // ...
+ // Bne BB#2
+ // BB#4:
+ // v1024 =
+ // B BB#3
+ // BB#2:
+ // ... no uses of v1024
+ // <fallthrough>
+ // BB#3:
+ // ...
+ // = v1024
+ //
+ // This is incorrect since v1024 is not computed along the BB#1->BB#2->BB#3
+ // flow. We need to ensure the new basic block where the computation is
+ // sunk to dominates all the uses.
+ // It's only legal to break critical edge and sink the computation to the
+ // new block if all the predecessors of "To", except for "From", are
+ // not dominated by "From". Given SSA property, this means these
+ // predecessors are dominated by "To".
+ for (MachineBasicBlock::pred_iterator PI = ToBB->pred_begin(),
+ E = ToBB->pred_end(); PI != E; ++PI) {
+ if (*PI == FromBB)
+ continue;
+ if (!DT->dominates(ToBB, *PI))
+ return 0;
+ }
+
+ // FIXME: Determine if it's cost effective to break this edge.
+ return FromBB->SplitCriticalEdge(ToBB, this);
+ }
+
+ return 0;
+}
+
/// SinkInstruction - Determine whether it is safe to sink the specified machine
/// instruction out of its current block into a successor.
bool MachineSinking::SinkInstruction(MachineInstr *MI, bool &SawStore) {
@@ -246,7 +326,8 @@ bool MachineSinking::SinkInstruction(MachineInstr *MI, bool &SawStore) {
if (SuccToSinkTo) {
// If a previous operand picked a block to sink to, then this operand
// must be sinkable to the same block.
- if (!AllUsesDominatedByBlock(Reg, SuccToSinkTo))
+ bool LocalUse = false;
+ if (!AllUsesDominatedByBlock(Reg, SuccToSinkTo, ParentBlock, LocalUse))
return false;
continue;
@@ -256,10 +337,14 @@ bool MachineSinking::SinkInstruction(MachineInstr *MI, bool &SawStore) {
// we should sink to.
for (MachineBasicBlock::succ_iterator SI = ParentBlock->succ_begin(),
E = ParentBlock->succ_end(); SI != E; ++SI) {
- if (AllUsesDominatedByBlock(Reg, *SI)) {
+ bool LocalUse = false;
+ if (AllUsesDominatedByBlock(Reg, *SI, ParentBlock, LocalUse)) {
SuccToSinkTo = *SI;
break;
}
+ if (LocalUse)
+ // Def is used locally, it's never safe to move this def.
+ return false;
}
// If we couldn't find a block to sink to, ignore this instruction.
@@ -303,27 +388,44 @@ bool MachineSinking::SinkInstruction(MachineInstr *MI, bool &SawStore) {
if (SuccToSinkTo->pred_size() > 1) {
// We cannot sink a load across a critical edge - there may be stores in
// other code paths.
+ bool TryBreak = false;
bool store = true;
if (!MI->isSafeToMove(TII, AA, store)) {
- DEBUG(dbgs() << " *** PUNTING: Wont sink load along critical edge.\n");
- return false;
+ DEBUG(dbgs() << " *** NOTE: Won't sink load along critical edge.\n");
+ TryBreak = true;
}
// We don't want to sink across a critical edge if we don't dominate the
// successor. We could be introducing calculations to new code paths.
- if (!DT->dominates(ParentBlock, SuccToSinkTo)) {
- DEBUG(dbgs() << " *** PUNTING: Critical edge found\n");
- return false;
+ if (!TryBreak && !DT->dominates(ParentBlock, SuccToSinkTo)) {
+ DEBUG(dbgs() << " *** NOTE: Critical edge found\n");
+ TryBreak = true;
}
// Don't sink instructions into a loop.
- if (LI->isLoopHeader(SuccToSinkTo)) {
- DEBUG(dbgs() << " *** PUNTING: Loop header found\n");
- return false;
+ if (!TryBreak && LI->isLoopHeader(SuccToSinkTo)) {
+ DEBUG(dbgs() << " *** NOTE: Loop header found\n");
+ TryBreak = true;
}
// Otherwise we are OK with sinking along a critical edge.
- DEBUG(dbgs() << "Sinking along critical edge.\n");
+ if (!TryBreak)
+ DEBUG(dbgs() << "Sinking along critical edge.\n");
+ else {
+ MachineBasicBlock *NewSucc = SplitCriticalEdge(ParentBlock, SuccToSinkTo);
+ if (!NewSucc) {
+ DEBUG(dbgs() <<
+ " *** PUNTING: Not legal or profitable to break critical edge\n");
+ return false;
+ } else {
+ DEBUG(dbgs() << " *** Splitting critical edge:"
+ " BB#" << ParentBlock->getNumber()
+ << " -- BB#" << NewSucc->getNumber()
+ << " -- BB#" << SuccToSinkTo->getNumber() << '\n');
+ SuccToSinkTo = NewSucc;
+ ++NumSplit;
+ }
+ }
}
// Determine where to insert into. Skip phi nodes.
diff --git a/lib/CodeGen/MachineVerifier.cpp b/lib/CodeGen/MachineVerifier.cpp
index 2297c908b1e00..1e88562935eac 100644
--- a/lib/CodeGen/MachineVerifier.cpp
+++ b/lib/CodeGen/MachineVerifier.cpp
@@ -1,4 +1,4 @@
-//===-- MachineVerifier.cpp - Machine Code Verifier -------------*- C++ -*-===//
+//===-- MachineVerifier.cpp - Machine Code Verifier -----------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -24,6 +24,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/Function.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
#include "llvm/CodeGen/LiveVariables.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
@@ -44,19 +45,14 @@ using namespace llvm;
namespace {
struct MachineVerifier {
- MachineVerifier(Pass *pass, bool allowDoubleDefs) :
+ MachineVerifier(Pass *pass) :
PASS(pass),
- allowVirtDoubleDefs(allowDoubleDefs),
- allowPhysDoubleDefs(true),
OutFileName(getenv("LLVM_VERIFY_MACHINEINSTRS"))
{}
bool runOnMachineFunction(MachineFunction &MF);
Pass *const PASS;
- const bool allowVirtDoubleDefs;
- const bool allowPhysDoubleDefs;
-
const char *const OutFileName;
raw_ostream *OS;
const MachineFunction *MF;
@@ -91,10 +87,6 @@ namespace {
// defined. Map value is the user.
RegMap vregsLiveIn;
- // Vregs that must be dead in because they are defined without being
- // killed first. Map value is the defining instruction.
- RegMap vregsDeadIn;
-
// Regs killed in MBB. They may be defined again, and will then be in both
// regsKilled and regsLiveOut.
RegSet regsKilled;
@@ -175,6 +167,7 @@ namespace {
// Analysis information if available
LiveVariables *LiveVars;
+ const LiveIntervals *LiveInts;
void visitMachineFunctionBefore();
void visitMachineBasicBlockBefore(const MachineBasicBlock *MBB);
@@ -195,15 +188,14 @@ namespace {
void calcRegsRequired();
void verifyLiveVariables();
+ void verifyLiveIntervals();
};
struct MachineVerifierPass : public MachineFunctionPass {
static char ID; // Pass ID, replacement for typeid
- bool AllowDoubleDefs;
- explicit MachineVerifierPass(bool allowDoubleDefs = false)
- : MachineFunctionPass(&ID),
- AllowDoubleDefs(allowDoubleDefs) {}
+ MachineVerifierPass()
+ : MachineFunctionPass(ID) {}
void getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesAll();
@@ -211,7 +203,7 @@ namespace {
}
bool runOnMachineFunction(MachineFunction &MF) {
- MF.verify(this, AllowDoubleDefs);
+ MF.verify(this);
return false;
}
};
@@ -219,17 +211,15 @@ namespace {
}
char MachineVerifierPass::ID = 0;
-static RegisterPass<MachineVerifierPass>
-MachineVer("machineverifier", "Verify generated machine code");
-static const PassInfo *const MachineVerifyID = &MachineVer;
+INITIALIZE_PASS(MachineVerifierPass, "machineverifier",
+ "Verify generated machine code", false, false);
-FunctionPass *llvm::createMachineVerifierPass(bool allowPhysDoubleDefs) {
- return new MachineVerifierPass(allowPhysDoubleDefs);
+FunctionPass *llvm::createMachineVerifierPass() {
+ return new MachineVerifierPass();
}
-void MachineFunction::verify(Pass *p, bool allowDoubleDefs) const {
- MachineVerifier(p, allowDoubleDefs)
- .runOnMachineFunction(const_cast<MachineFunction&>(*this));
+void MachineFunction::verify(Pass *p) const {
+ MachineVerifier(p).runOnMachineFunction(const_cast<MachineFunction&>(*this));
}
bool MachineVerifier::runOnMachineFunction(MachineFunction &MF) {
@@ -255,10 +245,13 @@ bool MachineVerifier::runOnMachineFunction(MachineFunction &MF) {
TRI = TM->getRegisterInfo();
MRI = &MF.getRegInfo();
+ LiveVars = NULL;
+ LiveInts = NULL;
if (PASS) {
- LiveVars = PASS->getAnalysisIfAvailable<LiveVariables>();
- } else {
- LiveVars = NULL;
+ LiveInts = PASS->getAnalysisIfAvailable<LiveIntervals>();
+ // We don't want to verify LiveVariables if LiveIntervals is available.
+ if (!LiveInts)
+ LiveVars = PASS->getAnalysisIfAvailable<LiveVariables>();
}
visitMachineFunctionBefore();
@@ -512,6 +505,20 @@ void MachineVerifier::visitMachineInstrBefore(const MachineInstr *MI) {
if ((*I)->isStore() && !TI.mayStore())
report("Missing mayStore flag", MI);
}
+
+ // Debug values must not have a slot index.
+ // Other instructions must have one.
+ if (LiveInts) {
+ bool mapped = !LiveInts->isNotInMIMap(MI);
+ if (MI->isDebugValue()) {
+ if (mapped)
+ report("Debug instruction has a slot index", MI);
+ } else {
+ if (!mapped)
+ report("Missing slot index", MI);
+ }
+ }
+
}
void
@@ -570,15 +577,30 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) {
} else
isKill = MO->isKill();
- if (isKill) {
+ if (isKill)
addRegWithSubRegs(regsKilled, Reg);
- // Check that LiveVars knows this kill
- if (LiveVars && TargetRegisterInfo::isVirtualRegister(Reg)) {
- LiveVariables::VarInfo &VI = LiveVars->getVarInfo(Reg);
- if (std::find(VI.Kills.begin(),
- VI.Kills.end(), MI) == VI.Kills.end())
- report("Kill missing from LiveVariables", MO, MONum);
+ // Check that LiveVars knows this kill.
+ if (LiveVars && TargetRegisterInfo::isVirtualRegister(Reg) &&
+ MO->isKill()) {
+ LiveVariables::VarInfo &VI = LiveVars->getVarInfo(Reg);
+ if (std::find(VI.Kills.begin(),
+ VI.Kills.end(), MI) == VI.Kills.end())
+ report("Kill missing from LiveVariables", MO, MONum);
+ }
+
+ // Check LiveInts liveness and kill.
+ if (LiveInts && !LiveInts->isNotInMIMap(MI)) {
+ SlotIndex UseIdx = LiveInts->getInstructionIndex(MI).getUseIndex();
+ if (LiveInts->hasInterval(Reg)) {
+ const LiveInterval &LI = LiveInts->getInterval(Reg);
+ if (!LI.liveAt(UseIdx)) {
+ report("No live range at use", MO, MONum);
+ *OS << UseIdx << " is not live in " << LI << '\n';
+ }
+ // TODO: Verify isKill == LI.killedAt.
+ } else if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+ report("Virtual register has no Live interval", MO, MONum);
}
}
@@ -607,6 +629,28 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) {
addRegWithSubRegs(regsDead, Reg);
else
addRegWithSubRegs(regsDefined, Reg);
+
+ // Check LiveInts for a live range, but only for virtual registers.
+ if (LiveInts && TargetRegisterInfo::isVirtualRegister(Reg) &&
+ !LiveInts->isNotInMIMap(MI)) {
+ SlotIndex DefIdx = LiveInts->getInstructionIndex(MI).getDefIndex();
+ if (LiveInts->hasInterval(Reg)) {
+ const LiveInterval &LI = LiveInts->getInterval(Reg);
+ if (const LiveRange *LR = LI.getLiveRangeContaining(DefIdx)) {
+ assert(LR->valno && "NULL valno is not allowed");
+ if (LR->valno->def != DefIdx) {
+ report("Inconsistent valno->def", MO, MONum);
+ *OS << "Valno " << LR->valno->id << " is not defined at "
+ << DefIdx << " in " << LI << '\n';
+ }
+ } else {
+ report("No live range at def", MO, MONum);
+ *OS << DefIdx << " is not live in " << LI << '\n';
+ }
+ } else {
+ report("Virtual register has no Live interval", MO, MONum);
+ }
+ }
}
// Check register classes.
@@ -670,40 +714,9 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) {
void MachineVerifier::visitMachineInstrAfter(const MachineInstr *MI) {
BBInfo &MInfo = MBBInfoMap[MI->getParent()];
set_union(MInfo.regsKilled, regsKilled);
- set_subtract(regsLive, regsKilled);
- regsKilled.clear();
-
- // Verify that both <def> and <def,dead> operands refer to dead registers.
- RegVector defs(regsDefined);
- defs.append(regsDead.begin(), regsDead.end());
-
- for (RegVector::const_iterator I = defs.begin(), E = defs.end();
- I != E; ++I) {
- if (regsLive.count(*I)) {
- if (TargetRegisterInfo::isPhysicalRegister(*I)) {
- if (!allowPhysDoubleDefs && !isReserved(*I) &&
- !regsLiveInButUnused.count(*I)) {
- report("Redefining a live physical register", MI);
- *OS << "Register " << TRI->getName(*I)
- << " was defined but already live.\n";
- }
- } else {
- if (!allowVirtDoubleDefs) {
- report("Redefining a live virtual register", MI);
- *OS << "Virtual register %reg" << *I
- << " was defined but already live.\n";
- }
- }
- } else if (TargetRegisterInfo::isVirtualRegister(*I) &&
- !MInfo.regsKilled.count(*I)) {
- // Virtual register defined without being killed first must be dead on
- // entry.
- MInfo.vregsDeadIn.insert(std::make_pair(*I, MI));
- }
- }
-
- set_subtract(regsLive, regsDead); regsDead.clear();
- set_union(regsLive, regsDefined); regsDefined.clear();
+ set_subtract(regsLive, regsKilled); regsKilled.clear();
+ set_subtract(regsLive, regsDead); regsDead.clear();
+ set_union(regsLive, regsDefined); regsDefined.clear();
}
void
@@ -828,35 +841,15 @@ void MachineVerifier::visitMachineFunctionAfter() {
continue;
checkPHIOps(MFI);
-
- // Verify dead-in virtual registers.
- if (!allowVirtDoubleDefs) {
- for (MachineBasicBlock::const_pred_iterator PrI = MFI->pred_begin(),
- PrE = MFI->pred_end(); PrI != PrE; ++PrI) {
- BBInfo &PrInfo = MBBInfoMap[*PrI];
- if (!PrInfo.reachable)
- continue;
-
- for (RegMap::iterator I = MInfo.vregsDeadIn.begin(),
- E = MInfo.vregsDeadIn.end(); I != E; ++I) {
- // DeadIn register must be in neither regsLiveOut or vregsPassed of
- // any predecessor.
- if (PrInfo.isLiveOut(I->first)) {
- report("Live-in virtual register redefined", I->second);
- *OS << "Register %reg" << I->first
- << " was live-out from predecessor MBB #"
- << (*PrI)->getNumber() << ".\n";
- }
- }
- }
- }
}
- // Now check LiveVariables info if available
- if (LiveVars) {
+ // Now check liveness info if available
+ if (LiveVars || LiveInts)
calcRegsRequired();
+ if (LiveVars)
verifyLiveVariables();
- }
+ if (LiveInts)
+ verifyLiveIntervals();
}
void MachineVerifier::verifyLiveVariables() {
@@ -886,4 +879,55 @@ void MachineVerifier::verifyLiveVariables() {
}
}
+void MachineVerifier::verifyLiveIntervals() {
+ assert(LiveInts && "Don't call verifyLiveIntervals without LiveInts");
+ for (LiveIntervals::const_iterator LVI = LiveInts->begin(),
+ LVE = LiveInts->end(); LVI != LVE; ++LVI) {
+ const LiveInterval &LI = *LVI->second;
+ assert(LVI->first == LI.reg && "Invalid reg to interval mapping");
+
+ for (LiveInterval::const_vni_iterator I = LI.vni_begin(), E = LI.vni_end();
+ I!=E; ++I) {
+ VNInfo *VNI = *I;
+ const LiveRange *DefLR = LI.getLiveRangeContaining(VNI->def);
+
+ if (!DefLR) {
+ if (!VNI->isUnused()) {
+ report("Valno not live at def and not marked unused", MF);
+ *OS << "Valno #" << VNI->id << " in " << LI << '\n';
+ }
+ continue;
+ }
+
+ if (VNI->isUnused())
+ continue;
+
+ if (DefLR->valno != VNI) {
+ report("Live range at def has different valno", MF);
+ DefLR->print(*OS);
+ *OS << " should use valno #" << VNI->id << " in " << LI << '\n';
+ }
+
+ }
+
+ for (LiveInterval::const_iterator I = LI.begin(), E = LI.end(); I!=E; ++I) {
+ const LiveRange &LR = *I;
+ assert(LR.valno && "Live range has no valno");
+
+ if (LR.valno->id >= LI.getNumValNums() ||
+ LR.valno != LI.getValNumInfo(LR.valno->id)) {
+ report("Foreign valno in live range", MF);
+ LR.print(*OS);
+ *OS << " has a valno not in " << LI << '\n';
+ }
+
+ if (LR.valno->isUnused()) {
+ report("Live range valno is marked unused", MF);
+ LR.print(*OS);
+ *OS << " in " << LI << '\n';
+ }
+
+ }
+ }
+}
diff --git a/lib/CodeGen/OptimizeExts.cpp b/lib/CodeGen/OptimizeExts.cpp
deleted file mode 100644
index dcdc243e5db34..0000000000000
--- a/lib/CodeGen/OptimizeExts.cpp
+++ /dev/null
@@ -1,220 +0,0 @@
-//===-- OptimizeExts.cpp - Optimize sign / zero extension instrs -----===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This pass performs optimization of sign / zero extension instructions. It
-// may be extended to handle other instructions of similar property.
-//
-// On some targets, some instructions, e.g. X86 sign / zero extension, may
-// leave the source value in the lower part of the result. This pass will
-// replace (some) uses of the pre-extension value with uses of the sub-register
-// of the results.
-//
-//===----------------------------------------------------------------------===//
-
-#define DEBUG_TYPE "ext-opt"
-#include "llvm/CodeGen/Passes.h"
-#include "llvm/CodeGen/MachineDominators.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/ADT/Statistic.h"
-using namespace llvm;
-
-static cl::opt<bool> Aggressive("aggressive-ext-opt", cl::Hidden,
- cl::desc("Aggressive extension optimization"));
-
-STATISTIC(NumReuse, "Number of extension results reused");
-
-namespace {
- class OptimizeExts : public MachineFunctionPass {
- const TargetMachine *TM;
- const TargetInstrInfo *TII;
- MachineRegisterInfo *MRI;
- MachineDominatorTree *DT; // Machine dominator tree
-
- public:
- static char ID; // Pass identification
- OptimizeExts() : MachineFunctionPass(&ID) {}
-
- virtual bool runOnMachineFunction(MachineFunction &MF);
-
- virtual void getAnalysisUsage(AnalysisUsage &AU) const {
- AU.setPreservesCFG();
- MachineFunctionPass::getAnalysisUsage(AU);
- if (Aggressive) {
- AU.addRequired<MachineDominatorTree>();
- AU.addPreserved<MachineDominatorTree>();
- }
- }
-
- private:
- bool OptimizeInstr(MachineInstr *MI, MachineBasicBlock *MBB,
- SmallPtrSet<MachineInstr*, 8> &LocalMIs);
- };
-}
-
-char OptimizeExts::ID = 0;
-static RegisterPass<OptimizeExts>
-X("opt-exts", "Optimize sign / zero extensions");
-
-FunctionPass *llvm::createOptimizeExtsPass() { return new OptimizeExts(); }
-
-/// OptimizeInstr - If instruction is a copy-like instruction, i.e. it reads
-/// a single register and writes a single register and it does not modify
-/// the source, and if the source value is preserved as a sub-register of
-/// the result, then replace all reachable uses of the source with the subreg
-/// of the result.
-/// Do not generate an EXTRACT that is used only in a debug use, as this
-/// changes the code. Since this code does not currently share EXTRACTs, just
-/// ignore all debug uses.
-bool OptimizeExts::OptimizeInstr(MachineInstr *MI, MachineBasicBlock *MBB,
- SmallPtrSet<MachineInstr*, 8> &LocalMIs) {
- bool Changed = false;
- LocalMIs.insert(MI);
-
- unsigned SrcReg, DstReg, SubIdx;
- if (TII->isCoalescableExtInstr(*MI, SrcReg, DstReg, SubIdx)) {
- if (TargetRegisterInfo::isPhysicalRegister(DstReg) ||
- TargetRegisterInfo::isPhysicalRegister(SrcReg))
- return false;
-
- MachineRegisterInfo::use_nodbg_iterator UI = MRI->use_nodbg_begin(SrcReg);
- if (++UI == MRI->use_nodbg_end())
- // No other uses.
- return false;
-
- // Ok, the source has other uses. See if we can replace the other uses
- // with use of the result of the extension.
- SmallPtrSet<MachineBasicBlock*, 4> ReachedBBs;
- UI = MRI->use_nodbg_begin(DstReg);
- for (MachineRegisterInfo::use_nodbg_iterator UE = MRI->use_nodbg_end();
- UI != UE; ++UI)
- ReachedBBs.insert(UI->getParent());
-
- bool ExtendLife = true;
- // Uses that are in the same BB of uses of the result of the instruction.
- SmallVector<MachineOperand*, 8> Uses;
- // Uses that the result of the instruction can reach.
- SmallVector<MachineOperand*, 8> ExtendedUses;
-
- UI = MRI->use_nodbg_begin(SrcReg);
- for (MachineRegisterInfo::use_nodbg_iterator UE = MRI->use_nodbg_end();
- UI != UE; ++UI) {
- MachineOperand &UseMO = UI.getOperand();
- MachineInstr *UseMI = &*UI;
- if (UseMI == MI)
- continue;
- if (UseMI->isPHI()) {
- ExtendLife = false;
- continue;
- }
-
- // It's an error to translate this:
- //
- // %reg1025 = <sext> %reg1024
- // ...
- // %reg1026 = SUBREG_TO_REG 0, %reg1024, 4
- //
- // into this:
- //
- // %reg1025 = <sext> %reg1024
- // ...
- // %reg1027 = COPY %reg1025:4
- // %reg1026 = SUBREG_TO_REG 0, %reg1027, 4
- //
- // The problem here is that SUBREG_TO_REG is there to assert that an
- // implicit zext occurs. It doesn't insert a zext instruction. If we allow
- // the COPY here, it will give us the value after the <sext>,
- // not the original value of %reg1024 before <sext>.
- if (UseMI->getOpcode() == TargetOpcode::SUBREG_TO_REG)
- continue;
-
- MachineBasicBlock *UseMBB = UseMI->getParent();
- if (UseMBB == MBB) {
- // Local uses that come after the extension.
- if (!LocalMIs.count(UseMI))
- Uses.push_back(&UseMO);
- } else if (ReachedBBs.count(UseMBB))
- // Non-local uses where the result of extension is used. Always
- // replace these unless it's a PHI.
- Uses.push_back(&UseMO);
- else if (Aggressive && DT->dominates(MBB, UseMBB))
- // We may want to extend live range of the extension result in order
- // to replace these uses.
- ExtendedUses.push_back(&UseMO);
- else {
- // Both will be live out of the def MBB anyway. Don't extend live
- // range of the extension result.
- ExtendLife = false;
- break;
- }
- }
-
- if (ExtendLife && !ExtendedUses.empty())
- // Ok, we'll extend the liveness of the extension result.
- std::copy(ExtendedUses.begin(), ExtendedUses.end(),
- std::back_inserter(Uses));
-
- // Now replace all uses.
- if (!Uses.empty()) {
- SmallPtrSet<MachineBasicBlock*, 4> PHIBBs;
- // Look for PHI uses of the extended result, we don't want to extend the
- // liveness of a PHI input. It breaks all kinds of assumptions down
- // stream. A PHI use is expected to be the kill of its source values.
- UI = MRI->use_nodbg_begin(DstReg);
- for (MachineRegisterInfo::use_nodbg_iterator UE = MRI->use_nodbg_end();
- UI != UE; ++UI)
- if (UI->isPHI())
- PHIBBs.insert(UI->getParent());
-
- const TargetRegisterClass *RC = MRI->getRegClass(SrcReg);
- for (unsigned i = 0, e = Uses.size(); i != e; ++i) {
- MachineOperand *UseMO = Uses[i];
- MachineInstr *UseMI = UseMO->getParent();
- MachineBasicBlock *UseMBB = UseMI->getParent();
- if (PHIBBs.count(UseMBB))
- continue;
- unsigned NewVR = MRI->createVirtualRegister(RC);
- BuildMI(*UseMBB, UseMI, UseMI->getDebugLoc(),
- TII->get(TargetOpcode::COPY), NewVR)
- .addReg(DstReg, 0, SubIdx);
- UseMO->setReg(NewVR);
- ++NumReuse;
- Changed = true;
- }
- }
- }
-
- return Changed;
-}
-
-bool OptimizeExts::runOnMachineFunction(MachineFunction &MF) {
- TM = &MF.getTarget();
- TII = TM->getInstrInfo();
- MRI = &MF.getRegInfo();
- DT = Aggressive ? &getAnalysis<MachineDominatorTree>() : 0;
-
- bool Changed = false;
-
- SmallPtrSet<MachineInstr*, 8> LocalMIs;
- for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) {
- MachineBasicBlock *MBB = &*I;
- LocalMIs.clear();
- for (MachineBasicBlock::iterator MII = I->begin(), ME = I->end(); MII != ME;
- ++MII) {
- MachineInstr *MI = &*MII;
- Changed |= OptimizeInstr(MI, MBB, LocalMIs);
- }
- }
-
- return Changed;
-}
diff --git a/lib/CodeGen/OptimizePHIs.cpp b/lib/CodeGen/OptimizePHIs.cpp
index 1613fe21e42dd..edb4eea71b8a7 100644
--- a/lib/CodeGen/OptimizePHIs.cpp
+++ b/lib/CodeGen/OptimizePHIs.cpp
@@ -33,7 +33,7 @@ namespace {
public:
static char ID; // Pass identification
- OptimizePHIs() : MachineFunctionPass(&ID) {}
+ OptimizePHIs() : MachineFunctionPass(ID) {}
virtual bool runOnMachineFunction(MachineFunction &MF);
@@ -54,8 +54,8 @@ namespace {
}
char OptimizePHIs::ID = 0;
-static RegisterPass<OptimizePHIs>
-X("opt-phis", "Optimize machine instruction PHIs");
+INITIALIZE_PASS(OptimizePHIs, "opt-phis",
+ "Optimize machine instruction PHIs", false, false);
FunctionPass *llvm::createOptimizePHIsPass() { return new OptimizePHIs(); }
@@ -101,16 +101,10 @@ bool OptimizePHIs::IsSingleValuePHICycle(MachineInstr *MI,
MachineInstr *SrcMI = MRI->getVRegDef(SrcReg);
// Skip over register-to-register moves.
- unsigned MvSrcReg, MvDstReg, SrcSubIdx, DstSubIdx;
- if (SrcMI &&
- TII->isMoveInstr(*SrcMI, MvSrcReg, MvDstReg, SrcSubIdx, DstSubIdx) &&
- SrcSubIdx == 0 && DstSubIdx == 0 &&
- TargetRegisterInfo::isVirtualRegister(MvSrcReg))
- SrcMI = MRI->getVRegDef(MvSrcReg);
- else if (SrcMI && SrcMI->isCopy() &&
- !SrcMI->getOperand(0).getSubReg() &&
- !SrcMI->getOperand(1).getSubReg() &&
- TargetRegisterInfo::isVirtualRegister(SrcMI->getOperand(1).getReg()))
+ if (SrcMI && SrcMI->isCopy() &&
+ !SrcMI->getOperand(0).getSubReg() &&
+ !SrcMI->getOperand(1).getSubReg() &&
+ TargetRegisterInfo::isVirtualRegister(SrcMI->getOperand(1).getReg()))
SrcMI = MRI->getVRegDef(SrcMI->getOperand(1).getReg());
if (!SrcMI)
return false;
diff --git a/lib/CodeGen/PBQP/HeuristicBase.h b/lib/CodeGen/PBQP/HeuristicBase.h
index 3bb24e1cc370c..791c227f0d07e 100644
--- a/lib/CodeGen/PBQP/HeuristicBase.h
+++ b/lib/CodeGen/PBQP/HeuristicBase.h
@@ -173,9 +173,13 @@ namespace PBQP {
bool finished = false;
while (!finished) {
- if (!optimalReduce())
- if (!impl().heuristicReduce())
+ if (!optimalReduce()) {
+ if (impl().heuristicReduce()) {
+ getSolver().recordRN();
+ } else {
finished = true;
+ }
+ }
}
}
diff --git a/lib/CodeGen/PBQP/HeuristicSolver.h b/lib/CodeGen/PBQP/HeuristicSolver.h
index 02938df007004..35514f9674785 100644
--- a/lib/CodeGen/PBQP/HeuristicSolver.h
+++ b/lib/CodeGen/PBQP/HeuristicSolver.h
@@ -226,6 +226,8 @@ namespace PBQP {
// Nothing to do. Just push the node onto the reduction stack.
pushToStack(nItr);
+
+ s.recordR0();
}
/// \brief Apply rule R1.
@@ -274,6 +276,7 @@ namespace PBQP {
assert(nd.getSolverDegree() == 0 &&
"Degree 1 with edge removed should be 0.");
pushToStack(xnItr);
+ s.recordR1();
}
/// \brief Apply rule R2.
@@ -378,8 +381,14 @@ namespace PBQP {
removeSolverEdge(zxeItr);
pushToStack(xnItr);
+ s.recordR2();
}
+ /// \brief Record an application of the RN rule.
+ ///
+ /// For use by the HeuristicBase.
+ void recordRN() { s.recordRN(); }
+
private:
NodeData& getSolverNodeData(Graph::NodeItr nItr) {
diff --git a/lib/CodeGen/PBQP/Heuristics/Briggs.h b/lib/CodeGen/PBQP/Heuristics/Briggs.h
index 4c1ce119ed050..18eaf7c0da9b3 100644
--- a/lib/CodeGen/PBQP/Heuristics/Briggs.h
+++ b/lib/CodeGen/PBQP/Heuristics/Briggs.h
@@ -52,9 +52,7 @@ namespace PBQP {
bool operator()(Graph::NodeItr n1Itr, Graph::NodeItr n2Itr) const {
if (s->getSolverDegree(n1Itr) > s->getSolverDegree(n2Itr))
return true;
- if (s->getSolverDegree(n1Itr) < s->getSolverDegree(n2Itr))
- return false;
- return (&*n1Itr < &*n2Itr);
+ return false;
}
private:
HeuristicSolverImpl<Briggs> *s;
@@ -69,9 +67,7 @@ namespace PBQP {
cost2 = g->getNodeCosts(n2Itr)[0] / s->getSolverDegree(n2Itr);
if (cost1 < cost2)
return true;
- if (cost1 > cost2)
- return false;
- return (&*n1Itr < &*n2Itr);
+ return false;
}
private:
diff --git a/lib/CodeGen/PBQP/Solution.h b/lib/CodeGen/PBQP/Solution.h
index 294b5370afdfe..047fd04c7cb8f 100644
--- a/lib/CodeGen/PBQP/Solution.h
+++ b/lib/CodeGen/PBQP/Solution.h
@@ -26,15 +26,46 @@ namespace PBQP {
/// To get the selection for each node in the problem use the getSelection method.
class Solution {
private:
+
typedef std::map<Graph::NodeItr, unsigned, NodeItrComparator> SelectionsMap;
SelectionsMap selections;
+ unsigned r0Reductions, r1Reductions, r2Reductions, rNReductions;
+
public:
/// \brief Number of nodes for which selections have been made.
/// @return Number of nodes for which selections have been made.
unsigned numNodes() const { return selections.size(); }
+ /// \brief Records a reduction via the R0 rule. Should be called from the
+ /// solver only.
+ void recordR0() { ++r0Reductions; }
+
+ /// \brief Returns the number of R0 reductions applied to solve the problem.
+ unsigned numR0Reductions() const { return r0Reductions; }
+
+ /// \brief Records a reduction via the R1 rule. Should be called from the
+ /// solver only.
+ void recordR1() { ++r1Reductions; }
+
+ /// \brief Returns the number of R1 reductions applied to solve the problem.
+ unsigned numR1Reductions() const { return r1Reductions; }
+
+ /// \brief Records a reduction via the R2 rule. Should be called from the
+ /// solver only.
+ void recordR2() { ++r2Reductions; }
+
+ /// \brief Returns the number of R2 reductions applied to solve the problem.
+ unsigned numR2Reductions() const { return r2Reductions; }
+
+ /// \brief Records a reduction via the RN rule. Should be called from the
+ /// solver only.
+ void recordRN() { ++ rNReductions; }
+
+ /// \brief Returns the number of RN reductions applied to solve the problem.
+ unsigned numRNReductions() const { return rNReductions; }
+
/// \brief Set the selection for a given node.
/// @param nItr Node iterator.
/// @param selection Selection for nItr.
diff --git a/lib/CodeGen/PHIElimination.cpp b/lib/CodeGen/PHIElimination.cpp
index ea6b094d7efe4..d4df4c548711e 100644
--- a/lib/CodeGen/PHIElimination.cpp
+++ b/lib/CodeGen/PHIElimination.cpp
@@ -20,6 +20,7 @@
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Function.h"
@@ -37,16 +38,15 @@ STATISTIC(NumAtomic, "Number of atomic phis lowered");
STATISTIC(NumReused, "Number of reused lowered phis");
char PHIElimination::ID = 0;
-static RegisterPass<PHIElimination>
-X("phi-node-elimination", "Eliminate PHI nodes for register allocation");
+INITIALIZE_PASS(PHIElimination, "phi-node-elimination",
+ "Eliminate PHI nodes for register allocation", false, false);
-const PassInfo *const llvm::PHIEliminationID = &X;
+char &llvm::PHIEliminationID = PHIElimination::ID;
void llvm::PHIElimination::getAnalysisUsage(AnalysisUsage &AU) const {
AU.addPreserved<LiveVariables>();
AU.addPreserved<MachineDominatorTree>();
- // rdar://7401784 This would be nice:
- // AU.addPreservedID(MachineLoopInfoID);
+ AU.addPreserved<MachineLoopInfo>();
MachineFunctionPass::getAnalysisUsage(AU);
}
@@ -56,9 +56,11 @@ bool llvm::PHIElimination::runOnMachineFunction(MachineFunction &MF) {
bool Changed = false;
// Split critical edges to help the coalescer
- if (LiveVariables *LV = getAnalysisIfAvailable<LiveVariables>())
+ if (LiveVariables *LV = getAnalysisIfAvailable<LiveVariables>()) {
+ MachineLoopInfo *MLI = getAnalysisIfAvailable<MachineLoopInfo>();
for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I)
- Changed |= SplitPHIEdges(MF, *I, *LV);
+ Changed |= SplitPHIEdges(MF, *I, *LV, MLI);
+ }
// Populate VRegPHIUseCount
analyzePHINodes(MF);
@@ -179,6 +181,7 @@ void llvm::PHIElimination::LowerAtomicPHINode(
unsigned NumSrcs = (MPhi->getNumOperands() - 1) / 2;
unsigned DestReg = MPhi->getOperand(0).getReg();
+ assert(MPhi->getOperand(0).getSubReg() == 0 && "Can't handle sub-reg PHIs");
bool isDead = MPhi->getOperand(0).isDead();
// Create a new register for the incoming PHI arguments.
@@ -265,6 +268,8 @@ void llvm::PHIElimination::LowerAtomicPHINode(
SmallPtrSet<MachineBasicBlock*, 8> MBBsInsertedInto;
for (int i = NumSrcs - 1; i >= 0; --i) {
unsigned SrcReg = MPhi->getOperand(i*2+1).getReg();
+ unsigned SrcSubReg = MPhi->getOperand(i*2+1).getSubReg();
+
assert(TargetRegisterInfo::isVirtualRegister(SrcReg) &&
"Machine PHI Operands must all be virtual registers!");
@@ -294,7 +299,7 @@ void llvm::PHIElimination::LowerAtomicPHINode(
// Insert the copy.
if (!reusedIncoming && IncomingReg)
BuildMI(opBlock, InsertPos, MPhi->getDebugLoc(),
- TII->get(TargetOpcode::COPY), IncomingReg).addReg(SrcReg);
+ TII->get(TargetOpcode::COPY), IncomingReg).addReg(SrcReg, 0, SrcSubReg);
// Now update live variable information if we have it. Otherwise we're done
if (!LV) continue;
@@ -378,10 +383,12 @@ void llvm::PHIElimination::analyzePHINodes(const MachineFunction& MF) {
bool llvm::PHIElimination::SplitPHIEdges(MachineFunction &MF,
MachineBasicBlock &MBB,
- LiveVariables &LV) {
+ LiveVariables &LV,
+ MachineLoopInfo *MLI) {
if (MBB.empty() || !MBB.front().isPHI() || MBB.isLandingPad())
return false; // Quick exit for basic blocks without PHIs.
+ bool Changed = false;
for (MachineBasicBlock::const_iterator BBI = MBB.begin(), BBE = MBB.end();
BBI != BBE && BBI->isPHI(); ++BBI) {
for (unsigned i = 1, e = BBI->getNumOperands(); i != e; i += 2) {
@@ -390,8 +397,15 @@ bool llvm::PHIElimination::SplitPHIEdges(MachineFunction &MF,
// We break edges when registers are live out from the predecessor block
// (not considering PHI nodes). If the register is live in to this block
// anyway, we would gain nothing from splitting.
- if (!LV.isLiveIn(Reg, MBB) && LV.isLiveOut(Reg, *PreMBB))
- PreMBB->SplitCriticalEdge(&MBB, this);
+ // Avoid splitting backedges of loops. It would introduce small
+ // out-of-line blocks into the loop which is very bad for code placement.
+ if (PreMBB != &MBB &&
+ !LV.isLiveIn(Reg, MBB) && LV.isLiveOut(Reg, *PreMBB)) {
+ if (!MLI ||
+ !(MLI->getLoopFor(PreMBB) == MLI->getLoopFor(&MBB) &&
+ MLI->isLoopHeader(&MBB)))
+ Changed |= PreMBB->SplitCriticalEdge(&MBB, this) != 0;
+ }
}
}
return true;
diff --git a/lib/CodeGen/PHIElimination.h b/lib/CodeGen/PHIElimination.h
index 7dedf0318a8a6..45a97182e71c5 100644
--- a/lib/CodeGen/PHIElimination.h
+++ b/lib/CodeGen/PHIElimination.h
@@ -13,19 +13,21 @@
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
namespace llvm {
class LiveVariables;
+ class MachineRegisterInfo;
+ class MachineLoopInfo;
/// Lower PHI instructions to copies.
class PHIElimination : public MachineFunctionPass {
- MachineRegisterInfo *MRI; // Machine register information
+ MachineRegisterInfo *MRI; // Machine register information
public:
static char ID; // Pass identification, replacement for typeid
- PHIElimination() : MachineFunctionPass(&ID) {}
+ PHIElimination() : MachineFunctionPass(ID) {}
virtual bool runOnMachineFunction(MachineFunction &Fn);
@@ -49,7 +51,7 @@ namespace llvm {
/// Split critical edges where necessary for good coalescer performance.
bool SplitPHIEdges(MachineFunction &MF, MachineBasicBlock &MBB,
- LiveVariables &LV);
+ LiveVariables &LV, MachineLoopInfo *MLI);
/// SplitCriticalEdge - Split a critical edge from A to B by
/// inserting a new MBB. Update branches in A and PHI instructions
diff --git a/lib/CodeGen/PeepholeOptimizer.cpp b/lib/CodeGen/PeepholeOptimizer.cpp
new file mode 100644
index 0000000000000..17cee46ca16c6
--- /dev/null
+++ b/lib/CodeGen/PeepholeOptimizer.cpp
@@ -0,0 +1,287 @@
+//===-- PeepholeOptimizer.cpp - Peephole Optimizations --------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Perform peephole optimizations on the machine code:
+//
+// - Optimize Extensions
+//
+// Optimization of sign / zero extension instructions. It may be extended to
+// handle other instructions with similar properties.
+//
+// On some targets, some instructions, e.g. X86 sign / zero extension, may
+// leave the source value in the lower part of the result. This optimization
+// will replace some uses of the pre-extension value with uses of the
+// sub-register of the results.
+//
+// - Optimize Comparisons
+//
+// Optimization of comparison instructions. For instance, in this code:
+//
+// sub r1, 1
+// cmp r1, 0
+// bz L1
+//
+// If the "sub" instruction all ready sets (or could be modified to set) the
+// same flag that the "cmp" instruction sets and that "bz" uses, then we can
+// eliminate the "cmp" instruction.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "peephole-opt"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/Statistic.h"
+using namespace llvm;
+
+// Optimize Extensions
+static cl::opt<bool>
+Aggressive("aggressive-ext-opt", cl::Hidden,
+ cl::desc("Aggressive extension optimization"));
+
+STATISTIC(NumReuse, "Number of extension results reused");
+STATISTIC(NumEliminated, "Number of compares eliminated");
+
+namespace {
+ class PeepholeOptimizer : public MachineFunctionPass {
+ const TargetMachine *TM;
+ const TargetInstrInfo *TII;
+ MachineRegisterInfo *MRI;
+ MachineDominatorTree *DT; // Machine dominator tree
+
+ public:
+ static char ID; // Pass identification
+ PeepholeOptimizer() : MachineFunctionPass(ID) {}
+
+ virtual bool runOnMachineFunction(MachineFunction &MF);
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ if (Aggressive) {
+ AU.addRequired<MachineDominatorTree>();
+ AU.addPreserved<MachineDominatorTree>();
+ }
+ }
+
+ private:
+ bool OptimizeCmpInstr(MachineInstr *MI, MachineBasicBlock *MBB);
+ bool OptimizeExtInstr(MachineInstr *MI, MachineBasicBlock *MBB,
+ SmallPtrSet<MachineInstr*, 8> &LocalMIs);
+ };
+}
+
+char PeepholeOptimizer::ID = 0;
+INITIALIZE_PASS(PeepholeOptimizer, "peephole-opts",
+ "Peephole Optimizations", false, false);
+
+FunctionPass *llvm::createPeepholeOptimizerPass() {
+ return new PeepholeOptimizer();
+}
+
+/// OptimizeExtInstr - If instruction is a copy-like instruction, i.e. it reads
+/// a single register and writes a single register and it does not modify the
+/// source, and if the source value is preserved as a sub-register of the
+/// result, then replace all reachable uses of the source with the subreg of the
+/// result.
+///
+/// Do not generate an EXTRACT that is used only in a debug use, as this changes
+/// the code. Since this code does not currently share EXTRACTs, just ignore all
+/// debug uses.
+bool PeepholeOptimizer::
+OptimizeExtInstr(MachineInstr *MI, MachineBasicBlock *MBB,
+ SmallPtrSet<MachineInstr*, 8> &LocalMIs) {
+ LocalMIs.insert(MI);
+
+ unsigned SrcReg, DstReg, SubIdx;
+ if (!TII->isCoalescableExtInstr(*MI, SrcReg, DstReg, SubIdx))
+ return false;
+
+ if (TargetRegisterInfo::isPhysicalRegister(DstReg) ||
+ TargetRegisterInfo::isPhysicalRegister(SrcReg))
+ return false;
+
+ MachineRegisterInfo::use_nodbg_iterator UI = MRI->use_nodbg_begin(SrcReg);
+ if (++UI == MRI->use_nodbg_end())
+ // No other uses.
+ return false;
+
+ // The source has other uses. See if we can replace the other uses with use of
+ // the result of the extension.
+ SmallPtrSet<MachineBasicBlock*, 4> ReachedBBs;
+ UI = MRI->use_nodbg_begin(DstReg);
+ for (MachineRegisterInfo::use_nodbg_iterator UE = MRI->use_nodbg_end();
+ UI != UE; ++UI)
+ ReachedBBs.insert(UI->getParent());
+
+ // Uses that are in the same BB of uses of the result of the instruction.
+ SmallVector<MachineOperand*, 8> Uses;
+
+ // Uses that the result of the instruction can reach.
+ SmallVector<MachineOperand*, 8> ExtendedUses;
+
+ bool ExtendLife = true;
+ UI = MRI->use_nodbg_begin(SrcReg);
+ for (MachineRegisterInfo::use_nodbg_iterator UE = MRI->use_nodbg_end();
+ UI != UE; ++UI) {
+ MachineOperand &UseMO = UI.getOperand();
+ MachineInstr *UseMI = &*UI;
+ if (UseMI == MI)
+ continue;
+
+ if (UseMI->isPHI()) {
+ ExtendLife = false;
+ continue;
+ }
+
+ // It's an error to translate this:
+ //
+ // %reg1025 = <sext> %reg1024
+ // ...
+ // %reg1026 = SUBREG_TO_REG 0, %reg1024, 4
+ //
+ // into this:
+ //
+ // %reg1025 = <sext> %reg1024
+ // ...
+ // %reg1027 = COPY %reg1025:4
+ // %reg1026 = SUBREG_TO_REG 0, %reg1027, 4
+ //
+ // The problem here is that SUBREG_TO_REG is there to assert that an
+ // implicit zext occurs. It doesn't insert a zext instruction. If we allow
+ // the COPY here, it will give us the value after the <sext>, not the
+ // original value of %reg1024 before <sext>.
+ if (UseMI->getOpcode() == TargetOpcode::SUBREG_TO_REG)
+ continue;
+
+ MachineBasicBlock *UseMBB = UseMI->getParent();
+ if (UseMBB == MBB) {
+ // Local uses that come after the extension.
+ if (!LocalMIs.count(UseMI))
+ Uses.push_back(&UseMO);
+ } else if (ReachedBBs.count(UseMBB)) {
+ // Non-local uses where the result of the extension is used. Always
+ // replace these unless it's a PHI.
+ Uses.push_back(&UseMO);
+ } else if (Aggressive && DT->dominates(MBB, UseMBB)) {
+ // We may want to extend the live range of the extension result in order
+ // to replace these uses.
+ ExtendedUses.push_back(&UseMO);
+ } else {
+ // Both will be live out of the def MBB anyway. Don't extend live range of
+ // the extension result.
+ ExtendLife = false;
+ break;
+ }
+ }
+
+ if (ExtendLife && !ExtendedUses.empty())
+ // Extend the liveness of the extension result.
+ std::copy(ExtendedUses.begin(), ExtendedUses.end(),
+ std::back_inserter(Uses));
+
+ // Now replace all uses.
+ bool Changed = false;
+ if (!Uses.empty()) {
+ SmallPtrSet<MachineBasicBlock*, 4> PHIBBs;
+
+ // Look for PHI uses of the extended result, we don't want to extend the
+ // liveness of a PHI input. It breaks all kinds of assumptions down
+ // stream. A PHI use is expected to be the kill of its source values.
+ UI = MRI->use_nodbg_begin(DstReg);
+ for (MachineRegisterInfo::use_nodbg_iterator
+ UE = MRI->use_nodbg_end(); UI != UE; ++UI)
+ if (UI->isPHI())
+ PHIBBs.insert(UI->getParent());
+
+ const TargetRegisterClass *RC = MRI->getRegClass(SrcReg);
+ for (unsigned i = 0, e = Uses.size(); i != e; ++i) {
+ MachineOperand *UseMO = Uses[i];
+ MachineInstr *UseMI = UseMO->getParent();
+ MachineBasicBlock *UseMBB = UseMI->getParent();
+ if (PHIBBs.count(UseMBB))
+ continue;
+
+ unsigned NewVR = MRI->createVirtualRegister(RC);
+ BuildMI(*UseMBB, UseMI, UseMI->getDebugLoc(),
+ TII->get(TargetOpcode::COPY), NewVR)
+ .addReg(DstReg, 0, SubIdx);
+
+ UseMO->setReg(NewVR);
+ ++NumReuse;
+ Changed = true;
+ }
+ }
+
+ return Changed;
+}
+
+/// OptimizeCmpInstr - If the instruction is a compare and the previous
+/// instruction it's comparing against all ready sets (or could be modified to
+/// set) the same flag as the compare, then we can remove the comparison and use
+/// the flag from the previous instruction.
+bool PeepholeOptimizer::OptimizeCmpInstr(MachineInstr *MI,
+ MachineBasicBlock *MBB) {
+ // If this instruction is a comparison against zero and isn't comparing a
+ // physical register, we can try to optimize it.
+ unsigned SrcReg;
+ int CmpValue;
+ if (!TII->AnalyzeCompare(MI, SrcReg, CmpValue) ||
+ TargetRegisterInfo::isPhysicalRegister(SrcReg) || CmpValue != 0)
+ return false;
+
+ MachineRegisterInfo::def_iterator DI = MRI->def_begin(SrcReg);
+ if (llvm::next(DI) != MRI->def_end())
+ // Only support one definition.
+ return false;
+
+ // Attempt to convert the defining instruction to set the "zero" flag.
+ if (TII->ConvertToSetZeroFlag(&*DI, MI)) {
+ ++NumEliminated;
+ return true;
+ }
+
+ return false;
+}
+
+bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) {
+ TM = &MF.getTarget();
+ TII = TM->getInstrInfo();
+ MRI = &MF.getRegInfo();
+ DT = Aggressive ? &getAnalysis<MachineDominatorTree>() : 0;
+
+ bool Changed = false;
+
+ SmallPtrSet<MachineInstr*, 8> LocalMIs;
+ for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) {
+ MachineBasicBlock *MBB = &*I;
+ LocalMIs.clear();
+
+ for (MachineBasicBlock::iterator
+ MII = I->begin(), ME = I->end(); MII != ME; ) {
+ MachineInstr *MI = &*MII;
+
+ if (MI->getDesc().isCompare() &&
+ !MI->getDesc().hasUnmodeledSideEffects()) {
+ ++MII; // The iterator may become invalid if the compare is deleted.
+ Changed |= OptimizeCmpInstr(MI, MBB);
+ } else {
+ Changed |= OptimizeExtInstr(MI, MBB, LocalMIs);
+ ++MII;
+ }
+ }
+ }
+
+ return Changed;
+}
diff --git a/lib/CodeGen/PostRASchedulerList.cpp b/lib/CodeGen/PostRASchedulerList.cpp
index 4af8e07f34800..f0bd6d1372be7 100644
--- a/lib/CodeGen/PostRASchedulerList.cpp
+++ b/lib/CodeGen/PostRASchedulerList.cpp
@@ -85,7 +85,7 @@ namespace {
public:
static char ID;
PostRAScheduler(CodeGenOpt::Level ol) :
- MachineFunctionPass(&ID), OptLevel(ol) {}
+ MachineFunctionPass(ID), OptLevel(ol) {}
void getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesCFG();
@@ -130,7 +130,7 @@ namespace {
/// KillIndices - The index of the most recent kill (proceding bottom-up),
/// or ~0u if the register is not live.
- unsigned KillIndices[TargetRegisterInfo::FirstVirtualRegister];
+ std::vector<unsigned> KillIndices;
public:
SchedulePostRATDList(MachineFunction &MF,
@@ -140,7 +140,8 @@ namespace {
AntiDepBreaker *ADB,
AliasAnalysis *aa)
: ScheduleDAGInstrs(MF, MLI, MDT), Topo(SUnits),
- HazardRec(HR), AntiDepBreak(ADB), AA(aa) {}
+ HazardRec(HR), AntiDepBreak(ADB), AA(aa),
+ KillIndices(TRI->getNumRegs()) {}
~SchedulePostRATDList() {
}
diff --git a/lib/CodeGen/PreAllocSplitting.cpp b/lib/CodeGen/PreAllocSplitting.cpp
index fb2f90935551c..cd9d83eeb6846 100644
--- a/lib/CodeGen/PreAllocSplitting.cpp
+++ b/lib/CodeGen/PreAllocSplitting.cpp
@@ -92,7 +92,7 @@ namespace {
public:
static char ID;
PreAllocSplitting()
- : MachineFunctionPass(&ID) {}
+ : MachineFunctionPass(ID) {}
virtual bool runOnMachineFunction(MachineFunction &MF);
@@ -203,10 +203,11 @@ namespace {
char PreAllocSplitting::ID = 0;
-static RegisterPass<PreAllocSplitting>
-X("pre-alloc-splitting", "Pre-Register Allocation Live Interval Splitting");
+INITIALIZE_PASS(PreAllocSplitting, "pre-alloc-splitting",
+ "Pre-Register Allocation Live Interval Splitting",
+ false, false);
-const PassInfo *const llvm::PreAllocSplittingID = &X;
+char &llvm::PreAllocSplittingID = PreAllocSplitting::ID;
/// findSpillPoint - Find a gap as far away from the given MI that's suitable
/// for spilling the current live interval. The index must be before any
@@ -676,11 +677,7 @@ void PreAllocSplitting::ReconstructLiveInterval(LiveInterval* LI) {
VNInfo* NewVN = LI->getNextValue(DefIdx, 0, true, Alloc);
// If the def is a move, set the copy field.
- unsigned SrcReg, DstReg, SrcSubIdx, DstSubIdx;
- if (TII->isMoveInstr(*DI, SrcReg, DstReg, SrcSubIdx, DstSubIdx)) {
- if (DstReg == LI->reg)
- NewVN->setCopy(&*DI);
- } else if (DI->isCopyLike() && DI->getOperand(0).getReg() == LI->reg)
+ if (DI->isCopyLike() && DI->getOperand(0).getReg() == LI->reg)
NewVN->setCopy(&*DI);
NewVNs[&*DI] = NewVN;
diff --git a/lib/CodeGen/ProcessImplicitDefs.cpp b/lib/CodeGen/ProcessImplicitDefs.cpp
index 2e31908f9fe2f..b8831db1d118a 100644
--- a/lib/CodeGen/ProcessImplicitDefs.cpp
+++ b/lib/CodeGen/ProcessImplicitDefs.cpp
@@ -26,8 +26,8 @@
using namespace llvm;
char ProcessImplicitDefs::ID = 0;
-static RegisterPass<ProcessImplicitDefs> X("processimpdefs",
- "Process Implicit Definitions.");
+INITIALIZE_PASS(ProcessImplicitDefs, "processimpdefs",
+ "Process Implicit Definitions.", false, false);
void ProcessImplicitDefs::getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesCFG();
@@ -46,12 +46,6 @@ ProcessImplicitDefs::CanTurnIntoImplicitDef(MachineInstr *MI,
unsigned Reg, unsigned OpIdx,
const TargetInstrInfo *tii_,
SmallSet<unsigned, 8> &ImpDefRegs) {
- unsigned SrcReg, DstReg, SrcSubReg, DstSubReg;
- if (tii_->isMoveInstr(*MI, SrcReg, DstReg, SrcSubReg, DstSubReg) &&
- Reg == SrcReg &&
- (DstSubReg == 0 || ImpDefRegs.count(DstReg)))
- return true;
-
switch(OpIdx) {
case 1:
return MI->isCopy() && (MI->getOperand(0).getSubReg() == 0 ||
@@ -75,14 +69,6 @@ static bool isUndefCopy(MachineInstr *MI, unsigned Reg,
return true;
return false;
}
-
- unsigned SrcReg, DstReg, SrcSubReg, DstSubReg;
- if (tii_->isMoveInstr(*MI, SrcReg, DstReg, SrcSubReg, DstSubReg)) {
- if (Reg != SrcReg)
- return false;
- if (DstSubReg == 0 || ImpDefRegs.count(DstReg))
- return true;
- }
return false;
}
diff --git a/lib/CodeGen/PrologEpilogInserter.cpp b/lib/CodeGen/PrologEpilogInserter.cpp
index 3843b2537051c..e2802c1fdf4a7 100644
--- a/lib/CodeGen/PrologEpilogInserter.cpp
+++ b/lib/CodeGen/PrologEpilogInserter.cpp
@@ -19,6 +19,7 @@
//
//===----------------------------------------------------------------------===//
+#define DEBUG_TYPE "pei"
#include "PrologEpilogInserter.h"
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineLoopInfo.h"
@@ -32,7 +33,10 @@
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
#include "llvm/ADT/IndexedMap.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/STLExtras.h"
#include <climits>
@@ -40,8 +44,11 @@ using namespace llvm;
char PEI::ID = 0;
-static RegisterPass<PEI>
-X("prologepilog", "Prologue/Epilogue Insertion");
+INITIALIZE_PASS(PEI, "prologepilog",
+ "Prologue/Epilogue Insertion", false, false);
+
+STATISTIC(NumVirtualFrameRegs, "Number of virtual frame regs encountered");
+STATISTIC(NumScavengedRegs, "Number of frame index regs scavenged");
/// createPrologEpilogCodeInserter - This function returns a pass that inserts
/// prolog and epilog code, and eliminates abstract frame references.
@@ -56,7 +63,6 @@ bool PEI::runOnMachineFunction(MachineFunction &Fn) {
const TargetRegisterInfo *TRI = Fn.getTarget().getRegisterInfo();
RS = TRI->requiresRegisterScavenging(Fn) ? new RegScavenger() : NULL;
FrameIndexVirtualScavenging = TRI->requiresFrameIndexScavenging(Fn);
- FrameConstantRegMap.clear();
// Calculate the MaxCallFrameSize and AdjustsStack variables for the
// function's frame information. Also eliminates call frame pseudo
@@ -72,10 +78,10 @@ bool PEI::runOnMachineFunction(MachineFunction &Fn) {
calculateCalleeSavedRegisters(Fn);
// Determine placement of CSR spill/restore code:
- // - with shrink wrapping, place spills and restores to tightly
+ // - With shrink wrapping, place spills and restores to tightly
// enclose regions in the Machine CFG of the function where
- // they are used. Without shrink wrapping
- // - default (no shrink wrapping), place all spills in the
+ // they are used.
+ // - Without shink wrapping (default), place all spills in the
// entry block, all restores in return blocks.
placeCSRSpillsAndRestores(Fn);
@@ -461,8 +467,10 @@ AdjustStackOffset(MachineFrameInfo *MFI, int FrameIdx,
Offset = (Offset + Align - 1) / Align * Align;
if (StackGrowsDown) {
+ DEBUG(dbgs() << "alloc FI(" << FrameIdx << ") at SP[" << -Offset << "]\n");
MFI->setObjectOffset(FrameIdx, -Offset); // Set the computed offset
} else {
+ DEBUG(dbgs() << "alloc FI(" << FrameIdx << ") at SP[" << Offset << "]\n");
MFI->setObjectOffset(FrameIdx, Offset);
Offset += MFI->getObjectSize(FrameIdx);
}
@@ -547,15 +555,66 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) {
AdjustStackOffset(MFI, SFI, StackGrowsDown, Offset, MaxAlign);
}
+ // FIXME: Once this is working, then enable flag will change to a target
+ // check for whether the frame is large enough to want to use virtual
+ // frame index registers. Functions which don't want/need this optimization
+ // will continue to use the existing code path.
+ if (MFI->getUseLocalStackAllocationBlock()) {
+ unsigned Align = MFI->getLocalFrameMaxAlign();
+
+ // Adjust to alignment boundary.
+ Offset = (Offset + Align - 1) / Align * Align;
+
+ DEBUG(dbgs() << "Local frame base offset: " << Offset << "\n");
+
+ // Resolve offsets for objects in the local block.
+ for (unsigned i = 0, e = MFI->getLocalFrameObjectCount(); i != e; ++i) {
+ std::pair<int, int64_t> Entry = MFI->getLocalFrameObjectMap(i);
+ int64_t FIOffset = (StackGrowsDown ? -Offset : Offset) + Entry.second;
+ DEBUG(dbgs() << "alloc FI(" << Entry.first << ") at SP[" <<
+ FIOffset << "]\n");
+ MFI->setObjectOffset(Entry.first, FIOffset);
+ }
+ // Allocate the local block
+ Offset += MFI->getLocalFrameSize();
+
+ MaxAlign = std::max(Align, MaxAlign);
+ }
+
// Make sure that the stack protector comes before the local variables on the
// stack.
- if (MFI->getStackProtectorIndex() >= 0)
+ SmallSet<int, 16> LargeStackObjs;
+ if (MFI->getStackProtectorIndex() >= 0) {
AdjustStackOffset(MFI, MFI->getStackProtectorIndex(), StackGrowsDown,
Offset, MaxAlign);
+ // Assign large stack objects first.
+ for (unsigned i = 0, e = MFI->getObjectIndexEnd(); i != e; ++i) {
+ if (MFI->isObjectPreAllocated(i) &&
+ MFI->getUseLocalStackAllocationBlock())
+ continue;
+ if (i >= MinCSFrameIndex && i <= MaxCSFrameIndex)
+ continue;
+ if (RS && (int)i == RS->getScavengingFrameIndex())
+ continue;
+ if (MFI->isDeadObjectIndex(i))
+ continue;
+ if (MFI->getStackProtectorIndex() == (int)i)
+ continue;
+ if (!MFI->MayNeedStackProtector(i))
+ continue;
+
+ AdjustStackOffset(MFI, i, StackGrowsDown, Offset, MaxAlign);
+ LargeStackObjs.insert(i);
+ }
+ }
+
// Then assign frame offsets to stack objects that are not used to spill
// callee saved registers.
for (unsigned i = 0, e = MFI->getObjectIndexEnd(); i != e; ++i) {
+ if (MFI->isObjectPreAllocated(i) &&
+ MFI->getUseLocalStackAllocationBlock())
+ continue;
if (i >= MinCSFrameIndex && i <= MaxCSFrameIndex)
continue;
if (RS && (int)i == RS->getScavengingFrameIndex())
@@ -564,6 +623,8 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) {
continue;
if (MFI->getStackProtectorIndex() == (int)i)
continue;
+ if (LargeStackObjs.count(i))
+ continue;
AdjustStackOffset(MFI, i, StackGrowsDown, Offset, MaxAlign);
}
@@ -694,16 +755,8 @@ void PEI::replaceFrameIndices(MachineFunction &Fn) {
// If this instruction has a FrameIndex operand, we need to
// use that target machine register info object to eliminate
// it.
- TargetRegisterInfo::FrameIndexValue Value;
- unsigned VReg =
- TRI.eliminateFrameIndex(MI, SPAdj, &Value,
+ TRI.eliminateFrameIndex(MI, SPAdj,
FrameIndexVirtualScavenging ? NULL : RS);
- if (VReg) {
- assert (FrameIndexVirtualScavenging &&
- "Not scavenging, but virtual returned from "
- "eliminateFrameIndex()!");
- FrameConstantRegMap[VReg] = FrameConstantEntry(Value, SPAdj);
- }
// Reset the iterator if we were at the beginning of the BB.
if (AtBeginning) {
@@ -731,38 +784,6 @@ void PEI::replaceFrameIndices(MachineFunction &Fn) {
}
}
-/// findLastUseReg - find the killing use of the specified register within
-/// the instruciton range. Return the operand number of the kill in Operand.
-static MachineBasicBlock::iterator
-findLastUseReg(MachineBasicBlock::iterator I, MachineBasicBlock::iterator ME,
- unsigned Reg) {
- // Scan forward to find the last use of this virtual register
- for (++I; I != ME; ++I) {
- MachineInstr *MI = I;
- bool isDefInsn = false;
- bool isKillInsn = false;
- for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i)
- if (MI->getOperand(i).isReg()) {
- unsigned OpReg = MI->getOperand(i).getReg();
- if (OpReg == 0 || !TargetRegisterInfo::isVirtualRegister(OpReg))
- continue;
- assert (OpReg == Reg
- && "overlapping use of scavenged index register!");
- // If this is the killing use, we have a candidate.
- if (MI->getOperand(i).isKill())
- isKillInsn = true;
- else if (MI->getOperand(i).isDef())
- isDefInsn = true;
- }
- if (isKillInsn && !isDefInsn)
- return I;
- }
- // If we hit the end of the basic block, there was no kill of
- // the virtual register, which is wrong.
- assert (0 && "scavenged index register never killed!");
- return ME;
-}
-
/// scavengeFrameVirtualRegs - Replace all frame index virtual registers
/// with physical registers. Use the register scavenger to find an
/// appropriate register to use.
@@ -772,27 +793,14 @@ void PEI::scavengeFrameVirtualRegs(MachineFunction &Fn) {
E = Fn.end(); BB != E; ++BB) {
RS->enterBasicBlock(BB);
- // FIXME: The logic flow in this function is still too convoluted.
- // It needs a cleanup refactoring. Do that in preparation for tracking
- // more than one scratch register value and using ranges to find
- // available scratch registers.
- unsigned CurrentVirtReg = 0;
- unsigned CurrentScratchReg = 0;
- bool havePrevValue = false;
- TargetRegisterInfo::FrameIndexValue PrevValue(0,0);
- TargetRegisterInfo::FrameIndexValue Value(0,0);
- MachineInstr *PrevLastUseMI = NULL;
- unsigned PrevLastUseOp = 0;
- bool trackingCurrentValue = false;
+ unsigned VirtReg = 0;
+ unsigned ScratchReg = 0;
int SPAdj = 0;
// The instruction stream may change in the loop, so check BB->end()
// directly.
for (MachineBasicBlock::iterator I = BB->begin(); I != BB->end(); ) {
MachineInstr *MI = I;
- bool isDefInsn = false;
- bool isKillInsn = false;
- bool clobbersScratchReg = false;
bool DoIncr = true;
for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
if (MI->getOperand(i).isReg()) {
@@ -800,121 +808,30 @@ void PEI::scavengeFrameVirtualRegs(MachineFunction &Fn) {
unsigned Reg = MO.getReg();
if (Reg == 0)
continue;
- if (!TargetRegisterInfo::isVirtualRegister(Reg)) {
- // If we have a previous scratch reg, check and see if anything
- // here kills whatever value is in there.
- if (Reg == CurrentScratchReg) {
- if (MO.isUse()) {
- // Two-address operands implicitly kill
- if (MO.isKill() || MI->isRegTiedToDefOperand(i))
- clobbersScratchReg = true;
- } else {
- assert (MO.isDef());
- clobbersScratchReg = true;
- }
- }
+ if (!TargetRegisterInfo::isVirtualRegister(Reg))
continue;
- }
- // If this is a def, remember that this insn defines the value.
- // This lets us properly consider insns which re-use the scratch
- // register, such as r2 = sub r2, #imm, in the middle of the
- // scratch range.
- if (MO.isDef())
- isDefInsn = true;
+
+ ++NumVirtualFrameRegs;
// Have we already allocated a scratch register for this virtual?
- if (Reg != CurrentVirtReg) {
+ if (Reg != VirtReg) {
// When we first encounter a new virtual register, it
// must be a definition.
assert(MI->getOperand(i).isDef() &&
"frame index virtual missing def!");
- // We can't have nested virtual register live ranges because
- // there's only a guarantee of one scavenged register at a time.
- assert (CurrentVirtReg == 0 &&
- "overlapping frame index virtual registers!");
-
- // If the target gave us information about what's in the register,
- // we can use that to re-use scratch regs.
- DenseMap<unsigned, FrameConstantEntry>::iterator Entry =
- FrameConstantRegMap.find(Reg);
- trackingCurrentValue = Entry != FrameConstantRegMap.end();
- if (trackingCurrentValue) {
- SPAdj = (*Entry).second.second;
- Value = (*Entry).second.first;
- } else {
- SPAdj = 0;
- Value.first = 0;
- Value.second = 0;
- }
-
- // If the scratch register from the last allocation is still
- // available, see if the value matches. If it does, just re-use it.
- if (trackingCurrentValue && havePrevValue && PrevValue == Value) {
- // FIXME: This assumes that the instructions in the live range
- // for the virtual register are exclusively for the purpose
- // of populating the value in the register. That's reasonable
- // for these frame index registers, but it's still a very, very
- // strong assumption. rdar://7322732. Better would be to
- // explicitly check each instruction in the range for references
- // to the virtual register. Only delete those insns that
- // touch the virtual register.
-
- // Find the last use of the new virtual register. Remove all
- // instruction between here and there, and update the current
- // instruction to reference the last use insn instead.
- MachineBasicBlock::iterator LastUseMI =
- findLastUseReg(I, BB->end(), Reg);
-
- // Remove all instructions up 'til the last use, since they're
- // just calculating the value we already have.
- BB->erase(I, LastUseMI);
- I = LastUseMI;
-
- // Extend the live range of the scratch register
- PrevLastUseMI->getOperand(PrevLastUseOp).setIsKill(false);
- RS->setUsed(CurrentScratchReg);
- CurrentVirtReg = Reg;
-
- // We deleted the instruction we were scanning the operands of.
- // Jump back to the instruction iterator loop. Don't increment
- // past this instruction since we updated the iterator already.
- DoIncr = false;
- break;
- }
-
// Scavenge a new scratch register
- CurrentVirtReg = Reg;
+ VirtReg = Reg;
const TargetRegisterClass *RC = Fn.getRegInfo().getRegClass(Reg);
- CurrentScratchReg = RS->scavengeRegister(RC, I, SPAdj);
- PrevValue = Value;
+ ScratchReg = RS->scavengeRegister(RC, I, SPAdj);
+ ++NumScavengedRegs;
}
// replace this reference to the virtual register with the
// scratch register.
- assert (CurrentScratchReg && "Missing scratch register!");
- MI->getOperand(i).setReg(CurrentScratchReg);
+ assert (ScratchReg && "Missing scratch register!");
+ MI->getOperand(i).setReg(ScratchReg);
- if (MI->getOperand(i).isKill()) {
- isKillInsn = true;
- PrevLastUseOp = i;
- PrevLastUseMI = MI;
- }
}
}
- // If this is the last use of the scratch, stop tracking it. The
- // last use will be a kill operand in an instruction that does
- // not also define the scratch register.
- if (isKillInsn && !isDefInsn) {
- CurrentVirtReg = 0;
- havePrevValue = trackingCurrentValue;
- }
- // Similarly, notice if instruction clobbered the value in the
- // register we're tracking for possible later reuse. This is noted
- // above, but enforced here since the value is still live while we
- // process the rest of the operands of the instruction.
- if (clobbersScratchReg) {
- havePrevValue = false;
- CurrentScratchReg = 0;
- }
if (DoIncr) {
RS->forward(I);
++I;
diff --git a/lib/CodeGen/PrologEpilogInserter.h b/lib/CodeGen/PrologEpilogInserter.h
index aa95773596cfe..d575124a6b3e6 100644
--- a/lib/CodeGen/PrologEpilogInserter.h
+++ b/lib/CodeGen/PrologEpilogInserter.h
@@ -36,7 +36,7 @@ namespace llvm {
class PEI : public MachineFunctionPass {
public:
static char ID;
- PEI() : MachineFunctionPass(&ID) {}
+ PEI() : MachineFunctionPass(ID) {}
const char *getPassName() const {
return "Prolog/Epilog Insertion & Frame Finalization";
@@ -99,13 +99,6 @@ namespace llvm {
// TRI->requiresFrameIndexScavenging() for the curren function.
bool FrameIndexVirtualScavenging;
- // When using the scavenger post-pass to resolve frame reference
- // materialization registers, maintain a map of the registers to
- // the constant value and SP adjustment associated with it.
- typedef std::pair<TargetRegisterInfo::FrameIndexValue, int>
- FrameConstantEntry;
- DenseMap<unsigned, FrameConstantEntry> FrameConstantRegMap;
-
#ifndef NDEBUG
// Machine function handle.
MachineFunction* MF;
diff --git a/lib/CodeGen/RegAllocFast.cpp b/lib/CodeGen/RegAllocFast.cpp
index f44478e5dd0bf..fc150d55e2265 100644
--- a/lib/CodeGen/RegAllocFast.cpp
+++ b/lib/CodeGen/RegAllocFast.cpp
@@ -16,6 +16,7 @@
#include "llvm/BasicBlock.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/Passes.h"
@@ -46,7 +47,7 @@ namespace {
class RAFast : public MachineFunctionPass {
public:
static char ID;
- RAFast() : MachineFunctionPass(&ID), StackSlotForVirtReg(-1),
+ RAFast() : MachineFunctionPass(ID), StackSlotForVirtReg(-1),
isBulkSpilling(false) {}
private:
const TargetMachine *TM;
@@ -80,6 +81,8 @@ namespace {
// that is currently available in a physical register.
LiveRegMap LiveVirtRegs;
+ DenseMap<unsigned, MachineInstr *> LiveDbgValueMap;
+
// RegState - Track the state of a physical register.
enum RegState {
// A disabled register is not available for allocation, but an alias may
@@ -110,9 +113,9 @@ namespace {
// Allocatable - vector of allocatable physical registers.
BitVector Allocatable;
- // SkippedInstrs - Descriptors of instructions whose clobber list was ignored
- // because all registers were spilled. It is still necessary to mark all the
- // clobbered registers as used by the function.
+ // SkippedInstrs - Descriptors of instructions whose clobber list was
+ // ignored because all registers were spilled. It is still necessary to
+ // mark all the clobbered registers as used by the function.
SmallPtrSet<const TargetInstrDesc*, 4> SkippedInstrs;
// isBulkSpilling - This flag is set when LiveRegMap will be cleared
@@ -236,8 +239,7 @@ void RAFast::killVirtReg(unsigned VirtReg) {
}
/// spillVirtReg - This method spills the value specified by VirtReg into the
-/// corresponding stack slot if needed. If isKill is set, the register is also
-/// killed.
+/// corresponding stack slot if needed.
void RAFast::spillVirtReg(MachineBasicBlock::iterator MI, unsigned VirtReg) {
assert(TargetRegisterInfo::isVirtualRegister(VirtReg) &&
"Spilling a physical register is illegal!");
@@ -265,6 +267,31 @@ void RAFast::spillVirtReg(MachineBasicBlock::iterator MI,
TII->storeRegToStackSlot(*MBB, MI, LR.PhysReg, SpillKill, FI, RC, TRI);
++NumStores; // Update statistics
+ // If this register is used by DBG_VALUE then insert new DBG_VALUE to
+ // identify spilled location as the place to find corresponding variable's
+ // value.
+ if (MachineInstr *DBG = LiveDbgValueMap.lookup(LRI->first)) {
+ const MDNode *MDPtr =
+ DBG->getOperand(DBG->getNumOperands()-1).getMetadata();
+ int64_t Offset = 0;
+ if (DBG->getOperand(1).isImm())
+ Offset = DBG->getOperand(1).getImm();
+ DebugLoc DL;
+ if (MI == MBB->end()) {
+ // If MI is at basic block end then use last instruction's location.
+ MachineBasicBlock::iterator EI = MI;
+ DL = (--EI)->getDebugLoc();
+ }
+ else
+ DL = MI->getDebugLoc();
+ if (MachineInstr *NewDV =
+ TII->emitFrameIndexDebugValue(*MF, FI, Offset, MDPtr, DL)) {
+ MachineBasicBlock *MBB = DBG->getParent();
+ MBB->insert(MI, NewDV);
+ DEBUG(dbgs() << "Inserting debug info due to spill:" << "\n" << *NewDV);
+ LiveDbgValueMap[LRI->first] = NewDV;
+ }
+ }
if (SpillKill)
LR.LastUse = 0; // Don't kill register again
}
@@ -471,7 +498,8 @@ void RAFast::allocVirtReg(MachineInstr *MI, LiveRegEntry &LRE, unsigned Hint) {
// First try to find a completely free register.
for (TargetRegisterClass::iterator I = AOB; I != AOE; ++I) {
unsigned PhysReg = *I;
- if (PhysRegState[PhysReg] == regFree && !UsedInInstr.test(PhysReg))
+ if (PhysRegState[PhysReg] == regFree && !UsedInInstr.test(PhysReg) &&
+ Allocatable.test(PhysReg))
return assignVirtToPhysReg(LRE, PhysReg);
}
@@ -480,6 +508,8 @@ void RAFast::allocVirtReg(MachineInstr *MI, LiveRegEntry &LRE, unsigned Hint) {
unsigned BestReg = 0, BestCost = spillImpossible;
for (TargetRegisterClass::iterator I = AOB; I != AOE; ++I) {
+ if (!Allocatable.test(*I))
+ continue;
unsigned Cost = calcSpillCost(*I);
// Cost is 0 when all aliases are already disabled.
if (Cost == 0)
@@ -520,12 +550,9 @@ RAFast::defineVirtReg(MachineInstr *MI, unsigned OpNum,
if ((!Hint || !TargetRegisterInfo::isPhysicalRegister(Hint)) &&
MRI->hasOneNonDBGUse(VirtReg)) {
const MachineInstr &UseMI = *MRI->use_nodbg_begin(VirtReg);
- unsigned SrcReg, DstReg, SrcSubReg, DstSubReg;
// It's a copy, use the destination register as a hint.
if (UseMI.isCopyLike())
Hint = UseMI.getOperand(0).getReg();
- else if (TII->isMoveInstr(UseMI, SrcReg, DstReg, SrcSubReg, DstSubReg))
- Hint = DstReg;
}
allocVirtReg(MI, *LRI, Hint);
} else if (LR.LastUse) {
@@ -712,7 +739,8 @@ void RAFast::AllocateBasicBlock() {
// Add live-in registers as live.
for (MachineBasicBlock::livein_iterator I = MBB->livein_begin(),
E = MBB->livein_end(); I != E; ++I)
- definePhysReg(MII, *I, regReserved);
+ if (Allocatable.test(*I))
+ definePhysReg(MII, *I, regReserved);
SmallVector<unsigned, 8> VirtDead;
SmallVector<MachineInstr*, 32> Coalesced;
@@ -756,31 +784,43 @@ void RAFast::AllocateBasicBlock() {
// Debug values are not allowed to change codegen in any way.
if (MI->isDebugValue()) {
- for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
- MachineOperand &MO = MI->getOperand(i);
- if (!MO.isReg()) continue;
- unsigned Reg = MO.getReg();
- if (!Reg || TargetRegisterInfo::isPhysicalRegister(Reg)) continue;
- LiveRegMap::iterator LRI = LiveVirtRegs.find(Reg);
- if (LRI != LiveVirtRegs.end())
- setPhysReg(MI, i, LRI->second.PhysReg);
- else {
- int SS = StackSlotForVirtReg[Reg];
- if (SS == -1)
- MO.setReg(0); // We can't allocate a physreg for a DebugValue, sorry!
+ bool ScanDbgValue = true;
+ while (ScanDbgValue) {
+ ScanDbgValue = false;
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg()) continue;
+ unsigned Reg = MO.getReg();
+ if (!Reg || TargetRegisterInfo::isPhysicalRegister(Reg)) continue;
+ LiveDbgValueMap[Reg] = MI;
+ LiveRegMap::iterator LRI = LiveVirtRegs.find(Reg);
+ if (LRI != LiveVirtRegs.end())
+ setPhysReg(MI, i, LRI->second.PhysReg);
else {
- // Modify DBG_VALUE now that the value is in a spill slot.
- uint64_t Offset = MI->getOperand(1).getImm();
- const MDNode *MDPtr =
- MI->getOperand(MI->getNumOperands()-1).getMetadata();
- DebugLoc DL = MI->getDebugLoc();
- if (MachineInstr *NewDV =
- TII->emitFrameIndexDebugValue(*MF, SS, Offset, MDPtr, DL)) {
- DEBUG(dbgs() << "Modifying debug info due to spill:" << "\t" << *MI);
- MachineBasicBlock *MBB = MI->getParent();
- MBB->insert(MBB->erase(MI), NewDV);
- } else
- MO.setReg(0); // We can't allocate a physreg for a DebugValue, sorry!
+ int SS = StackSlotForVirtReg[Reg];
+ if (SS == -1)
+ // We can't allocate a physreg for a DebugValue, sorry!
+ MO.setReg(0);
+ else {
+ // Modify DBG_VALUE now that the value is in a spill slot.
+ int64_t Offset = MI->getOperand(1).getImm();
+ const MDNode *MDPtr =
+ MI->getOperand(MI->getNumOperands()-1).getMetadata();
+ DebugLoc DL = MI->getDebugLoc();
+ if (MachineInstr *NewDV =
+ TII->emitFrameIndexDebugValue(*MF, SS, Offset, MDPtr, DL)) {
+ DEBUG(dbgs() << "Modifying debug info due to spill:" <<
+ "\t" << *MI);
+ MachineBasicBlock *MBB = MI->getParent();
+ MBB->insert(MBB->erase(MI), NewDV);
+ // Scan NewDV operands from the beginning.
+ MI = NewDV;
+ ScanDbgValue = true;
+ break;
+ } else
+ // We can't allocate a physreg for a DebugValue; sorry!
+ MO.setReg(0);
+ }
}
}
}
@@ -789,14 +829,13 @@ void RAFast::AllocateBasicBlock() {
}
// If this is a copy, we may be able to coalesce.
- unsigned CopySrc, CopyDst, CopySrcSub, CopyDstSub;
+ unsigned CopySrc = 0, CopyDst = 0, CopySrcSub = 0, CopyDstSub = 0;
if (MI->isCopy()) {
CopyDst = MI->getOperand(0).getReg();
CopySrc = MI->getOperand(1).getReg();
CopyDstSub = MI->getOperand(0).getSubReg();
CopySrcSub = MI->getOperand(1).getSubReg();
- } else if (!TII->isMoveInstr(*MI, CopySrc, CopyDst, CopySrcSub, CopyDstSub))
- CopySrc = CopyDst = 0;
+ }
// Track registers used by instruction.
UsedInInstr.reset();
@@ -843,13 +882,18 @@ void RAFast::AllocateBasicBlock() {
// operands. If there are also physical defs, these registers must avoid
// both physical defs and uses, making them more constrained than normal
// operands.
+ // Similarly, if there are multiple defs and tied operands, we must make
+ // sure the same register is allocated to uses and defs.
// We didn't detect inline asm tied operands above, so just make this extra
// pass for all inline asm.
if (MI->isInlineAsm() || hasEarlyClobbers || hasPartialRedefs ||
- (hasTiedOps && hasPhysDefs)) {
+ (hasTiedOps && (hasPhysDefs || TID.getNumDefs() > 1))) {
handleThroughOperands(MI, VirtDead);
// Don't attempt coalescing when we have funny stuff going on.
CopyDst = 0;
+ // Pretend we have early clobbers so the use operands get marked below.
+ // This is not necessary for the common case of a single tied use.
+ hasEarlyClobbers = true;
}
// Second scan.
@@ -870,14 +914,17 @@ void RAFast::AllocateBasicBlock() {
MRI->addPhysRegsUsed(UsedInInstr);
- // Track registers defined by instruction - early clobbers at this point.
+ // Track registers defined by instruction - early clobbers and tied uses at
+ // this point.
UsedInInstr.reset();
if (hasEarlyClobbers) {
for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
MachineOperand &MO = MI->getOperand(i);
- if (!MO.isReg() || !MO.isDef()) continue;
+ if (!MO.isReg()) continue;
unsigned Reg = MO.getReg();
if (!Reg || !TargetRegisterInfo::isPhysicalRegister(Reg)) continue;
+ // Look for physreg defs and tied uses.
+ if (!MO.isDef() && !MI->isRegTiedToDefOperand(i)) continue;
UsedInInstr.set(Reg);
for (const unsigned *AS = TRI->getAliasSet(Reg); *AS; ++AS)
UsedInInstr.set(*AS);
@@ -887,9 +934,9 @@ void RAFast::AllocateBasicBlock() {
unsigned DefOpEnd = MI->getNumOperands();
if (TID.isCall()) {
// Spill all virtregs before a call. This serves two purposes: 1. If an
- // exception is thrown, the landing pad is going to expect to find registers
- // in their spill slots, and 2. we don't have to wade through all the
- // <imp-def> operands on the call instruction.
+ // exception is thrown, the landing pad is going to expect to find
+ // registers in their spill slots, and 2. we don't have to wade through
+ // all the <imp-def> operands on the call instruction.
DefOpEnd = VirtOpEnd;
DEBUG(dbgs() << " Spilling remaining registers before call.\n");
spillAll(MI);
@@ -992,6 +1039,7 @@ bool RAFast::runOnMachineFunction(MachineFunction &Fn) {
SkippedInstrs.clear();
StackSlotForVirtReg.clear();
+ LiveDbgValueMap.clear();
return true;
}
diff --git a/lib/CodeGen/RegAllocLinearScan.cpp b/lib/CodeGen/RegAllocLinearScan.cpp
index 044672d6d7a5e..5c62354a8872c 100644
--- a/lib/CodeGen/RegAllocLinearScan.cpp
+++ b/lib/CodeGen/RegAllocLinearScan.cpp
@@ -87,10 +87,10 @@ namespace {
"to skip."),
cl::init(0),
cl::Hidden);
-
+
struct RALinScan : public MachineFunctionPass {
static char ID;
- RALinScan() : MachineFunctionPass(&ID) {
+ RALinScan() : MachineFunctionPass(ID) {
// Initialize the queue to record recently-used registers.
if (NumRecentlyUsedRegs > 0)
RecentRegs.resize(NumRecentlyUsedRegs, 0);
@@ -125,9 +125,10 @@ namespace {
const TargetRegisterInfo* tri_;
const TargetInstrInfo* tii_;
BitVector allocatableRegs_;
+ BitVector reservedRegs_;
LiveIntervals* li_;
LiveStacks* ls_;
- const MachineLoopInfo *loopInfo;
+ MachineLoopInfo *loopInfo;
/// handled_ - Intervals are added to the handled_ set in the order of their
/// start value. This is uses for backtracking.
@@ -255,9 +256,9 @@ namespace {
SmallVector<LiveInterval*, 8> &SpillIntervals);
/// attemptTrivialCoalescing - If a simple interval is defined by a copy,
- /// try allocate the definition the same register as the source register
- /// if the register is not defined during live time of the interval. This
- /// eliminate a copy. This is used to coalesce copies which were not
+ /// try to allocate the definition to the same register as the source,
+ /// if the register is not defined during the life time of the interval.
+ /// This eliminates a copy, and is used to coalesce copies which were not
/// coalesced away before allocation either due to dest and src being in
/// different register classes or because the coalescer was overly
/// conservative.
@@ -335,6 +336,17 @@ namespace {
SmallVector<unsigned, 256> &inactiveCounts,
bool SkipDGRegs);
+ /// getFirstNonReservedPhysReg - return the first non-reserved physical
+ /// register in the register class.
+ unsigned getFirstNonReservedPhysReg(const TargetRegisterClass *RC) {
+ TargetRegisterClass::iterator aoe = RC->allocation_order_end(*mf_);
+ TargetRegisterClass::iterator i = RC->allocation_order_begin(*mf_);
+ while (i != aoe && reservedRegs_.test(*i))
+ ++i;
+ assert(i != aoe && "All registers reserved?!");
+ return *i;
+ }
+
void ComputeRelatedRegClasses();
template <typename ItTy>
@@ -358,8 +370,8 @@ namespace {
char RALinScan::ID = 0;
}
-static RegisterPass<RALinScan>
-X("linearscan-regalloc", "Linear Scan Register Allocator");
+INITIALIZE_PASS(RALinScan, "linearscan-regalloc",
+ "Linear Scan Register Allocator", false, false);
void RALinScan::ComputeRelatedRegClasses() {
// First pass, add all reg classes to the union, and determine at least one
@@ -371,7 +383,7 @@ void RALinScan::ComputeRelatedRegClasses() {
for (TargetRegisterClass::iterator I = (*RCI)->begin(), E = (*RCI)->end();
I != E; ++I) {
HasAliases = HasAliases || *tri_->getAliasSet(*I) != 0;
-
+
const TargetRegisterClass *&PRC = OneClassForEachPhysReg[*I];
if (PRC) {
// Already processed this register. Just make sure we know that
@@ -382,7 +394,7 @@ void RALinScan::ComputeRelatedRegClasses() {
}
}
}
-
+
// Second pass, now that we know conservatively what register classes each reg
// belongs to, add info about aliases. We don't need to do this for targets
// without register aliases.
@@ -419,20 +431,15 @@ unsigned RALinScan::attemptTrivialCoalescing(LiveInterval &cur, unsigned Reg) {
unsigned CandReg;
{
MachineInstr *CopyMI;
- unsigned SrcReg, DstReg, SrcSubReg, DstSubReg;
if (vni->def != SlotIndex() && vni->isDefAccurate() &&
- (CopyMI = li_->getInstructionFromIndex(vni->def)) &&
- (CopyMI->isCopy() ||
- tii_->isMoveInstr(*CopyMI, SrcReg, DstReg, SrcSubReg, DstSubReg)))
+ (CopyMI = li_->getInstructionFromIndex(vni->def)) && CopyMI->isCopy())
// Defined by a copy, try to extend SrcReg forward
- CandReg = CopyMI->isCopy() ? CopyMI->getOperand(1).getReg() : SrcReg;
+ CandReg = CopyMI->getOperand(1).getReg();
else if (TrivCoalesceEnds &&
- (CopyMI =
- li_->getInstructionFromIndex(range.end.getBaseIndex())) &&
- tii_->isMoveInstr(*CopyMI, SrcReg, DstReg, SrcSubReg, DstSubReg) &&
- cur.reg == SrcReg)
+ (CopyMI = li_->getInstructionFromIndex(range.end.getBaseIndex())) &&
+ CopyMI->isCopy() && cur.reg == CopyMI->getOperand(1).getReg())
// Only used by a copy, try to extend DstReg backwards
- CandReg = DstReg;
+ CandReg = CopyMI->getOperand(0).getReg();
else
return Reg;
}
@@ -469,6 +476,7 @@ bool RALinScan::runOnMachineFunction(MachineFunction &fn) {
tri_ = tm_->getRegisterInfo();
tii_ = tm_->getInstrInfo();
allocatableRegs_ = tri_->getAllocatableSet(fn);
+ reservedRegs_ = tri_->getReservedRegs(fn);
li_ = &getAnalysis<LiveIntervals>();
ls_ = &getAnalysis<LiveStacks>();
loopInfo = &getAnalysis<MachineLoopInfo>();
@@ -487,9 +495,9 @@ bool RALinScan::runOnMachineFunction(MachineFunction &fn) {
vrm_ = &getAnalysis<VirtRegMap>();
if (!rewriter_.get()) rewriter_.reset(createVirtRegRewriter());
-
- spiller_.reset(createSpiller(mf_, li_, loopInfo, vrm_));
-
+
+ spiller_.reset(createSpiller(*this, *mf_, *vrm_));
+
initIntervalSets();
linearScan();
@@ -543,7 +551,7 @@ void RALinScan::linearScan() {
// linear scan algorithm
DEBUG({
dbgs() << "********** LINEAR SCAN **********\n"
- << "********** Function: "
+ << "********** Function: "
<< mf_->getFunction()->getName() << '\n';
printIntervals("fixed", fixed_.begin(), fixed_.end());
});
@@ -765,7 +773,8 @@ FindIntervalInVector(RALinScan::IntervalPtrs &IP, LiveInterval *LI) {
return IP.end();
}
-static void RevertVectorIteratorsTo(RALinScan::IntervalPtrs &V, SlotIndex Point){
+static void RevertVectorIteratorsTo(RALinScan::IntervalPtrs &V,
+ SlotIndex Point){
for (unsigned i = 0, e = V.size(); i != e; ++i) {
RALinScan::IntervalPtr &IP = V[i];
LiveInterval::iterator I = std::upper_bound(IP.first->begin(),
@@ -804,7 +813,7 @@ static void addStackInterval(LiveInterval *cur, LiveStacks *ls_,
static
float getConflictWeight(LiveInterval *cur, unsigned Reg, LiveIntervals *li_,
MachineRegisterInfo *mri_,
- const MachineLoopInfo *loopInfo) {
+ MachineLoopInfo *loopInfo) {
float Conflicts = 0;
for (MachineRegisterInfo::reg_iterator I = mri_->reg_begin(Reg),
E = mri_->reg_end(); I != E; ++I) {
@@ -837,7 +846,7 @@ void RALinScan::findIntervalsToSpill(LiveInterval *cur,
dbgs() << tri_->getName(Candidates[i].first) << " ";
dbgs() << "\n";
});
-
+
// Calculate the number of conflicts of each candidate.
for (IntervalPtrs::iterator i = active_.begin(); i != active_.end(); ++i) {
unsigned Reg = i->first->reg;
@@ -955,7 +964,7 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) {
if (cur->empty()) {
unsigned physReg = vrm_->getRegAllocPref(cur->reg);
if (!physReg)
- physReg = *RC->allocation_order_begin(*mf_);
+ physReg = getFirstNonReservedPhysReg(RC);
DEBUG(dbgs() << tri_->getName(physReg) << '\n');
// Note the register is not really in use.
vrm_->assignVirt2Phys(cur->reg, physReg);
@@ -978,27 +987,10 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) {
if ((vni->def != SlotIndex()) && !vni->isUnused() &&
vni->isDefAccurate()) {
MachineInstr *CopyMI = li_->getInstructionFromIndex(vni->def);
- unsigned SrcReg, DstReg, SrcSubReg, DstSubReg;
- if (CopyMI &&
- tii_->isMoveInstr(*CopyMI, SrcReg, DstReg, SrcSubReg, DstSubReg)) {
- unsigned Reg = 0;
- if (TargetRegisterInfo::isPhysicalRegister(SrcReg))
- Reg = SrcReg;
- else if (vrm_->isAssignedReg(SrcReg))
- Reg = vrm_->getPhys(SrcReg);
- if (Reg) {
- if (SrcSubReg)
- Reg = tri_->getSubReg(Reg, SrcSubReg);
- if (DstSubReg)
- Reg = tri_->getMatchingSuperReg(Reg, DstSubReg, RC);
- if (Reg && allocatableRegs_[Reg] && RC->contains(Reg))
- mri_->setRegAllocationHint(cur->reg, 0, Reg);
- }
- } else if (CopyMI && CopyMI->isCopy()) {
- DstReg = CopyMI->getOperand(0).getReg();
- DstSubReg = CopyMI->getOperand(0).getSubReg();
- SrcReg = CopyMI->getOperand(1).getReg();
- SrcSubReg = CopyMI->getOperand(1).getSubReg();
+ if (CopyMI && CopyMI->isCopy()) {
+ unsigned DstSubReg = CopyMI->getOperand(0).getSubReg();
+ unsigned SrcReg = CopyMI->getOperand(1).getReg();
+ unsigned SrcSubReg = CopyMI->getOperand(1).getSubReg();
unsigned Reg = 0;
if (TargetRegisterInfo::isPhysicalRegister(SrcReg))
Reg = SrcReg;
@@ -1024,7 +1016,7 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) {
assert(TargetRegisterInfo::isVirtualRegister(Reg) &&
"Can only allocate virtual registers!");
const TargetRegisterClass *RegRC = mri_->getRegClass(Reg);
- // If this is not in a related reg class to the register we're allocating,
+ // If this is not in a related reg class to the register we're allocating,
// don't check it.
if (RelatedRegClasses.getLeaderValue(RegRC) == RCLeader &&
cur->overlapsFrom(*i->first, i->second-1)) {
@@ -1033,7 +1025,7 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) {
SpillWeightsToAdd.push_back(std::make_pair(Reg, i->first->weight));
}
}
-
+
// Speculatively check to see if we can get a register right now. If not,
// we know we won't be able to by adding more constraints. If so, we can
// check to see if it is valid. Doing an exhaustive search of the fixed_ list
@@ -1048,7 +1040,7 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) {
SmallSet<unsigned, 8> RegAliases;
for (const unsigned *AS = tri_->getAliasSet(physReg); *AS; ++AS)
RegAliases.insert(*AS);
-
+
bool ConflictsWithFixed = false;
for (unsigned i = 0, e = fixed_.size(); i != e; ++i) {
IntervalPtr &IP = fixed_[i];
@@ -1068,7 +1060,7 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) {
}
}
}
-
+
// Okay, the register picked by our speculative getFreePhysReg call turned
// out to be in use. Actually add all of the conflicting fixed registers to
// regUse_ so we can do an accurate query.
@@ -1080,7 +1072,7 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) {
LiveInterval *I = IP.first;
const TargetRegisterClass *RegRC = OneClassForEachPhysReg[I->reg];
- if (RelatedRegClasses.getLeaderValue(RegRC) == RCLeader &&
+ if (RelatedRegClasses.getLeaderValue(RegRC) == RCLeader &&
I->endIndex() > StartPosition) {
LiveInterval::iterator II = I->advanceTo(IP.second, StartPosition);
IP.second = II;
@@ -1099,11 +1091,11 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) {
physReg = getFreePhysReg(cur);
}
}
-
+
// Restore the physical register tracker, removing information about the
// future.
restoreRegUses();
-
+
// If we find a free register, we are done: assign this virtual to
// the free physical register and add this interval to the active
// list.
@@ -1118,7 +1110,7 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) {
UpgradeRegister(physReg);
if (LiveInterval *NextReloadLI = hasNextReloadInterval(cur)) {
// "Downgrade" physReg to try to keep physReg from being allocated until
- // the next reload from the same SS is allocated.
+ // the next reload from the same SS is allocated.
mri_->setRegAllocationHint(NextReloadLI->reg, 0, physReg);
DowngradeRegister(cur, physReg);
}
@@ -1131,7 +1123,7 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) {
for (std::vector<std::pair<unsigned, float> >::iterator
I = SpillWeightsToAdd.begin(), E = SpillWeightsToAdd.end(); I != E; ++I)
updateSpillWeights(SpillWeights, I->first, I->second, RC);
-
+
// for each interval in active, update spill weights.
for (IntervalPtrs::const_iterator i = active_.begin(), e = active_.end();
i != e; ++i) {
@@ -1141,7 +1133,7 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) {
reg = vrm_->getPhys(reg);
updateSpillWeights(SpillWeights, reg, i->first->weight, RC);
}
-
+
DEBUG(dbgs() << "\tassigning stack slot at interval "<< *cur << ":\n");
// Find a register to spill.
@@ -1155,17 +1147,22 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) {
e = RC->allocation_order_end(*mf_); i != e; ++i) {
unsigned reg = *i;
float regWeight = SpillWeights[reg];
- // Skip recently allocated registers.
+ // Don't even consider reserved regs.
+ if (reservedRegs_.test(reg))
+ continue;
+ // Skip recently allocated registers and reserved registers.
if (minWeight > regWeight && !isRecentlyUsed(reg))
Found = true;
RegsWeights.push_back(std::make_pair(reg, regWeight));
}
-
+
// If we didn't find a register that is spillable, try aliases?
if (!Found) {
for (TargetRegisterClass::iterator i = RC->allocation_order_begin(*mf_),
e = RC->allocation_order_end(*mf_); i != e; ++i) {
unsigned reg = *i;
+ if (reservedRegs_.test(reg))
+ continue;
// No need to worry about if the alias register size < regsize of RC.
// We are going to spill all registers that alias it anyway.
for (const unsigned* as = tri_->getAliasSet(reg); *as; ++as)
@@ -1179,7 +1176,7 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) {
minWeight = RegsWeights[0].second;
if (minWeight == HUGE_VALF) {
// All registers must have inf weight. Just grab one!
- minReg = BestPhysReg ? BestPhysReg : *RC->allocation_order_begin(*mf_);
+ minReg = BestPhysReg ? BestPhysReg : getFirstNonReservedPhysReg(RC);
if (cur->weight == HUGE_VALF ||
li_->getApproximateInstructionCount(*cur) == 0) {
// Spill a physical register around defs and uses.
@@ -1224,8 +1221,7 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) {
// linearscan.
if (cur->weight != HUGE_VALF && cur->weight <= minWeight) {
DEBUG(dbgs() << "\t\t\tspilling(c): " << *cur << '\n');
- SmallVector<LiveInterval*, 8> spillIs;
- std::vector<LiveInterval*> added;
+ SmallVector<LiveInterval*, 8> spillIs, added;
spiller_->spill(cur, added, spillIs);
std::sort(added.begin(), added.end(), LISorter());
@@ -1288,27 +1284,33 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) {
// The earliest start of a Spilled interval indicates up to where
// in handled we need to roll back
- assert(!spillIs.empty() && "No spill intervals?");
+ assert(!spillIs.empty() && "No spill intervals?");
SlotIndex earliestStart = spillIs[0]->beginIndex();
-
+
// Spill live intervals of virtual regs mapped to the physical register we
// want to clear (and its aliases). We only spill those that overlap with the
// current interval as the rest do not affect its allocation. we also keep
// track of the earliest start of all spilled live intervals since this will
// mark our rollback point.
- std::vector<LiveInterval*> added;
+ SmallVector<LiveInterval*, 8> added;
while (!spillIs.empty()) {
LiveInterval *sli = spillIs.back();
spillIs.pop_back();
DEBUG(dbgs() << "\t\t\tspilling(a): " << *sli << '\n');
if (sli->beginIndex() < earliestStart)
earliestStart = sli->beginIndex();
-
- spiller_->spill(sli, added, spillIs, &earliestStart);
+ spiller_->spill(sli, added, spillIs);
addStackInterval(sli, ls_, li_, mri_, *vrm_);
spilled.insert(sli->reg);
}
+ // Include any added intervals in earliestStart.
+ for (unsigned i = 0, e = added.size(); i != e; ++i) {
+ SlotIndex SI = added[i]->beginIndex();
+ if (SI < earliestStart)
+ earliestStart = SI;
+ }
+
DEBUG(dbgs() << "\t\trolling back to: " << earliestStart << '\n');
// Scan handled in reverse order up to the earliest start of a
@@ -1431,6 +1433,9 @@ unsigned RALinScan::getFreePhysReg(LiveInterval* cur,
// Ignore "downgraded" registers.
if (SkipDGRegs && DowngradedRegs.count(Reg))
continue;
+ // Skip reserved registers.
+ if (reservedRegs_.test(Reg))
+ continue;
// Skip recently allocated registers.
if (isRegAvail(Reg) && !isRecentlyUsed(Reg)) {
FreeReg = Reg;
@@ -1459,6 +1464,9 @@ unsigned RALinScan::getFreePhysReg(LiveInterval* cur,
// Ignore "downgraded" registers.
if (SkipDGRegs && DowngradedRegs.count(Reg))
continue;
+ // Skip reserved registers.
+ if (reservedRegs_.test(Reg))
+ continue;
if (isRegAvail(Reg) && Reg < inactiveCounts.size() &&
FreeRegInactiveCount < inactiveCounts[Reg] && !isRecentlyUsed(Reg)) {
FreeReg = Reg;
@@ -1479,17 +1487,17 @@ unsigned RALinScan::getFreePhysReg(LiveInterval* cur,
unsigned RALinScan::getFreePhysReg(LiveInterval *cur) {
SmallVector<unsigned, 256> inactiveCounts;
unsigned MaxInactiveCount = 0;
-
+
const TargetRegisterClass *RC = mri_->getRegClass(cur->reg);
const TargetRegisterClass *RCLeader = RelatedRegClasses.getLeaderValue(RC);
-
+
for (IntervalPtrs::iterator i = inactive_.begin(), e = inactive_.end();
i != e; ++i) {
unsigned reg = i->first->reg;
assert(TargetRegisterInfo::isVirtualRegister(reg) &&
"Can only allocate virtual registers!");
- // If this is not in a related reg class to the register we're allocating,
+ // If this is not in a related reg class to the register we're allocating,
// don't check it.
const TargetRegisterClass *RegRC = mri_->getRegClass(reg);
if (RelatedRegClasses.getLeaderValue(RegRC) == RCLeader) {
@@ -1506,7 +1514,7 @@ unsigned RALinScan::getFreePhysReg(LiveInterval *cur) {
unsigned Preference = vrm_->getRegAllocPref(cur->reg);
if (Preference) {
DEBUG(dbgs() << "(preferred: " << tri_->getName(Preference) << ") ");
- if (isRegAvail(Preference) &&
+ if (isRegAvail(Preference) &&
RC->contains(Preference))
return Preference;
}
diff --git a/lib/CodeGen/RegAllocPBQP.cpp b/lib/CodeGen/RegAllocPBQP.cpp
index 7e61a12a7eea5..61f337bab49c4 100644
--- a/lib/CodeGen/RegAllocPBQP.cpp
+++ b/lib/CodeGen/RegAllocPBQP.cpp
@@ -34,6 +34,8 @@
#include "PBQP/HeuristicSolver.h"
#include "PBQP/Graph.h"
#include "PBQP/Heuristics/Briggs.h"
+#include "RenderMachineFunction.h"
+#include "Splitter.h"
#include "VirtRegMap.h"
#include "VirtRegRewriter.h"
#include "llvm/CodeGen/CalcSpillWeights.h"
@@ -65,6 +67,11 @@ pbqpCoalescing("pbqp-coalescing",
cl::desc("Attempt coalescing during PBQP register allocation."),
cl::init(false), cl::Hidden);
+static cl::opt<bool>
+pbqpPreSplitting("pbqp-pre-splitting",
+ cl::desc("Pre-splite before PBQP register allocation."),
+ cl::init(false), cl::Hidden);
+
namespace {
///
@@ -77,7 +84,7 @@ namespace {
static char ID;
/// Construct a PBQP register allocator.
- PBQPRegAlloc() : MachineFunctionPass(&ID) {}
+ PBQPRegAlloc() : MachineFunctionPass(ID) {}
/// Return the pass name.
virtual const char* getPassName() const {
@@ -96,7 +103,10 @@ namespace {
au.addPreserved<LiveStacks>();
au.addRequired<MachineLoopInfo>();
au.addPreserved<MachineLoopInfo>();
+ if (pbqpPreSplitting)
+ au.addRequired<LoopSplitter>();
au.addRequired<VirtRegMap>();
+ au.addRequired<RenderMachineFunction>();
MachineFunctionPass::getAnalysisUsage(au);
}
@@ -104,7 +114,15 @@ namespace {
virtual bool runOnMachineFunction(MachineFunction &MF);
private:
- typedef std::map<const LiveInterval*, unsigned> LI2NodeMap;
+
+ class LIOrdering {
+ public:
+ bool operator()(const LiveInterval *li1, const LiveInterval *li2) const {
+ return li1->reg < li2->reg;
+ }
+ };
+
+ typedef std::map<const LiveInterval*, unsigned, LIOrdering> LI2NodeMap;
typedef std::vector<const LiveInterval*> Node2LIMap;
typedef std::vector<unsigned> AllowedSet;
typedef std::vector<AllowedSet> AllowedSetMap;
@@ -112,7 +130,7 @@ namespace {
typedef std::pair<unsigned, unsigned> RegPair;
typedef std::map<RegPair, PBQP::PBQPNum> CoalesceMap;
- typedef std::set<LiveInterval*> LiveIntervalSet;
+ typedef std::set<LiveInterval*, LIOrdering> LiveIntervalSet;
typedef std::vector<PBQP::Graph::NodeItr> NodeVector;
@@ -122,6 +140,7 @@ namespace {
const TargetInstrInfo *tii;
const MachineLoopInfo *loopInfo;
MachineRegisterInfo *mri;
+ RenderMachineFunction *rmf;
LiveIntervals *lis;
LiveStacks *lss;
@@ -379,12 +398,14 @@ PBQPRegAlloc::CoalesceMap PBQPRegAlloc::findCoalesces() {
iItr != iEnd; ++iItr) {
const MachineInstr *instr = &*iItr;
- unsigned srcReg, dstReg, srcSubReg, dstSubReg;
// If this isn't a copy then continue to the next instruction.
- if (!tii->isMoveInstr(*instr, srcReg, dstReg, srcSubReg, dstSubReg))
+ if (!instr->isCopy())
continue;
+ unsigned srcReg = instr->getOperand(1).getReg();
+ unsigned dstReg = instr->getOperand(0).getReg();
+
// If the registers are already the same our job is nice and easy.
if (dstReg == srcReg)
continue;
@@ -567,6 +588,8 @@ PBQP::Graph PBQPRegAlloc::constructPBQPProblem() {
// Resize allowedSets container appropriately.
allowedSets.resize(vregIntervalsToAlloc.size());
+ BitVector ReservedRegs = tri->getReservedRegs(*mf);
+
// Iterate over virtual register intervals to compute allowed sets...
for (unsigned node = 0; node < node2LI.size(); ++node) {
@@ -575,8 +598,12 @@ PBQP::Graph PBQPRegAlloc::constructPBQPProblem() {
const TargetRegisterClass *liRC = mri->getRegClass(li->reg);
// Start by assuming all allocable registers in the class are allowed...
- RegVector liAllowed(liRC->allocation_order_begin(*mf),
- liRC->allocation_order_end(*mf));
+ RegVector liAllowed;
+ TargetRegisterClass::iterator aob = liRC->allocation_order_begin(*mf);
+ TargetRegisterClass::iterator aoe = liRC->allocation_order_end(*mf);
+ for (TargetRegisterClass::iterator it = aob; it != aoe; ++it)
+ if (!ReservedRegs.test(*it))
+ liAllowed.push_back(*it);
// Eliminate the physical registers which overlap with this range, along
// with all their aliases.
@@ -735,9 +762,11 @@ bool PBQPRegAlloc::mapPBQPToRegAlloc(const PBQP::Solution &solution) {
const LiveInterval *spillInterval = node2LI[node];
double oldSpillWeight = spillInterval->weight;
SmallVector<LiveInterval*, 8> spillIs;
+ rmf->rememberUseDefs(spillInterval);
std::vector<LiveInterval*> newSpills =
lis->addIntervalsForSpills(*spillInterval, spillIs, loopInfo, *vrm);
addStackInterval(spillInterval, mri);
+ rmf->rememberSpills(spillInterval, newSpills);
(void) oldSpillWeight;
DEBUG(dbgs() << "VREG " << virtReg << " -> SPILLED (Cost: "
@@ -845,9 +874,11 @@ bool PBQPRegAlloc::runOnMachineFunction(MachineFunction &MF) {
lis = &getAnalysis<LiveIntervals>();
lss = &getAnalysis<LiveStacks>();
loopInfo = &getAnalysis<MachineLoopInfo>();
+ rmf = &getAnalysis<RenderMachineFunction>();
vrm = &getAnalysis<VirtRegMap>();
+
DEBUG(dbgs() << "PBQP Register Allocating for " << mf->getFunction()->getName() << "\n");
// Allocator main loop:
@@ -884,6 +915,8 @@ bool PBQPRegAlloc::runOnMachineFunction(MachineFunction &MF) {
// Finalise allocation, allocate empty ranges.
finalizeAlloc();
+ rmf->renderMachineFunction("After PBQP register allocation.", vrm);
+
vregIntervalsToAlloc.clear();
emptyVRegIntervals.clear();
li2Node.clear();
diff --git a/lib/CodeGen/RegisterCoalescer.cpp b/lib/CodeGen/RegisterCoalescer.cpp
index ab0bc2d78a608..02b5539f0f4f0 100644
--- a/lib/CodeGen/RegisterCoalescer.cpp
+++ b/lib/CodeGen/RegisterCoalescer.cpp
@@ -54,9 +54,8 @@ bool CoalescerPair::isMoveInstr(const MachineInstr *MI,
DstSub = compose(MI->getOperand(0).getSubReg(), MI->getOperand(3).getImm());
Src = MI->getOperand(2).getReg();
SrcSub = MI->getOperand(2).getSubReg();
- } else if (!tii_.isMoveInstr(*MI, Src, Dst, SrcSub, DstSub)) {
+ } else
return false;
- }
return true;
}
diff --git a/lib/CodeGen/RegisterScavenging.cpp b/lib/CodeGen/RegisterScavenging.cpp
index 43b3fb6426351..a2580b85bcc33 100644
--- a/lib/CodeGen/RegisterScavenging.cpp
+++ b/lib/CodeGen/RegisterScavenging.cpp
@@ -21,7 +21,9 @@
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetMachine.h"
@@ -226,19 +228,14 @@ void RegScavenger::getRegsUsed(BitVector &used, bool includeReserved) {
used = ~RegsAvailable & ~ReservedRegs;
}
-/// CreateRegClassMask - Set the bits that represent the registers in the
-/// TargetRegisterClass.
-static void CreateRegClassMask(const TargetRegisterClass *RC, BitVector &Mask) {
- for (TargetRegisterClass::iterator I = RC->begin(), E = RC->end(); I != E;
- ++I)
- Mask.set(*I);
-}
-
unsigned RegScavenger::FindUnusedReg(const TargetRegisterClass *RC) const {
for (TargetRegisterClass::iterator I = RC->begin(), E = RC->end();
I != E; ++I)
- if (!isAliasUsed(*I))
+ if (!isAliasUsed(*I)) {
+ DEBUG(dbgs() << "Scavenger found unused reg: " << TRI->getName(*I) <<
+ "\n");
return *I;
+ }
return 0;
}
@@ -325,11 +322,9 @@ unsigned RegScavenger::findSurvivorReg(MachineBasicBlock::iterator StartMI,
unsigned RegScavenger::scavengeRegister(const TargetRegisterClass *RC,
MachineBasicBlock::iterator I,
int SPAdj) {
- // Mask off the registers which are not in the TargetRegisterClass.
- BitVector Candidates(NumPhysRegs, false);
- CreateRegClassMask(RC, Candidates);
- // Do not include reserved registers.
- Candidates ^= ReservedRegs & Candidates;
+ // Consider all allocatable registers in the register class initially
+ BitVector Candidates =
+ TRI->getAllocatableSet(*I->getParent()->getParent(), RC);
// Exclude all the registers being used by the instruction.
for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) {
@@ -349,8 +344,10 @@ unsigned RegScavenger::scavengeRegister(const TargetRegisterClass *RC,
unsigned SReg = findSurvivorReg(I, Candidates, 25, UseMI);
// If we found an unused register there is no reason to spill it.
- if (!isAliasUsed(SReg))
+ if (!isAliasUsed(SReg)) {
+ DEBUG(dbgs() << "Scavenged register: " << TRI->getName(SReg) << "\n");
return SReg;
+ }
assert(ScavengedReg == 0 &&
"Scavenger slot is live, unable to scavenge another register!");
@@ -366,12 +363,12 @@ unsigned RegScavenger::scavengeRegister(const TargetRegisterClass *RC,
"Cannot scavenge register without an emergency spill slot!");
TII->storeRegToStackSlot(*MBB, I, SReg, true, ScavengingFrameIndex, RC,TRI);
MachineBasicBlock::iterator II = prior(I);
- TRI->eliminateFrameIndex(II, SPAdj, NULL, this);
+ TRI->eliminateFrameIndex(II, SPAdj, this);
// Restore the scavenged register before its use (or first terminator).
TII->loadRegFromStackSlot(*MBB, UseMI, SReg, ScavengingFrameIndex, RC, TRI);
II = prior(UseMI);
- TRI->eliminateFrameIndex(II, SPAdj, NULL, this);
+ TRI->eliminateFrameIndex(II, SPAdj, this);
}
ScavengeRestore = prior(UseMI);
@@ -380,5 +377,8 @@ unsigned RegScavenger::scavengeRegister(const TargetRegisterClass *RC,
// ScavengedReg = SReg;
ScavengedRC = RC;
+ DEBUG(dbgs() << "Scavenged register (with spill): " << TRI->getName(SReg) <<
+ "\n");
+
return SReg;
}
diff --git a/lib/CodeGen/RenderMachineFunction.cpp b/lib/CodeGen/RenderMachineFunction.cpp
new file mode 100644
index 0000000000000..93426eecbbc1e
--- /dev/null
+++ b/lib/CodeGen/RenderMachineFunction.cpp
@@ -0,0 +1,1014 @@
+//===-- llvm/CodeGen/RenderMachineFunction.cpp - MF->HTML -----s-----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "rendermf"
+
+#include "RenderMachineFunction.h"
+
+#include "VirtRegMap.h"
+
+#include "llvm/Function.h"
+#include "llvm/Module.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetMachine.h"
+
+#include <sstream>
+
+using namespace llvm;
+
+char RenderMachineFunction::ID = 0;
+INITIALIZE_PASS(RenderMachineFunction, "rendermf",
+ "Render machine functions (and related info) to HTML pages",
+ false, false);
+
+static cl::opt<std::string>
+outputFileSuffix("rmf-file-suffix",
+ cl::desc("Appended to function name to get output file name "
+ "(default: \".html\")"),
+ cl::init(".html"), cl::Hidden);
+
+static cl::opt<std::string>
+machineFuncsToRender("rmf-funcs",
+ cl::desc("Coma seperated list of functions to render"
+ ", or \"*\"."),
+ cl::init(""), cl::Hidden);
+
+static cl::opt<std::string>
+pressureClasses("rmf-classes",
+ cl::desc("Register classes to render pressure for."),
+ cl::init(""), cl::Hidden);
+
+static cl::opt<std::string>
+showIntervals("rmf-intervals",
+ cl::desc("Live intervals to show alongside code."),
+ cl::init(""), cl::Hidden);
+
+static cl::opt<bool>
+filterEmpty("rmf-filter-empty-intervals",
+ cl::desc("Don't display empty intervals."),
+ cl::init(true), cl::Hidden);
+
+static cl::opt<bool>
+showEmptyIndexes("rmf-empty-indexes",
+ cl::desc("Render indexes not associated with instructions or "
+ "MBB starts."),
+ cl::init(false), cl::Hidden);
+
+static cl::opt<bool>
+useFancyVerticals("rmf-fancy-verts",
+ cl::desc("Use SVG for vertical text."),
+ cl::init(true), cl::Hidden);
+
+static cl::opt<bool>
+prettyHTML("rmf-pretty-html",
+ cl::desc("Pretty print HTML. For debugging the renderer only.."),
+ cl::init(false), cl::Hidden);
+
+
+namespace llvm {
+
+ bool MFRenderingOptions::renderingOptionsProcessed;
+ std::set<std::string> MFRenderingOptions::mfNamesToRender;
+ bool MFRenderingOptions::renderAllMFs = false;
+
+ std::set<std::string> MFRenderingOptions::classNamesToRender;
+ bool MFRenderingOptions::renderAllClasses = false;
+
+ std::set<std::pair<unsigned, unsigned> >
+ MFRenderingOptions::intervalNumsToRender;
+ unsigned MFRenderingOptions::intervalTypesToRender = ExplicitOnly;
+
+ template <typename OutputItr>
+ void MFRenderingOptions::splitComaSeperatedList(const std::string &s,
+ OutputItr outItr) {
+ std::string::const_iterator curPos = s.begin();
+ std::string::const_iterator nextComa = std::find(curPos, s.end(), ',');
+ while (nextComa != s.end()) {
+ std::string elem;
+ std::copy(curPos, nextComa, std::back_inserter(elem));
+ *outItr = elem;
+ ++outItr;
+ curPos = llvm::next(nextComa);
+ nextComa = std::find(curPos, s.end(), ',');
+ }
+
+ if (curPos != s.end()) {
+ std::string elem;
+ std::copy(curPos, s.end(), std::back_inserter(elem));
+ *outItr = elem;
+ ++outItr;
+ }
+ }
+
+ void MFRenderingOptions::processOptions() {
+ if (!renderingOptionsProcessed) {
+ processFuncNames();
+ processRegClassNames();
+ processIntervalNumbers();
+ renderingOptionsProcessed = true;
+ }
+ }
+
+ void MFRenderingOptions::processFuncNames() {
+ if (machineFuncsToRender == "*") {
+ renderAllMFs = true;
+ } else {
+ splitComaSeperatedList(machineFuncsToRender,
+ std::inserter(mfNamesToRender,
+ mfNamesToRender.begin()));
+ }
+ }
+
+ void MFRenderingOptions::processRegClassNames() {
+ if (pressureClasses == "*") {
+ renderAllClasses = true;
+ } else {
+ splitComaSeperatedList(pressureClasses,
+ std::inserter(classNamesToRender,
+ classNamesToRender.begin()));
+ }
+ }
+
+ void MFRenderingOptions::processIntervalNumbers() {
+ std::set<std::string> intervalRanges;
+ splitComaSeperatedList(showIntervals,
+ std::inserter(intervalRanges,
+ intervalRanges.begin()));
+ std::for_each(intervalRanges.begin(), intervalRanges.end(),
+ processIntervalRange);
+ }
+
+ void MFRenderingOptions::processIntervalRange(
+ const std::string &intervalRangeStr) {
+ if (intervalRangeStr == "*") {
+ intervalTypesToRender |= All;
+ } else if (intervalRangeStr == "virt-nospills*") {
+ intervalTypesToRender |= VirtNoSpills;
+ } else if (intervalRangeStr == "spills*") {
+ intervalTypesToRender |= VirtSpills;
+ } else if (intervalRangeStr == "virt*") {
+ intervalTypesToRender |= AllVirt;
+ } else if (intervalRangeStr == "phys*") {
+ intervalTypesToRender |= AllPhys;
+ } else {
+ std::istringstream iss(intervalRangeStr);
+ unsigned reg1, reg2;
+ if ((iss >> reg1 >> std::ws)) {
+ if (iss.eof()) {
+ intervalNumsToRender.insert(std::make_pair(reg1, reg1 + 1));
+ } else {
+ char c;
+ iss >> c;
+ if (c == '-' && (iss >> reg2)) {
+ intervalNumsToRender.insert(std::make_pair(reg1, reg2 + 1));
+ } else {
+ dbgs() << "Warning: Invalid interval range \""
+ << intervalRangeStr << "\" in -rmf-intervals. Skipping.\n";
+ }
+ }
+ } else {
+ dbgs() << "Warning: Invalid interval number \""
+ << intervalRangeStr << "\" in -rmf-intervals. Skipping.\n";
+ }
+ }
+ }
+
+ void MFRenderingOptions::setup(MachineFunction *mf,
+ const TargetRegisterInfo *tri,
+ LiveIntervals *lis,
+ const RenderMachineFunction *rmf) {
+ this->mf = mf;
+ this->tri = tri;
+ this->lis = lis;
+ this->rmf = rmf;
+
+ clear();
+ }
+
+ void MFRenderingOptions::clear() {
+ regClassesTranslatedToCurrentFunction = false;
+ regClassSet.clear();
+
+ intervalsTranslatedToCurrentFunction = false;
+ intervalSet.clear();
+ }
+
+ void MFRenderingOptions::resetRenderSpecificOptions() {
+ intervalSet.clear();
+ intervalsTranslatedToCurrentFunction = false;
+ }
+
+ bool MFRenderingOptions::shouldRenderCurrentMachineFunction() const {
+ processOptions();
+
+ return (renderAllMFs ||
+ mfNamesToRender.find(mf->getFunction()->getName()) !=
+ mfNamesToRender.end());
+ }
+
+ const MFRenderingOptions::RegClassSet& MFRenderingOptions::regClasses() const{
+ translateRegClassNamesToCurrentFunction();
+ return regClassSet;
+ }
+
+ const MFRenderingOptions::IntervalSet& MFRenderingOptions::intervals() const {
+ translateIntervalNumbersToCurrentFunction();
+ return intervalSet;
+ }
+
+ bool MFRenderingOptions::renderEmptyIndexes() const {
+ return showEmptyIndexes;
+ }
+
+ bool MFRenderingOptions::fancyVerticals() const {
+ return useFancyVerticals;
+ }
+
+ void MFRenderingOptions::translateRegClassNamesToCurrentFunction() const {
+ if (!regClassesTranslatedToCurrentFunction) {
+ processOptions();
+ for (TargetRegisterInfo::regclass_iterator rcItr = tri->regclass_begin(),
+ rcEnd = tri->regclass_end();
+ rcItr != rcEnd; ++rcItr) {
+ const TargetRegisterClass *trc = *rcItr;
+ if (renderAllClasses ||
+ classNamesToRender.find(trc->getName()) !=
+ classNamesToRender.end()) {
+ regClassSet.insert(trc);
+ }
+ }
+ regClassesTranslatedToCurrentFunction = true;
+ }
+ }
+
+ void MFRenderingOptions::translateIntervalNumbersToCurrentFunction() const {
+ if (!intervalsTranslatedToCurrentFunction) {
+ processOptions();
+
+ // If we're not just doing explicit then do a copy over all matching
+ // types.
+ if (intervalTypesToRender != ExplicitOnly) {
+ for (LiveIntervals::iterator liItr = lis->begin(), liEnd = lis->end();
+ liItr != liEnd; ++liItr) {
+ LiveInterval *li = liItr->second;
+
+ if (filterEmpty && li->empty())
+ continue;
+
+ if ((TargetRegisterInfo::isPhysicalRegister(li->reg) &&
+ (intervalTypesToRender & AllPhys))) {
+ intervalSet.insert(li);
+ } else if (TargetRegisterInfo::isVirtualRegister(li->reg)) {
+ if (((intervalTypesToRender & VirtNoSpills) && !rmf->isSpill(li)) ||
+ ((intervalTypesToRender & VirtSpills) && rmf->isSpill(li))) {
+ intervalSet.insert(li);
+ }
+ }
+ }
+ }
+
+ // If we need to process the explicit list...
+ if (intervalTypesToRender != All) {
+ for (std::set<std::pair<unsigned, unsigned> >::const_iterator
+ regRangeItr = intervalNumsToRender.begin(),
+ regRangeEnd = intervalNumsToRender.end();
+ regRangeItr != regRangeEnd; ++regRangeItr) {
+ const std::pair<unsigned, unsigned> &range = *regRangeItr;
+ for (unsigned reg = range.first; reg != range.second; ++reg) {
+ if (lis->hasInterval(reg)) {
+ intervalSet.insert(&lis->getInterval(reg));
+ }
+ }
+ }
+ }
+
+ intervalsTranslatedToCurrentFunction = true;
+ }
+ }
+
+ // ---------- TargetRegisterExtraInformation implementation ----------
+
+ TargetRegisterExtraInfo::TargetRegisterExtraInfo()
+ : mapsPopulated(false) {
+ }
+
+ void TargetRegisterExtraInfo::setup(MachineFunction *mf,
+ MachineRegisterInfo *mri,
+ const TargetRegisterInfo *tri,
+ LiveIntervals *lis) {
+ this->mf = mf;
+ this->mri = mri;
+ this->tri = tri;
+ this->lis = lis;
+ }
+
+ void TargetRegisterExtraInfo::reset() {
+ if (!mapsPopulated) {
+ initWorst();
+ //initBounds();
+ initCapacity();
+ mapsPopulated = true;
+ }
+
+ resetPressureAndLiveStates();
+ }
+
+ void TargetRegisterExtraInfo::clear() {
+ prWorst.clear();
+ vrWorst.clear();
+ capacityMap.clear();
+ pressureMap.clear();
+ //liveStatesMap.clear();
+ mapsPopulated = false;
+ }
+
+ void TargetRegisterExtraInfo::initWorst() {
+ assert(!mapsPopulated && prWorst.empty() && vrWorst.empty() &&
+ "Worst map already initialised?");
+
+ // Start with the physical registers.
+ for (unsigned preg = 1; preg < tri->getNumRegs(); ++preg) {
+ WorstMapLine &pregLine = prWorst[preg];
+
+ for (TargetRegisterInfo::regclass_iterator rcItr = tri->regclass_begin(),
+ rcEnd = tri->regclass_end();
+ rcItr != rcEnd; ++rcItr) {
+ const TargetRegisterClass *trc = *rcItr;
+
+ unsigned numOverlaps = 0;
+ for (TargetRegisterClass::iterator rItr = trc->begin(),
+ rEnd = trc->end();
+ rItr != rEnd; ++rItr) {
+ unsigned trcPReg = *rItr;
+ if (tri->regsOverlap(preg, trcPReg))
+ ++numOverlaps;
+ }
+
+ pregLine[trc] = numOverlaps;
+ }
+ }
+
+ // Now the register classes.
+ for (TargetRegisterInfo::regclass_iterator rc1Itr = tri->regclass_begin(),
+ rcEnd = tri->regclass_end();
+ rc1Itr != rcEnd; ++rc1Itr) {
+ const TargetRegisterClass *trc1 = *rc1Itr;
+ WorstMapLine &classLine = vrWorst[trc1];
+
+ for (TargetRegisterInfo::regclass_iterator rc2Itr = tri->regclass_begin();
+ rc2Itr != rcEnd; ++rc2Itr) {
+ const TargetRegisterClass *trc2 = *rc2Itr;
+
+ unsigned worst = 0;
+
+ for (TargetRegisterClass::iterator trc1Itr = trc1->begin(),
+ trc1End = trc1->end();
+ trc1Itr != trc1End; ++trc1Itr) {
+ unsigned trc1Reg = *trc1Itr;
+ unsigned trc1RegWorst = 0;
+
+ for (TargetRegisterClass::iterator trc2Itr = trc2->begin(),
+ trc2End = trc2->end();
+ trc2Itr != trc2End; ++trc2Itr) {
+ unsigned trc2Reg = *trc2Itr;
+ if (tri->regsOverlap(trc1Reg, trc2Reg))
+ ++trc1RegWorst;
+ }
+ if (trc1RegWorst > worst) {
+ worst = trc1RegWorst;
+ }
+ }
+
+ if (worst != 0) {
+ classLine[trc2] = worst;
+ }
+ }
+ }
+ }
+
+ unsigned TargetRegisterExtraInfo::getWorst(
+ unsigned reg,
+ const TargetRegisterClass *trc) const {
+ const WorstMapLine *wml = 0;
+ if (TargetRegisterInfo::isPhysicalRegister(reg)) {
+ PRWorstMap::const_iterator prwItr = prWorst.find(reg);
+ assert(prwItr != prWorst.end() && "Missing prWorst entry.");
+ wml = &prwItr->second;
+ } else {
+ const TargetRegisterClass *regTRC = mri->getRegClass(reg);
+ VRWorstMap::const_iterator vrwItr = vrWorst.find(regTRC);
+ assert(vrwItr != vrWorst.end() && "Missing vrWorst entry.");
+ wml = &vrwItr->second;
+ }
+
+ WorstMapLine::const_iterator wmlItr = wml->find(trc);
+ if (wmlItr == wml->end())
+ return 0;
+
+ return wmlItr->second;
+ }
+
+ void TargetRegisterExtraInfo::initCapacity() {
+ assert(!mapsPopulated && capacityMap.empty() &&
+ "Capacity map already initialised?");
+
+ for (TargetRegisterInfo::regclass_iterator rcItr = tri->regclass_begin(),
+ rcEnd = tri->regclass_end();
+ rcItr != rcEnd; ++rcItr) {
+ const TargetRegisterClass *trc = *rcItr;
+ unsigned capacity = std::distance(trc->allocation_order_begin(*mf),
+ trc->allocation_order_end(*mf));
+
+ if (capacity != 0)
+ capacityMap[trc] = capacity;
+ }
+ }
+
+ unsigned TargetRegisterExtraInfo::getCapacity(
+ const TargetRegisterClass *trc) const {
+ CapacityMap::const_iterator cmItr = capacityMap.find(trc);
+ assert(cmItr != capacityMap.end() &&
+ "vreg with unallocable register class");
+ return cmItr->second;
+ }
+
+ void TargetRegisterExtraInfo::resetPressureAndLiveStates() {
+ pressureMap.clear();
+ //liveStatesMap.clear();
+
+ // Iterate over all slots.
+
+
+ // Iterate over all live intervals.
+ for (LiveIntervals::iterator liItr = lis->begin(),
+ liEnd = lis->end();
+ liItr != liEnd; ++liItr) {
+ LiveInterval *li = liItr->second;
+
+ const TargetRegisterClass *liTRC;
+
+ if (TargetRegisterInfo::isPhysicalRegister(li->reg))
+ continue;
+
+ liTRC = mri->getRegClass(li->reg);
+
+
+ // For all ranges in the current interal.
+ for (LiveInterval::iterator lrItr = li->begin(),
+ lrEnd = li->end();
+ lrItr != lrEnd; ++lrItr) {
+ LiveRange *lr = &*lrItr;
+
+ // For all slots in the current range.
+ for (SlotIndex i = lr->start; i != lr->end; i = i.getNextSlot()) {
+
+ // Record increased pressure at index for all overlapping classes.
+ for (TargetRegisterInfo::regclass_iterator
+ rcItr = tri->regclass_begin(),
+ rcEnd = tri->regclass_end();
+ rcItr != rcEnd; ++rcItr) {
+ const TargetRegisterClass *trc = *rcItr;
+
+ if (trc->allocation_order_begin(*mf) ==
+ trc->allocation_order_end(*mf))
+ continue;
+
+ unsigned worstAtI = getWorst(li->reg, trc);
+
+ if (worstAtI != 0) {
+ pressureMap[i][trc] += worstAtI;
+ }
+ }
+ }
+ }
+ }
+ }
+
+ unsigned TargetRegisterExtraInfo::getPressureAtSlot(
+ const TargetRegisterClass *trc,
+ SlotIndex i) const {
+ PressureMap::const_iterator pmItr = pressureMap.find(i);
+ if (pmItr == pressureMap.end())
+ return 0;
+ const PressureMapLine &pmLine = pmItr->second;
+ PressureMapLine::const_iterator pmlItr = pmLine.find(trc);
+ if (pmlItr == pmLine.end())
+ return 0;
+ return pmlItr->second;
+ }
+
+ bool TargetRegisterExtraInfo::classOverCapacityAtSlot(
+ const TargetRegisterClass *trc,
+ SlotIndex i) const {
+ return (getPressureAtSlot(trc, i) > getCapacity(trc));
+ }
+
+ // ---------- MachineFunctionRenderer implementation ----------
+
+ void RenderMachineFunction::Spacer::print(raw_ostream &os) const {
+ if (!prettyHTML)
+ return;
+ for (unsigned i = 0; i < ns; ++i) {
+ os << " ";
+ }
+ }
+
+ RenderMachineFunction::Spacer RenderMachineFunction::s(unsigned ns) const {
+ return Spacer(ns);
+ }
+
+ raw_ostream& operator<<(raw_ostream &os, const RenderMachineFunction::Spacer &s) {
+ s.print(os);
+ return os;
+ }
+
+ template <typename Iterator>
+ std::string RenderMachineFunction::escapeChars(Iterator sBegin, Iterator sEnd) const {
+ std::string r;
+
+ for (Iterator sItr = sBegin; sItr != sEnd; ++sItr) {
+ char c = *sItr;
+
+ switch (c) {
+ case '<': r.append("&lt;"); break;
+ case '>': r.append("&gt;"); break;
+ case '&': r.append("&amp;"); break;
+ case ' ': r.append("&nbsp;"); break;
+ case '\"': r.append("&quot;"); break;
+ default: r.push_back(c); break;
+ }
+ }
+
+ return r;
+ }
+
+ RenderMachineFunction::LiveState
+ RenderMachineFunction::getLiveStateAt(const LiveInterval *li,
+ SlotIndex i) const {
+ const MachineInstr *mi = sis->getInstructionFromIndex(i);
+
+ // For uses/defs recorded use/def indexes override current liveness and
+ // instruction operands (Only for the interval which records the indexes).
+ if (i.isUse() || i.isDef()) {
+ UseDefs::const_iterator udItr = useDefs.find(li);
+ if (udItr != useDefs.end()) {
+ const SlotSet &slotSet = udItr->second;
+ if (slotSet.count(i)) {
+ if (i.isUse()) {
+ return Used;
+ }
+ // else
+ return Defined;
+ }
+ }
+ }
+
+ // If the slot is a load/store, or there's no info in the use/def set then
+ // use liveness and instruction operand info.
+ if (li->liveAt(i)) {
+
+ if (mi == 0) {
+ if (vrm == 0 ||
+ (vrm->getStackSlot(li->reg) == VirtRegMap::NO_STACK_SLOT)) {
+ return AliveReg;
+ } else {
+ return AliveStack;
+ }
+ } else {
+ if (i.isDef() && mi->definesRegister(li->reg, tri)) {
+ return Defined;
+ } else if (i.isUse() && mi->readsRegister(li->reg)) {
+ return Used;
+ } else {
+ if (vrm == 0 ||
+ (vrm->getStackSlot(li->reg) == VirtRegMap::NO_STACK_SLOT)) {
+ return AliveReg;
+ } else {
+ return AliveStack;
+ }
+ }
+ }
+ }
+ return Dead;
+ }
+
+ RenderMachineFunction::PressureState
+ RenderMachineFunction::getPressureStateAt(const TargetRegisterClass *trc,
+ SlotIndex i) const {
+ if (trei.getPressureAtSlot(trc, i) == 0) {
+ return Zero;
+ } else if (trei.classOverCapacityAtSlot(trc, i)){
+ return High;
+ }
+ return Low;
+ }
+
+ /// \brief Render a machine instruction.
+ void RenderMachineFunction::renderMachineInstr(raw_ostream &os,
+ const MachineInstr *mi) const {
+ std::string s;
+ raw_string_ostream oss(s);
+ oss << *mi;
+
+ os << escapeChars(oss.str());
+ }
+
+ template <typename T>
+ void RenderMachineFunction::renderVertical(const Spacer &indent,
+ raw_ostream &os,
+ const T &t) const {
+ if (ro.fancyVerticals()) {
+ os << indent << "<object\n"
+ << indent + s(2) << "class=\"obj\"\n"
+ << indent + s(2) << "type=\"image/svg+xml\"\n"
+ << indent + s(2) << "width=\"14px\"\n"
+ << indent + s(2) << "height=\"55px\"\n"
+ << indent + s(2) << "data=\"data:image/svg+xml,\n"
+ << indent + s(4) << "<svg xmlns='http://www.w3.org/2000/svg'>\n"
+ << indent + s(6) << "<text x='-55' y='10' "
+ "font-family='Courier' font-size='12' "
+ "transform='rotate(-90)' "
+ "text-rendering='optimizeSpeed' "
+ "fill='#000'>" << t << "</text>\n"
+ << indent + s(4) << "</svg>\">\n"
+ << indent << "</object>\n";
+ } else {
+ std::ostringstream oss;
+ oss << t;
+ std::string tStr(oss.str());
+
+ os << indent;
+ for (std::string::iterator tStrItr = tStr.begin(), tStrEnd = tStr.end();
+ tStrItr != tStrEnd; ++tStrItr) {
+ os << *tStrItr << "<br/>";
+ }
+ os << "\n";
+ }
+ }
+
+ void RenderMachineFunction::insertCSS(const Spacer &indent,
+ raw_ostream &os) const {
+ os << indent << "<style type=\"text/css\">\n"
+ << indent + s(2) << "body { font-color: black; }\n"
+ << indent + s(2) << "table.code td { font-family: monospace; "
+ "border-width: 0px; border-style: solid; "
+ "border-bottom: 1px solid #dddddd; white-space: nowrap; }\n"
+ << indent + s(2) << "table.code td.p-z { background-color: #000000; }\n"
+ << indent + s(2) << "table.code td.p-l { background-color: #00ff00; }\n"
+ << indent + s(2) << "table.code td.p-h { background-color: #ff0000; }\n"
+ << indent + s(2) << "table.code td.l-n { background-color: #ffffff; }\n"
+ << indent + s(2) << "table.code td.l-d { background-color: #ff0000; }\n"
+ << indent + s(2) << "table.code td.l-u { background-color: #ffff00; }\n"
+ << indent + s(2) << "table.code td.l-r { background-color: #000000; }\n"
+ << indent + s(2) << "table.code td.l-s { background-color: #770000; }\n"
+ << indent + s(2) << "table.code th { border-width: 0px; "
+ "border-style: solid; }\n"
+ << indent << "</style>\n";
+ }
+
+ void RenderMachineFunction::renderFunctionSummary(
+ const Spacer &indent, raw_ostream &os,
+ const char * const renderContextStr) const {
+ os << indent << "<h1>Function: " << mf->getFunction()->getName()
+ << "</h1>\n"
+ << indent << "<h2>Rendering context: " << renderContextStr << "</h2>\n";
+ }
+
+
+ void RenderMachineFunction::renderPressureTableLegend(
+ const Spacer &indent,
+ raw_ostream &os) const {
+ os << indent << "<h2>Rendering Pressure Legend:</h2>\n"
+ << indent << "<table class=\"code\">\n"
+ << indent + s(2) << "<tr>\n"
+ << indent + s(4) << "<th>Pressure</th><th>Description</th>"
+ "<th>Appearance</th>\n"
+ << indent + s(2) << "</tr>\n"
+ << indent + s(2) << "<tr>\n"
+ << indent + s(4) << "<td>No Pressure</td>"
+ "<td>No physical registers of this class requested.</td>"
+ "<td class=\"p-z\">&nbsp;&nbsp;</td>\n"
+ << indent + s(2) << "</tr>\n"
+ << indent + s(2) << "<tr>\n"
+ << indent + s(4) << "<td>Low Pressure</td>"
+ "<td>Sufficient physical registers to meet demand.</td>"
+ "<td class=\"p-l\">&nbsp;&nbsp;</td>\n"
+ << indent + s(2) << "</tr>\n"
+ << indent + s(2) << "<tr>\n"
+ << indent + s(4) << "<td>High Pressure</td>"
+ "<td>Potentially insufficient physical registers to meet demand.</td>"
+ "<td class=\"p-h\">&nbsp;&nbsp;</td>\n"
+ << indent + s(2) << "</tr>\n"
+ << indent << "</table>\n";
+ }
+
+ template <typename CellType>
+ void RenderMachineFunction::renderCellsWithRLE(
+ const Spacer &indent, raw_ostream &os,
+ const std::pair<CellType, unsigned> &rleAccumulator,
+ const std::map<CellType, std::string> &cellTypeStrs) const {
+
+ if (rleAccumulator.second == 0)
+ return;
+
+ typename std::map<CellType, std::string>::const_iterator ctsItr =
+ cellTypeStrs.find(rleAccumulator.first);
+
+ assert(ctsItr != cellTypeStrs.end() && "No string for given cell type.");
+
+ os << indent + s(4) << "<td class=\"" << ctsItr->second << "\"";
+ if (rleAccumulator.second > 1)
+ os << " colspan=" << rleAccumulator.second;
+ os << "></td>\n";
+ }
+
+
+ void RenderMachineFunction::renderCodeTablePlusPI(const Spacer &indent,
+ raw_ostream &os) const {
+
+ std::map<LiveState, std::string> lsStrs;
+ lsStrs[Dead] = "l-n";
+ lsStrs[Defined] = "l-d";
+ lsStrs[Used] = "l-u";
+ lsStrs[AliveReg] = "l-r";
+ lsStrs[AliveStack] = "l-s";
+
+ std::map<PressureState, std::string> psStrs;
+ psStrs[Zero] = "p-z";
+ psStrs[Low] = "p-l";
+ psStrs[High] = "p-h";
+
+ // Open the table...
+
+ os << indent << "<table cellpadding=0 cellspacing=0 class=\"code\">\n"
+ << indent + s(2) << "<tr>\n";
+
+ // Render the header row...
+
+ os << indent + s(4) << "<th>index</th>\n"
+ << indent + s(4) << "<th>instr</th>\n";
+
+ // Render class names if necessary...
+ if (!ro.regClasses().empty()) {
+ for (MFRenderingOptions::RegClassSet::const_iterator
+ rcItr = ro.regClasses().begin(),
+ rcEnd = ro.regClasses().end();
+ rcItr != rcEnd; ++rcItr) {
+ const TargetRegisterClass *trc = *rcItr;
+ os << indent + s(4) << "<th>\n";
+ renderVertical(indent + s(6), os, trc->getName());
+ os << indent + s(4) << "</th>\n";
+ }
+ }
+
+ // FIXME: Is there a nicer way to insert space between columns in HTML?
+ if (!ro.regClasses().empty() && !ro.intervals().empty())
+ os << indent + s(4) << "<th>&nbsp;&nbsp;</th>\n";
+
+ // Render interval numbers if necessary...
+ if (!ro.intervals().empty()) {
+ for (MFRenderingOptions::IntervalSet::const_iterator
+ liItr = ro.intervals().begin(),
+ liEnd = ro.intervals().end();
+ liItr != liEnd; ++liItr) {
+
+ const LiveInterval *li = *liItr;
+ os << indent + s(4) << "<th>\n";
+ renderVertical(indent + s(6), os, li->reg);
+ os << indent + s(4) << "</th>\n";
+ }
+ }
+
+ os << indent + s(2) << "</tr>\n";
+
+ // End header row, start with the data rows...
+
+ MachineInstr *mi = 0;
+
+ // Data rows:
+ for (SlotIndex i = sis->getZeroIndex(); i != sis->getLastIndex();
+ i = i.getNextSlot()) {
+
+ // Render the slot column.
+ os << indent + s(2) << "<tr height=6ex>\n";
+
+ // Render the code column.
+ if (i.isLoad()) {
+ MachineBasicBlock *mbb = sis->getMBBFromIndex(i);
+ mi = sis->getInstructionFromIndex(i);
+
+ if (i == sis->getMBBStartIdx(mbb) || mi != 0 ||
+ ro.renderEmptyIndexes()) {
+ os << indent + s(4) << "<td rowspan=4>" << i << "&nbsp;</td>\n"
+ << indent + s(4) << "<td rowspan=4>\n";
+
+ if (i == sis->getMBBStartIdx(mbb)) {
+ os << indent + s(6) << "BB#" << mbb->getNumber() << ":&nbsp;\n";
+ } else if (mi != 0) {
+ os << indent + s(6) << "&nbsp;&nbsp;";
+ renderMachineInstr(os, mi);
+ } else {
+ // Empty interval - leave blank.
+ }
+ os << indent + s(4) << "</td>\n";
+ } else {
+ i = i.getStoreIndex(); // <- Will be incremented to the next index.
+ continue;
+ }
+ }
+
+ // Render the class columns.
+ if (!ro.regClasses().empty()) {
+ std::pair<PressureState, unsigned> psRLEAccumulator(Zero, 0);
+ for (MFRenderingOptions::RegClassSet::const_iterator
+ rcItr = ro.regClasses().begin(),
+ rcEnd = ro.regClasses().end();
+ rcItr != rcEnd; ++rcItr) {
+ const TargetRegisterClass *trc = *rcItr;
+ PressureState newPressure = getPressureStateAt(trc, i);
+
+ if (newPressure == psRLEAccumulator.first) {
+ ++psRLEAccumulator.second;
+ } else {
+ renderCellsWithRLE(indent + s(4), os, psRLEAccumulator, psStrs);
+ psRLEAccumulator.first = newPressure;
+ psRLEAccumulator.second = 1;
+ }
+ }
+ renderCellsWithRLE(indent + s(4), os, psRLEAccumulator, psStrs);
+ }
+
+ // FIXME: Is there a nicer way to insert space between columns in HTML?
+ if (!ro.regClasses().empty() && !ro.intervals().empty())
+ os << indent + s(4) << "<td width=2em></td>\n";
+
+ if (!ro.intervals().empty()) {
+ std::pair<LiveState, unsigned> lsRLEAccumulator(Dead, 0);
+ for (MFRenderingOptions::IntervalSet::const_iterator
+ liItr = ro.intervals().begin(),
+ liEnd = ro.intervals().end();
+ liItr != liEnd; ++liItr) {
+ const LiveInterval *li = *liItr;
+ LiveState newLiveness = getLiveStateAt(li, i);
+
+ if (newLiveness == lsRLEAccumulator.first) {
+ ++lsRLEAccumulator.second;
+ } else {
+ renderCellsWithRLE(indent + s(4), os, lsRLEAccumulator, lsStrs);
+ lsRLEAccumulator.first = newLiveness;
+ lsRLEAccumulator.second = 1;
+ }
+ }
+ renderCellsWithRLE(indent + s(4), os, lsRLEAccumulator, lsStrs);
+ }
+ os << indent + s(2) << "</tr>\n";
+ }
+
+ os << indent << "</table>\n";
+
+ if (!ro.regClasses().empty())
+ renderPressureTableLegend(indent, os);
+ }
+
+ void RenderMachineFunction::renderFunctionPage(
+ raw_ostream &os,
+ const char * const renderContextStr) const {
+ os << "<html>\n"
+ << s(2) << "<head>\n"
+ << s(4) << "<title>" << fqn << "</title>\n";
+
+ insertCSS(s(4), os);
+
+ os << s(2) << "<head>\n"
+ << s(2) << "<body >\n";
+
+ renderFunctionSummary(s(4), os, renderContextStr);
+
+ os << s(4) << "<br/><br/><br/>\n";
+
+ //renderLiveIntervalInfoTable(" ", os);
+
+ os << s(4) << "<br/><br/><br/>\n";
+
+ renderCodeTablePlusPI(s(4), os);
+
+ os << s(2) << "</body>\n"
+ << "</html>\n";
+ }
+
+ void RenderMachineFunction::getAnalysisUsage(AnalysisUsage &au) const {
+ au.addRequired<SlotIndexes>();
+ au.addRequired<LiveIntervals>();
+ au.setPreservesAll();
+ MachineFunctionPass::getAnalysisUsage(au);
+ }
+
+ bool RenderMachineFunction::runOnMachineFunction(MachineFunction &fn) {
+
+ mf = &fn;
+ mri = &mf->getRegInfo();
+ tri = mf->getTarget().getRegisterInfo();
+ lis = &getAnalysis<LiveIntervals>();
+ sis = &getAnalysis<SlotIndexes>();
+
+ trei.setup(mf, mri, tri, lis);
+ ro.setup(mf, tri, lis, this);
+ spillIntervals.clear();
+ spillFor.clear();
+ useDefs.clear();
+
+ fqn = mf->getFunction()->getParent()->getModuleIdentifier() + "." +
+ mf->getFunction()->getName().str();
+
+ return false;
+ }
+
+ void RenderMachineFunction::releaseMemory() {
+ trei.clear();
+ ro.clear();
+ spillIntervals.clear();
+ spillFor.clear();
+ useDefs.clear();
+ }
+
+ void RenderMachineFunction::rememberUseDefs(const LiveInterval *li) {
+
+ if (!ro.shouldRenderCurrentMachineFunction())
+ return;
+
+ for (MachineRegisterInfo::reg_iterator rItr = mri->reg_begin(li->reg),
+ rEnd = mri->reg_end();
+ rItr != rEnd; ++rItr) {
+ const MachineInstr *mi = &*rItr;
+ if (mi->readsRegister(li->reg)) {
+ useDefs[li].insert(lis->getInstructionIndex(mi).getUseIndex());
+ }
+ if (mi->definesRegister(li->reg)) {
+ useDefs[li].insert(lis->getInstructionIndex(mi).getDefIndex());
+ }
+ }
+ }
+
+ void RenderMachineFunction::rememberSpills(
+ const LiveInterval *li,
+ const std::vector<LiveInterval*> &spills) {
+
+ if (!ro.shouldRenderCurrentMachineFunction())
+ return;
+
+ for (std::vector<LiveInterval*>::const_iterator siItr = spills.begin(),
+ siEnd = spills.end();
+ siItr != siEnd; ++siItr) {
+ const LiveInterval *spill = *siItr;
+ spillIntervals[li].insert(spill);
+ spillFor[spill] = li;
+ }
+ }
+
+ bool RenderMachineFunction::isSpill(const LiveInterval *li) const {
+ SpillForMap::const_iterator sfItr = spillFor.find(li);
+ if (sfItr == spillFor.end())
+ return false;
+ return true;
+ }
+
+ void RenderMachineFunction::renderMachineFunction(
+ const char *renderContextStr,
+ const VirtRegMap *vrm,
+ const char *renderSuffix) {
+ if (!ro.shouldRenderCurrentMachineFunction())
+ return;
+
+ this->vrm = vrm;
+ trei.reset();
+
+ std::string rpFileName(mf->getFunction()->getName().str() +
+ (renderSuffix ? renderSuffix : "") +
+ outputFileSuffix);
+
+ std::string errMsg;
+ raw_fd_ostream outFile(rpFileName.c_str(), errMsg, raw_fd_ostream::F_Binary);
+
+ renderFunctionPage(outFile, renderContextStr);
+
+ ro.resetRenderSpecificOptions();
+ }
+
+ std::string RenderMachineFunction::escapeChars(const std::string &s) const {
+ return escapeChars(s.begin(), s.end());
+ }
+
+}
diff --git a/lib/CodeGen/RenderMachineFunction.h b/lib/CodeGen/RenderMachineFunction.h
new file mode 100644
index 0000000000000..8d56a8292ac59
--- /dev/null
+++ b/lib/CodeGen/RenderMachineFunction.h
@@ -0,0 +1,336 @@
+//===-- llvm/CodeGen/RenderMachineFunction.h - MF->HTML -*- C++ -*---------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_RENDERMACHINEFUNCTION_H
+#define LLVM_CODEGEN_RENDERMACHINEFUNCTION_H
+
+#include "llvm/CodeGen/LiveInterval.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/SlotIndexes.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+
+#include <algorithm>
+#include <map>
+#include <set>
+#include <string>
+
+namespace llvm {
+
+ class LiveInterval;
+ class LiveIntervals;
+ class MachineInstr;
+ class MachineRegisterInfo;
+ class RenderMachineFunction;
+ class TargetRegisterClass;
+ class TargetRegisterInfo;
+ class VirtRegMap;
+ class raw_ostream;
+
+ /// \brief Helper class to process rendering options. Tries to be as lazy as
+ /// possible.
+ class MFRenderingOptions {
+ public:
+
+ struct RegClassComp {
+ bool operator()(const TargetRegisterClass *trc1,
+ const TargetRegisterClass *trc2) const {
+ std::string trc1Name(trc1->getName()), trc2Name(trc2->getName());
+ return std::lexicographical_compare(trc1Name.begin(), trc1Name.end(),
+ trc2Name.begin(), trc2Name.end());
+ }
+ };
+
+ typedef std::set<const TargetRegisterClass*, RegClassComp> RegClassSet;
+
+ struct IntervalComp {
+ bool operator()(const LiveInterval *li1, const LiveInterval *li2) const {
+ return li1->reg < li2->reg;
+ }
+ };
+
+ typedef std::set<const LiveInterval*, IntervalComp> IntervalSet;
+
+ /// Initialise the rendering options.
+ void setup(MachineFunction *mf, const TargetRegisterInfo *tri,
+ LiveIntervals *lis, const RenderMachineFunction *rmf);
+
+ /// Clear translations of options to the current function.
+ void clear();
+
+ /// Reset any options computed for this specific rendering.
+ void resetRenderSpecificOptions();
+
+ /// Should we render the current function.
+ bool shouldRenderCurrentMachineFunction() const;
+
+ /// Return the set of register classes to render pressure for.
+ const RegClassSet& regClasses() const;
+
+ /// Return the set of live intervals to render liveness for.
+ const IntervalSet& intervals() const;
+
+ /// Render indexes which are not associated with instructions / MBB starts.
+ bool renderEmptyIndexes() const;
+
+ /// Return whether or not to render using SVG for fancy vertical text.
+ bool fancyVerticals() const;
+
+ private:
+
+ static bool renderingOptionsProcessed;
+ static std::set<std::string> mfNamesToRender;
+ static bool renderAllMFs;
+
+ static std::set<std::string> classNamesToRender;
+ static bool renderAllClasses;
+
+
+ static std::set<std::pair<unsigned, unsigned> > intervalNumsToRender;
+ typedef enum { ExplicitOnly = 0,
+ AllPhys = 1,
+ VirtNoSpills = 2,
+ VirtSpills = 4,
+ AllVirt = 6,
+ All = 7 }
+ IntervalTypesToRender;
+ static unsigned intervalTypesToRender;
+
+ template <typename OutputItr>
+ static void splitComaSeperatedList(const std::string &s, OutputItr outItr);
+
+ static void processOptions();
+
+ static void processFuncNames();
+ static void processRegClassNames();
+ static void processIntervalNumbers();
+
+ static void processIntervalRange(const std::string &intervalRangeStr);
+
+ MachineFunction *mf;
+ const TargetRegisterInfo *tri;
+ LiveIntervals *lis;
+ const RenderMachineFunction *rmf;
+
+ mutable bool regClassesTranslatedToCurrentFunction;
+ mutable RegClassSet regClassSet;
+
+ mutable bool intervalsTranslatedToCurrentFunction;
+ mutable IntervalSet intervalSet;
+
+ void translateRegClassNamesToCurrentFunction() const;
+
+ void translateIntervalNumbersToCurrentFunction() const;
+ };
+
+ /// \brief Provide extra information about the physical and virtual registers
+ /// in the function being compiled.
+ class TargetRegisterExtraInfo {
+ public:
+ TargetRegisterExtraInfo();
+
+ /// \brief Set up TargetRegisterExtraInfo with pointers to necessary
+ /// sources of information.
+ void setup(MachineFunction *mf, MachineRegisterInfo *mri,
+ const TargetRegisterInfo *tri, LiveIntervals *lis);
+
+ /// \brief Recompute tables for changed function.
+ void reset();
+
+ /// \brief Free all tables in TargetRegisterExtraInfo.
+ void clear();
+
+ /// \brief Maximum number of registers from trc which alias reg.
+ unsigned getWorst(unsigned reg, const TargetRegisterClass *trc) const;
+
+ /// \brief Returns the number of allocable registers in trc.
+ unsigned getCapacity(const TargetRegisterClass *trc) const;
+
+ /// \brief Return the number of registers of class trc that may be
+ /// needed at slot i.
+ unsigned getPressureAtSlot(const TargetRegisterClass *trc,
+ SlotIndex i) const;
+
+ /// \brief Return true if the number of registers of type trc that may be
+ /// needed at slot i is greater than the capacity of trc.
+ bool classOverCapacityAtSlot(const TargetRegisterClass *trc,
+ SlotIndex i) const;
+
+ private:
+
+ MachineFunction *mf;
+ MachineRegisterInfo *mri;
+ const TargetRegisterInfo *tri;
+ LiveIntervals *lis;
+
+ typedef std::map<const TargetRegisterClass*, unsigned> WorstMapLine;
+ typedef std::map<const TargetRegisterClass*, WorstMapLine> VRWorstMap;
+ VRWorstMap vrWorst;
+
+ typedef std::map<unsigned, WorstMapLine> PRWorstMap;
+ PRWorstMap prWorst;
+
+ typedef std::map<const TargetRegisterClass*, unsigned> CapacityMap;
+ CapacityMap capacityMap;
+
+ typedef std::map<const TargetRegisterClass*, unsigned> PressureMapLine;
+ typedef std::map<SlotIndex, PressureMapLine> PressureMap;
+ PressureMap pressureMap;
+
+ bool mapsPopulated;
+
+ /// \brief Initialise the 'worst' table.
+ void initWorst();
+
+ /// \brief Initialise the 'capacity' table.
+ void initCapacity();
+
+ /// \brief Initialise/Reset the 'pressure' and live states tables.
+ void resetPressureAndLiveStates();
+ };
+
+ /// \brief Render MachineFunction objects and related information to a HTML
+ /// page.
+ class RenderMachineFunction : public MachineFunctionPass {
+ public:
+ static char ID;
+
+ RenderMachineFunction() : MachineFunctionPass(ID) {}
+
+ virtual void getAnalysisUsage(AnalysisUsage &au) const;
+
+ virtual bool runOnMachineFunction(MachineFunction &fn);
+
+ virtual void releaseMemory();
+
+ void rememberUseDefs(const LiveInterval *li);
+
+ void rememberSpills(const LiveInterval *li,
+ const std::vector<LiveInterval*> &spills);
+
+ bool isSpill(const LiveInterval *li) const;
+
+ /// \brief Render this machine function to HTML.
+ ///
+ /// @param renderContextStr This parameter will be included in the top of
+ /// the html file to explain where (in the
+ /// codegen pipeline) this function was rendered
+ /// from. Set it to something like
+ /// "Pre-register-allocation".
+ /// @param vrm If non-null the VRM will be queried to determine
+ /// whether a virtual register was allocated to a
+ /// physical register or spilled.
+ /// @param renderFilePrefix This string will be appended to the function
+ /// name (before the output file suffix) to enable
+ /// multiple renderings from the same function.
+ void renderMachineFunction(const char *renderContextStr,
+ const VirtRegMap *vrm = 0,
+ const char *renderSuffix = 0);
+
+ private:
+ class Spacer;
+ friend raw_ostream& operator<<(raw_ostream &os, const Spacer &s);
+
+ std::string fqn;
+
+ MachineFunction *mf;
+ MachineRegisterInfo *mri;
+ const TargetRegisterInfo *tri;
+ LiveIntervals *lis;
+ SlotIndexes *sis;
+ const VirtRegMap *vrm;
+
+ TargetRegisterExtraInfo trei;
+ MFRenderingOptions ro;
+
+
+
+ // Utilities.
+ typedef enum { Dead, Defined, Used, AliveReg, AliveStack } LiveState;
+ LiveState getLiveStateAt(const LiveInterval *li, SlotIndex i) const;
+
+ typedef enum { Zero, Low, High } PressureState;
+ PressureState getPressureStateAt(const TargetRegisterClass *trc,
+ SlotIndex i) const;
+
+ typedef std::map<const LiveInterval*, std::set<const LiveInterval*> >
+ SpillIntervals;
+ SpillIntervals spillIntervals;
+
+ typedef std::map<const LiveInterval*, const LiveInterval*> SpillForMap;
+ SpillForMap spillFor;
+
+ typedef std::set<SlotIndex> SlotSet;
+ typedef std::map<const LiveInterval*, SlotSet> UseDefs;
+ UseDefs useDefs;
+
+ // ---------- Rendering methods ----------
+
+ /// For inserting spaces when pretty printing.
+ class Spacer {
+ public:
+ explicit Spacer(unsigned numSpaces) : ns(numSpaces) {}
+ Spacer operator+(const Spacer &o) const { return Spacer(ns + o.ns); }
+ void print(raw_ostream &os) const;
+ private:
+ unsigned ns;
+ };
+
+ Spacer s(unsigned ns) const;
+
+ template <typename Iterator>
+ std::string escapeChars(Iterator sBegin, Iterator sEnd) const;
+
+ /// \brief Render a machine instruction.
+ void renderMachineInstr(raw_ostream &os,
+ const MachineInstr *mi) const;
+
+ /// \brief Render vertical text.
+ template <typename T>
+ void renderVertical(const Spacer &indent,
+ raw_ostream &os,
+ const T &t) const;
+
+ /// \brief Insert CSS layout info.
+ void insertCSS(const Spacer &indent,
+ raw_ostream &os) const;
+
+ /// \brief Render a brief summary of the function (including rendering
+ /// context).
+ void renderFunctionSummary(const Spacer &indent,
+ raw_ostream &os,
+ const char * const renderContextStr) const;
+
+ /// \brief Render a legend for the pressure table.
+ void renderPressureTableLegend(const Spacer &indent,
+ raw_ostream &os) const;
+
+ /// \brief Render a consecutive set of HTML cells of the same class using
+ /// the colspan attribute for run-length encoding.
+ template <typename CellType>
+ void renderCellsWithRLE(
+ const Spacer &indent, raw_ostream &os,
+ const std::pair<CellType, unsigned> &rleAccumulator,
+ const std::map<CellType, std::string> &cellTypeStrs) const;
+
+ /// \brief Render code listing, potentially with register pressure
+ /// and live intervals shown alongside.
+ void renderCodeTablePlusPI(const Spacer &indent,
+ raw_ostream &os) const;
+
+ /// \brief Render the HTML page representing the MachineFunction.
+ void renderFunctionPage(raw_ostream &os,
+ const char * const renderContextStr) const;
+
+ std::string escapeChars(const std::string &s) const;
+ };
+}
+
+#endif /* LLVM_CODEGEN_RENDERMACHINEFUNCTION_H */
diff --git a/lib/CodeGen/ScheduleDAGInstrs.cpp b/lib/CodeGen/ScheduleDAGInstrs.cpp
index 09202f84cb29d..ea93dd5c66633 100644
--- a/lib/CodeGen/ScheduleDAGInstrs.cpp
+++ b/lib/CodeGen/ScheduleDAGInstrs.cpp
@@ -32,7 +32,8 @@ using namespace llvm;
ScheduleDAGInstrs::ScheduleDAGInstrs(MachineFunction &mf,
const MachineLoopInfo &mli,
const MachineDominatorTree &mdt)
- : ScheduleDAG(mf), MLI(mli), MDT(mdt), LoopRegs(MLI, MDT) {
+ : ScheduleDAG(mf), MLI(mli), MDT(mdt), Defs(TRI->getNumRegs()),
+ Uses(TRI->getNumRegs()), LoopRegs(MLI, MDT) {
MFI = mf.getFrameInfo();
DbgValueVec.clear();
}
@@ -159,8 +160,9 @@ void ScheduleDAGInstrs::BuildSchedGraph(AliasAnalysis *AA) {
std::map<const Value *, std::vector<SUnit *> > AliasMemUses, NonAliasMemUses;
// Keep track of dangling debug references to registers.
- std::pair<MachineInstr*, unsigned>
- DanglingDebugValue[TargetRegisterInfo::FirstVirtualRegister];
+ std::vector<std::pair<MachineInstr*, unsigned> >
+ DanglingDebugValue(TRI->getNumRegs(),
+ std::make_pair(static_cast<MachineInstr*>(0), 0));
// Check to see if the scheduler cares about latencies.
bool UnitLatencies = ForceUnitLatencies();
@@ -172,7 +174,6 @@ void ScheduleDAGInstrs::BuildSchedGraph(AliasAnalysis *AA) {
// Remove any stale debug info; sometimes BuildSchedGraph is called again
// without emitting the info from the previous call.
DbgValueVec.clear();
- std::memset(DanglingDebugValue, 0, sizeof(DanglingDebugValue));
// Walk the list of instructions, from bottom moving up.
for (MachineBasicBlock::iterator MII = InsertPos, MIE = Begin;
diff --git a/lib/CodeGen/ScheduleDAGInstrs.h b/lib/CodeGen/ScheduleDAGInstrs.h
index d90659bb163ef..c8f543f7146dd 100644
--- a/lib/CodeGen/ScheduleDAGInstrs.h
+++ b/lib/CodeGen/ScheduleDAGInstrs.h
@@ -106,8 +106,8 @@ namespace llvm {
/// are as we iterate upward through the instructions. This is allocated
/// here instead of inside BuildSchedGraph to avoid the need for it to be
/// initialized and destructed for each block.
- std::vector<SUnit *> Defs[TargetRegisterInfo::FirstVirtualRegister];
- std::vector<SUnit *> Uses[TargetRegisterInfo::FirstVirtualRegister];
+ std::vector<std::vector<SUnit *> > Defs;
+ std::vector<std::vector<SUnit *> > Uses;
/// DbgValueVec - Remember DBG_VALUEs that refer to a particular
/// register.
diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index e671752464572..c9c4d91e97361 100644
--- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -4489,6 +4489,16 @@ ConstantFoldBIT_CONVERTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) {
// If this is a conversion of N elements of one type to N elements of another
// type, convert each element. This handles FP<->INT cases.
if (SrcBitSize == DstBitSize) {
+ EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT,
+ BV->getValueType(0).getVectorNumElements());
+
+ // Due to the FP element handling below calling this routine recursively,
+ // we can end up with a scalar-to-vector node here.
+ if (BV->getOpcode() == ISD::SCALAR_TO_VECTOR)
+ return DAG.getNode(ISD::SCALAR_TO_VECTOR, BV->getDebugLoc(), VT,
+ DAG.getNode(ISD::BIT_CONVERT, BV->getDebugLoc(),
+ DstEltVT, BV->getOperand(0)));
+
SmallVector<SDValue, 8> Ops;
for (unsigned i = 0, e = BV->getNumOperands(); i != e; ++i) {
SDValue Op = BV->getOperand(i);
@@ -4500,8 +4510,6 @@ ConstantFoldBIT_CONVERTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) {
DstEltVT, Op));
AddToWorkList(Ops.back().getNode());
}
- EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT,
- BV->getValueType(0).getVectorNumElements());
return DAG.getNode(ISD::BUILD_VECTOR, BV->getDebugLoc(), VT,
&Ops[0], Ops.size());
}
@@ -5790,7 +5798,8 @@ SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {
return SDValue();
SDValue N0 = Value.getOperand(0);
- if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse()) {
+ if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
+ Chain == SDValue(N0.getNode(), 1)) {
LoadSDNode *LD = cast<LoadSDNode>(N0);
if (LD->getBasePtr() != Ptr)
return SDValue();
diff --git a/lib/CodeGen/SelectionDAG/FastISel.cpp b/lib/CodeGen/SelectionDAG/FastISel.cpp
index decaa769e99fe..a4eed71e65c01 100644
--- a/lib/CodeGen/SelectionDAG/FastISel.cpp
+++ b/lib/CodeGen/SelectionDAG/FastISel.cpp
@@ -94,7 +94,7 @@ bool FastISel::hasTrivialKill(const Value *V) const {
!(I->getOpcode() == Instruction::BitCast ||
I->getOpcode() == Instruction::PtrToInt ||
I->getOpcode() == Instruction::IntToPtr) &&
- cast<Instruction>(I->use_begin())->getParent() == I->getParent();
+ cast<Instruction>(*I->use_begin())->getParent() == I->getParent();
}
unsigned FastISel::getRegForValue(const Value *V) {
@@ -146,7 +146,7 @@ unsigned FastISel::getRegForValue(const Value *V) {
return Reg;
}
-/// materializeRegForValue - Helper for getRegForVale. This function is
+/// materializeRegForValue - Helper for getRegForValue. This function is
/// called when the value isn't already available in a register and must
/// be materialized with new instructions.
unsigned FastISel::materializeRegForValue(const Value *V, MVT VT) {
@@ -276,6 +276,7 @@ std::pair<unsigned, bool> FastISel::getRegForGEPIndex(const Value *Idx) {
void FastISel::recomputeInsertPt() {
if (getLastLocalValue()) {
FuncInfo.InsertPt = getLastLocalValue();
+ FuncInfo.MBB = FuncInfo.InsertPt->getParent();
++FuncInfo.InsertPt;
} else
FuncInfo.InsertPt = FuncInfo.MBB->getFirstNonPHI();
@@ -472,17 +473,7 @@ bool FastISel::SelectCall(const User *I) {
return true;
const AllocaInst *AI = dyn_cast<AllocaInst>(Address);
// Don't handle byval struct arguments or VLAs, for example.
- // Note that if we have a byval struct argument, fast ISel is turned off;
- // those are handled in SelectionDAGBuilder.
- if (AI) {
- DenseMap<const AllocaInst*, int>::iterator SI =
- FuncInfo.StaticAllocaMap.find(AI);
- if (SI == FuncInfo.StaticAllocaMap.end()) break; // VLAs.
- int FI = SI->second;
- if (!DI->getDebugLoc().isUnknown())
- FuncInfo.MF->getMMI().setVariableDbgInfo(DI->getVariable(),
- FI, DI->getDebugLoc());
- } else
+ if (!AI)
// Building the map above is target independent. Generating DBG_VALUE
// inline is target dependent; do this now.
(void)TargetSelectInstruction(cast<Instruction>(I));
diff --git a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
index 928e1ecd4cf4e..5ef6404ee5d6b 100644
--- a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
+++ b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
@@ -20,6 +20,7 @@
#include "llvm/IntrinsicInst.h"
#include "llvm/LLVMContext.h"
#include "llvm/Module.h"
+#include "llvm/Analysis/DebugInfo.h"
#include "llvm/CodeGen/Analysis.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
@@ -111,17 +112,56 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf) {
TySize *= CUI->getZExtValue(); // Get total allocated size.
if (TySize == 0) TySize = 1; // Don't create zero-sized stack objects.
+
+ // The object may need to be placed onto the stack near the stack
+ // protector if one exists. Determine here if this object is a suitable
+ // candidate. I.e., it would trigger the creation of a stack protector.
+ bool MayNeedSP =
+ (AI->isArrayAllocation() ||
+ (TySize > 8 && isa<ArrayType>(Ty) &&
+ cast<ArrayType>(Ty)->getElementType()->isIntegerTy(8)));
StaticAllocaMap[AI] =
- MF->getFrameInfo()->CreateStackObject(TySize, Align, false);
+ MF->getFrameInfo()->CreateStackObject(TySize, Align, false, MayNeedSP);
}
for (; BB != EB; ++BB)
- for (BasicBlock::const_iterator I = BB->begin(), E = BB->end(); I != E; ++I)
+ for (BasicBlock::const_iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
+ // Mark values used outside their block as exported, by allocating
+ // a virtual register for them.
if (isUsedOutsideOfDefiningBlock(I))
if (!isa<AllocaInst>(I) ||
!StaticAllocaMap.count(cast<AllocaInst>(I)))
InitializeRegForValue(I);
+ // Collect llvm.dbg.declare information. This is done now instead of
+ // during the initial isel pass through the IR so that it is done
+ // in a predictable order.
+ if (const DbgDeclareInst *DI = dyn_cast<DbgDeclareInst>(I)) {
+ MachineModuleInfo &MMI = MF->getMMI();
+ if (MMI.hasDebugInfo() &&
+ DIVariable(DI->getVariable()).Verify() &&
+ !DI->getDebugLoc().isUnknown()) {
+ // Don't handle byval struct arguments or VLAs, for example.
+ // Non-byval arguments are handled here (they refer to the stack
+ // temporary alloca at this point).
+ const Value *Address = DI->getAddress();
+ if (Address) {
+ if (const BitCastInst *BCI = dyn_cast<BitCastInst>(Address))
+ Address = BCI->getOperand(0);
+ if (const AllocaInst *AI = dyn_cast<AllocaInst>(Address)) {
+ DenseMap<const AllocaInst *, int>::iterator SI =
+ StaticAllocaMap.find(AI);
+ if (SI != StaticAllocaMap.end()) { // Check for VLAs.
+ int FI = SI->second;
+ MMI.setVariableDbgInfo(DI->getVariable(),
+ FI, DI->getDebugLoc());
+ }
+ }
+ }
+ }
+ }
+ }
+
// Create an initial MachineBasicBlock for each LLVM BasicBlock in F. This
// also creates the initial PHI MachineInstrs, though none of the input
// operands are populated.
@@ -181,6 +221,7 @@ void FunctionLoweringInfo::clear() {
#endif
LiveOutRegInfo.clear();
ArgDbgValues.clear();
+ ByValArgFrameIndexMap.clear();
RegFixups.clear();
}
@@ -214,6 +255,28 @@ unsigned FunctionLoweringInfo::CreateRegs(const Type *Ty) {
return FirstReg;
}
+/// setByValArgumentFrameIndex - Record frame index for the byval
+/// argument. This overrides previous frame index entry for this argument,
+/// if any.
+void FunctionLoweringInfo::setByValArgumentFrameIndex(const Argument *A,
+ int FI) {
+ assert (A->hasByValAttr() && "Argument does not have byval attribute!");
+ ByValArgFrameIndexMap[A] = FI;
+}
+
+/// getByValArgumentFrameIndex - Get frame index for the byval argument.
+/// If the argument does not have any assigned frame index then 0 is
+/// returned.
+int FunctionLoweringInfo::getByValArgumentFrameIndex(const Argument *A) {
+ assert (A->hasByValAttr() && "Argument does not have byval attribute!");
+ DenseMap<const Argument *, int>::iterator I =
+ ByValArgFrameIndexMap.find(A);
+ if (I != ByValArgFrameIndexMap.end())
+ return I->second;
+ DEBUG(dbgs() << "Argument does not have assigned frame index!");
+ return 0;
+}
+
/// AddCatchInfo - Extract the personality and type infos from an eh.selector
/// call, and add them to the specified machine basic block.
void llvm::AddCatchInfo(const CallInst &I, MachineModuleInfo *MMI,
diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index 7a47da4ec52ef..2981cd3f1cabd 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -100,8 +100,7 @@ public:
/// it is already legal or we need to expand it into multiple registers of
/// smaller integer type, or we need to promote it to a larger type.
LegalizeAction getTypeAction(EVT VT) const {
- return
- (LegalizeAction)ValueTypeActions.getTypeAction(*DAG.getContext(), VT);
+ return (LegalizeAction)ValueTypeActions.getTypeAction(VT);
}
/// isTypeLegal - Return true if this type is legal on this target.
@@ -1314,21 +1313,30 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
}
break;
case TargetLowering::Expand:
- // f64 = EXTLOAD f32 should expand to LOAD, FP_EXTEND
- // f128 = EXTLOAD {f32,f64} too
- if ((SrcVT == MVT::f32 && (Node->getValueType(0) == MVT::f64 ||
- Node->getValueType(0) == MVT::f128)) ||
- (SrcVT == MVT::f64 && Node->getValueType(0) == MVT::f128)) {
+ if (!TLI.isLoadExtLegal(ISD::EXTLOAD, SrcVT) && isTypeLegal(SrcVT)) {
SDValue Load = DAG.getLoad(SrcVT, dl, Tmp1, Tmp2, LD->getSrcValue(),
LD->getSrcValueOffset(),
LD->isVolatile(), LD->isNonTemporal(),
LD->getAlignment());
- Result = DAG.getNode(ISD::FP_EXTEND, dl,
- Node->getValueType(0), Load);
+ unsigned ExtendOp;
+ switch (ExtType) {
+ case ISD::EXTLOAD:
+ ExtendOp = (SrcVT.isFloatingPoint() ?
+ ISD::FP_EXTEND : ISD::ANY_EXTEND);
+ break;
+ case ISD::SEXTLOAD: ExtendOp = ISD::SIGN_EXTEND; break;
+ case ISD::ZEXTLOAD: ExtendOp = ISD::ZERO_EXTEND; break;
+ default: llvm_unreachable("Unexpected extend load type!");
+ }
+ Result = DAG.getNode(ExtendOp, dl, Node->getValueType(0), Load);
Tmp1 = LegalizeOp(Result); // Relegalize new nodes.
Tmp2 = LegalizeOp(Load.getValue(1));
break;
}
+ // FIXME: This does not work for vectors on most targets. Sign- and
+ // zero-extend operations are currently folded into extending loads,
+ // whether they are legal or not, and then we end up here without any
+ // support for legalizing them.
assert(ExtType != ISD::EXTLOAD &&
"EXTLOAD should always be supported!");
// Turn the unsupported load into an EXTLOAD followed by an explicit
diff --git a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
index b94ea9a3a9afb..f8c5890719219 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
@@ -234,8 +234,9 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BUILD_PAIR(SDNode *N) {
// The pair element type may be legal, or may not promote to the same type as
// the result, for example i14 = BUILD_PAIR (i7, i7). Handle all cases.
return DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(),
- TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)),
- JoinIntegers(N->getOperand(0), N->getOperand(1)));
+ TLI.getTypeToTransformTo(*DAG.getContext(),
+ N->getValueType(0)), JoinIntegers(N->getOperand(0),
+ N->getOperand(1)));
}
SDValue DAGTypeLegalizer::PromoteIntRes_Constant(SDNode *N) {
@@ -245,7 +246,8 @@ SDValue DAGTypeLegalizer::PromoteIntRes_Constant(SDNode *N) {
// Zero extend things like i1, sign extend everything else. It shouldn't
// matter in theory which one we pick, but this tends to give better code?
unsigned Opc = VT.isByteSized() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
- SDValue Result = DAG.getNode(Opc, dl, TLI.getTypeToTransformTo(*DAG.getContext(), VT),
+ SDValue Result = DAG.getNode(Opc, dl,
+ TLI.getTypeToTransformTo(*DAG.getContext(), VT),
SDValue(N, 0));
assert(isa<ConstantSDNode>(Result) && "Didn't constant fold ext?");
return Result;
@@ -310,8 +312,8 @@ SDValue DAGTypeLegalizer::PromoteIntRes_FP_TO_XINT(SDNode *N) {
// If we're promoting a UINT to a larger size and the larger FP_TO_UINT is
// not Legal, check to see if we can use FP_TO_SINT instead. (If both UINT
- // and SINT conversions are Custom, there is no way to tell which is preferable.
- // We choose SINT because that's the right thing on PPC.)
+ // and SINT conversions are Custom, there is no way to tell which is
+ // preferable. We choose SINT because that's the right thing on PPC.)
if (N->getOpcode() == ISD::FP_TO_UINT &&
!TLI.isOperationLegal(ISD::FP_TO_UINT, NVT) &&
TLI.isOperationLegalOrCustom(ISD::FP_TO_SINT, NVT))
@@ -1030,7 +1032,7 @@ void DAGTypeLegalizer::ExpandShiftByConstant(SDNode *N, unsigned Amt,
Hi = InL;
} else if (Amt == 1 &&
TLI.isOperationLegalOrCustom(ISD::ADDC,
- TLI.getTypeToExpandTo(*DAG.getContext(), NVT))) {
+ TLI.getTypeToExpandTo(*DAG.getContext(), NVT))) {
// Emit this X << 1 as X+X.
SDVTList VTList = DAG.getVTList(NVT, MVT::Flag);
SDValue LoOps[2] = { InL, InL };
@@ -1926,7 +1928,8 @@ ExpandIntRes_SIGN_EXTEND_INREG(SDNode *N, SDValue &Lo, SDValue &Hi) {
unsigned ExcessBits =
EVT.getSizeInBits() - Lo.getValueType().getSizeInBits();
Hi = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, Hi.getValueType(), Hi,
- DAG.getValueType(EVT::getIntegerVT(*DAG.getContext(), ExcessBits)));
+ DAG.getValueType(EVT::getIntegerVT(*DAG.getContext(),
+ ExcessBits)));
}
}
@@ -2046,7 +2049,8 @@ void DAGTypeLegalizer::ExpandIntRes_ZERO_EXTEND(SDNode *N,
unsigned ExcessBits =
Op.getValueType().getSizeInBits() - NVT.getSizeInBits();
Hi = DAG.getZeroExtendInReg(Hi, dl,
- EVT::getIntegerVT(*DAG.getContext(), ExcessBits));
+ EVT::getIntegerVT(*DAG.getContext(),
+ ExcessBits));
}
}
diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/lib/CodeGen/SelectionDAG/LegalizeTypes.h
index bd86694446d6a..d56029208e61a 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeTypes.h
+++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.h
@@ -75,7 +75,7 @@ private:
/// getTypeAction - Return how we should legalize values of this type.
LegalizeAction getTypeAction(EVT VT) const {
- switch (ValueTypeActions.getTypeAction(*DAG.getContext(), VT)) {
+ switch (ValueTypeActions.getTypeAction(VT)) {
default:
assert(false && "Unknown legalize action!");
case TargetLowering::Legal:
@@ -86,8 +86,7 @@ private:
// 2) For vectors, use a wider vector type (e.g. v3i32 -> v4i32).
if (!VT.isVector())
return PromoteInteger;
- else
- return WidenVector;
+ return WidenVector;
case TargetLowering::Expand:
// Expand can mean
// 1) split scalar in half, 2) convert a float to an integer,
@@ -95,23 +94,21 @@ private:
if (!VT.isVector()) {
if (VT.isInteger())
return ExpandInteger;
- else if (VT.getSizeInBits() ==
- TLI.getTypeToTransformTo(*DAG.getContext(), VT).getSizeInBits())
+ if (VT.getSizeInBits() ==
+ TLI.getTypeToTransformTo(*DAG.getContext(), VT).getSizeInBits())
return SoftenFloat;
- else
- return ExpandFloat;
- } else if (VT.getVectorNumElements() == 1) {
- return ScalarizeVector;
- } else {
- return SplitVector;
+ return ExpandFloat;
}
+
+ if (VT.getVectorNumElements() == 1)
+ return ScalarizeVector;
+ return SplitVector;
}
}
/// isTypeLegal - Return true if this type is legal on this target.
bool isTypeLegal(EVT VT) const {
- return (ValueTypeActions.getTypeAction(*DAG.getContext(), VT) ==
- TargetLowering::Legal);
+ return ValueTypeActions.getTypeAction(VT) == TargetLowering::Legal;
}
/// IgnoreNodeResults - Pretend all of this node's results are legal.
@@ -584,6 +581,7 @@ private:
SDValue SplitVecOp_EXTRACT_SUBVECTOR(SDNode *N);
SDValue SplitVecOp_EXTRACT_VECTOR_ELT(SDNode *N);
SDValue SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo);
+ SDValue SplitVecOp_CONCAT_VECTORS(SDNode *N);
//===--------------------------------------------------------------------===//
// Vector Widening Support: LegalizeVectorTypes.cpp
diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index 93aeff5c1e6cf..93bc2d04928e7 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -983,6 +983,7 @@ bool DAGTypeLegalizer::SplitVectorOperand(SDNode *N, unsigned OpNo) {
case ISD::BIT_CONVERT: Res = SplitVecOp_BIT_CONVERT(N); break;
case ISD::EXTRACT_SUBVECTOR: Res = SplitVecOp_EXTRACT_SUBVECTOR(N); break;
case ISD::EXTRACT_VECTOR_ELT:Res = SplitVecOp_EXTRACT_VECTOR_ELT(N); break;
+ case ISD::CONCAT_VECTORS: Res = SplitVecOp_CONCAT_VECTORS(N); break;
case ISD::STORE:
Res = SplitVecOp_STORE(cast<StoreSDNode>(N), OpNo);
break;
@@ -1091,8 +1092,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_EXTRACT_VECTOR_ELT(SDNode *N) {
return SDValue(DAG.UpdateNodeOperands(N, Lo, Idx), 0);
return SDValue(DAG.UpdateNodeOperands(N, Hi,
DAG.getConstant(IdxVal - LoElts,
- Idx.getValueType())),
- 0);
+ Idx.getValueType())), 0);
}
// Store the vector to the stack.
@@ -1113,7 +1113,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_EXTRACT_VECTOR_ELT(SDNode *N) {
SDValue DAGTypeLegalizer::SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo) {
assert(N->isUnindexed() && "Indexed store of vector?");
assert(OpNo == 1 && "Can only split the stored value");
- DebugLoc dl = N->getDebugLoc();
+ DebugLoc DL = N->getDebugLoc();
bool isTruncating = N->isTruncatingStore();
SDValue Ch = N->getChain();
@@ -1132,25 +1132,49 @@ SDValue DAGTypeLegalizer::SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo) {
unsigned IncrementSize = LoMemVT.getSizeInBits()/8;
if (isTruncating)
- Lo = DAG.getTruncStore(Ch, dl, Lo, Ptr, N->getSrcValue(), SVOffset,
+ Lo = DAG.getTruncStore(Ch, DL, Lo, Ptr, N->getSrcValue(), SVOffset,
LoMemVT, isVol, isNT, Alignment);
else
- Lo = DAG.getStore(Ch, dl, Lo, Ptr, N->getSrcValue(), SVOffset,
+ Lo = DAG.getStore(Ch, DL, Lo, Ptr, N->getSrcValue(), SVOffset,
isVol, isNT, Alignment);
// Increment the pointer to the other half.
- Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
+ Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
DAG.getIntPtrConstant(IncrementSize));
SVOffset += IncrementSize;
if (isTruncating)
- Hi = DAG.getTruncStore(Ch, dl, Hi, Ptr, N->getSrcValue(), SVOffset,
+ Hi = DAG.getTruncStore(Ch, DL, Hi, Ptr, N->getSrcValue(), SVOffset,
HiMemVT, isVol, isNT, Alignment);
else
- Hi = DAG.getStore(Ch, dl, Hi, Ptr, N->getSrcValue(), SVOffset,
+ Hi = DAG.getStore(Ch, DL, Hi, Ptr, N->getSrcValue(), SVOffset,
isVol, isNT, Alignment);
- return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi);
+ return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi);
+}
+
+SDValue DAGTypeLegalizer::SplitVecOp_CONCAT_VECTORS(SDNode *N) {
+ DebugLoc DL = N->getDebugLoc();
+
+ // The input operands all must have the same type, and we know the result the
+ // result type is valid. Convert this to a buildvector which extracts all the
+ // input elements.
+ // TODO: If the input elements are power-two vectors, we could convert this to
+ // a new CONCAT_VECTORS node with elements that are half-wide.
+ SmallVector<SDValue, 32> Elts;
+ EVT EltVT = N->getValueType(0).getVectorElementType();
+ for (unsigned op = 0, e = N->getNumOperands(); op != e; ++op) {
+ SDValue Op = N->getOperand(op);
+ for (unsigned i = 0, e = Op.getValueType().getVectorNumElements();
+ i != e; ++i) {
+ Elts.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT,
+ Op, DAG.getIntPtrConstant(i)));
+
+ }
+ }
+
+ return DAG.getNode(ISD::BUILD_VECTOR, DL, N->getValueType(0),
+ &Elts[0], Elts.size());
}
@@ -1274,8 +1298,8 @@ SDValue DAGTypeLegalizer::WidenVecRes_Binary(SDNode *N) {
EVT VT = WidenVT;
unsigned NumElts = VT.getVectorNumElements();
while (!TLI.isTypeSynthesizable(VT) && NumElts != 1) {
- NumElts = NumElts / 2;
- VT = EVT::getVectorVT(*DAG.getContext(), WidenEltVT, NumElts);
+ NumElts = NumElts / 2;
+ VT = EVT::getVectorVT(*DAG.getContext(), WidenEltVT, NumElts);
}
if (NumElts != 1 && !TLI.canOpTrap(N->getOpcode(), VT)) {
@@ -1283,124 +1307,123 @@ SDValue DAGTypeLegalizer::WidenVecRes_Binary(SDNode *N) {
SDValue InOp1 = GetWidenedVector(N->getOperand(0));
SDValue InOp2 = GetWidenedVector(N->getOperand(1));
return DAG.getNode(N->getOpcode(), dl, WidenVT, InOp1, InOp2);
- } else if (NumElts == 1) {
- // No legal vector version so unroll the vector operation and then widen.
+ }
+
+ // No legal vector version so unroll the vector operation and then widen.
+ if (NumElts == 1)
return DAG.UnrollVectorOp(N, WidenVT.getVectorNumElements());
- } else {
- // Since the operation can trap, apply operation on the original vector.
- EVT MaxVT = VT;
- SDValue InOp1 = GetWidenedVector(N->getOperand(0));
- SDValue InOp2 = GetWidenedVector(N->getOperand(1));
- unsigned CurNumElts = N->getValueType(0).getVectorNumElements();
-
- SmallVector<SDValue, 16> ConcatOps(CurNumElts);
- unsigned ConcatEnd = 0; // Current ConcatOps index.
- int Idx = 0; // Current Idx into input vectors.
-
- // NumElts := greatest synthesizable vector size (at most WidenVT)
- // while (orig. vector has unhandled elements) {
- // take munches of size NumElts from the beginning and add to ConcatOps
- // NumElts := next smaller supported vector size or 1
- // }
- while (CurNumElts != 0) {
- while (CurNumElts >= NumElts) {
- SDValue EOp1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, InOp1,
- DAG.getIntPtrConstant(Idx));
- SDValue EOp2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, InOp2,
- DAG.getIntPtrConstant(Idx));
- ConcatOps[ConcatEnd++] = DAG.getNode(Opcode, dl, VT, EOp1, EOp2);
- Idx += NumElts;
- CurNumElts -= NumElts;
- }
- do {
- NumElts = NumElts / 2;
- VT = EVT::getVectorVT(*DAG.getContext(), WidenEltVT, NumElts);
- } while (!TLI.isTypeSynthesizable(VT) && NumElts != 1);
-
- if (NumElts == 1) {
- for (unsigned i = 0; i != CurNumElts; ++i, ++Idx) {
- SDValue EOp1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, WidenEltVT,
- InOp1, DAG.getIntPtrConstant(Idx));
- SDValue EOp2 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, WidenEltVT,
- InOp2, DAG.getIntPtrConstant(Idx));
- ConcatOps[ConcatEnd++] = DAG.getNode(Opcode, dl, WidenEltVT,
- EOp1, EOp2);
- }
- CurNumElts = 0;
+
+ // Since the operation can trap, apply operation on the original vector.
+ EVT MaxVT = VT;
+ SDValue InOp1 = GetWidenedVector(N->getOperand(0));
+ SDValue InOp2 = GetWidenedVector(N->getOperand(1));
+ unsigned CurNumElts = N->getValueType(0).getVectorNumElements();
+
+ SmallVector<SDValue, 16> ConcatOps(CurNumElts);
+ unsigned ConcatEnd = 0; // Current ConcatOps index.
+ int Idx = 0; // Current Idx into input vectors.
+
+ // NumElts := greatest synthesizable vector size (at most WidenVT)
+ // while (orig. vector has unhandled elements) {
+ // take munches of size NumElts from the beginning and add to ConcatOps
+ // NumElts := next smaller supported vector size or 1
+ // }
+ while (CurNumElts != 0) {
+ while (CurNumElts >= NumElts) {
+ SDValue EOp1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, InOp1,
+ DAG.getIntPtrConstant(Idx));
+ SDValue EOp2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, InOp2,
+ DAG.getIntPtrConstant(Idx));
+ ConcatOps[ConcatEnd++] = DAG.getNode(Opcode, dl, VT, EOp1, EOp2);
+ Idx += NumElts;
+ CurNumElts -= NumElts;
+ }
+ do {
+ NumElts = NumElts / 2;
+ VT = EVT::getVectorVT(*DAG.getContext(), WidenEltVT, NumElts);
+ } while (!TLI.isTypeSynthesizable(VT) && NumElts != 1);
+
+ if (NumElts == 1) {
+ for (unsigned i = 0; i != CurNumElts; ++i, ++Idx) {
+ SDValue EOp1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, WidenEltVT,
+ InOp1, DAG.getIntPtrConstant(Idx));
+ SDValue EOp2 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, WidenEltVT,
+ InOp2, DAG.getIntPtrConstant(Idx));
+ ConcatOps[ConcatEnd++] = DAG.getNode(Opcode, dl, WidenEltVT,
+ EOp1, EOp2);
}
+ CurNumElts = 0;
}
+ }
- // Check to see if we have a single operation with the widen type.
- if (ConcatEnd == 1) {
- VT = ConcatOps[0].getValueType();
- if (VT == WidenVT)
- return ConcatOps[0];
- }
+ // Check to see if we have a single operation with the widen type.
+ if (ConcatEnd == 1) {
+ VT = ConcatOps[0].getValueType();
+ if (VT == WidenVT)
+ return ConcatOps[0];
+ }
- // while (Some element of ConcatOps is not of type MaxVT) {
- // From the end of ConcatOps, collect elements of the same type and put
- // them into an op of the next larger supported type
- // }
- while (ConcatOps[ConcatEnd-1].getValueType() != MaxVT) {
- Idx = ConcatEnd - 1;
- VT = ConcatOps[Idx--].getValueType();
- while (Idx >= 0 && ConcatOps[Idx].getValueType() == VT)
- Idx--;
-
- int NextSize = VT.isVector() ? VT.getVectorNumElements() : 1;
- EVT NextVT;
- do {
- NextSize *= 2;
- NextVT = EVT::getVectorVT(*DAG.getContext(), WidenEltVT, NextSize);
- } while (!TLI.isTypeSynthesizable(NextVT));
-
- if (!VT.isVector()) {
- // Scalar type, create an INSERT_VECTOR_ELEMENT of type NextVT
- SDValue VecOp = DAG.getUNDEF(NextVT);
- unsigned NumToInsert = ConcatEnd - Idx - 1;
- for (unsigned i = 0, OpIdx = Idx+1; i < NumToInsert; i++, OpIdx++) {
- VecOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, NextVT, VecOp,
- ConcatOps[OpIdx], DAG.getIntPtrConstant(i));
- }
- ConcatOps[Idx+1] = VecOp;
- ConcatEnd = Idx + 2;
- }
- else {
- // Vector type, create a CONCAT_VECTORS of type NextVT
- SDValue undefVec = DAG.getUNDEF(VT);
- unsigned OpsToConcat = NextSize/VT.getVectorNumElements();
- SmallVector<SDValue, 16> SubConcatOps(OpsToConcat);
- unsigned RealVals = ConcatEnd - Idx - 1;
- unsigned SubConcatEnd = 0;
- unsigned SubConcatIdx = Idx + 1;
- while (SubConcatEnd < RealVals)
- SubConcatOps[SubConcatEnd++] = ConcatOps[++Idx];
- while (SubConcatEnd < OpsToConcat)
- SubConcatOps[SubConcatEnd++] = undefVec;
- ConcatOps[SubConcatIdx] = DAG.getNode(ISD::CONCAT_VECTORS, dl,
- NextVT, &SubConcatOps[0],
- OpsToConcat);
- ConcatEnd = SubConcatIdx + 1;
+ // while (Some element of ConcatOps is not of type MaxVT) {
+ // From the end of ConcatOps, collect elements of the same type and put
+ // them into an op of the next larger supported type
+ // }
+ while (ConcatOps[ConcatEnd-1].getValueType() != MaxVT) {
+ Idx = ConcatEnd - 1;
+ VT = ConcatOps[Idx--].getValueType();
+ while (Idx >= 0 && ConcatOps[Idx].getValueType() == VT)
+ Idx--;
+
+ int NextSize = VT.isVector() ? VT.getVectorNumElements() : 1;
+ EVT NextVT;
+ do {
+ NextSize *= 2;
+ NextVT = EVT::getVectorVT(*DAG.getContext(), WidenEltVT, NextSize);
+ } while (!TLI.isTypeSynthesizable(NextVT));
+
+ if (!VT.isVector()) {
+ // Scalar type, create an INSERT_VECTOR_ELEMENT of type NextVT
+ SDValue VecOp = DAG.getUNDEF(NextVT);
+ unsigned NumToInsert = ConcatEnd - Idx - 1;
+ for (unsigned i = 0, OpIdx = Idx+1; i < NumToInsert; i++, OpIdx++) {
+ VecOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, NextVT, VecOp,
+ ConcatOps[OpIdx], DAG.getIntPtrConstant(i));
}
+ ConcatOps[Idx+1] = VecOp;
+ ConcatEnd = Idx + 2;
+ } else {
+ // Vector type, create a CONCAT_VECTORS of type NextVT
+ SDValue undefVec = DAG.getUNDEF(VT);
+ unsigned OpsToConcat = NextSize/VT.getVectorNumElements();
+ SmallVector<SDValue, 16> SubConcatOps(OpsToConcat);
+ unsigned RealVals = ConcatEnd - Idx - 1;
+ unsigned SubConcatEnd = 0;
+ unsigned SubConcatIdx = Idx + 1;
+ while (SubConcatEnd < RealVals)
+ SubConcatOps[SubConcatEnd++] = ConcatOps[++Idx];
+ while (SubConcatEnd < OpsToConcat)
+ SubConcatOps[SubConcatEnd++] = undefVec;
+ ConcatOps[SubConcatIdx] = DAG.getNode(ISD::CONCAT_VECTORS, dl,
+ NextVT, &SubConcatOps[0],
+ OpsToConcat);
+ ConcatEnd = SubConcatIdx + 1;
}
+ }
- // Check to see if we have a single operation with the widen type.
- if (ConcatEnd == 1) {
- VT = ConcatOps[0].getValueType();
- if (VT == WidenVT)
- return ConcatOps[0];
- }
-
- // add undefs of size MaxVT until ConcatOps grows to length of WidenVT
- unsigned NumOps =
- WidenVT.getVectorNumElements()/MaxVT.getVectorNumElements();
- if (NumOps != ConcatEnd ) {
- SDValue UndefVal = DAG.getUNDEF(MaxVT);
- for (unsigned j = ConcatEnd; j < NumOps; ++j)
- ConcatOps[j] = UndefVal;
- }
- return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT, &ConcatOps[0], NumOps);
+ // Check to see if we have a single operation with the widen type.
+ if (ConcatEnd == 1) {
+ VT = ConcatOps[0].getValueType();
+ if (VT == WidenVT)
+ return ConcatOps[0];
}
+
+ // add undefs of size MaxVT until ConcatOps grows to length of WidenVT
+ unsigned NumOps = WidenVT.getVectorNumElements()/MaxVT.getVectorNumElements();
+ if (NumOps != ConcatEnd ) {
+ SDValue UndefVal = DAG.getUNDEF(MaxVT);
+ for (unsigned j = ConcatEnd; j < NumOps; ++j)
+ ConcatOps[j] = UndefVal;
+ }
+ return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT, &ConcatOps[0], NumOps);
}
SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) {
@@ -1561,8 +1584,8 @@ SDValue DAGTypeLegalizer::WidenVecRes_BIT_CONVERT(SDNode *N) {
unsigned NewNumElts = WidenSize / InSize;
if (InVT.isVector()) {
EVT InEltVT = InVT.getVectorElementType();
- NewInVT= EVT::getVectorVT(*DAG.getContext(), InEltVT,
- WidenSize / InEltVT.getSizeInBits());
+ NewInVT = EVT::getVectorVT(*DAG.getContext(), InEltVT,
+ WidenSize / InEltVT.getSizeInBits());
} else {
NewInVT = EVT::getVectorVT(*DAG.getContext(), InVT, NewNumElts);
}
@@ -1686,8 +1709,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_CONVERT_RNDSAT(SDNode *N) {
SDValue RndOp = N->getOperand(3);
SDValue SatOp = N->getOperand(4);
- EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(),
- N->getValueType(0));
+ EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
unsigned WidenNumElts = WidenVT.getVectorNumElements();
EVT InVT = InOp.getValueType();
@@ -1720,9 +1742,9 @@ SDValue DAGTypeLegalizer::WidenVecRes_CONVERT_RNDSAT(SDNode *N) {
SmallVector<SDValue, 16> Ops(NumConcat);
Ops[0] = InOp;
SDValue UndefVal = DAG.getUNDEF(InVT);
- for (unsigned i = 1; i != NumConcat; ++i) {
+ for (unsigned i = 1; i != NumConcat; ++i)
Ops[i] = UndefVal;
- }
+
InOp = DAG.getNode(ISD::CONCAT_VECTORS, dl, InWidenVT, &Ops[0],NumConcat);
return DAG.getConvertRndSat(WidenVT, dl, InOp, DTyOp, STyOp, RndOp,
SatOp, CvtCode);
@@ -2225,25 +2247,24 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVector<SDValue, 16>& LdChain,
// Check if we can load the element with one instruction
if (LdWidth <= NewVTWidth) {
- if (NewVT.isVector()) {
- if (NewVT != WidenVT) {
- assert(WidenWidth % NewVTWidth == 0);
- unsigned NumConcat = WidenWidth / NewVTWidth;
- SmallVector<SDValue, 16> ConcatOps(NumConcat);
- SDValue UndefVal = DAG.getUNDEF(NewVT);
- ConcatOps[0] = LdOp;
- for (unsigned i = 1; i != NumConcat; ++i)
- ConcatOps[i] = UndefVal;
- return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT, &ConcatOps[0],
- NumConcat);
- } else
- return LdOp;
- } else {
+ if (!NewVT.isVector()) {
unsigned NumElts = WidenWidth / NewVTWidth;
EVT NewVecVT = EVT::getVectorVT(*DAG.getContext(), NewVT, NumElts);
SDValue VecOp = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, NewVecVT, LdOp);
return DAG.getNode(ISD::BIT_CONVERT, dl, WidenVT, VecOp);
}
+ if (NewVT == WidenVT)
+ return LdOp;
+
+ assert(WidenWidth % NewVTWidth == 0);
+ unsigned NumConcat = WidenWidth / NewVTWidth;
+ SmallVector<SDValue, 16> ConcatOps(NumConcat);
+ SDValue UndefVal = DAG.getUNDEF(NewVT);
+ ConcatOps[0] = LdOp;
+ for (unsigned i = 1; i != NumConcat; ++i)
+ ConcatOps[i] = UndefVal;
+ return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT, &ConcatOps[0],
+ NumConcat);
}
// Load vector by using multiple loads from largest vector to scalar
@@ -2276,52 +2297,55 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVector<SDValue, 16>& LdChain,
// Build the vector from the loads operations
unsigned End = LdOps.size();
- if (LdOps[0].getValueType().isVector()) {
- // If the load contains vectors, build the vector using concat vector.
- // All of the vectors used to loads are power of 2 and the scalars load
- // can be combined to make a power of 2 vector.
- SmallVector<SDValue, 16> ConcatOps(End);
- int i = End - 1;
- int Idx = End;
- EVT LdTy = LdOps[i].getValueType();
- // First combine the scalar loads to a vector
- if (!LdTy.isVector()) {
- for (--i; i >= 0; --i) {
- LdTy = LdOps[i].getValueType();
- if (LdTy.isVector())
- break;
- }
- ConcatOps[--Idx] = BuildVectorFromScalar(DAG, LdTy, LdOps, i+1, End);
- }
- ConcatOps[--Idx] = LdOps[i];
+ if (!LdOps[0].getValueType().isVector())
+ // All the loads are scalar loads.
+ return BuildVectorFromScalar(DAG, WidenVT, LdOps, 0, End);
+
+ // If the load contains vectors, build the vector using concat vector.
+ // All of the vectors used to loads are power of 2 and the scalars load
+ // can be combined to make a power of 2 vector.
+ SmallVector<SDValue, 16> ConcatOps(End);
+ int i = End - 1;
+ int Idx = End;
+ EVT LdTy = LdOps[i].getValueType();
+ // First combine the scalar loads to a vector
+ if (!LdTy.isVector()) {
for (--i; i >= 0; --i) {
- EVT NewLdTy = LdOps[i].getValueType();
- if (NewLdTy != LdTy) {
- // Create a larger vector
- ConcatOps[End-1] = DAG.getNode(ISD::CONCAT_VECTORS, dl, NewLdTy,
- &ConcatOps[Idx], End - Idx);
- Idx = End - 1;
- LdTy = NewLdTy;
- }
- ConcatOps[--Idx] = LdOps[i];
+ LdTy = LdOps[i].getValueType();
+ if (LdTy.isVector())
+ break;
}
+ ConcatOps[--Idx] = BuildVectorFromScalar(DAG, LdTy, LdOps, i+1, End);
+ }
+ ConcatOps[--Idx] = LdOps[i];
+ for (--i; i >= 0; --i) {
+ EVT NewLdTy = LdOps[i].getValueType();
+ if (NewLdTy != LdTy) {
+ // Create a larger vector
+ ConcatOps[End-1] = DAG.getNode(ISD::CONCAT_VECTORS, dl, NewLdTy,
+ &ConcatOps[Idx], End - Idx);
+ Idx = End - 1;
+ LdTy = NewLdTy;
+ }
+ ConcatOps[--Idx] = LdOps[i];
+ }
- if (WidenWidth != LdTy.getSizeInBits()*(End - Idx)) {
- // We need to fill the rest with undefs to build the vector
- unsigned NumOps = WidenWidth / LdTy.getSizeInBits();
- SmallVector<SDValue, 16> WidenOps(NumOps);
- SDValue UndefVal = DAG.getUNDEF(LdTy);
- unsigned i = 0;
- for (; i != End-Idx; ++i)
- WidenOps[i] = ConcatOps[Idx+i];
- for (; i != NumOps; ++i)
- WidenOps[i] = UndefVal;
- return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT, &WidenOps[0],NumOps);
- } else
- return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT,
- &ConcatOps[Idx], End - Idx);
- } else // All the loads are scalar loads.
- return BuildVectorFromScalar(DAG, WidenVT, LdOps, 0, End);
+ if (WidenWidth == LdTy.getSizeInBits()*(End - Idx))
+ return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT,
+ &ConcatOps[Idx], End - Idx);
+
+ // We need to fill the rest with undefs to build the vector
+ unsigned NumOps = WidenWidth / LdTy.getSizeInBits();
+ SmallVector<SDValue, 16> WidenOps(NumOps);
+ SDValue UndefVal = DAG.getUNDEF(LdTy);
+ {
+ unsigned i = 0;
+ for (; i != End-Idx; ++i)
+ WidenOps[i] = ConcatOps[Idx+i];
+ for (; i != NumOps; ++i)
+ WidenOps[i] = UndefVal;
+ }
+ return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT, &WidenOps[0],NumOps);
}
SDValue
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
index 3b86c3286585f..fae27294e3646 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
@@ -13,6 +13,7 @@
#define DEBUG_TYPE "pre-RA-sched"
#include "ScheduleDAGSDNodes.h"
+#include "llvm/InlineAsm.h"
#include "llvm/CodeGen/SchedulerRegistry.h"
#include "llvm/CodeGen/SelectionDAGISel.h"
#include "llvm/Target/TargetRegisterInfo.h"
@@ -432,6 +433,30 @@ static EVT getPhysicalRegisterVT(SDNode *N, unsigned Reg,
return N->getValueType(NumRes);
}
+/// CheckForLiveRegDef - Return true and update live register vector if the
+/// specified register def of the specified SUnit clobbers any "live" registers.
+static bool CheckForLiveRegDef(SUnit *SU, unsigned Reg,
+ std::vector<SUnit*> &LiveRegDefs,
+ SmallSet<unsigned, 4> &RegAdded,
+ SmallVector<unsigned, 4> &LRegs,
+ const TargetRegisterInfo *TRI) {
+ bool Added = false;
+ if (LiveRegDefs[Reg] && LiveRegDefs[Reg] != SU) {
+ if (RegAdded.insert(Reg)) {
+ LRegs.push_back(Reg);
+ Added = true;
+ }
+ }
+ for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias)
+ if (LiveRegDefs[*Alias] && LiveRegDefs[*Alias] != SU) {
+ if (RegAdded.insert(*Alias)) {
+ LRegs.push_back(*Alias);
+ Added = true;
+ }
+ }
+ return Added;
+}
+
/// DelayForLiveRegsBottomUp - Returns true if it is necessary to delay
/// scheduling of the given node to satisfy live physical register dependencies.
/// If the specific node is the last one that's available to schedule, do
@@ -446,37 +471,44 @@ bool ScheduleDAGFast::DelayForLiveRegsBottomUp(SUnit *SU,
for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
I != E; ++I) {
if (I->isAssignedRegDep()) {
- unsigned Reg = I->getReg();
- if (LiveRegDefs[Reg] && LiveRegDefs[Reg] != I->getSUnit()) {
- if (RegAdded.insert(Reg))
- LRegs.push_back(Reg);
- }
- for (const unsigned *Alias = TRI->getAliasSet(Reg);
- *Alias; ++Alias)
- if (LiveRegDefs[*Alias] && LiveRegDefs[*Alias] != I->getSUnit()) {
- if (RegAdded.insert(*Alias))
- LRegs.push_back(*Alias);
- }
+ CheckForLiveRegDef(I->getSUnit(), I->getReg(), LiveRegDefs,
+ RegAdded, LRegs, TRI);
}
}
for (SDNode *Node = SU->getNode(); Node; Node = Node->getFlaggedNode()) {
+ if (Node->getOpcode() == ISD::INLINEASM) {
+ // Inline asm can clobber physical defs.
+ unsigned NumOps = Node->getNumOperands();
+ if (Node->getOperand(NumOps-1).getValueType() == MVT::Flag)
+ --NumOps; // Ignore the flag operand.
+
+ for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) {
+ unsigned Flags =
+ cast<ConstantSDNode>(Node->getOperand(i))->getZExtValue();
+ unsigned NumVals = InlineAsm::getNumOperandRegisters(Flags);
+
+ ++i; // Skip the ID value.
+ if (InlineAsm::isRegDefKind(Flags) ||
+ InlineAsm::isRegDefEarlyClobberKind(Flags)) {
+ // Check for def of register or earlyclobber register.
+ for (; NumVals; --NumVals, ++i) {
+ unsigned Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg();
+ if (TargetRegisterInfo::isPhysicalRegister(Reg))
+ CheckForLiveRegDef(SU, Reg, LiveRegDefs, RegAdded, LRegs, TRI);
+ }
+ } else
+ i += NumVals;
+ }
+ continue;
+ }
if (!Node->isMachineOpcode())
continue;
const TargetInstrDesc &TID = TII->get(Node->getMachineOpcode());
if (!TID.ImplicitDefs)
continue;
for (const unsigned *Reg = TID.ImplicitDefs; *Reg; ++Reg) {
- if (LiveRegDefs[*Reg] && LiveRegDefs[*Reg] != SU) {
- if (RegAdded.insert(*Reg))
- LRegs.push_back(*Reg);
- }
- for (const unsigned *Alias = TRI->getAliasSet(*Reg);
- *Alias; ++Alias)
- if (LiveRegDefs[*Alias] && LiveRegDefs[*Alias] != SU) {
- if (RegAdded.insert(*Alias))
- LRegs.push_back(*Alias);
- }
+ CheckForLiveRegDef(SU, *Reg, LiveRegDefs, RegAdded, LRegs, TRI);
}
}
return !LRegs.empty();
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
index 3ef521c398e11..4c3e4e3b07680 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
@@ -24,6 +24,7 @@
#include "llvm/Target/TargetData.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetLowering.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/STLExtras.h"
@@ -54,10 +55,16 @@ static RegisterScheduler
static RegisterScheduler
hybridListDAGScheduler("list-hybrid",
- "Bottom-up rr list scheduling which avoid stalls for "
- "long latency instructions",
+ "Bottom-up register pressure aware list scheduling "
+ "which tries to balance latency and register pressure",
createHybridListDAGScheduler);
+static RegisterScheduler
+ ILPListDAGScheduler("list-ilp",
+ "Bottom-up register pressure aware list scheduling "
+ "which tries to balance ILP and register pressure",
+ createILPListDAGScheduler);
+
namespace {
//===----------------------------------------------------------------------===//
/// ScheduleDAGRRList - The actual register reduction list scheduler
@@ -181,7 +188,9 @@ private:
/// Schedule - Schedule the DAG using list scheduling.
void ScheduleDAGRRList::Schedule() {
- DEBUG(dbgs() << "********** List Scheduling **********\n");
+ DEBUG(dbgs()
+ << "********** List Scheduling BB#" << BB->getNumber()
+ << " **********\n");
NumLiveRegs = 0;
LiveRegDefs.resize(TRI->getNumRegs(), NULL);
@@ -273,6 +282,8 @@ void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU, unsigned CurCycle) {
SU->setHeightToAtLeast(CurCycle);
Sequence.push_back(SU);
+ AvailableQueue->ScheduledNode(SU);
+
ReleasePredecessors(SU, CurCycle);
// Release all the implicit physical register defs that are live.
@@ -291,7 +302,6 @@ void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU, unsigned CurCycle) {
}
SU->isScheduled = true;
- AvailableQueue->ScheduledNode(SU);
}
/// CapturePred - This does the opposite of ReleasePred. Since SU is being
@@ -315,8 +325,6 @@ void ScheduleDAGRRList::UnscheduleNodeBottomUp(SUnit *SU) {
DEBUG(dbgs() << "*** Unscheduling [" << SU->getHeight() << "]: ");
DEBUG(SU->dump(this));
- AvailableQueue->UnscheduledNode(SU);
-
for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
I != E; ++I) {
CapturePred(&*I);
@@ -346,6 +354,7 @@ void ScheduleDAGRRList::UnscheduleNodeBottomUp(SUnit *SU) {
SU->isScheduled = false;
SU->isAvailable = true;
AvailableQueue->push(SU);
+ AvailableQueue->UnscheduledNode(SU);
}
/// BacktrackBottomUp - Backtrack scheduling to a previous cycle specified in
@@ -956,7 +965,8 @@ namespace {
template<class SF>
class RegReductionPriorityQueue;
- /// Sorting functions for the Available queue.
+ /// bu_ls_rr_sort - Priority function for bottom up register pressure
+ // reduction scheduler.
struct bu_ls_rr_sort : public std::binary_function<SUnit*, SUnit*, bool> {
RegReductionPriorityQueue<bu_ls_rr_sort> *SPQ;
bu_ls_rr_sort(RegReductionPriorityQueue<bu_ls_rr_sort> *spq) : SPQ(spq) {}
@@ -965,6 +975,8 @@ namespace {
bool operator()(const SUnit* left, const SUnit* right) const;
};
+ // td_ls_rr_sort - Priority function for top down register pressure reduction
+ // scheduler.
struct td_ls_rr_sort : public std::binary_function<SUnit*, SUnit*, bool> {
RegReductionPriorityQueue<td_ls_rr_sort> *SPQ;
td_ls_rr_sort(RegReductionPriorityQueue<td_ls_rr_sort> *spq) : SPQ(spq) {}
@@ -973,6 +985,7 @@ namespace {
bool operator()(const SUnit* left, const SUnit* right) const;
};
+ // src_ls_rr_sort - Priority function for source order scheduler.
struct src_ls_rr_sort : public std::binary_function<SUnit*, SUnit*, bool> {
RegReductionPriorityQueue<src_ls_rr_sort> *SPQ;
src_ls_rr_sort(RegReductionPriorityQueue<src_ls_rr_sort> *spq)
@@ -983,13 +996,26 @@ namespace {
bool operator()(const SUnit* left, const SUnit* right) const;
};
+ // hybrid_ls_rr_sort - Priority function for hybrid scheduler.
struct hybrid_ls_rr_sort : public std::binary_function<SUnit*, SUnit*, bool> {
RegReductionPriorityQueue<hybrid_ls_rr_sort> *SPQ;
hybrid_ls_rr_sort(RegReductionPriorityQueue<hybrid_ls_rr_sort> *spq)
: SPQ(spq) {}
hybrid_ls_rr_sort(const hybrid_ls_rr_sort &RHS)
: SPQ(RHS.SPQ) {}
-
+
+ bool operator()(const SUnit* left, const SUnit* right) const;
+ };
+
+ // ilp_ls_rr_sort - Priority function for ILP (instruction level parallelism)
+ // scheduler.
+ struct ilp_ls_rr_sort : public std::binary_function<SUnit*, SUnit*, bool> {
+ RegReductionPriorityQueue<ilp_ls_rr_sort> *SPQ;
+ ilp_ls_rr_sort(RegReductionPriorityQueue<ilp_ls_rr_sort> *spq)
+ : SPQ(spq) {}
+ ilp_ls_rr_sort(const ilp_ls_rr_sort &RHS)
+ : SPQ(RHS.SPQ) {}
+
bool operator()(const SUnit* left, const SUnit* right) const;
};
} // end anonymous namespace
@@ -1029,23 +1055,48 @@ namespace {
std::vector<SUnit*> Queue;
SF Picker;
unsigned CurQueueId;
+ bool TracksRegPressure;
protected:
// SUnits - The SUnits for the current graph.
std::vector<SUnit> *SUnits;
-
+
+ MachineFunction &MF;
const TargetInstrInfo *TII;
const TargetRegisterInfo *TRI;
+ const TargetLowering *TLI;
ScheduleDAGRRList *scheduleDAG;
// SethiUllmanNumbers - The SethiUllman number for each node.
std::vector<unsigned> SethiUllmanNumbers;
+ /// RegPressure - Tracking current reg pressure per register class.
+ ///
+ std::vector<unsigned> RegPressure;
+
+ /// RegLimit - Tracking the number of allocatable registers per register
+ /// class.
+ std::vector<unsigned> RegLimit;
+
public:
- RegReductionPriorityQueue(const TargetInstrInfo *tii,
- const TargetRegisterInfo *tri)
- : Picker(this), CurQueueId(0),
- TII(tii), TRI(tri), scheduleDAG(NULL) {}
+ RegReductionPriorityQueue(MachineFunction &mf,
+ bool tracksrp,
+ const TargetInstrInfo *tii,
+ const TargetRegisterInfo *tri,
+ const TargetLowering *tli)
+ : Picker(this), CurQueueId(0), TracksRegPressure(tracksrp),
+ MF(mf), TII(tii), TRI(tri), TLI(tli), scheduleDAG(NULL) {
+ if (TracksRegPressure) {
+ unsigned NumRC = TRI->getNumRegClasses();
+ RegLimit.resize(NumRC);
+ RegPressure.resize(NumRC);
+ std::fill(RegLimit.begin(), RegLimit.end(), 0);
+ std::fill(RegPressure.begin(), RegPressure.end(), 0);
+ for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(),
+ E = TRI->regclass_end(); I != E; ++I)
+ RegLimit[(*I)->getID()] = tli->getRegPressureLimit(*I, MF);
+ }
+ }
void initNodes(std::vector<SUnit> &sunits) {
SUnits = &sunits;
@@ -1072,6 +1123,7 @@ namespace {
void releaseState() {
SUnits = 0;
SethiUllmanNumbers.clear();
+ std::fill(RegPressure.begin(), RegPressure.end(), 0);
}
unsigned getNodePriority(const SUnit *SU) const {
@@ -1139,10 +1191,244 @@ namespace {
SU->NodeQueueId = 0;
}
+ bool HighRegPressure(const SUnit *SU) const {
+ if (!TLI)
+ return false;
+
+ for (SUnit::const_pred_iterator I = SU->Preds.begin(),E = SU->Preds.end();
+ I != E; ++I) {
+ if (I->isCtrl())
+ continue;
+ SUnit *PredSU = I->getSUnit();
+ const SDNode *PN = PredSU->getNode();
+ if (!PN->isMachineOpcode()) {
+ if (PN->getOpcode() == ISD::CopyFromReg) {
+ EVT VT = PN->getValueType(0);
+ unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
+ unsigned Cost = TLI->getRepRegClassCostFor(VT);
+ if ((RegPressure[RCId] + Cost) >= RegLimit[RCId])
+ return true;
+ }
+ continue;
+ }
+ unsigned POpc = PN->getMachineOpcode();
+ if (POpc == TargetOpcode::IMPLICIT_DEF)
+ continue;
+ if (POpc == TargetOpcode::EXTRACT_SUBREG) {
+ EVT VT = PN->getOperand(0).getValueType();
+ unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
+ unsigned Cost = TLI->getRepRegClassCostFor(VT);
+ // Check if this increases register pressure of the specific register
+ // class to the point where it would cause spills.
+ if ((RegPressure[RCId] + Cost) >= RegLimit[RCId])
+ return true;
+ continue;
+ } else if (POpc == TargetOpcode::INSERT_SUBREG ||
+ POpc == TargetOpcode::SUBREG_TO_REG) {
+ EVT VT = PN->getValueType(0);
+ unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
+ unsigned Cost = TLI->getRepRegClassCostFor(VT);
+ // Check if this increases register pressure of the specific register
+ // class to the point where it would cause spills.
+ if ((RegPressure[RCId] + Cost) >= RegLimit[RCId])
+ return true;
+ continue;
+ }
+ unsigned NumDefs = TII->get(PN->getMachineOpcode()).getNumDefs();
+ for (unsigned i = 0; i != NumDefs; ++i) {
+ EVT VT = PN->getValueType(i);
+ unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
+ if (RegPressure[RCId] >= RegLimit[RCId])
+ return true; // Reg pressure already high.
+ unsigned Cost = TLI->getRepRegClassCostFor(VT);
+ if (!PN->hasAnyUseOfValue(i))
+ continue;
+ // Check if this increases register pressure of the specific register
+ // class to the point where it would cause spills.
+ if ((RegPressure[RCId] + Cost) >= RegLimit[RCId])
+ return true;
+ }
+ }
+
+ return false;
+ }
+
+ void ScheduledNode(SUnit *SU) {
+ if (!TracksRegPressure)
+ return;
+
+ const SDNode *N = SU->getNode();
+ if (!N->isMachineOpcode()) {
+ if (N->getOpcode() != ISD::CopyToReg)
+ return;
+ } else {
+ unsigned Opc = N->getMachineOpcode();
+ if (Opc == TargetOpcode::EXTRACT_SUBREG ||
+ Opc == TargetOpcode::INSERT_SUBREG ||
+ Opc == TargetOpcode::SUBREG_TO_REG ||
+ Opc == TargetOpcode::REG_SEQUENCE ||
+ Opc == TargetOpcode::IMPLICIT_DEF)
+ return;
+ }
+
+ for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+ I != E; ++I) {
+ if (I->isCtrl())
+ continue;
+ SUnit *PredSU = I->getSUnit();
+ if (PredSU->NumSuccsLeft != PredSU->NumSuccs)
+ continue;
+ const SDNode *PN = PredSU->getNode();
+ if (!PN->isMachineOpcode()) {
+ if (PN->getOpcode() == ISD::CopyFromReg) {
+ EVT VT = PN->getValueType(0);
+ unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
+ RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
+ }
+ continue;
+ }
+ unsigned POpc = PN->getMachineOpcode();
+ if (POpc == TargetOpcode::IMPLICIT_DEF)
+ continue;
+ if (POpc == TargetOpcode::EXTRACT_SUBREG) {
+ EVT VT = PN->getOperand(0).getValueType();
+ unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
+ RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
+ continue;
+ } else if (POpc == TargetOpcode::INSERT_SUBREG ||
+ POpc == TargetOpcode::SUBREG_TO_REG) {
+ EVT VT = PN->getValueType(0);
+ unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
+ RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
+ continue;
+ }
+ unsigned NumDefs = TII->get(PN->getMachineOpcode()).getNumDefs();
+ for (unsigned i = 0; i != NumDefs; ++i) {
+ EVT VT = PN->getValueType(i);
+ if (!PN->hasAnyUseOfValue(i))
+ continue;
+ unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
+ RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
+ }
+ }
+
+ // Check for isMachineOpcode() as PrescheduleNodesWithMultipleUses()
+ // may transfer data dependencies to CopyToReg.
+ if (SU->NumSuccs && N->isMachineOpcode()) {
+ unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs();
+ for (unsigned i = 0; i != NumDefs; ++i) {
+ EVT VT = N->getValueType(i);
+ if (!N->hasAnyUseOfValue(i))
+ continue;
+ unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
+ if (RegPressure[RCId] < TLI->getRepRegClassCostFor(VT))
+ // Register pressure tracking is imprecise. This can happen.
+ RegPressure[RCId] = 0;
+ else
+ RegPressure[RCId] -= TLI->getRepRegClassCostFor(VT);
+ }
+ }
+
+ dumpRegPressure();
+ }
+
+ void UnscheduledNode(SUnit *SU) {
+ if (!TracksRegPressure)
+ return;
+
+ const SDNode *N = SU->getNode();
+ if (!N->isMachineOpcode()) {
+ if (N->getOpcode() != ISD::CopyToReg)
+ return;
+ } else {
+ unsigned Opc = N->getMachineOpcode();
+ if (Opc == TargetOpcode::EXTRACT_SUBREG ||
+ Opc == TargetOpcode::INSERT_SUBREG ||
+ Opc == TargetOpcode::SUBREG_TO_REG ||
+ Opc == TargetOpcode::REG_SEQUENCE ||
+ Opc == TargetOpcode::IMPLICIT_DEF)
+ return;
+ }
+
+ for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+ I != E; ++I) {
+ if (I->isCtrl())
+ continue;
+ SUnit *PredSU = I->getSUnit();
+ if (PredSU->NumSuccsLeft != PredSU->NumSuccs)
+ continue;
+ const SDNode *PN = PredSU->getNode();
+ if (!PN->isMachineOpcode()) {
+ if (PN->getOpcode() == ISD::CopyFromReg) {
+ EVT VT = PN->getValueType(0);
+ unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
+ RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
+ }
+ continue;
+ }
+ unsigned POpc = PN->getMachineOpcode();
+ if (POpc == TargetOpcode::IMPLICIT_DEF)
+ continue;
+ if (POpc == TargetOpcode::EXTRACT_SUBREG) {
+ EVT VT = PN->getOperand(0).getValueType();
+ unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
+ RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
+ continue;
+ } else if (POpc == TargetOpcode::INSERT_SUBREG ||
+ POpc == TargetOpcode::SUBREG_TO_REG) {
+ EVT VT = PN->getValueType(0);
+ unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
+ RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
+ continue;
+ }
+ unsigned NumDefs = TII->get(PN->getMachineOpcode()).getNumDefs();
+ for (unsigned i = 0; i != NumDefs; ++i) {
+ EVT VT = PN->getValueType(i);
+ if (!PN->hasAnyUseOfValue(i))
+ continue;
+ unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
+ if (RegPressure[RCId] < TLI->getRepRegClassCostFor(VT))
+ // Register pressure tracking is imprecise. This can happen.
+ RegPressure[RCId] = 0;
+ else
+ RegPressure[RCId] -= TLI->getRepRegClassCostFor(VT);
+ }
+ }
+
+ // Check for isMachineOpcode() as PrescheduleNodesWithMultipleUses()
+ // may transfer data dependencies to CopyToReg.
+ if (SU->NumSuccs && N->isMachineOpcode()) {
+ unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs();
+ for (unsigned i = NumDefs, e = N->getNumValues(); i != e; ++i) {
+ EVT VT = N->getValueType(i);
+ if (VT == MVT::Flag || VT == MVT::Other)
+ continue;
+ if (!N->hasAnyUseOfValue(i))
+ continue;
+ unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
+ RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
+ }
+ }
+
+ dumpRegPressure();
+ }
+
void setScheduleDAG(ScheduleDAGRRList *scheduleDag) {
scheduleDAG = scheduleDag;
}
+ void dumpRegPressure() const {
+ for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(),
+ E = TRI->regclass_end(); I != E; ++I) {
+ const TargetRegisterClass *RC = *I;
+ unsigned Id = RC->getID();
+ unsigned RP = RegPressure[Id];
+ if (!RP) continue;
+ DEBUG(dbgs() << RC->getName() << ": " << RP << " / " << RegLimit[Id]
+ << '\n');
+ }
+ }
+
protected:
bool canClobber(const SUnit *SU, const SUnit *Op);
void AddPseudoTwoAddrDeps();
@@ -1161,6 +1447,9 @@ namespace {
typedef RegReductionPriorityQueue<hybrid_ls_rr_sort>
HybridBURRPriorityQueue;
+
+ typedef RegReductionPriorityQueue<ilp_ls_rr_sort>
+ ILPBURRPriorityQueue;
}
/// closestSucc - Returns the scheduled cycle of the successor which is
@@ -1260,30 +1549,63 @@ bool src_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const {
}
bool hybrid_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const{
- bool LStall = left->SchedulingPref == Sched::Latency &&
- SPQ->getCurCycle() < left->getHeight();
- bool RStall = right->SchedulingPref == Sched::Latency &&
- SPQ->getCurCycle() < right->getHeight();
- // If scheduling one of the node will cause a pipeline stall, delay it.
- // If scheduling either one of the node will cause a pipeline stall, sort them
- // according to their height.
- // If neither will cause a pipeline stall, try to reduce register pressure.
- if (LStall) {
- if (!RStall)
- return true;
- if (left->getHeight() != right->getHeight())
- return left->getHeight() > right->getHeight();
- } else if (RStall)
+ bool LHigh = SPQ->HighRegPressure(left);
+ bool RHigh = SPQ->HighRegPressure(right);
+ // Avoid causing spills. If register pressure is high, schedule for
+ // register pressure reduction.
+ if (LHigh && !RHigh)
+ return true;
+ else if (!LHigh && RHigh)
+ return false;
+ else if (!LHigh && !RHigh) {
+ // Low register pressure situation, schedule for latency if possible.
+ bool LStall = left->SchedulingPref == Sched::Latency &&
+ SPQ->getCurCycle() < left->getHeight();
+ bool RStall = right->SchedulingPref == Sched::Latency &&
+ SPQ->getCurCycle() < right->getHeight();
+ // If scheduling one of the node will cause a pipeline stall, delay it.
+ // If scheduling either one of the node will cause a pipeline stall, sort
+ // them according to their height.
+ // If neither will cause a pipeline stall, try to reduce register pressure.
+ if (LStall) {
+ if (!RStall)
+ return true;
+ if (left->getHeight() != right->getHeight())
+ return left->getHeight() > right->getHeight();
+ } else if (RStall)
return false;
- // If either node is scheduling for latency, sort them by height and latency
- // first.
- if (left->SchedulingPref == Sched::Latency ||
- right->SchedulingPref == Sched::Latency) {
- if (left->getHeight() != right->getHeight())
- return left->getHeight() > right->getHeight();
- if (left->Latency != right->Latency)
- return left->Latency > right->Latency;
+ // If either node is scheduling for latency, sort them by height and latency
+ // first.
+ if (left->SchedulingPref == Sched::Latency ||
+ right->SchedulingPref == Sched::Latency) {
+ if (left->getHeight() != right->getHeight())
+ return left->getHeight() > right->getHeight();
+ if (left->Latency != right->Latency)
+ return left->Latency > right->Latency;
+ }
+ }
+
+ return BURRSort(left, right, SPQ);
+}
+
+bool ilp_ls_rr_sort::operator()(const SUnit *left,
+ const SUnit *right) const {
+ bool LHigh = SPQ->HighRegPressure(left);
+ bool RHigh = SPQ->HighRegPressure(right);
+ // Avoid causing spills. If register pressure is high, schedule for
+ // register pressure reduction.
+ if (LHigh && !RHigh)
+ return true;
+ else if (!LHigh && RHigh)
+ return false;
+ else if (!LHigh && !RHigh) {
+ // Low register pressure situation, schedule to maximize instruction level
+ // parallelism.
+ if (left->NumPreds > right->NumPreds)
+ return false;
+ else if (left->NumPreds < right->NumPreds)
+ return false;
}
return BURRSort(left, right, SPQ);
@@ -1635,8 +1957,8 @@ llvm::createBURRListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) {
const TargetInstrInfo *TII = TM.getInstrInfo();
const TargetRegisterInfo *TRI = TM.getRegisterInfo();
- BURegReductionPriorityQueue *PQ = new BURegReductionPriorityQueue(TII, TRI);
-
+ BURegReductionPriorityQueue *PQ =
+ new BURegReductionPriorityQueue(*IS->MF, false, TII, TRI, 0);
ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, false, PQ);
PQ->setScheduleDAG(SD);
return SD;
@@ -1648,8 +1970,8 @@ llvm::createTDRRListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) {
const TargetInstrInfo *TII = TM.getInstrInfo();
const TargetRegisterInfo *TRI = TM.getRegisterInfo();
- TDRegReductionPriorityQueue *PQ = new TDRegReductionPriorityQueue(TII, TRI);
-
+ TDRegReductionPriorityQueue *PQ =
+ new TDRegReductionPriorityQueue(*IS->MF, false, TII, TRI, 0);
ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, false, false, PQ);
PQ->setScheduleDAG(SD);
return SD;
@@ -1661,8 +1983,8 @@ llvm::createSourceListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) {
const TargetInstrInfo *TII = TM.getInstrInfo();
const TargetRegisterInfo *TRI = TM.getRegisterInfo();
- SrcRegReductionPriorityQueue *PQ = new SrcRegReductionPriorityQueue(TII, TRI);
-
+ SrcRegReductionPriorityQueue *PQ =
+ new SrcRegReductionPriorityQueue(*IS->MF, false, TII, TRI, 0);
ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, false, PQ);
PQ->setScheduleDAG(SD);
return SD;
@@ -1673,9 +1995,24 @@ llvm::createHybridListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) {
const TargetMachine &TM = IS->TM;
const TargetInstrInfo *TII = TM.getInstrInfo();
const TargetRegisterInfo *TRI = TM.getRegisterInfo();
+ const TargetLowering *TLI = &IS->getTargetLowering();
- HybridBURRPriorityQueue *PQ = new HybridBURRPriorityQueue(TII, TRI);
+ HybridBURRPriorityQueue *PQ =
+ new HybridBURRPriorityQueue(*IS->MF, true, TII, TRI, TLI);
+ ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, true, PQ);
+ PQ->setScheduleDAG(SD);
+ return SD;
+}
+llvm::ScheduleDAGSDNodes *
+llvm::createILPListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) {
+ const TargetMachine &TM = IS->TM;
+ const TargetInstrInfo *TII = TM.getInstrInfo();
+ const TargetRegisterInfo *TRI = TM.getRegisterInfo();
+ const TargetLowering *TLI = &IS->getTargetLowering();
+
+ ILPBURRPriorityQueue *PQ =
+ new ILPBURRPriorityQueue(*IS->MF, true, TII, TRI, TLI);
ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, true, PQ);
PQ->setScheduleDAG(SD);
return SD;
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
index 06cf053087550..f1bf82ab145a4 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
@@ -59,8 +59,9 @@ SUnit *ScheduleDAGSDNodes::NewSUnit(SDNode *N) {
SUnits.back().OrigNode = &SUnits.back();
SUnit *SU = &SUnits.back();
const TargetLowering &TLI = DAG->getTargetLoweringInfo();
- if (N->isMachineOpcode() &&
- N->getMachineOpcode() == TargetOpcode::IMPLICIT_DEF)
+ if (!N ||
+ (N->isMachineOpcode() &&
+ N->getMachineOpcode() == TargetOpcode::IMPLICIT_DEF))
SU->SchedulingPref = Sched::None;
else
SU->SchedulingPref = TLI.getSchedulingPreference(N);
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index e83a0346b5351..ad06ebda5b007 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -2236,7 +2236,7 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const{
bool SelectionDAG::isKnownNeverNaN(SDValue Op) const {
// If we're told that NaNs won't happen, assume they won't.
- if (FiniteOnlyFPMath())
+ if (NoNaNsFPMath)
return true;
// If the value is a constant, we can obviously see if it is a NaN or not.
@@ -2281,35 +2281,6 @@ bool SelectionDAG::isVerifiedDebugInfoDesc(SDValue Op) const {
}
-/// getShuffleScalarElt - Returns the scalar element that will make up the ith
-/// element of the result of the vector shuffle.
-SDValue SelectionDAG::getShuffleScalarElt(const ShuffleVectorSDNode *N,
- unsigned i) {
- EVT VT = N->getValueType(0);
- if (N->getMaskElt(i) < 0)
- return getUNDEF(VT.getVectorElementType());
- unsigned Index = N->getMaskElt(i);
- unsigned NumElems = VT.getVectorNumElements();
- SDValue V = (Index < NumElems) ? N->getOperand(0) : N->getOperand(1);
- Index %= NumElems;
-
- if (V.getOpcode() == ISD::BIT_CONVERT) {
- V = V.getOperand(0);
- EVT VVT = V.getValueType();
- if (!VVT.isVector() || VVT.getVectorNumElements() != (unsigned)NumElems)
- return SDValue();
- }
- if (V.getOpcode() == ISD::SCALAR_TO_VECTOR)
- return (Index == 0) ? V.getOperand(0)
- : getUNDEF(VT.getVectorElementType());
- if (V.getOpcode() == ISD::BUILD_VECTOR)
- return V.getOperand(Index);
- if (const ShuffleVectorSDNode *SVN = dyn_cast<ShuffleVectorSDNode>(V))
- return getShuffleScalarElt(SVN, Index);
- return SDValue();
-}
-
-
/// getNode - Gets or creates the specified node.
///
SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT) {
@@ -2624,7 +2595,8 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT,
// one big BUILD_VECTOR.
if (N1.getOpcode() == ISD::BUILD_VECTOR &&
N2.getOpcode() == ISD::BUILD_VECTOR) {
- SmallVector<SDValue, 16> Elts(N1.getNode()->op_begin(), N1.getNode()->op_end());
+ SmallVector<SDValue, 16> Elts(N1.getNode()->op_begin(),
+ N1.getNode()->op_end());
Elts.append(N2.getNode()->op_begin(), N2.getNode()->op_end());
return getNode(ISD::BUILD_VECTOR, DL, VT, &Elts[0], Elts.size());
}
@@ -3021,7 +2993,8 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT,
if (N1.getOpcode() == ISD::BUILD_VECTOR &&
N2.getOpcode() == ISD::BUILD_VECTOR &&
N3.getOpcode() == ISD::BUILD_VECTOR) {
- SmallVector<SDValue, 16> Elts(N1.getNode()->op_begin(), N1.getNode()->op_end());
+ SmallVector<SDValue, 16> Elts(N1.getNode()->op_begin(),
+ N1.getNode()->op_end());
Elts.append(N2.getNode()->op_begin(), N2.getNode()->op_end());
Elts.append(N3.getNode()->op_begin(), N3.getNode()->op_end());
return getNode(ISD::BUILD_VECTOR, DL, VT, &Elts[0], Elts.size());
@@ -5872,6 +5845,7 @@ std::string ISD::ArgFlagsTy::getArgFlagsString() {
void SDNode::dump() const { dump(0); }
void SDNode::dump(const SelectionDAG *G) const {
print(dbgs(), G);
+ dbgs() << '\n';
}
void SDNode::print_types(raw_ostream &OS, const SelectionDAG *G) const {
@@ -5895,7 +5869,7 @@ void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const {
for (MachineSDNode::mmo_iterator i = MN->memoperands_begin(),
e = MN->memoperands_end(); i != e; ++i) {
OS << **i;
- if (next(i) != e)
+ if (llvm::next(i) != e)
OS << " ";
}
OS << ">";
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 458e865a6b3c8..e65744592c8bb 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -70,22 +70,29 @@ LimitFPPrecision("limit-float-precision",
cl::location(LimitFloatPrecision),
cl::init(0));
+static SDValue getCopyFromPartsVector(SelectionDAG &DAG, DebugLoc DL,
+ const SDValue *Parts, unsigned NumParts,
+ EVT PartVT, EVT ValueVT);
+
/// getCopyFromParts - Create a value that contains the specified legal parts
/// combined into the value they represent. If the parts combine to a type
/// larger then ValueVT then AssertOp can be used to specify whether the extra
/// bits are known to be zero (ISD::AssertZext) or sign extended from ValueVT
/// (ISD::AssertSext).
-static SDValue getCopyFromParts(SelectionDAG &DAG, DebugLoc dl,
+static SDValue getCopyFromParts(SelectionDAG &DAG, DebugLoc DL,
const SDValue *Parts,
unsigned NumParts, EVT PartVT, EVT ValueVT,
ISD::NodeType AssertOp = ISD::DELETED_NODE) {
+ if (ValueVT.isVector())
+ return getCopyFromPartsVector(DAG, DL, Parts, NumParts, PartVT, ValueVT);
+
assert(NumParts > 0 && "No parts to assemble!");
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
SDValue Val = Parts[0];
if (NumParts > 1) {
// Assemble the value from multiple parts.
- if (!ValueVT.isVector() && ValueVT.isInteger()) {
+ if (ValueVT.isInteger()) {
unsigned PartBits = PartVT.getSizeInBits();
unsigned ValueBits = ValueVT.getSizeInBits();
@@ -100,25 +107,25 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, DebugLoc dl,
EVT HalfVT = EVT::getIntegerVT(*DAG.getContext(), RoundBits/2);
if (RoundParts > 2) {
- Lo = getCopyFromParts(DAG, dl, Parts, RoundParts / 2,
+ Lo = getCopyFromParts(DAG, DL, Parts, RoundParts / 2,
PartVT, HalfVT);
- Hi = getCopyFromParts(DAG, dl, Parts + RoundParts / 2,
+ Hi = getCopyFromParts(DAG, DL, Parts + RoundParts / 2,
RoundParts / 2, PartVT, HalfVT);
} else {
- Lo = DAG.getNode(ISD::BIT_CONVERT, dl, HalfVT, Parts[0]);
- Hi = DAG.getNode(ISD::BIT_CONVERT, dl, HalfVT, Parts[1]);
+ Lo = DAG.getNode(ISD::BIT_CONVERT, DL, HalfVT, Parts[0]);
+ Hi = DAG.getNode(ISD::BIT_CONVERT, DL, HalfVT, Parts[1]);
}
if (TLI.isBigEndian())
std::swap(Lo, Hi);
- Val = DAG.getNode(ISD::BUILD_PAIR, dl, RoundVT, Lo, Hi);
+ Val = DAG.getNode(ISD::BUILD_PAIR, DL, RoundVT, Lo, Hi);
if (RoundParts < NumParts) {
// Assemble the trailing non-power-of-2 part.
unsigned OddParts = NumParts - RoundParts;
EVT OddVT = EVT::getIntegerVT(*DAG.getContext(), OddParts * PartBits);
- Hi = getCopyFromParts(DAG, dl,
+ Hi = getCopyFromParts(DAG, DL,
Parts + RoundParts, OddParts, PartVT, OddVT);
// Combine the round and odd parts.
@@ -126,68 +133,29 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, DebugLoc dl,
if (TLI.isBigEndian())
std::swap(Lo, Hi);
EVT TotalVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits);
- Hi = DAG.getNode(ISD::ANY_EXTEND, dl, TotalVT, Hi);
- Hi = DAG.getNode(ISD::SHL, dl, TotalVT, Hi,
+ Hi = DAG.getNode(ISD::ANY_EXTEND, DL, TotalVT, Hi);
+ Hi = DAG.getNode(ISD::SHL, DL, TotalVT, Hi,
DAG.getConstant(Lo.getValueType().getSizeInBits(),
TLI.getPointerTy()));
- Lo = DAG.getNode(ISD::ZERO_EXTEND, dl, TotalVT, Lo);
- Val = DAG.getNode(ISD::OR, dl, TotalVT, Lo, Hi);
- }
- } else if (ValueVT.isVector()) {
- // Handle a multi-element vector.
- EVT IntermediateVT, RegisterVT;
- unsigned NumIntermediates;
- unsigned NumRegs =
- TLI.getVectorTypeBreakdown(*DAG.getContext(), ValueVT, IntermediateVT,
- NumIntermediates, RegisterVT);
- assert(NumRegs == NumParts
- && "Part count doesn't match vector breakdown!");
- NumParts = NumRegs; // Silence a compiler warning.
- assert(RegisterVT == PartVT
- && "Part type doesn't match vector breakdown!");
- assert(RegisterVT == Parts[0].getValueType() &&
- "Part type doesn't match part!");
-
- // Assemble the parts into intermediate operands.
- SmallVector<SDValue, 8> Ops(NumIntermediates);
- if (NumIntermediates == NumParts) {
- // If the register was not expanded, truncate or copy the value,
- // as appropriate.
- for (unsigned i = 0; i != NumParts; ++i)
- Ops[i] = getCopyFromParts(DAG, dl, &Parts[i], 1,
- PartVT, IntermediateVT);
- } else if (NumParts > 0) {
- // If the intermediate type was expanded, build the intermediate
- // operands from the parts.
- assert(NumParts % NumIntermediates == 0 &&
- "Must expand into a divisible number of parts!");
- unsigned Factor = NumParts / NumIntermediates;
- for (unsigned i = 0; i != NumIntermediates; ++i)
- Ops[i] = getCopyFromParts(DAG, dl, &Parts[i * Factor], Factor,
- PartVT, IntermediateVT);
+ Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, TotalVT, Lo);
+ Val = DAG.getNode(ISD::OR, DL, TotalVT, Lo, Hi);
}
-
- // Build a vector with BUILD_VECTOR or CONCAT_VECTORS from the
- // intermediate operands.
- Val = DAG.getNode(IntermediateVT.isVector() ?
- ISD::CONCAT_VECTORS : ISD::BUILD_VECTOR, dl,
- ValueVT, &Ops[0], NumIntermediates);
} else if (PartVT.isFloatingPoint()) {
// FP split into multiple FP parts (for ppcf128)
assert(ValueVT == EVT(MVT::ppcf128) && PartVT == EVT(MVT::f64) &&
"Unexpected split");
SDValue Lo, Hi;
- Lo = DAG.getNode(ISD::BIT_CONVERT, dl, EVT(MVT::f64), Parts[0]);
- Hi = DAG.getNode(ISD::BIT_CONVERT, dl, EVT(MVT::f64), Parts[1]);
+ Lo = DAG.getNode(ISD::BIT_CONVERT, DL, EVT(MVT::f64), Parts[0]);
+ Hi = DAG.getNode(ISD::BIT_CONVERT, DL, EVT(MVT::f64), Parts[1]);
if (TLI.isBigEndian())
std::swap(Lo, Hi);
- Val = DAG.getNode(ISD::BUILD_PAIR, dl, ValueVT, Lo, Hi);
+ Val = DAG.getNode(ISD::BUILD_PAIR, DL, ValueVT, Lo, Hi);
} else {
// FP split into integer parts (soft fp)
assert(ValueVT.isFloatingPoint() && PartVT.isInteger() &&
!PartVT.isVector() && "Unexpected split");
EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), ValueVT.getSizeInBits());
- Val = getCopyFromParts(DAG, dl, Parts, NumParts, PartVT, IntVT);
+ Val = getCopyFromParts(DAG, DL, Parts, NumParts, PartVT, IntVT);
}
}
@@ -197,219 +165,315 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, DebugLoc dl,
if (PartVT == ValueVT)
return Val;
- if (PartVT.isVector()) {
- assert(ValueVT.isVector() && "Unknown vector conversion!");
- return DAG.getNode(ISD::BIT_CONVERT, dl, ValueVT, Val);
- }
-
- if (ValueVT.isVector()) {
- assert(ValueVT.getVectorElementType() == PartVT &&
- ValueVT.getVectorNumElements() == 1 &&
- "Only trivial scalar-to-vector conversions should get here!");
- return DAG.getNode(ISD::BUILD_VECTOR, dl, ValueVT, Val);
- }
-
- if (PartVT.isInteger() &&
- ValueVT.isInteger()) {
+ if (PartVT.isInteger() && ValueVT.isInteger()) {
if (ValueVT.bitsLT(PartVT)) {
// For a truncate, see if we have any information to
// indicate whether the truncated bits will always be
// zero or sign-extension.
if (AssertOp != ISD::DELETED_NODE)
- Val = DAG.getNode(AssertOp, dl, PartVT, Val,
+ Val = DAG.getNode(AssertOp, DL, PartVT, Val,
DAG.getValueType(ValueVT));
- return DAG.getNode(ISD::TRUNCATE, dl, ValueVT, Val);
- } else {
- return DAG.getNode(ISD::ANY_EXTEND, dl, ValueVT, Val);
+ return DAG.getNode(ISD::TRUNCATE, DL, ValueVT, Val);
}
+ return DAG.getNode(ISD::ANY_EXTEND, DL, ValueVT, Val);
}
if (PartVT.isFloatingPoint() && ValueVT.isFloatingPoint()) {
- if (ValueVT.bitsLT(Val.getValueType())) {
- // FP_ROUND's are always exact here.
- return DAG.getNode(ISD::FP_ROUND, dl, ValueVT, Val,
+ // FP_ROUND's are always exact here.
+ if (ValueVT.bitsLT(Val.getValueType()))
+ return DAG.getNode(ISD::FP_ROUND, DL, ValueVT, Val,
DAG.getIntPtrConstant(1));
- }
- return DAG.getNode(ISD::FP_EXTEND, dl, ValueVT, Val);
+ return DAG.getNode(ISD::FP_EXTEND, DL, ValueVT, Val);
}
if (PartVT.getSizeInBits() == ValueVT.getSizeInBits())
- return DAG.getNode(ISD::BIT_CONVERT, dl, ValueVT, Val);
+ return DAG.getNode(ISD::BIT_CONVERT, DL, ValueVT, Val);
llvm_unreachable("Unknown mismatch!");
return SDValue();
}
+/// getCopyFromParts - Create a value that contains the specified legal parts
+/// combined into the value they represent. If the parts combine to a type
+/// larger then ValueVT then AssertOp can be used to specify whether the extra
+/// bits are known to be zero (ISD::AssertZext) or sign extended from ValueVT
+/// (ISD::AssertSext).
+static SDValue getCopyFromPartsVector(SelectionDAG &DAG, DebugLoc DL,
+ const SDValue *Parts, unsigned NumParts,
+ EVT PartVT, EVT ValueVT) {
+ assert(ValueVT.isVector() && "Not a vector value");
+ assert(NumParts > 0 && "No parts to assemble!");
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ SDValue Val = Parts[0];
+
+ // Handle a multi-element vector.
+ if (NumParts > 1) {
+ EVT IntermediateVT, RegisterVT;
+ unsigned NumIntermediates;
+ unsigned NumRegs =
+ TLI.getVectorTypeBreakdown(*DAG.getContext(), ValueVT, IntermediateVT,
+ NumIntermediates, RegisterVT);
+ assert(NumRegs == NumParts && "Part count doesn't match vector breakdown!");
+ NumParts = NumRegs; // Silence a compiler warning.
+ assert(RegisterVT == PartVT && "Part type doesn't match vector breakdown!");
+ assert(RegisterVT == Parts[0].getValueType() &&
+ "Part type doesn't match part!");
+
+ // Assemble the parts into intermediate operands.
+ SmallVector<SDValue, 8> Ops(NumIntermediates);
+ if (NumIntermediates == NumParts) {
+ // If the register was not expanded, truncate or copy the value,
+ // as appropriate.
+ for (unsigned i = 0; i != NumParts; ++i)
+ Ops[i] = getCopyFromParts(DAG, DL, &Parts[i], 1,
+ PartVT, IntermediateVT);
+ } else if (NumParts > 0) {
+ // If the intermediate type was expanded, build the intermediate
+ // operands from the parts.
+ assert(NumParts % NumIntermediates == 0 &&
+ "Must expand into a divisible number of parts!");
+ unsigned Factor = NumParts / NumIntermediates;
+ for (unsigned i = 0; i != NumIntermediates; ++i)
+ Ops[i] = getCopyFromParts(DAG, DL, &Parts[i * Factor], Factor,
+ PartVT, IntermediateVT);
+ }
+
+ // Build a vector with BUILD_VECTOR or CONCAT_VECTORS from the
+ // intermediate operands.
+ Val = DAG.getNode(IntermediateVT.isVector() ?
+ ISD::CONCAT_VECTORS : ISD::BUILD_VECTOR, DL,
+ ValueVT, &Ops[0], NumIntermediates);
+ }
+
+ // There is now one part, held in Val. Correct it to match ValueVT.
+ PartVT = Val.getValueType();
+
+ if (PartVT == ValueVT)
+ return Val;
+
+ if (PartVT.isVector()) {
+ // If the element type of the source/dest vectors are the same, but the
+ // parts vector has more elements than the value vector, then we have a
+ // vector widening case (e.g. <2 x float> -> <4 x float>). Extract the
+ // elements we want.
+ if (PartVT.getVectorElementType() == ValueVT.getVectorElementType()) {
+ assert(PartVT.getVectorNumElements() > ValueVT.getVectorNumElements() &&
+ "Cannot narrow, it would be a lossy transformation");
+ return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ValueVT, Val,
+ DAG.getIntPtrConstant(0));
+ }
+
+ // Vector/Vector bitcast.
+ return DAG.getNode(ISD::BIT_CONVERT, DL, ValueVT, Val);
+ }
+
+ assert(ValueVT.getVectorElementType() == PartVT &&
+ ValueVT.getVectorNumElements() == 1 &&
+ "Only trivial scalar-to-vector conversions should get here!");
+ return DAG.getNode(ISD::BUILD_VECTOR, DL, ValueVT, Val);
+}
+
+
+
+
+static void getCopyToPartsVector(SelectionDAG &DAG, DebugLoc dl,
+ SDValue Val, SDValue *Parts, unsigned NumParts,
+ EVT PartVT);
+
/// getCopyToParts - Create a series of nodes that contain the specified value
/// split into legal parts. If the parts contain more bits than Val, then, for
/// integers, ExtendKind can be used to specify how to generate the extra bits.
-static void getCopyToParts(SelectionDAG &DAG, DebugLoc dl,
+static void getCopyToParts(SelectionDAG &DAG, DebugLoc DL,
SDValue Val, SDValue *Parts, unsigned NumParts,
EVT PartVT,
ISD::NodeType ExtendKind = ISD::ANY_EXTEND) {
- const TargetLowering &TLI = DAG.getTargetLoweringInfo();
- EVT PtrVT = TLI.getPointerTy();
EVT ValueVT = Val.getValueType();
+
+ // Handle the vector case separately.
+ if (ValueVT.isVector())
+ return getCopyToPartsVector(DAG, DL, Val, Parts, NumParts, PartVT);
+
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
unsigned PartBits = PartVT.getSizeInBits();
unsigned OrigNumParts = NumParts;
assert(TLI.isTypeLegal(PartVT) && "Copying to an illegal type!");
- if (!NumParts)
+ if (NumParts == 0)
return;
- if (!ValueVT.isVector()) {
- if (PartVT == ValueVT) {
- assert(NumParts == 1 && "No-op copy with multiple parts!");
- Parts[0] = Val;
- return;
- }
-
- if (NumParts * PartBits > ValueVT.getSizeInBits()) {
- // If the parts cover more bits than the value has, promote the value.
- if (PartVT.isFloatingPoint() && ValueVT.isFloatingPoint()) {
- assert(NumParts == 1 && "Do not know what to promote to!");
- Val = DAG.getNode(ISD::FP_EXTEND, dl, PartVT, Val);
- } else if (PartVT.isInteger() && ValueVT.isInteger()) {
- ValueVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits);
- Val = DAG.getNode(ExtendKind, dl, ValueVT, Val);
- } else {
- llvm_unreachable("Unknown mismatch!");
- }
- } else if (PartBits == ValueVT.getSizeInBits()) {
- // Different types of the same size.
- assert(NumParts == 1 && PartVT != ValueVT);
- Val = DAG.getNode(ISD::BIT_CONVERT, dl, PartVT, Val);
- } else if (NumParts * PartBits < ValueVT.getSizeInBits()) {
- // If the parts cover less bits than value has, truncate the value.
- if (PartVT.isInteger() && ValueVT.isInteger()) {
- ValueVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits);
- Val = DAG.getNode(ISD::TRUNCATE, dl, ValueVT, Val);
- } else {
- llvm_unreachable("Unknown mismatch!");
- }
- }
-
- // The value may have changed - recompute ValueVT.
- ValueVT = Val.getValueType();
- assert(NumParts * PartBits == ValueVT.getSizeInBits() &&
- "Failed to tile the value with PartVT!");
-
- if (NumParts == 1) {
- assert(PartVT == ValueVT && "Type conversion failed!");
- Parts[0] = Val;
- return;
- }
+ assert(!ValueVT.isVector() && "Vector case handled elsewhere");
+ if (PartVT == ValueVT) {
+ assert(NumParts == 1 && "No-op copy with multiple parts!");
+ Parts[0] = Val;
+ return;
+ }
- // Expand the value into multiple parts.
- if (NumParts & (NumParts - 1)) {
- // The number of parts is not a power of 2. Split off and copy the tail.
+ if (NumParts * PartBits > ValueVT.getSizeInBits()) {
+ // If the parts cover more bits than the value has, promote the value.
+ if (PartVT.isFloatingPoint() && ValueVT.isFloatingPoint()) {
+ assert(NumParts == 1 && "Do not know what to promote to!");
+ Val = DAG.getNode(ISD::FP_EXTEND, DL, PartVT, Val);
+ } else {
assert(PartVT.isInteger() && ValueVT.isInteger() &&
- "Do not know what to expand to!");
- unsigned RoundParts = 1 << Log2_32(NumParts);
- unsigned RoundBits = RoundParts * PartBits;
- unsigned OddParts = NumParts - RoundParts;
- SDValue OddVal = DAG.getNode(ISD::SRL, dl, ValueVT, Val,
- DAG.getConstant(RoundBits,
- TLI.getPointerTy()));
- getCopyToParts(DAG, dl, OddVal, Parts + RoundParts,
- OddParts, PartVT);
-
- if (TLI.isBigEndian())
- // The odd parts were reversed by getCopyToParts - unreverse them.
- std::reverse(Parts + RoundParts, Parts + NumParts);
-
- NumParts = RoundParts;
+ "Unknown mismatch!");
ValueVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits);
- Val = DAG.getNode(ISD::TRUNCATE, dl, ValueVT, Val);
+ Val = DAG.getNode(ExtendKind, DL, ValueVT, Val);
}
+ } else if (PartBits == ValueVT.getSizeInBits()) {
+ // Different types of the same size.
+ assert(NumParts == 1 && PartVT != ValueVT);
+ Val = DAG.getNode(ISD::BIT_CONVERT, DL, PartVT, Val);
+ } else if (NumParts * PartBits < ValueVT.getSizeInBits()) {
+ // If the parts cover less bits than value has, truncate the value.
+ assert(PartVT.isInteger() && ValueVT.isInteger() &&
+ "Unknown mismatch!");
+ ValueVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits);
+ Val = DAG.getNode(ISD::TRUNCATE, DL, ValueVT, Val);
+ }
+
+ // The value may have changed - recompute ValueVT.
+ ValueVT = Val.getValueType();
+ assert(NumParts * PartBits == ValueVT.getSizeInBits() &&
+ "Failed to tile the value with PartVT!");
- // The number of parts is a power of 2. Repeatedly bisect the value using
- // EXTRACT_ELEMENT.
- Parts[0] = DAG.getNode(ISD::BIT_CONVERT, dl,
- EVT::getIntegerVT(*DAG.getContext(),
- ValueVT.getSizeInBits()),
- Val);
-
- for (unsigned StepSize = NumParts; StepSize > 1; StepSize /= 2) {
- for (unsigned i = 0; i < NumParts; i += StepSize) {
- unsigned ThisBits = StepSize * PartBits / 2;
- EVT ThisVT = EVT::getIntegerVT(*DAG.getContext(), ThisBits);
- SDValue &Part0 = Parts[i];
- SDValue &Part1 = Parts[i+StepSize/2];
-
- Part1 = DAG.getNode(ISD::EXTRACT_ELEMENT, dl,
- ThisVT, Part0,
- DAG.getConstant(1, PtrVT));
- Part0 = DAG.getNode(ISD::EXTRACT_ELEMENT, dl,
- ThisVT, Part0,
- DAG.getConstant(0, PtrVT));
-
- if (ThisBits == PartBits && ThisVT != PartVT) {
- Part0 = DAG.getNode(ISD::BIT_CONVERT, dl,
- PartVT, Part0);
- Part1 = DAG.getNode(ISD::BIT_CONVERT, dl,
- PartVT, Part1);
- }
+ if (NumParts == 1) {
+ assert(PartVT == ValueVT && "Type conversion failed!");
+ Parts[0] = Val;
+ return;
+ }
+
+ // Expand the value into multiple parts.
+ if (NumParts & (NumParts - 1)) {
+ // The number of parts is not a power of 2. Split off and copy the tail.
+ assert(PartVT.isInteger() && ValueVT.isInteger() &&
+ "Do not know what to expand to!");
+ unsigned RoundParts = 1 << Log2_32(NumParts);
+ unsigned RoundBits = RoundParts * PartBits;
+ unsigned OddParts = NumParts - RoundParts;
+ SDValue OddVal = DAG.getNode(ISD::SRL, DL, ValueVT, Val,
+ DAG.getIntPtrConstant(RoundBits));
+ getCopyToParts(DAG, DL, OddVal, Parts + RoundParts, OddParts, PartVT);
+
+ if (TLI.isBigEndian())
+ // The odd parts were reversed by getCopyToParts - unreverse them.
+ std::reverse(Parts + RoundParts, Parts + NumParts);
+
+ NumParts = RoundParts;
+ ValueVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits);
+ Val = DAG.getNode(ISD::TRUNCATE, DL, ValueVT, Val);
+ }
+
+ // The number of parts is a power of 2. Repeatedly bisect the value using
+ // EXTRACT_ELEMENT.
+ Parts[0] = DAG.getNode(ISD::BIT_CONVERT, DL,
+ EVT::getIntegerVT(*DAG.getContext(),
+ ValueVT.getSizeInBits()),
+ Val);
+
+ for (unsigned StepSize = NumParts; StepSize > 1; StepSize /= 2) {
+ for (unsigned i = 0; i < NumParts; i += StepSize) {
+ unsigned ThisBits = StepSize * PartBits / 2;
+ EVT ThisVT = EVT::getIntegerVT(*DAG.getContext(), ThisBits);
+ SDValue &Part0 = Parts[i];
+ SDValue &Part1 = Parts[i+StepSize/2];
+
+ Part1 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL,
+ ThisVT, Part0, DAG.getIntPtrConstant(1));
+ Part0 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL,
+ ThisVT, Part0, DAG.getIntPtrConstant(0));
+
+ if (ThisBits == PartBits && ThisVT != PartVT) {
+ Part0 = DAG.getNode(ISD::BIT_CONVERT, DL, PartVT, Part0);
+ Part1 = DAG.getNode(ISD::BIT_CONVERT, DL, PartVT, Part1);
}
}
+ }
- if (TLI.isBigEndian())
- std::reverse(Parts, Parts + OrigNumParts);
+ if (TLI.isBigEndian())
+ std::reverse(Parts, Parts + OrigNumParts);
+}
- return;
- }
- // Vector ValueVT.
+/// getCopyToPartsVector - Create a series of nodes that contain the specified
+/// value split into legal parts.
+static void getCopyToPartsVector(SelectionDAG &DAG, DebugLoc DL,
+ SDValue Val, SDValue *Parts, unsigned NumParts,
+ EVT PartVT) {
+ EVT ValueVT = Val.getValueType();
+ assert(ValueVT.isVector() && "Not a vector");
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+
if (NumParts == 1) {
- if (PartVT != ValueVT) {
- if (PartVT.getSizeInBits() == ValueVT.getSizeInBits()) {
- Val = DAG.getNode(ISD::BIT_CONVERT, dl, PartVT, Val);
- } else {
- assert(ValueVT.getVectorElementType() == PartVT &&
- ValueVT.getVectorNumElements() == 1 &&
- "Only trivial vector-to-scalar conversions should get here!");
- Val = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
- PartVT, Val,
- DAG.getConstant(0, PtrVT));
- }
- }
+ if (PartVT == ValueVT) {
+ // Nothing to do.
+ } else if (PartVT.getSizeInBits() == ValueVT.getSizeInBits()) {
+ // Bitconvert vector->vector case.
+ Val = DAG.getNode(ISD::BIT_CONVERT, DL, PartVT, Val);
+ } else if (PartVT.isVector() &&
+ PartVT.getVectorElementType() == ValueVT.getVectorElementType()&&
+ PartVT.getVectorNumElements() > ValueVT.getVectorNumElements()) {
+ EVT ElementVT = PartVT.getVectorElementType();
+ // Vector widening case, e.g. <2 x float> -> <4 x float>. Shuffle in
+ // undef elements.
+ SmallVector<SDValue, 16> Ops;
+ for (unsigned i = 0, e = ValueVT.getVectorNumElements(); i != e; ++i)
+ Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL,
+ ElementVT, Val, DAG.getIntPtrConstant(i)));
+
+ for (unsigned i = ValueVT.getVectorNumElements(),
+ e = PartVT.getVectorNumElements(); i != e; ++i)
+ Ops.push_back(DAG.getUNDEF(ElementVT));
+
+ Val = DAG.getNode(ISD::BUILD_VECTOR, DL, PartVT, &Ops[0], Ops.size());
+ // FIXME: Use CONCAT for 2x -> 4x.
+
+ //SDValue UndefElts = DAG.getUNDEF(VectorTy);
+ //Val = DAG.getNode(ISD::CONCAT_VECTORS, DL, PartVT, Val, UndefElts);
+ } else {
+ // Vector -> scalar conversion.
+ assert(ValueVT.getVectorElementType() == PartVT &&
+ ValueVT.getVectorNumElements() == 1 &&
+ "Only trivial vector-to-scalar conversions should get here!");
+ Val = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL,
+ PartVT, Val, DAG.getIntPtrConstant(0));
+ }
+
Parts[0] = Val;
return;
}
-
+
// Handle a multi-element vector.
EVT IntermediateVT, RegisterVT;
unsigned NumIntermediates;
unsigned NumRegs = TLI.getVectorTypeBreakdown(*DAG.getContext(), ValueVT,
- IntermediateVT, NumIntermediates, RegisterVT);
+ IntermediateVT,
+ NumIntermediates, RegisterVT);
unsigned NumElements = ValueVT.getVectorNumElements();
-
+
assert(NumRegs == NumParts && "Part count doesn't match vector breakdown!");
NumParts = NumRegs; // Silence a compiler warning.
assert(RegisterVT == PartVT && "Part type doesn't match vector breakdown!");
-
+
// Split the vector into intermediate operands.
SmallVector<SDValue, 8> Ops(NumIntermediates);
for (unsigned i = 0; i != NumIntermediates; ++i) {
if (IntermediateVT.isVector())
- Ops[i] = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl,
+ Ops[i] = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL,
IntermediateVT, Val,
- DAG.getConstant(i * (NumElements / NumIntermediates),
- PtrVT));
+ DAG.getIntPtrConstant(i * (NumElements / NumIntermediates)));
else
- Ops[i] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
- IntermediateVT, Val,
- DAG.getConstant(i, PtrVT));
+ Ops[i] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL,
+ IntermediateVT, Val, DAG.getIntPtrConstant(i));
}
-
+
// Split the intermediate operands into legal parts.
if (NumParts == NumIntermediates) {
// If the register was not expanded, promote or copy the value,
// as appropriate.
for (unsigned i = 0; i != NumParts; ++i)
- getCopyToParts(DAG, dl, Ops[i], &Parts[i], 1, PartVT);
+ getCopyToParts(DAG, DL, Ops[i], &Parts[i], 1, PartVT);
} else if (NumParts > 0) {
// If the intermediate type was expanded, split each the value into
// legal parts.
@@ -417,10 +481,13 @@ static void getCopyToParts(SelectionDAG &DAG, DebugLoc dl,
"Must expand into a divisible number of parts!");
unsigned Factor = NumParts / NumIntermediates;
for (unsigned i = 0; i != NumIntermediates; ++i)
- getCopyToParts(DAG, dl, Ops[i], &Parts[i*Factor], Factor, PartVT);
+ getCopyToParts(DAG, DL, Ops[i], &Parts[i*Factor], Factor, PartVT);
}
}
+
+
+
namespace {
/// RegsForValue - This struct represents the registers (physical or virtual)
/// that a particular set of values is assigned, and the type information
@@ -460,11 +527,6 @@ namespace {
EVT regvt, EVT valuevt)
: ValueVTs(1, valuevt), RegVTs(1, regvt), Regs(regs) {}
- RegsForValue(const SmallVector<unsigned, 4> &regs,
- const SmallVector<EVT, 4> &regvts,
- const SmallVector<EVT, 4> &valuevts)
- : ValueVTs(valuevts), RegVTs(regvts), Regs(regs) {}
-
RegsForValue(LLVMContext &Context, const TargetLowering &tli,
unsigned Reg, const Type *Ty) {
ComputeValueVTs(tli, Ty, ValueVTs);
@@ -530,6 +592,10 @@ SDValue RegsForValue::getCopyFromRegs(SelectionDAG &DAG,
FunctionLoweringInfo &FuncInfo,
DebugLoc dl,
SDValue &Chain, SDValue *Flag) const {
+ // A Value with type {} or [0 x %t] needs no registers.
+ if (ValueVTs.empty())
+ return SDValue();
+
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
// Assemble the legal parts into the final values.
@@ -623,8 +689,7 @@ void RegsForValue::getCopyToRegs(SDValue Val, SelectionDAG &DAG, DebugLoc dl,
unsigned NumParts = TLI.getNumRegisters(*DAG.getContext(), ValueVT);
EVT RegisterVT = RegVTs[Value];
- getCopyToParts(DAG, dl,
- Val.getValue(Val.getResNo() + Value),
+ getCopyToParts(DAG, dl, Val.getValue(Val.getResNo() + Value),
&Parts[Part], NumParts, RegisterVT);
Part += NumParts;
}
@@ -701,6 +766,7 @@ void SelectionDAGBuilder::clear() {
UnusedArgNodeMap.clear();
PendingLoads.clear();
PendingExports.clear();
+ DanglingDebugInfoMap.clear();
CurDebugLoc = DebugLoc();
HasTailCall = false;
}
@@ -805,6 +871,33 @@ void SelectionDAGBuilder::visit(unsigned Opcode, const User &I) {
}
}
+// resolveDanglingDebugInfo - if we saw an earlier dbg_value referring to V,
+// generate the debug data structures now that we've seen its definition.
+void SelectionDAGBuilder::resolveDanglingDebugInfo(const Value *V,
+ SDValue Val) {
+ DanglingDebugInfo &DDI = DanglingDebugInfoMap[V];
+ if (DDI.getDI()) {
+ const DbgValueInst *DI = DDI.getDI();
+ DebugLoc dl = DDI.getdl();
+ unsigned DbgSDNodeOrder = DDI.getSDNodeOrder();
+ MDNode *Variable = DI->getVariable();
+ uint64_t Offset = DI->getOffset();
+ SDDbgValue *SDV;
+ if (Val.getNode()) {
+ if (!EmitFuncArgumentDbgValue(V, Variable, Offset, Val)) {
+ SDV = DAG.getDbgValue(Variable, Val.getNode(),
+ Val.getResNo(), Offset, dl, DbgSDNodeOrder);
+ DAG.AddDbgValue(SDV, Val.getNode(), false);
+ }
+ } else {
+ SDV = DAG.getDbgValue(Variable, UndefValue::get(V->getType()),
+ Offset, dl, SDNodeOrder);
+ DAG.AddDbgValue(SDV, 0, false);
+ }
+ DanglingDebugInfoMap[V] = DanglingDebugInfo();
+ }
+}
+
// getValue - Return an SDValue for the given Value.
SDValue SelectionDAGBuilder::getValue(const Value *V) {
// If we already have an SDValue for this value, use it. It's important
@@ -826,6 +919,7 @@ SDValue SelectionDAGBuilder::getValue(const Value *V) {
// Otherwise create a new SDValue and remember it.
SDValue Val = getValueImpl(V);
NodeMap[V] = Val;
+ resolveDanglingDebugInfo(V, Val);
return Val;
}
@@ -839,10 +933,11 @@ SDValue SelectionDAGBuilder::getNonRegisterValue(const Value *V) {
// Otherwise create a new SDValue and remember it.
SDValue Val = getValueImpl(V);
NodeMap[V] = Val;
+ resolveDanglingDebugInfo(V, Val);
return Val;
}
-/// getValueImpl - Helper function for getValue and getMaterializedValue.
+/// getValueImpl - Helper function for getValue and getNonRegisterValue.
/// Create an SDValue for the given value.
SDValue SelectionDAGBuilder::getValueImpl(const Value *V) {
if (const Constant *C = dyn_cast<Constant>(V)) {
@@ -986,10 +1081,10 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) {
unsigned NumValues = ValueVTs.size();
SmallVector<SDValue, 4> Chains(NumValues);
- EVT PtrVT = PtrValueVTs[0];
for (unsigned i = 0; i != NumValues; ++i) {
- SDValue Add = DAG.getNode(ISD::ADD, getCurDebugLoc(), PtrVT, RetPtr,
- DAG.getConstant(Offsets[i], PtrVT));
+ SDValue Add = DAG.getNode(ISD::ADD, getCurDebugLoc(),
+ RetPtr.getValueType(), RetPtr,
+ DAG.getIntPtrConstant(Offsets[i]));
Chains[i] =
DAG.getStore(Chain, getCurDebugLoc(),
SDValue(RetOp.getNode(), RetOp.getResNo() + i),
@@ -2709,11 +2804,6 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) {
}
Ty = StTy->getElementType(Field);
- } else if (const UnionType *UnTy = dyn_cast<UnionType>(Ty)) {
- unsigned Field = cast<ConstantInt>(Idx)->getZExtValue();
-
- // Offset canonically 0 for unions, but type changes
- Ty = UnTy->getElementType(Field);
} else {
Ty = cast<SequentialType>(Ty)->getElementType();
@@ -2818,7 +2908,7 @@ void SelectionDAGBuilder::visitAlloca(const AllocaInst &I) {
// Inform the Frame Information that we have just allocated a variable-sized
// object.
- FuncInfo.MF->getFrameInfo()->CreateVariableSizedObject();
+ FuncInfo.MF->getFrameInfo()->CreateVariableSizedObject(Align ? Align : 1);
}
void SelectionDAGBuilder::visitLoad(const LoadInst &I) {
@@ -3824,11 +3914,11 @@ static SDValue ExpandPowI(DebugLoc DL, SDValue LHS, SDValue RHS,
/// argument, create the corresponding DBG_VALUE machine instruction for it now.
/// At the end of instruction selection, they will be inserted to the entry BB.
bool
-SelectionDAGBuilder::EmitFuncArgumentDbgValue(const DbgValueInst &DI,
- const Value *V, MDNode *Variable,
- uint64_t Offset,
+SelectionDAGBuilder::EmitFuncArgumentDbgValue(const Value *V, MDNode *Variable,
+ int64_t Offset,
const SDValue &N) {
- if (!isa<Argument>(V))
+ const Argument *Arg = dyn_cast<Argument>(V);
+ if (!Arg)
return false;
MachineFunction &MF = DAG.getMachineFunction();
@@ -3842,7 +3932,15 @@ SelectionDAGBuilder::EmitFuncArgumentDbgValue(const DbgValueInst &DI,
return false;
unsigned Reg = 0;
- if (N.getOpcode() == ISD::CopyFromReg) {
+ if (Arg->hasByValAttr()) {
+ // Byval arguments' frame index is recorded during argument lowering.
+ // Use this info directly.
+ const TargetRegisterInfo *TRI = DAG.getTarget().getRegisterInfo();
+ Reg = TRI->getFrameRegister(MF);
+ Offset = FuncInfo.getByValArgumentFrameIndex(Arg);
+ }
+
+ if (N.getNode() && N.getOpcode() == ISD::CopyFromReg) {
Reg = cast<RegisterSDNode>(N.getOperand(1))->getReg();
if (Reg && TargetRegisterInfo::isVirtualRegister(Reg)) {
MachineRegisterInfo &RegInfo = MF.getRegInfo();
@@ -3966,42 +4064,40 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
}
case Intrinsic::dbg_declare: {
const DbgDeclareInst &DI = cast<DbgDeclareInst>(I);
- if (!DIVariable(DI.getVariable()).Verify())
- return 0;
-
MDNode *Variable = DI.getVariable();
- // Parameters are handled specially.
- bool isParameter =
- DIVariable(Variable).getTag() == dwarf::DW_TAG_arg_variable;
const Value *Address = DI.getAddress();
- if (!Address)
+ if (!Address || !DIVariable(DI.getVariable()).Verify())
return 0;
- if (const BitCastInst *BCI = dyn_cast<BitCastInst>(Address))
- Address = BCI->getOperand(0);
- const AllocaInst *AI = dyn_cast<AllocaInst>(Address);
- if (AI) {
- // Don't handle byval arguments or VLAs, for example.
- // Non-byval arguments are handled here (they refer to the stack temporary
- // alloca at this point).
- DenseMap<const AllocaInst*, int>::iterator SI =
- FuncInfo.StaticAllocaMap.find(AI);
- if (SI == FuncInfo.StaticAllocaMap.end())
- return 0; // VLAs.
- int FI = SI->second;
-
- MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI();
- if (!DI.getDebugLoc().isUnknown() && MMI.hasDebugInfo())
- MMI.setVariableDbgInfo(Variable, FI, DI.getDebugLoc());
- }
// Build an entry in DbgOrdering. Debug info input nodes get an SDNodeOrder
// but do not always have a corresponding SDNode built. The SDNodeOrder
// absolute, but not relative, values are different depending on whether
// debug info exists.
++SDNodeOrder;
+
+ // Check if address has undef value.
+ if (isa<UndefValue>(Address) ||
+ (Address->use_empty() && !isa<Argument>(Address))) {
+ SDDbgValue*SDV =
+ DAG.getDbgValue(Variable, UndefValue::get(Address->getType()),
+ 0, dl, SDNodeOrder);
+ DAG.AddDbgValue(SDV, 0, false);
+ return 0;
+ }
+
SDValue &N = NodeMap[Address];
+ if (!N.getNode() && isa<Argument>(Address))
+ // Check unused arguments map.
+ N = UnusedArgNodeMap[Address];
SDDbgValue *SDV;
if (N.getNode()) {
+ // Parameters are handled specially.
+ bool isParameter =
+ DIVariable(Variable).getTag() == dwarf::DW_TAG_arg_variable;
+ if (const BitCastInst *BCI = dyn_cast<BitCastInst>(Address))
+ Address = BCI->getOperand(0);
+ const AllocaInst *AI = dyn_cast<AllocaInst>(Address);
+
if (isParameter && !AI) {
FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(N.getNode());
if (FINode)
@@ -4020,10 +4116,14 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
return 0;
DAG.AddDbgValue(SDV, N.getNode(), isParameter);
} else {
- // This isn't useful, but it shows what we're missing.
- SDV = DAG.getDbgValue(Variable, UndefValue::get(Address->getType()),
- 0, dl, SDNodeOrder);
- DAG.AddDbgValue(SDV, 0, isParameter);
+ // If Address is an arugment then try to emits its dbg value using
+ // virtual register info from the FuncInfo.ValueMap. Otherwise add undef
+ // to help track missing debug info.
+ if (!EmitFuncArgumentDbgValue(Address, Variable, 0, N)) {
+ SDV = DAG.getDbgValue(Variable, UndefValue::get(Address->getType()),
+ 0, dl, SDNodeOrder);
+ DAG.AddDbgValue(SDV, 0, false);
+ }
}
return 0;
}
@@ -4048,31 +4148,24 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
SDV = DAG.getDbgValue(Variable, V, Offset, dl, SDNodeOrder);
DAG.AddDbgValue(SDV, 0, false);
} else {
- bool createUndef = false;
- // FIXME : Why not use getValue() directly ?
+ // Do not use getValue() in here; we don't want to generate code at
+ // this point if it hasn't been done yet.
SDValue N = NodeMap[V];
if (!N.getNode() && isa<Argument>(V))
// Check unused arguments map.
N = UnusedArgNodeMap[V];
if (N.getNode()) {
- if (!EmitFuncArgumentDbgValue(DI, V, Variable, Offset, N)) {
+ if (!EmitFuncArgumentDbgValue(V, Variable, Offset, N)) {
SDV = DAG.getDbgValue(Variable, N.getNode(),
N.getResNo(), Offset, dl, SDNodeOrder);
DAG.AddDbgValue(SDV, N.getNode(), false);
}
- } else if (isa<PHINode>(V) && !V->use_empty()) {
- SDValue N = getValue(V);
- if (N.getNode()) {
- if (!EmitFuncArgumentDbgValue(DI, V, Variable, Offset, N)) {
- SDV = DAG.getDbgValue(Variable, N.getNode(),
- N.getResNo(), Offset, dl, SDNodeOrder);
- DAG.AddDbgValue(SDV, N.getNode(), false);
- }
- } else
- createUndef = true;
- } else
- createUndef = true;
- if (createUndef) {
+ } else if (isa<PHINode>(V) && !V->use_empty() ) {
+ // Do not call getValue(V) yet, as we don't want to generate code.
+ // Remember it for later.
+ DanglingDebugInfo DDI(&DI, dl, SDNodeOrder);
+ DanglingDebugInfoMap[V] = DDI;
+ } else {
// We may expand this to cover more cases. One case where we have no
// data available is an unreferenced parameter; we need this fallback.
SDV = DAG.getDbgValue(Variable, UndefValue::get(V->getType()),
@@ -4572,6 +4665,11 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee,
!isInTailCallPosition(CS, CS.getAttributes().getRetAttributes(), TLI))
isTailCall = false;
+ // If there's a possibility that fast-isel has already selected some amount
+ // of the current basic block, don't emit a tail call.
+ if (isTailCall && EnableFastISel)
+ isTailCall = false;
+
std::pair<SDValue,SDValue> Result =
TLI.LowerCallTo(getRoot(), RetTy,
CS.paramHasAttr(0, Attribute::SExt),
@@ -6054,6 +6152,12 @@ void SelectionDAGISel::LowerArguments(const BasicBlock *LLVMBB) {
i += NumParts;
}
+ // Note down frame index for byval arguments.
+ if (I->hasByValAttr() && !ArgValues.empty())
+ if (FrameIndexSDNode *FI =
+ dyn_cast<FrameIndexSDNode>(ArgValues[0].getNode()))
+ FuncInfo->setByValArgumentFrameIndex(I, FI->getIndex());
+
if (!I->use_empty()) {
SDValue Res;
if (!ArgValues.empty())
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
index 46733d6db1241..5f400e9c83acb 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
@@ -18,9 +18,6 @@
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/ADT/APInt.h"
#include "llvm/ADT/DenseMap.h"
-#ifndef NDEBUG
-#include "llvm/ADT/SmallSet.h"
-#endif
#include "llvm/CodeGen/SelectionDAGNodes.h"
#include "llvm/CodeGen/ValueTypes.h"
#include "llvm/Support/CallSite.h"
@@ -64,6 +61,7 @@ class PHINode;
class PtrToIntInst;
class ReturnInst;
class SDISelAsmOperandInfo;
+class SDDbgValue;
class SExtInst;
class SelectInst;
class ShuffleVectorInst;
@@ -93,6 +91,24 @@ class SelectionDAGBuilder {
/// to preserve debug information for incoming arguments.
DenseMap<const Value*, SDValue> UnusedArgNodeMap;
+ /// DanglingDebugInfo - Helper type for DanglingDebugInfoMap.
+ class DanglingDebugInfo {
+ const DbgValueInst* DI;
+ DebugLoc dl;
+ unsigned SDNodeOrder;
+ public:
+ DanglingDebugInfo() : DI(0), dl(DebugLoc()), SDNodeOrder(0) { }
+ DanglingDebugInfo(const DbgValueInst *di, DebugLoc DL, unsigned SDNO) :
+ DI(di), dl(DL), SDNodeOrder(SDNO) { }
+ const DbgValueInst* getDI() { return DI; }
+ DebugLoc getdl() { return dl; }
+ unsigned getSDNodeOrder() { return SDNodeOrder; }
+ };
+
+ /// DanglingDebugInfoMap - Keeps track of dbg_values for which we have not
+ /// yet seen the referent. We defer handling these until we do see it.
+ DenseMap<const Value*, DanglingDebugInfo> DanglingDebugInfoMap;
+
public:
/// PendingLoads - Loads are not emitted to the program immediately. We bunch
/// them up and then emit token factor nodes when possible. This allows us to
@@ -345,6 +361,9 @@ public:
void visit(unsigned Opcode, const User &I);
+ // resolveDanglingDebugInfo - if we saw an earlier dbg_value referring to V,
+ // generate the debug data structures now that we've seen its definition.
+ void resolveDanglingDebugInfo(const Value *V, SDValue Val);
SDValue getValue(const Value *V);
SDValue getNonRegisterValue(const Value *V);
SDValue getValueImpl(const Value *V);
@@ -506,13 +525,11 @@ private:
void HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB);
- /// EmitFuncArgumentDbgValue - If the DbgValueInst is a dbg_value of a
- /// function argument, create the corresponding DBG_VALUE machine instruction
- /// for it now. At the end of instruction selection, they will be inserted to
- /// the entry BB.
- bool EmitFuncArgumentDbgValue(const DbgValueInst &DI,
- const Value *V, MDNode *Variable,
- uint64_t Offset, const SDValue &N);
+ /// EmitFuncArgumentDbgValue - If V is an function argument then create
+ /// corresponding DBG_VALUE machine instruction for it now. At the end of
+ /// instruction selection, they will be inserted to the entry BB.
+ bool EmitFuncArgumentDbgValue(const Value *V, MDNode *Variable,
+ int64_t Offset, const SDValue &N);
};
} // end namespace llvm
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
index 08ba5482f7d22..66cb5ceb09e53 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
@@ -132,14 +132,16 @@ namespace llvm {
const TargetLowering &TLI = IS->getTargetLowering();
if (OptLevel == CodeGenOpt::None)
- return createFastDAGScheduler(IS, OptLevel);
+ return createSourceListDAGScheduler(IS, OptLevel);
if (TLI.getSchedulingPreference() == Sched::Latency)
return createTDListDAGScheduler(IS, OptLevel);
if (TLI.getSchedulingPreference() == Sched::RegPressure)
return createBURRListDAGScheduler(IS, OptLevel);
- assert(TLI.getSchedulingPreference() == Sched::Hybrid &&
+ if (TLI.getSchedulingPreference() == Sched::Hybrid)
+ return createHybridListDAGScheduler(IS, OptLevel);
+ assert(TLI.getSchedulingPreference() == Sched::ILP &&
"Unknown sched type!");
- return createHybridListDAGScheduler(IS, OptLevel);
+ return createILPListDAGScheduler(IS, OptLevel);
}
}
@@ -169,7 +171,7 @@ TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
//===----------------------------------------------------------------------===//
SelectionDAGISel::SelectionDAGISel(const TargetMachine &tm, CodeGenOpt::Level OL) :
- MachineFunctionPass(&ID), TM(tm), TLI(*tm.getTargetLowering()),
+ MachineFunctionPass(ID), TM(tm), TLI(*tm.getTargetLowering()),
FuncInfo(new FunctionLoweringInfo(TLI)),
CurDAG(new SelectionDAG(tm)),
SDB(new SelectionDAGBuilder(*CurDAG, *FuncInfo, OL)),
@@ -216,7 +218,7 @@ static bool FunctionCallsSetJmp(const Function *F) {
for (Value::const_use_iterator
I = Callee->use_begin(), E = Callee->use_end();
I != E; ++I)
- if (const CallInst *CI = dyn_cast<CallInst>(I))
+ if (const CallInst *CI = dyn_cast<CallInst>(*I))
if (CI->getParent()->getParent() == F)
return true;
}
@@ -362,38 +364,6 @@ SelectionDAGISel::SelectBasicBlock(BasicBlock::const_iterator Begin,
CodeGenAndEmitDAG();
}
-namespace {
-/// WorkListRemover - This class is a DAGUpdateListener that removes any deleted
-/// nodes from the worklist.
-class SDOPsWorkListRemover : public SelectionDAG::DAGUpdateListener {
- SmallVector<SDNode*, 128> &Worklist;
- SmallPtrSet<SDNode*, 128> &InWorklist;
-public:
- SDOPsWorkListRemover(SmallVector<SDNode*, 128> &wl,
- SmallPtrSet<SDNode*, 128> &inwl)
- : Worklist(wl), InWorklist(inwl) {}
-
- void RemoveFromWorklist(SDNode *N) {
- if (!InWorklist.erase(N)) return;
-
- SmallVector<SDNode*, 128>::iterator I =
- std::find(Worklist.begin(), Worklist.end(), N);
- assert(I != Worklist.end() && "Not in worklist");
-
- *I = Worklist.back();
- Worklist.pop_back();
- }
-
- virtual void NodeDeleted(SDNode *N, SDNode *E) {
- RemoveFromWorklist(N);
- }
-
- virtual void NodeUpdated(SDNode *N) {
- // Ignore updates.
- }
-};
-}
-
void SelectionDAGISel::ComputeLiveOutVRegInfo() {
SmallPtrSet<SDNode*, 128> VisitedNodes;
SmallVector<SDNode*, 128> Worklist;
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp
index 6cae804422ce6..8313de5e32bba 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp
@@ -199,7 +199,7 @@ const std::string SelectionDAG::getGraphAttrs(const SDNode *N) const {
#else
errs() << "SelectionDAG::getGraphAttrs is only available in debug builds"
<< " on systems with Graphviz or gv!\n";
- return std::string("");
+ return std::string();
#endif
}
diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 4f3866956cac5..b74f600cfa2db 100644
--- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -651,6 +651,53 @@ static unsigned getVectorTypeBreakdownMVT(MVT VT, MVT &IntermediateVT,
return NumVectorRegs;
}
+/// isLegalRC - Return true if the value types that can be represented by the
+/// specified register class are all legal.
+bool TargetLowering::isLegalRC(const TargetRegisterClass *RC) const {
+ for (TargetRegisterClass::vt_iterator I = RC->vt_begin(), E = RC->vt_end();
+ I != E; ++I) {
+ if (isTypeLegal(*I))
+ return true;
+ }
+ return false;
+}
+
+/// hasLegalSuperRegRegClasses - Return true if the specified register class
+/// has one or more super-reg register classes that are legal.
+bool
+TargetLowering::hasLegalSuperRegRegClasses(const TargetRegisterClass *RC) const{
+ if (*RC->superregclasses_begin() == 0)
+ return false;
+ for (TargetRegisterInfo::regclass_iterator I = RC->superregclasses_begin(),
+ E = RC->superregclasses_end(); I != E; ++I) {
+ const TargetRegisterClass *RRC = *I;
+ if (isLegalRC(RRC))
+ return true;
+ }
+ return false;
+}
+
+/// findRepresentativeClass - Return the largest legal super-reg register class
+/// of the register class for the specified type and its associated "cost".
+std::pair<const TargetRegisterClass*, uint8_t>
+TargetLowering::findRepresentativeClass(EVT VT) const {
+ const TargetRegisterClass *RC = RegClassForVT[VT.getSimpleVT().SimpleTy];
+ if (!RC)
+ return std::make_pair(RC, 0);
+ const TargetRegisterClass *BestRC = RC;
+ for (TargetRegisterInfo::regclass_iterator I = RC->superregclasses_begin(),
+ E = RC->superregclasses_end(); I != E; ++I) {
+ const TargetRegisterClass *RRC = *I;
+ if (RRC->isASubClass() || !isLegalRC(RRC))
+ continue;
+ if (!hasLegalSuperRegRegClasses(RRC))
+ return std::make_pair(RRC, 1);
+ BestRC = RRC;
+ }
+ return std::make_pair(BestRC, 1);
+}
+
+
/// computeRegisterProperties - Once all of the register classes are added,
/// this allows us to compute derived properties we expose.
void TargetLowering::computeRegisterProperties() {
@@ -736,6 +783,28 @@ void TargetLowering::computeRegisterProperties() {
MVT VT = (MVT::SimpleValueType)i;
if (isTypeLegal(VT)) continue;
+ // Determine if there is a legal wider type. If so, we should promote to
+ // that wider vector type.
+ EVT EltVT = VT.getVectorElementType();
+ unsigned NElts = VT.getVectorNumElements();
+ if (NElts != 1) {
+ bool IsLegalWiderType = false;
+ for (unsigned nVT = i+1; nVT <= MVT::LAST_VECTOR_VALUETYPE; ++nVT) {
+ EVT SVT = (MVT::SimpleValueType)nVT;
+ if (SVT.getVectorElementType() == EltVT &&
+ SVT.getVectorNumElements() > NElts &&
+ isTypeSynthesizable(SVT)) {
+ TransformToType[i] = SVT;
+ RegisterTypeForVT[i] = SVT;
+ NumRegistersForVT[i] = 1;
+ ValueTypeActions.setTypeAction(VT, Promote);
+ IsLegalWiderType = true;
+ break;
+ }
+ }
+ if (IsLegalWiderType) continue;
+ }
+
MVT IntermediateVT;
EVT RegisterVT;
unsigned NumIntermediates;
@@ -744,32 +813,29 @@ void TargetLowering::computeRegisterProperties() {
RegisterVT, this);
RegisterTypeForVT[i] = RegisterVT;
- // Determine if there is a legal wider type.
- bool IsLegalWiderType = false;
- EVT EltVT = VT.getVectorElementType();
- unsigned NElts = VT.getVectorNumElements();
- for (unsigned nVT = i+1; nVT <= MVT::LAST_VECTOR_VALUETYPE; ++nVT) {
- EVT SVT = (MVT::SimpleValueType)nVT;
- if (isTypeSynthesizable(SVT) && SVT.getVectorElementType() == EltVT &&
- SVT.getVectorNumElements() > NElts && NElts != 1) {
- TransformToType[i] = SVT;
- ValueTypeActions.setTypeAction(VT, Promote);
- IsLegalWiderType = true;
- break;
- }
- }
- if (!IsLegalWiderType) {
- EVT NVT = VT.getPow2VectorType();
- if (NVT == VT) {
- // Type is already a power of 2. The default action is to split.
- TransformToType[i] = MVT::Other;
- ValueTypeActions.setTypeAction(VT, Expand);
- } else {
- TransformToType[i] = NVT;
- ValueTypeActions.setTypeAction(VT, Promote);
- }
+ EVT NVT = VT.getPow2VectorType();
+ if (NVT == VT) {
+ // Type is already a power of 2. The default action is to split.
+ TransformToType[i] = MVT::Other;
+ ValueTypeActions.setTypeAction(VT, Expand);
+ } else {
+ TransformToType[i] = NVT;
+ ValueTypeActions.setTypeAction(VT, Promote);
}
}
+
+ // Determine the 'representative' register class for each value type.
+ // An representative register class is the largest (meaning one which is
+ // not a sub-register class / subreg register class) legal register class for
+ // a group of value types. For example, on i386, i8, i16, and i32
+ // representative would be GR32; while on x86_64 it's GR64.
+ for (unsigned i = 0; i != MVT::LAST_VALUETYPE; ++i) {
+ const TargetRegisterClass* RRC;
+ uint8_t Cost;
+ tie(RRC, Cost) = findRepresentativeClass((MVT::SimpleValueType)i);
+ RepRegClassForVT[i] = RRC;
+ RepRegClassCostForVT[i] = Cost;
+ }
}
const char *TargetLowering::getTargetNodeName(unsigned Opcode) const {
@@ -798,8 +864,21 @@ unsigned TargetLowering::getVectorTypeBreakdown(LLVMContext &Context, EVT VT,
EVT &IntermediateVT,
unsigned &NumIntermediates,
EVT &RegisterVT) const {
- // Figure out the right, legal destination reg to copy into.
unsigned NumElts = VT.getVectorNumElements();
+
+ // If there is a wider vector type with the same element type as this one,
+ // we should widen to that legal vector type. This handles things like
+ // <2 x float> -> <4 x float>.
+ if (NumElts != 1 && getTypeAction(VT) == Promote) {
+ RegisterVT = getTypeToTransformTo(Context, VT);
+ if (isTypeLegal(RegisterVT)) {
+ IntermediateVT = RegisterVT;
+ NumIntermediates = 1;
+ return 1;
+ }
+ }
+
+ // Figure out the right, legal destination reg to copy into.
EVT EltTy = VT.getVectorElementType();
unsigned NumVectorRegs = 1;
@@ -828,16 +907,12 @@ unsigned TargetLowering::getVectorTypeBreakdown(LLVMContext &Context, EVT VT,
EVT DestVT = getRegisterType(Context, NewVT);
RegisterVT = DestVT;
- if (DestVT.bitsLT(NewVT)) {
- // Value is expanded, e.g. i64 -> i16.
+ if (DestVT.bitsLT(NewVT)) // Value is expanded, e.g. i64 -> i16.
return NumVectorRegs*(NewVT.getSizeInBits()/DestVT.getSizeInBits());
- } else {
- // Otherwise, promotion or legal types use the same number of registers as
- // the vector decimated to the appropriate level.
- return NumVectorRegs;
- }
- return 1;
+ // Otherwise, promotion or legal types use the same number of registers as
+ // the vector decimated to the appropriate level.
+ return NumVectorRegs;
}
/// Get the EVTs and ArgFlags collections that represent the legalized return
@@ -1308,9 +1383,32 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
}
}
- if (SimplifyDemandedBits(Op.getOperand(0), NewMask.lshr(ShAmt),
+ if (SimplifyDemandedBits(InOp, NewMask.lshr(ShAmt),
KnownZero, KnownOne, TLO, Depth+1))
return true;
+
+ // Convert (shl (anyext x, c)) to (anyext (shl x, c)) if the high bits
+ // are not demanded. This will likely allow the anyext to be folded away.
+ if (InOp.getNode()->getOpcode() == ISD::ANY_EXTEND) {
+ SDValue InnerOp = InOp.getNode()->getOperand(0);
+ EVT InnerVT = InnerOp.getValueType();
+ if ((APInt::getHighBitsSet(BitWidth,
+ BitWidth - InnerVT.getSizeInBits()) &
+ DemandedMask) == 0 &&
+ isTypeDesirableForOp(ISD::SHL, InnerVT)) {
+ EVT ShTy = getShiftAmountTy();
+ if (!APInt(BitWidth, ShAmt).isIntN(ShTy.getSizeInBits()))
+ ShTy = InnerVT;
+ SDValue NarrowShl =
+ TLO.DAG.getNode(ISD::SHL, dl, InnerVT, InnerOp,
+ TLO.DAG.getConstant(ShAmt, ShTy));
+ return
+ TLO.CombineTo(Op,
+ TLO.DAG.getNode(ISD::ANY_EXTEND, dl, Op.getValueType(),
+ NarrowShl));
+ }
+ }
+
KnownZero <<= SA->getZExtValue();
KnownOne <<= SA->getZExtValue();
// low bits known zero.
@@ -1415,11 +1513,10 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
// present in the input.
APInt NewBits =
APInt::getHighBitsSet(BitWidth,
- BitWidth - EVT.getScalarType().getSizeInBits()) &
- NewMask;
+ BitWidth - EVT.getScalarType().getSizeInBits());
// If none of the extended bits are demanded, eliminate the sextinreg.
- if (NewBits == 0)
+ if ((NewBits & NewMask) == 0)
return TLO.CombineTo(Op, Op.getOperand(0));
APInt InSignBit = APInt::getSignBit(EVT.getScalarType().getSizeInBits());
@@ -1886,12 +1983,9 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
EVT ExtDstTy = N0.getValueType();
unsigned ExtDstTyBits = ExtDstTy.getSizeInBits();
- // If the extended part has any inconsistent bits, it cannot ever
- // compare equal. In other words, they have to be all ones or all
- // zeros.
- APInt ExtBits =
- APInt::getHighBitsSet(ExtDstTyBits, ExtDstTyBits - ExtSrcTyBits);
- if ((C1 & ExtBits) != 0 && (C1 & ExtBits) != ExtBits)
+ // If the constant doesn't fit into the number of bits for the source of
+ // the sign extension, it is impossible for both sides to be equal.
+ if (C1.getMinSignedBits() > ExtSrcTyBits)
return DAG.getConstant(Cond == ISD::SETNE, VT);
SDValue ZextOp;
@@ -2476,7 +2570,7 @@ void TargetLowering::LowerAsmOperandForConstraint(SDValue Op,
int64_t Offs = GA->getOffset();
if (C) Offs += C->getZExtValue();
Ops.push_back(DAG.getTargetGlobalAddress(GA->getGlobal(),
- C->getDebugLoc(),
+ C ? C->getDebugLoc() : DebugLoc(),
Op.getValueType(), Offs));
return;
}
diff --git a/lib/CodeGen/SimpleRegisterCoalescing.cpp b/lib/CodeGen/SimpleRegisterCoalescing.cpp
index e69d3e4fa78aa..b29ea19835bc9 100644
--- a/lib/CodeGen/SimpleRegisterCoalescing.cpp
+++ b/lib/CodeGen/SimpleRegisterCoalescing.cpp
@@ -59,13 +59,16 @@ DisableCrossClassJoin("disable-cross-class-join",
cl::desc("Avoid coalescing cross register class copies"),
cl::init(false), cl::Hidden);
-static RegisterPass<SimpleRegisterCoalescing>
-X("simple-register-coalescing", "Simple Register Coalescing");
+static cl::opt<bool>
+DisablePhysicalJoin("disable-physical-join",
+ cl::desc("Avoid coalescing physical register copies"),
+ cl::init(false), cl::Hidden);
-// Declare that we implement the RegisterCoalescer interface
-static RegisterAnalysisGroup<RegisterCoalescer, true/*The Default*/> V(X);
+INITIALIZE_AG_PASS(SimpleRegisterCoalescing, RegisterCoalescer,
+ "simple-register-coalescing", "Simple Register Coalescing",
+ false, false, true);
-const PassInfo *const llvm::SimpleRegisterCoalescingID = &X;
+char &llvm::SimpleRegisterCoalescingID = SimpleRegisterCoalescing::ID;
void SimpleRegisterCoalescing::getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesCFG();
@@ -386,16 +389,12 @@ bool SimpleRegisterCoalescing::RemoveCopyByCommutingDef(const CoalescerPair &CP,
if (HasOtherReachingDefs(IntA, IntB, AValNo, BValNo))
return false;
- bool BHasSubRegs = false;
- if (TargetRegisterInfo::isPhysicalRegister(IntB.reg))
- BHasSubRegs = *tri_->getSubRegisters(IntB.reg);
-
- // Abort if the subregisters of IntB.reg have values that are not simply the
+ // Abort if the aliases of IntB.reg have values that are not simply the
// clobbers from the superreg.
- if (BHasSubRegs)
- for (const unsigned *SR = tri_->getSubRegisters(IntB.reg); *SR; ++SR)
- if (li_->hasInterval(*SR) &&
- HasOtherReachingDefs(IntA, li_->getInterval(*SR), AValNo, 0))
+ if (TargetRegisterInfo::isPhysicalRegister(IntB.reg))
+ for (const unsigned *AS = tri_->getAliasSet(IntB.reg); *AS; ++AS)
+ if (li_->hasInterval(*AS) &&
+ HasOtherReachingDefs(IntA, li_->getInterval(*AS), AValNo, 0))
return false;
// If some of the uses of IntA.reg is already coalesced away, return false.
@@ -412,6 +411,8 @@ bool SimpleRegisterCoalescing::RemoveCopyByCommutingDef(const CoalescerPair &CP,
return false;
}
+ DEBUG(dbgs() << "\tRemoveCopyByCommutingDef: " << *DefMI);
+
// At this point we have decided that it is legal to do this
// transformation. Start by commuting the instruction.
MachineBasicBlock *MBB = DefMI->getParent();
@@ -470,16 +471,12 @@ bool SimpleRegisterCoalescing::RemoveCopyByCommutingDef(const CoalescerPair &CP,
if (Extended)
UseMO.setIsKill(false);
}
- unsigned SrcReg, DstReg, SrcSubIdx, DstSubIdx;
- if (UseMI->isCopy()) {
- if (UseMI->getOperand(0).getReg() != IntB.reg ||
- UseMI->getOperand(0).getSubReg())
- continue;
- } else if (tii_->isMoveInstr(*UseMI, SrcReg, DstReg, SrcSubIdx, DstSubIdx)){
- if (DstReg != IntB.reg || DstSubIdx)
- continue;
- } else
+ if (!UseMI->isCopy())
continue;
+ if (UseMI->getOperand(0).getReg() != IntB.reg ||
+ UseMI->getOperand(0).getSubReg())
+ continue;
+
// This copy will become a noop. If it's defining a new val#,
// remove that val# as well. However this live range is being
// extended to the end of the existing live range defined by the copy.
@@ -504,13 +501,13 @@ bool SimpleRegisterCoalescing::RemoveCopyByCommutingDef(const CoalescerPair &CP,
// Remove val#'s defined by copies that will be coalesced away.
for (unsigned i = 0, e = BDeadValNos.size(); i != e; ++i) {
VNInfo *DeadVNI = BDeadValNos[i];
- if (BHasSubRegs) {
- for (const unsigned *SR = tri_->getSubRegisters(IntB.reg); *SR; ++SR) {
- if (!li_->hasInterval(*SR))
+ if (TargetRegisterInfo::isPhysicalRegister(IntB.reg)) {
+ for (const unsigned *AS = tri_->getAliasSet(IntB.reg); *AS; ++AS) {
+ if (!li_->hasInterval(*AS))
continue;
- LiveInterval &SRLI = li_->getInterval(*SR);
- if (const LiveRange *SRLR = SRLI.getLiveRangeContaining(DeadVNI->def))
- SRLI.removeValNo(SRLR->valno);
+ LiveInterval &ASLI = li_->getInterval(*AS);
+ if (const LiveRange *ASLR = ASLI.getLiveRangeContaining(DeadVNI->def))
+ ASLI.removeValNo(ASLR->valno);
}
}
IntB.removeValNo(BDeadValNos[i]);
@@ -628,14 +625,6 @@ SimpleRegisterCoalescing::TrimLiveIntervalToLastUse(SlotIndex CopyIdx,
if (DefMO.getReg() == li.reg && !DefMO.getSubReg())
DefMO.setIsDead();
}
- unsigned SrcReg, DstReg, SrcSubIdx, DstSubIdx;
- if (tii_->isMoveInstr(*LastUseMI, SrcReg, DstReg, SrcSubIdx, DstSubIdx) &&
- DstReg == li.reg && DstSubIdx == 0) {
- // Last use is itself an identity code.
- int DeadIdx = LastUseMI->findRegisterDefOperandIdx(li.reg,
- false, false, tri_);
- LastUseMI->getOperand(DeadIdx).setIsDead();
- }
return true;
}
@@ -772,16 +761,6 @@ SimpleRegisterCoalescing::UpdateRegDefsUses(const CoalescerPair &CP) {
// A PhysReg copy that won't be coalesced can perhaps be rematerialized
// instead.
if (DstIsPhys) {
- unsigned CopySrcReg, CopyDstReg, CopySrcSubIdx, CopyDstSubIdx;
- if (tii_->isMoveInstr(*UseMI, CopySrcReg, CopyDstReg,
- CopySrcSubIdx, CopyDstSubIdx) &&
- CopySrcSubIdx == 0 && CopyDstSubIdx == 0 &&
- CopySrcReg != CopyDstReg && CopySrcReg == SrcReg &&
- CopyDstReg != DstReg && !JoinedCopies.count(UseMI) &&
- ReMaterializeTrivialDef(li_->getInterval(SrcReg), CopyDstReg, 0,
- UseMI))
- continue;
-
if (UseMI->isCopy() &&
!UseMI->getOperand(1).getSubReg() &&
!UseMI->getOperand(0).getSubReg() &&
@@ -834,28 +813,6 @@ SimpleRegisterCoalescing::UpdateRegDefsUses(const CoalescerPair &CP) {
dbgs() << li_->getInstructionIndex(UseMI) << "\t";
dbgs() << *UseMI;
});
-
-
- // After updating the operand, check if the machine instruction has
- // become a copy. If so, update its val# information.
- const TargetInstrDesc &TID = UseMI->getDesc();
- if (DstIsPhys || TID.getNumDefs() != 1 || TID.getNumOperands() <= 2)
- continue;
-
- unsigned CopySrcReg, CopyDstReg, CopySrcSubIdx, CopyDstSubIdx;
- if (tii_->isMoveInstr(*UseMI, CopySrcReg, CopyDstReg,
- CopySrcSubIdx, CopyDstSubIdx) &&
- CopySrcReg != CopyDstReg &&
- (TargetRegisterInfo::isVirtualRegister(CopyDstReg) ||
- allocatableRegs_[CopyDstReg])) {
- LiveInterval &LI = li_->getInterval(CopyDstReg);
- SlotIndex DefIdx =
- li_->getInstructionIndex(UseMI).getDefIndex();
- if (const LiveRange *DLR = LI.getLiveRangeContaining(DefIdx)) {
- if (DLR->valno->def == DefIdx)
- DLR->valno->setCopy(UseMI);
- }
- }
}
}
@@ -1082,13 +1039,18 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) {
return false; // Not coalescable.
}
+ if (DisablePhysicalJoin && CP.isPhys()) {
+ DEBUG(dbgs() << "\tPhysical joins disabled.\n");
+ return false;
+ }
+
DEBUG(dbgs() << "\tConsidering merging %reg" << CP.getSrcReg());
// Enforce policies.
if (CP.isPhys()) {
DEBUG(dbgs() <<" with physreg %" << tri_->getName(CP.getDstReg()) << "\n");
// Only coalesce to allocatable physreg.
- if (!allocatableRegs_[CP.getDstReg()]) {
+ if (!li_->isAllocatable(CP.getDstReg())) {
DEBUG(dbgs() << "\tRegister is an unallocatable physreg.\n");
return false; // Not coalescable.
}
@@ -1137,7 +1099,6 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) {
// happens.
if (li_->hasInterval(CP.getDstReg()) &&
li_->getInterval(CP.getDstReg()).ranges.size() > 1000) {
- mri_->setRegAllocationHint(CP.getSrcReg(), 0, CP.getDstReg());
++numAborts;
DEBUG(dbgs()
<< "\tPhysical register live interval too complicated, abort!\n");
@@ -1156,7 +1117,6 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) {
ReMaterializeTrivialDef(JoinVInt, CP.getDstReg(), 0, CopyMI))
return true;
- mri_->setRegAllocationHint(CP.getSrcReg(), 0, CP.getDstReg());
++numAborts;
DEBUG(dbgs() << "\tMay tie down a physical register, abort!\n");
Again = true; // May be possible to coalesce later.
@@ -1543,21 +1503,19 @@ void SimpleRegisterCoalescing::CopyCoalesceInMBB(MachineBasicBlock *MBB,
MachineInstr *Inst = MII++;
// If this isn't a copy nor a extract_subreg, we can't join intervals.
- unsigned SrcReg, DstReg, SrcSubIdx, DstSubIdx;
- bool isInsUndef = false;
+ unsigned SrcReg, DstReg;
if (Inst->isCopy()) {
DstReg = Inst->getOperand(0).getReg();
SrcReg = Inst->getOperand(1).getReg();
} else if (Inst->isSubregToReg()) {
DstReg = Inst->getOperand(0).getReg();
SrcReg = Inst->getOperand(2).getReg();
- } else if (!tii_->isMoveInstr(*Inst, SrcReg, DstReg, SrcSubIdx, DstSubIdx))
+ } else
continue;
bool SrcIsPhys = TargetRegisterInfo::isPhysicalRegister(SrcReg);
bool DstIsPhys = TargetRegisterInfo::isPhysicalRegister(DstReg);
- if (isInsUndef ||
- (li_->hasInterval(SrcReg) && li_->getInterval(SrcReg).empty()))
+ if (li_->hasInterval(SrcReg) && li_->getInterval(SrcReg).empty())
ImpDefCopies.push_back(CopyRec(Inst, 0));
else if (SrcIsPhys || DstIsPhys)
PhysCopies.push_back(CopyRec(Inst, 0));
@@ -1679,11 +1637,6 @@ SimpleRegisterCoalescing::lastRegisterUse(SlotIndex Start,
MachineInstr *UseMI = Use.getParent();
if (UseMI->isIdentityCopy())
continue;
- unsigned SrcReg, DstReg, SrcSubIdx, DstSubIdx;
- if (tii_->isMoveInstr(*UseMI, SrcReg, DstReg, SrcSubIdx, DstSubIdx) &&
- SrcReg == DstReg && SrcSubIdx == DstSubIdx)
- // Ignore identity copies.
- continue;
SlotIndex Idx = li_->getInstructionIndex(UseMI);
// FIXME: Should this be Idx != UseIdx? SlotIndex() will return something
// that compares higher than any other interval.
@@ -1708,10 +1661,7 @@ SimpleRegisterCoalescing::lastRegisterUse(SlotIndex Start,
return NULL;
// Ignore identity copies.
- unsigned SrcReg, DstReg, SrcSubIdx, DstSubIdx;
- if (!MI->isIdentityCopy() &&
- !(tii_->isMoveInstr(*MI, SrcReg, DstReg, SrcSubIdx, DstSubIdx) &&
- SrcReg == DstReg && SrcSubIdx == DstSubIdx))
+ if (!MI->isIdentityCopy())
for (unsigned i = 0, NumOps = MI->getNumOperands(); i != NumOps; ++i) {
MachineOperand &Use = MI->getOperand(i);
if (Use.isReg() && Use.isUse() && Use.getReg() &&
@@ -1747,7 +1697,6 @@ bool SimpleRegisterCoalescing::runOnMachineFunction(MachineFunction &fn) {
<< "********** Function: "
<< ((Value*)mf_->getFunction())->getName() << '\n');
- allocatableRegs_ = tri_->getAllocatableSet(fn);
for (TargetRegisterInfo::regclass_iterator I = tri_->regclass_begin(),
E = tri_->regclass_end(); I != E; ++I)
allocatableRCRegs_.insert(std::make_pair(*I,
@@ -1775,30 +1724,35 @@ bool SimpleRegisterCoalescing::runOnMachineFunction(MachineFunction &fn) {
for (MachineBasicBlock::iterator mii = mbb->begin(), mie = mbb->end();
mii != mie; ) {
MachineInstr *MI = mii;
- unsigned SrcReg, DstReg, SrcSubIdx, DstSubIdx;
if (JoinedCopies.count(MI)) {
// Delete all coalesced copies.
bool DoDelete = true;
- if (!tii_->isMoveInstr(*MI, SrcReg, DstReg, SrcSubIdx, DstSubIdx)) {
- assert(MI->isCopyLike() && "Unrecognized copy instruction");
- SrcReg = MI->getOperand(MI->isSubregToReg() ? 2 : 1).getReg();
- if (TargetRegisterInfo::isPhysicalRegister(SrcReg))
- // Do not delete extract_subreg, insert_subreg of physical
- // registers unless the definition is dead. e.g.
- // %DO<def> = INSERT_SUBREG %D0<undef>, %S0<kill>, 1
- // or else the scavenger may complain. LowerSubregs will
- // delete them later.
- DoDelete = false;
- }
+ assert(MI->isCopyLike() && "Unrecognized copy instruction");
+ unsigned SrcReg = MI->getOperand(MI->isSubregToReg() ? 2 : 1).getReg();
+ if (TargetRegisterInfo::isPhysicalRegister(SrcReg) &&
+ MI->getNumOperands() > 2)
+ // Do not delete extract_subreg, insert_subreg of physical
+ // registers unless the definition is dead. e.g.
+ // %DO<def> = INSERT_SUBREG %D0<undef>, %S0<kill>, 1
+ // or else the scavenger may complain. LowerSubregs will
+ // delete them later.
+ DoDelete = false;
+
if (MI->allDefsAreDead()) {
LiveInterval &li = li_->getInterval(SrcReg);
if (!ShortenDeadCopySrcLiveRange(li, MI))
ShortenDeadCopyLiveRange(li, MI);
DoDelete = true;
}
- if (!DoDelete)
+ if (!DoDelete) {
+ // We need the instruction to adjust liveness, so make it a KILL.
+ if (MI->isSubregToReg()) {
+ MI->RemoveOperand(3);
+ MI->RemoveOperand(1);
+ }
+ MI->setDesc(tii_->get(TargetOpcode::KILL));
mii = llvm::next(mii);
- else {
+ } else {
li_->RemoveMachineInstrFromMaps(MI);
mii = mbbi->erase(mii);
++numPeep;
@@ -1840,9 +1794,8 @@ bool SimpleRegisterCoalescing::runOnMachineFunction(MachineFunction &fn) {
}
// If the move will be an identity move delete it
- bool isMove= tii_->isMoveInstr(*MI, SrcReg, DstReg, SrcSubIdx, DstSubIdx);
- if (MI->isIdentityCopy() ||
- (isMove && SrcReg == DstReg && SrcSubIdx == DstSubIdx)) {
+ if (MI->isIdentityCopy()) {
+ unsigned SrcReg = MI->getOperand(1).getReg();
if (li_->hasInterval(SrcReg)) {
LiveInterval &RegInt = li_->getInterval(SrcReg);
// If def of this move instruction is dead, remove its live range
diff --git a/lib/CodeGen/SimpleRegisterCoalescing.h b/lib/CodeGen/SimpleRegisterCoalescing.h
index e154da60affa2..855bdb98b36c6 100644
--- a/lib/CodeGen/SimpleRegisterCoalescing.h
+++ b/lib/CodeGen/SimpleRegisterCoalescing.h
@@ -47,7 +47,6 @@ namespace llvm {
const MachineLoopInfo* loopInfo;
AliasAnalysis *AA;
- BitVector allocatableRegs_;
DenseMap<const TargetRegisterClass*, BitVector> allocatableRCRegs_;
/// JoinedCopies - Keep track of copies eliminated due to coalescing.
@@ -64,7 +63,7 @@ namespace llvm {
public:
static char ID; // Pass identifcation, replacement for typeid
- SimpleRegisterCoalescing() : MachineFunctionPass(&ID) {}
+ SimpleRegisterCoalescing() : MachineFunctionPass(ID) {}
struct InstrSlots {
enum {
diff --git a/lib/CodeGen/SjLjEHPrepare.cpp b/lib/CodeGen/SjLjEHPrepare.cpp
index e90869d600dd6..b637980f885c3 100644
--- a/lib/CodeGen/SjLjEHPrepare.cpp
+++ b/lib/CodeGen/SjLjEHPrepare.cpp
@@ -58,7 +58,7 @@ namespace {
public:
static char ID; // Pass identification, replacement for typeid
explicit SjLjEHPass(const TargetLowering *tli = NULL)
- : FunctionPass(&ID), TLI(tli) { }
+ : FunctionPass(ID), TLI(tli) { }
bool doInitialization(Module &M);
bool runOnFunction(Function &F);
diff --git a/lib/CodeGen/SlotIndexes.cpp b/lib/CodeGen/SlotIndexes.cpp
index 7a227cf02d57d..1bc148f160bc8 100644
--- a/lib/CodeGen/SlotIndexes.cpp
+++ b/lib/CodeGen/SlotIndexes.cpp
@@ -40,7 +40,8 @@ namespace {
}
char SlotIndexes::ID = 0;
-static RegisterPass<SlotIndexes> X("slotindexes", "Slot index numbering");
+INITIALIZE_PASS(SlotIndexes, "slotindexes",
+ "Slot index numbering", false, false);
IndexListEntry* IndexListEntry::getEmptyKeyEntry() {
return &*IndexListEntryEmptyKey;
diff --git a/lib/CodeGen/Spiller.cpp b/lib/CodeGen/Spiller.cpp
index 56bcb2824ae8a..59d5ab33c994f 100644
--- a/lib/CodeGen/Spiller.cpp
+++ b/lib/CodeGen/Spiller.cpp
@@ -15,6 +15,7 @@
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetInstrInfo.h"
@@ -49,29 +50,31 @@ namespace {
/// Utility class for spillers.
class SpillerBase : public Spiller {
protected:
+ MachineFunctionPass *pass;
MachineFunction *mf;
+ VirtRegMap *vrm;
LiveIntervals *lis;
MachineFrameInfo *mfi;
MachineRegisterInfo *mri;
const TargetInstrInfo *tii;
const TargetRegisterInfo *tri;
- VirtRegMap *vrm;
/// Construct a spiller base.
- SpillerBase(MachineFunction *mf, LiveIntervals *lis, VirtRegMap *vrm)
- : mf(mf), lis(lis), vrm(vrm)
+ SpillerBase(MachineFunctionPass &pass, MachineFunction &mf, VirtRegMap &vrm)
+ : pass(&pass), mf(&mf), vrm(&vrm)
{
- mfi = mf->getFrameInfo();
- mri = &mf->getRegInfo();
- tii = mf->getTarget().getInstrInfo();
- tri = mf->getTarget().getRegisterInfo();
+ lis = &pass.getAnalysis<LiveIntervals>();
+ mfi = mf.getFrameInfo();
+ mri = &mf.getRegInfo();
+ tii = mf.getTarget().getInstrInfo();
+ tri = mf.getTarget().getRegisterInfo();
}
/// Add spill ranges for every use/def of the live interval, inserting loads
/// immediately before each use, and stores after each def. No folding or
/// remat is attempted.
void trivialSpillEverywhere(LiveInterval *li,
- std::vector<LiveInterval*> &newIntervals) {
+ SmallVectorImpl<LiveInterval*> &newIntervals) {
DEBUG(dbgs() << "Spilling everywhere " << *li << "\n");
assert(li->weight != HUGE_VALF &&
@@ -173,13 +176,13 @@ namespace {
class TrivialSpiller : public SpillerBase {
public:
- TrivialSpiller(MachineFunction *mf, LiveIntervals *lis, VirtRegMap *vrm)
- : SpillerBase(mf, lis, vrm) {}
+ TrivialSpiller(MachineFunctionPass &pass, MachineFunction &mf,
+ VirtRegMap &vrm)
+ : SpillerBase(pass, mf, vrm) {}
void spill(LiveInterval *li,
- std::vector<LiveInterval*> &newIntervals,
- SmallVectorImpl<LiveInterval*> &,
- SlotIndex*) {
+ SmallVectorImpl<LiveInterval*> &newIntervals,
+ SmallVectorImpl<LiveInterval*> &) {
// Ignore spillIs - we don't use it.
trivialSpillEverywhere(li, newIntervals);
}
@@ -193,18 +196,19 @@ namespace {
class StandardSpiller : public Spiller {
protected:
LiveIntervals *lis;
- const MachineLoopInfo *loopInfo;
+ MachineLoopInfo *loopInfo;
VirtRegMap *vrm;
public:
- StandardSpiller(LiveIntervals *lis, const MachineLoopInfo *loopInfo,
- VirtRegMap *vrm)
- : lis(lis), loopInfo(loopInfo), vrm(vrm) {}
+ StandardSpiller(MachineFunctionPass &pass, MachineFunction &mf,
+ VirtRegMap &vrm)
+ : lis(&pass.getAnalysis<LiveIntervals>()),
+ loopInfo(pass.getAnalysisIfAvailable<MachineLoopInfo>()),
+ vrm(&vrm) {}
/// Falls back on LiveIntervals::addIntervalsForSpills.
void spill(LiveInterval *li,
- std::vector<LiveInterval*> &newIntervals,
- SmallVectorImpl<LiveInterval*> &spillIs,
- SlotIndex*) {
+ SmallVectorImpl<LiveInterval*> &newIntervals,
+ SmallVectorImpl<LiveInterval*> &spillIs) {
std::vector<LiveInterval*> added =
lis->addIntervalsForSpills(*li, spillIs, loopInfo, *vrm);
newIntervals.insert(newIntervals.end(), added.begin(), added.end());
@@ -221,23 +225,21 @@ namespace {
/// then the spiller falls back on the standard spilling mechanism.
class SplittingSpiller : public StandardSpiller {
public:
- SplittingSpiller(MachineFunction *mf, LiveIntervals *lis,
- const MachineLoopInfo *loopInfo, VirtRegMap *vrm)
- : StandardSpiller(lis, loopInfo, vrm) {
-
- mri = &mf->getRegInfo();
- tii = mf->getTarget().getInstrInfo();
- tri = mf->getTarget().getRegisterInfo();
+ SplittingSpiller(MachineFunctionPass &pass, MachineFunction &mf,
+ VirtRegMap &vrm)
+ : StandardSpiller(pass, mf, vrm) {
+ mri = &mf.getRegInfo();
+ tii = mf.getTarget().getInstrInfo();
+ tri = mf.getTarget().getRegisterInfo();
}
void spill(LiveInterval *li,
- std::vector<LiveInterval*> &newIntervals,
- SmallVectorImpl<LiveInterval*> &spillIs,
- SlotIndex *earliestStart) {
+ SmallVectorImpl<LiveInterval*> &newIntervals,
+ SmallVectorImpl<LiveInterval*> &spillIs) {
if (worthTryingToSplit(li))
- tryVNISplit(li, earliestStart);
+ tryVNISplit(li);
else
- StandardSpiller::spill(li, newIntervals, spillIs, earliestStart);
+ StandardSpiller::spill(li, newIntervals, spillIs);
}
private:
@@ -252,8 +254,7 @@ private:
}
/// Try to break a LiveInterval into its component values.
- std::vector<LiveInterval*> tryVNISplit(LiveInterval *li,
- SlotIndex *earliestStart) {
+ std::vector<LiveInterval*> tryVNISplit(LiveInterval *li) {
DEBUG(dbgs() << "Trying VNI split of %reg" << *li << "\n");
@@ -277,10 +278,6 @@ private:
DEBUG(dbgs() << *splitInterval << "\n");
added.push_back(splitInterval);
alreadySplit.insert(splitInterval);
- if (earliestStart != 0) {
- if (splitInterval->beginIndex() < *earliestStart)
- *earliestStart = splitInterval->beginIndex();
- }
} else {
DEBUG(dbgs() << "0\n");
}
@@ -293,10 +290,6 @@ private:
if (!li->empty()) {
added.push_back(li);
alreadySplit.insert(li);
- if (earliestStart != 0) {
- if (li->beginIndex() < *earliestStart)
- *earliestStart = li->beginIndex();
- }
}
return added;
@@ -506,20 +499,19 @@ private:
namespace llvm {
-Spiller *createInlineSpiller(MachineFunction*,
- LiveIntervals*,
- const MachineLoopInfo*,
- VirtRegMap*);
+Spiller *createInlineSpiller(MachineFunctionPass &pass,
+ MachineFunction &mf,
+ VirtRegMap &vrm);
}
-llvm::Spiller* llvm::createSpiller(MachineFunction *mf, LiveIntervals *lis,
- const MachineLoopInfo *loopInfo,
- VirtRegMap *vrm) {
+llvm::Spiller* llvm::createSpiller(MachineFunctionPass &pass,
+ MachineFunction &mf,
+ VirtRegMap &vrm) {
switch (spillerOpt) {
default: assert(0 && "unknown spiller");
- case trivial: return new TrivialSpiller(mf, lis, vrm);
- case standard: return new StandardSpiller(lis, loopInfo, vrm);
- case splitting: return new SplittingSpiller(mf, lis, loopInfo, vrm);
- case inline_: return createInlineSpiller(mf, lis, loopInfo, vrm);
+ case trivial: return new TrivialSpiller(pass, mf, vrm);
+ case standard: return new StandardSpiller(pass, mf, vrm);
+ case splitting: return new SplittingSpiller(pass, mf, vrm);
+ case inline_: return createInlineSpiller(pass, mf, vrm);
}
}
diff --git a/lib/CodeGen/Spiller.h b/lib/CodeGen/Spiller.h
index 450447b3933a8..59bc0ec6ae70f 100644
--- a/lib/CodeGen/Spiller.h
+++ b/lib/CodeGen/Spiller.h
@@ -11,19 +11,14 @@
#define LLVM_CODEGEN_SPILLER_H
#include "llvm/ADT/SmallVector.h"
-#include <vector>
namespace llvm {
class LiveInterval;
- class LiveIntervals;
- class LiveStacks;
class MachineFunction;
- class MachineInstr;
- class MachineLoopInfo;
+ class MachineFunctionPass;
class SlotIndex;
class VirtRegMap;
- class VNInfo;
/// Spiller interface.
///
@@ -40,18 +35,16 @@ namespace llvm {
/// @param spillIs A list of intervals that are about to be spilled,
/// and so cannot be used for remat etc.
/// @param newIntervals The newly created intervals will be appended here.
- /// @param earliestIndex The earliest point for splitting. (OK, it's another
- /// pointer to the allocator guts).
virtual void spill(LiveInterval *li,
- std::vector<LiveInterval*> &newIntervals,
- SmallVectorImpl<LiveInterval*> &spillIs,
- SlotIndex *earliestIndex = 0) = 0;
+ SmallVectorImpl<LiveInterval*> &newIntervals,
+ SmallVectorImpl<LiveInterval*> &spillIs) = 0;
};
/// Create and return a spiller object, as specified on the command line.
- Spiller* createSpiller(MachineFunction *mf, LiveIntervals *li,
- const MachineLoopInfo *loopInfo, VirtRegMap *vrm);
+ Spiller* createSpiller(MachineFunctionPass &pass,
+ MachineFunction &mf,
+ VirtRegMap &vrm);
}
#endif
diff --git a/lib/CodeGen/SplitKit.cpp b/lib/CodeGen/SplitKit.cpp
new file mode 100644
index 0000000000000..29474f0d55121
--- /dev/null
+++ b/lib/CodeGen/SplitKit.cpp
@@ -0,0 +1,1097 @@
+//===---------- SplitKit.cpp - Toolkit for splitting live ranges ----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the SplitAnalysis class as well as mutator functions for
+// live range splitting.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "splitter"
+#include "SplitKit.h"
+#include "VirtRegMap.h"
+#include "llvm/CodeGen/CalcSpillWeights.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+
+using namespace llvm;
+
+static cl::opt<bool>
+AllowSplit("spiller-splits-edges",
+ cl::desc("Allow critical edge splitting during spilling"));
+
+//===----------------------------------------------------------------------===//
+// Split Analysis
+//===----------------------------------------------------------------------===//
+
+SplitAnalysis::SplitAnalysis(const MachineFunction &mf,
+ const LiveIntervals &lis,
+ const MachineLoopInfo &mli)
+ : mf_(mf),
+ lis_(lis),
+ loops_(mli),
+ tii_(*mf.getTarget().getInstrInfo()),
+ curli_(0) {}
+
+void SplitAnalysis::clear() {
+ usingInstrs_.clear();
+ usingBlocks_.clear();
+ usingLoops_.clear();
+ curli_ = 0;
+}
+
+bool SplitAnalysis::canAnalyzeBranch(const MachineBasicBlock *MBB) {
+ MachineBasicBlock *T, *F;
+ SmallVector<MachineOperand, 4> Cond;
+ return !tii_.AnalyzeBranch(const_cast<MachineBasicBlock&>(*MBB), T, F, Cond);
+}
+
+/// analyzeUses - Count instructions, basic blocks, and loops using curli.
+void SplitAnalysis::analyzeUses() {
+ const MachineRegisterInfo &MRI = mf_.getRegInfo();
+ for (MachineRegisterInfo::reg_iterator I = MRI.reg_begin(curli_->reg);
+ MachineInstr *MI = I.skipInstruction();) {
+ if (MI->isDebugValue() || !usingInstrs_.insert(MI))
+ continue;
+ MachineBasicBlock *MBB = MI->getParent();
+ if (usingBlocks_[MBB]++)
+ continue;
+ if (MachineLoop *Loop = loops_.getLoopFor(MBB))
+ usingLoops_[Loop]++;
+ }
+ DEBUG(dbgs() << " counted "
+ << usingInstrs_.size() << " instrs, "
+ << usingBlocks_.size() << " blocks, "
+ << usingLoops_.size() << " loops.\n");
+}
+
+/// removeUse - Update statistics by noting that MI no longer uses curli.
+void SplitAnalysis::removeUse(const MachineInstr *MI) {
+ if (!usingInstrs_.erase(MI))
+ return;
+
+ // Decrement MBB count.
+ const MachineBasicBlock *MBB = MI->getParent();
+ BlockCountMap::iterator bi = usingBlocks_.find(MBB);
+ assert(bi != usingBlocks_.end() && "MBB missing");
+ assert(bi->second && "0 count in map");
+ if (--bi->second)
+ return;
+ // No more uses in MBB.
+ usingBlocks_.erase(bi);
+
+ // Decrement loop count.
+ MachineLoop *Loop = loops_.getLoopFor(MBB);
+ if (!Loop)
+ return;
+ LoopCountMap::iterator li = usingLoops_.find(Loop);
+ assert(li != usingLoops_.end() && "Loop missing");
+ assert(li->second && "0 count in map");
+ if (--li->second)
+ return;
+ // No more blocks in Loop.
+ usingLoops_.erase(li);
+}
+
+// Get three sets of basic blocks surrounding a loop: Blocks inside the loop,
+// predecessor blocks, and exit blocks.
+void SplitAnalysis::getLoopBlocks(const MachineLoop *Loop, LoopBlocks &Blocks) {
+ Blocks.clear();
+
+ // Blocks in the loop.
+ Blocks.Loop.insert(Loop->block_begin(), Loop->block_end());
+
+ // Predecessor blocks.
+ const MachineBasicBlock *Header = Loop->getHeader();
+ for (MachineBasicBlock::const_pred_iterator I = Header->pred_begin(),
+ E = Header->pred_end(); I != E; ++I)
+ if (!Blocks.Loop.count(*I))
+ Blocks.Preds.insert(*I);
+
+ // Exit blocks.
+ for (MachineLoop::block_iterator I = Loop->block_begin(),
+ E = Loop->block_end(); I != E; ++I) {
+ const MachineBasicBlock *MBB = *I;
+ for (MachineBasicBlock::const_succ_iterator SI = MBB->succ_begin(),
+ SE = MBB->succ_end(); SI != SE; ++SI)
+ if (!Blocks.Loop.count(*SI))
+ Blocks.Exits.insert(*SI);
+ }
+}
+
+/// analyzeLoopPeripheralUse - Return an enum describing how curli_ is used in
+/// and around the Loop.
+SplitAnalysis::LoopPeripheralUse SplitAnalysis::
+analyzeLoopPeripheralUse(const SplitAnalysis::LoopBlocks &Blocks) {
+ LoopPeripheralUse use = ContainedInLoop;
+ for (BlockCountMap::iterator I = usingBlocks_.begin(), E = usingBlocks_.end();
+ I != E; ++I) {
+ const MachineBasicBlock *MBB = I->first;
+ // Is this a peripheral block?
+ if (use < MultiPeripheral &&
+ (Blocks.Preds.count(MBB) || Blocks.Exits.count(MBB))) {
+ if (I->second > 1) use = MultiPeripheral;
+ else use = SinglePeripheral;
+ continue;
+ }
+ // Is it a loop block?
+ if (Blocks.Loop.count(MBB))
+ continue;
+ // It must be an unrelated block.
+ return OutsideLoop;
+ }
+ return use;
+}
+
+/// getCriticalExits - It may be necessary to partially break critical edges
+/// leaving the loop if an exit block has phi uses of curli. Collect the exit
+/// blocks that need special treatment into CriticalExits.
+void SplitAnalysis::getCriticalExits(const SplitAnalysis::LoopBlocks &Blocks,
+ BlockPtrSet &CriticalExits) {
+ CriticalExits.clear();
+
+ // A critical exit block contains a phi def of curli, and has a predecessor
+ // that is not in the loop nor a loop predecessor.
+ // For such an exit block, the edges carrying the new variable must be moved
+ // to a new pre-exit block.
+ for (BlockPtrSet::iterator I = Blocks.Exits.begin(), E = Blocks.Exits.end();
+ I != E; ++I) {
+ const MachineBasicBlock *Succ = *I;
+ SlotIndex SuccIdx = lis_.getMBBStartIdx(Succ);
+ VNInfo *SuccVNI = curli_->getVNInfoAt(SuccIdx);
+ // This exit may not have curli live in at all. No need to split.
+ if (!SuccVNI)
+ continue;
+ // If this is not a PHI def, it is either using a value from before the
+ // loop, or a value defined inside the loop. Both are safe.
+ if (!SuccVNI->isPHIDef() || SuccVNI->def.getBaseIndex() != SuccIdx)
+ continue;
+ // This exit block does have a PHI. Does it also have a predecessor that is
+ // not a loop block or loop predecessor?
+ for (MachineBasicBlock::const_pred_iterator PI = Succ->pred_begin(),
+ PE = Succ->pred_end(); PI != PE; ++PI) {
+ const MachineBasicBlock *Pred = *PI;
+ if (Blocks.Loop.count(Pred) || Blocks.Preds.count(Pred))
+ continue;
+ // This is a critical exit block, and we need to split the exit edge.
+ CriticalExits.insert(Succ);
+ break;
+ }
+ }
+}
+
+/// canSplitCriticalExits - Return true if it is possible to insert new exit
+/// blocks before the blocks in CriticalExits.
+bool
+SplitAnalysis::canSplitCriticalExits(const SplitAnalysis::LoopBlocks &Blocks,
+ BlockPtrSet &CriticalExits) {
+ // If we don't allow critical edge splitting, require no critical exits.
+ if (!AllowSplit)
+ return CriticalExits.empty();
+
+ for (BlockPtrSet::iterator I = CriticalExits.begin(), E = CriticalExits.end();
+ I != E; ++I) {
+ const MachineBasicBlock *Succ = *I;
+ // We want to insert a new pre-exit MBB before Succ, and change all the
+ // in-loop blocks to branch to the pre-exit instead of Succ.
+ // Check that all the in-loop predecessors can be changed.
+ for (MachineBasicBlock::const_pred_iterator PI = Succ->pred_begin(),
+ PE = Succ->pred_end(); PI != PE; ++PI) {
+ const MachineBasicBlock *Pred = *PI;
+ // The external predecessors won't be altered.
+ if (!Blocks.Loop.count(Pred) && !Blocks.Preds.count(Pred))
+ continue;
+ if (!canAnalyzeBranch(Pred))
+ return false;
+ }
+
+ // If Succ's layout predecessor falls through, that too must be analyzable.
+ // We need to insert the pre-exit block in the gap.
+ MachineFunction::const_iterator MFI = Succ;
+ if (MFI == mf_.begin())
+ continue;
+ if (!canAnalyzeBranch(--MFI))
+ return false;
+ }
+ // No problems found.
+ return true;
+}
+
+void SplitAnalysis::analyze(const LiveInterval *li) {
+ clear();
+ curli_ = li;
+ analyzeUses();
+}
+
+const MachineLoop *SplitAnalysis::getBestSplitLoop() {
+ assert(curli_ && "Call analyze() before getBestSplitLoop");
+ if (usingLoops_.empty())
+ return 0;
+
+ LoopPtrSet Loops, SecondLoops;
+ LoopBlocks Blocks;
+ BlockPtrSet CriticalExits;
+
+ // Find first-class and second class candidate loops.
+ // We prefer to split around loops where curli is used outside the periphery.
+ for (LoopCountMap::const_iterator I = usingLoops_.begin(),
+ E = usingLoops_.end(); I != E; ++I) {
+ const MachineLoop *Loop = I->first;
+ getLoopBlocks(Loop, Blocks);
+
+ // FIXME: We need an SSA updater to properly handle multiple exit blocks.
+ if (Blocks.Exits.size() > 1) {
+ DEBUG(dbgs() << " multiple exits from " << *Loop);
+ continue;
+ }
+
+ LoopPtrSet *LPS = 0;
+ switch(analyzeLoopPeripheralUse(Blocks)) {
+ case OutsideLoop:
+ LPS = &Loops;
+ break;
+ case MultiPeripheral:
+ LPS = &SecondLoops;
+ break;
+ case ContainedInLoop:
+ DEBUG(dbgs() << " contained in " << *Loop);
+ continue;
+ case SinglePeripheral:
+ DEBUG(dbgs() << " single peripheral use in " << *Loop);
+ continue;
+ }
+ // Will it be possible to split around this loop?
+ getCriticalExits(Blocks, CriticalExits);
+ DEBUG(dbgs() << " " << CriticalExits.size() << " critical exits from "
+ << *Loop);
+ if (!canSplitCriticalExits(Blocks, CriticalExits))
+ continue;
+ // This is a possible split.
+ assert(LPS);
+ LPS->insert(Loop);
+ }
+
+ DEBUG(dbgs() << " getBestSplitLoop found " << Loops.size() << " + "
+ << SecondLoops.size() << " candidate loops.\n");
+
+ // If there are no first class loops available, look at second class loops.
+ if (Loops.empty())
+ Loops = SecondLoops;
+
+ if (Loops.empty())
+ return 0;
+
+ // Pick the earliest loop.
+ // FIXME: Are there other heuristics to consider?
+ const MachineLoop *Best = 0;
+ SlotIndex BestIdx;
+ for (LoopPtrSet::const_iterator I = Loops.begin(), E = Loops.end(); I != E;
+ ++I) {
+ SlotIndex Idx = lis_.getMBBStartIdx((*I)->getHeader());
+ if (!Best || Idx < BestIdx)
+ Best = *I, BestIdx = Idx;
+ }
+ DEBUG(dbgs() << " getBestSplitLoop found " << *Best);
+ return Best;
+}
+
+/// getMultiUseBlocks - if curli has more than one use in a basic block, it
+/// may be an advantage to split curli for the duration of the block.
+bool SplitAnalysis::getMultiUseBlocks(BlockPtrSet &Blocks) {
+ // If curli is local to one block, there is no point to splitting it.
+ if (usingBlocks_.size() <= 1)
+ return false;
+ // Add blocks with multiple uses.
+ for (BlockCountMap::iterator I = usingBlocks_.begin(), E = usingBlocks_.end();
+ I != E; ++I)
+ switch (I->second) {
+ case 0:
+ case 1:
+ continue;
+ case 2: {
+ // It doesn't pay to split a 2-instr block if it redefines curli.
+ VNInfo *VN1 = curli_->getVNInfoAt(lis_.getMBBStartIdx(I->first));
+ VNInfo *VN2 =
+ curli_->getVNInfoAt(lis_.getMBBEndIdx(I->first).getPrevIndex());
+ // live-in and live-out with a different value.
+ if (VN1 && VN2 && VN1 != VN2)
+ continue;
+ } // Fall through.
+ default:
+ Blocks.insert(I->first);
+ }
+ return !Blocks.empty();
+}
+
+//===----------------------------------------------------------------------===//
+// LiveIntervalMap
+//===----------------------------------------------------------------------===//
+
+// defValue - Introduce a li_ def for ParentVNI that could be later than
+// ParentVNI->def.
+VNInfo *LiveIntervalMap::defValue(const VNInfo *ParentVNI, SlotIndex Idx) {
+ assert(ParentVNI && "Mapping NULL value");
+ assert(Idx.isValid() && "Invalid SlotIndex");
+ assert(parentli_.getVNInfoAt(Idx) == ParentVNI && "Bad ParentVNI");
+
+ // Is this a simple 1-1 mapping? Not likely.
+ if (Idx == ParentVNI->def)
+ return mapValue(ParentVNI, Idx);
+
+ // This is a complex def. Mark with a NULL in valueMap.
+ VNInfo *OldVNI =
+ valueMap_.insert(
+ ValueMap::value_type(ParentVNI, static_cast<VNInfo *>(0))).first->second;
+ // The static_cast<VNInfo *> is only needed to work around a bug in an
+ // old version of the C++0x standard which the following compilers
+ // implemented and have yet to fix:
+ //
+ // Microsoft Visual Studio 2010 Version 10.0.30319.1 RTMRel
+ // Microsoft (R) 32-bit C/C++ Optimizing Compiler Version 16.00.30319.01
+ //
+ // If/When we move to C++0x, this can be replaced by nullptr.
+ (void)OldVNI;
+ assert(OldVNI == 0 && "Simple/Complex values mixed");
+
+ // Should we insert a minimal snippet of VNI LiveRange, or can we count on
+ // callers to do that? We need it for lookups of complex values.
+ VNInfo *VNI = li_.getNextValue(Idx, 0, true, lis_.getVNInfoAllocator());
+ return VNI;
+}
+
+// mapValue - Find the mapped value for ParentVNI at Idx.
+// Potentially create phi-def values.
+VNInfo *LiveIntervalMap::mapValue(const VNInfo *ParentVNI, SlotIndex Idx) {
+ assert(ParentVNI && "Mapping NULL value");
+ assert(Idx.isValid() && "Invalid SlotIndex");
+ assert(parentli_.getVNInfoAt(Idx) == ParentVNI && "Bad ParentVNI");
+
+ // Use insert for lookup, so we can add missing values with a second lookup.
+ std::pair<ValueMap::iterator,bool> InsP =
+ valueMap_.insert(ValueMap::value_type(ParentVNI, static_cast<VNInfo *>(0)));
+ // The static_cast<VNInfo *> is only needed to work around a bug in an
+ // old version of the C++0x standard which the following compilers
+ // implemented and have yet to fix:
+ //
+ // Microsoft Visual Studio 2010 Version 10.0.30319.1 RTMRel
+ // Microsoft (R) 32-bit C/C++ Optimizing Compiler Version 16.00.30319.01
+ //
+ // If/When we move to C++0x, this can be replaced by nullptr.
+
+ // This was an unknown value. Create a simple mapping.
+ if (InsP.second)
+ return InsP.first->second = li_.createValueCopy(ParentVNI,
+ lis_.getVNInfoAllocator());
+ // This was a simple mapped value.
+ if (InsP.first->second)
+ return InsP.first->second;
+
+ // This is a complex mapped value. There may be multiple defs, and we may need
+ // to create phi-defs.
+ MachineBasicBlock *IdxMBB = lis_.getMBBFromIndex(Idx);
+ assert(IdxMBB && "No MBB at Idx");
+
+ // Is there a def in the same MBB we can extend?
+ if (VNInfo *VNI = extendTo(IdxMBB, Idx))
+ return VNI;
+
+ // Now for the fun part. We know that ParentVNI potentially has multiple defs,
+ // and we may need to create even more phi-defs to preserve VNInfo SSA form.
+ // Perform a depth-first search for predecessor blocks where we know the
+ // dominating VNInfo. Insert phi-def VNInfos along the path back to IdxMBB.
+
+ // Track MBBs where we have created or learned the dominating value.
+ // This may change during the DFS as we create new phi-defs.
+ typedef DenseMap<MachineBasicBlock*, VNInfo*> MBBValueMap;
+ MBBValueMap DomValue;
+
+ for (idf_iterator<MachineBasicBlock*>
+ IDFI = idf_begin(IdxMBB),
+ IDFE = idf_end(IdxMBB); IDFI != IDFE;) {
+ MachineBasicBlock *MBB = *IDFI;
+ SlotIndex End = lis_.getMBBEndIdx(MBB);
+
+ // We are operating on the restricted CFG where ParentVNI is live.
+ if (parentli_.getVNInfoAt(End.getPrevSlot()) != ParentVNI) {
+ IDFI.skipChildren();
+ continue;
+ }
+
+ // Do we have a dominating value in this block?
+ VNInfo *VNI = extendTo(MBB, End);
+ if (!VNI) {
+ ++IDFI;
+ continue;
+ }
+
+ // Yes, VNI dominates MBB. Track the path back to IdxMBB, creating phi-defs
+ // as needed along the way.
+ for (unsigned PI = IDFI.getPathLength()-1; PI != 0; --PI) {
+ // Start from MBB's immediate successor. End at IdxMBB.
+ MachineBasicBlock *Succ = IDFI.getPath(PI-1);
+ std::pair<MBBValueMap::iterator, bool> InsP =
+ DomValue.insert(MBBValueMap::value_type(Succ, VNI));
+
+ // This is the first time we backtrack to Succ.
+ if (InsP.second)
+ continue;
+
+ // We reached Succ again with the same VNI. Nothing is going to change.
+ VNInfo *OVNI = InsP.first->second;
+ if (OVNI == VNI)
+ break;
+
+ // Succ already has a phi-def. No need to continue.
+ SlotIndex Start = lis_.getMBBStartIdx(Succ);
+ if (OVNI->def == Start)
+ break;
+
+ // We have a collision between the old and new VNI at Succ. That means
+ // neither dominates and we need a new phi-def.
+ VNI = li_.getNextValue(Start, 0, true, lis_.getVNInfoAllocator());
+ VNI->setIsPHIDef(true);
+ InsP.first->second = VNI;
+
+ // Replace OVNI with VNI in the remaining path.
+ for (; PI > 1 ; --PI) {
+ MBBValueMap::iterator I = DomValue.find(IDFI.getPath(PI-2));
+ if (I == DomValue.end() || I->second != OVNI)
+ break;
+ I->second = VNI;
+ }
+ }
+
+ // No need to search the children, we found a dominating value.
+ IDFI.skipChildren();
+ }
+
+ // The search should at least find a dominating value for IdxMBB.
+ assert(!DomValue.empty() && "Couldn't find a reaching definition");
+
+ // Since we went through the trouble of a full DFS visiting all reaching defs,
+ // the values in DomValue are now accurate. No more phi-defs are needed for
+ // these blocks, so we can color the live ranges.
+ // This makes the next mapValue call much faster.
+ VNInfo *IdxVNI = 0;
+ for (MBBValueMap::iterator I = DomValue.begin(), E = DomValue.end(); I != E;
+ ++I) {
+ MachineBasicBlock *MBB = I->first;
+ VNInfo *VNI = I->second;
+ SlotIndex Start = lis_.getMBBStartIdx(MBB);
+ if (MBB == IdxMBB) {
+ // Don't add full liveness to IdxMBB, stop at Idx.
+ if (Start != Idx)
+ li_.addRange(LiveRange(Start, Idx, VNI));
+ // The caller had better add some liveness to IdxVNI, or it leaks.
+ IdxVNI = VNI;
+ } else
+ li_.addRange(LiveRange(Start, lis_.getMBBEndIdx(MBB), VNI));
+ }
+
+ assert(IdxVNI && "Didn't find value for Idx");
+ return IdxVNI;
+}
+
+// extendTo - Find the last li_ value defined in MBB at or before Idx. The
+// parentli_ is assumed to be live at Idx. Extend the live range to Idx.
+// Return the found VNInfo, or NULL.
+VNInfo *LiveIntervalMap::extendTo(MachineBasicBlock *MBB, SlotIndex Idx) {
+ LiveInterval::iterator I = std::upper_bound(li_.begin(), li_.end(), Idx);
+ if (I == li_.begin())
+ return 0;
+ --I;
+ if (I->start < lis_.getMBBStartIdx(MBB))
+ return 0;
+ if (I->end < Idx)
+ I->end = Idx;
+ return I->valno;
+}
+
+// addSimpleRange - Add a simple range from parentli_ to li_.
+// ParentVNI must be live in the [Start;End) interval.
+void LiveIntervalMap::addSimpleRange(SlotIndex Start, SlotIndex End,
+ const VNInfo *ParentVNI) {
+ VNInfo *VNI = mapValue(ParentVNI, Start);
+ // A simple mappoing is easy.
+ if (VNI->def == ParentVNI->def) {
+ li_.addRange(LiveRange(Start, End, VNI));
+ return;
+ }
+
+ // ParentVNI is a complex value. We must map per MBB.
+ MachineFunction::iterator MBB = lis_.getMBBFromIndex(Start);
+ MachineFunction::iterator MBBE = lis_.getMBBFromIndex(End);
+
+ if (MBB == MBBE) {
+ li_.addRange(LiveRange(Start, End, VNI));
+ return;
+ }
+
+ // First block.
+ li_.addRange(LiveRange(Start, lis_.getMBBEndIdx(MBB), VNI));
+
+ // Run sequence of full blocks.
+ for (++MBB; MBB != MBBE; ++MBB) {
+ Start = lis_.getMBBStartIdx(MBB);
+ li_.addRange(LiveRange(Start, lis_.getMBBEndIdx(MBB),
+ mapValue(ParentVNI, Start)));
+ }
+
+ // Final block.
+ Start = lis_.getMBBStartIdx(MBB);
+ if (Start != End)
+ li_.addRange(LiveRange(Start, End, mapValue(ParentVNI, Start)));
+}
+
+/// addRange - Add live ranges to li_ where [Start;End) intersects parentli_.
+/// All needed values whose def is not inside [Start;End) must be defined
+/// beforehand so mapValue will work.
+void LiveIntervalMap::addRange(SlotIndex Start, SlotIndex End) {
+ LiveInterval::const_iterator B = parentli_.begin(), E = parentli_.end();
+ LiveInterval::const_iterator I = std::lower_bound(B, E, Start);
+
+ // Check if --I begins before Start and overlaps.
+ if (I != B) {
+ --I;
+ if (I->end > Start)
+ addSimpleRange(Start, std::min(End, I->end), I->valno);
+ ++I;
+ }
+
+ // The remaining ranges begin after Start.
+ for (;I != E && I->start < End; ++I)
+ addSimpleRange(I->start, std::min(End, I->end), I->valno);
+}
+
+//===----------------------------------------------------------------------===//
+// Split Editor
+//===----------------------------------------------------------------------===//
+
+/// Create a new SplitEditor for editing the LiveInterval analyzed by SA.
+SplitEditor::SplitEditor(SplitAnalysis &sa, LiveIntervals &lis, VirtRegMap &vrm,
+ SmallVectorImpl<LiveInterval*> &intervals)
+ : sa_(sa), lis_(lis), vrm_(vrm),
+ mri_(vrm.getMachineFunction().getRegInfo()),
+ tii_(*vrm.getMachineFunction().getTarget().getInstrInfo()),
+ curli_(sa_.getCurLI()),
+ dupli_(0), openli_(0),
+ intervals_(intervals),
+ firstInterval(intervals_.size())
+{
+ assert(curli_ && "SplitEditor created from empty SplitAnalysis");
+
+ // Make sure curli_ is assigned a stack slot, so all our intervals get the
+ // same slot as curli_.
+ if (vrm_.getStackSlot(curli_->reg) == VirtRegMap::NO_STACK_SLOT)
+ vrm_.assignVirt2StackSlot(curli_->reg);
+
+}
+
+LiveInterval *SplitEditor::createInterval() {
+ unsigned curli = sa_.getCurLI()->reg;
+ unsigned Reg = mri_.createVirtualRegister(mri_.getRegClass(curli));
+ LiveInterval &Intv = lis_.getOrCreateInterval(Reg);
+ vrm_.grow();
+ vrm_.assignVirt2StackSlot(Reg, vrm_.getStackSlot(curli));
+ return &Intv;
+}
+
+LiveInterval *SplitEditor::getDupLI() {
+ if (!dupli_) {
+ // Create an interval for dupli that is a copy of curli.
+ dupli_ = createInterval();
+ dupli_->Copy(*curli_, &mri_, lis_.getVNInfoAllocator());
+ }
+ return dupli_;
+}
+
+VNInfo *SplitEditor::mapValue(const VNInfo *curliVNI) {
+ VNInfo *&VNI = valueMap_[curliVNI];
+ if (!VNI)
+ VNI = openli_->createValueCopy(curliVNI, lis_.getVNInfoAllocator());
+ return VNI;
+}
+
+/// Insert a COPY instruction curli -> li. Allocate a new value from li
+/// defined by the COPY. Note that rewrite() will deal with the curli
+/// register, so this function can be used to copy from any interval - openli,
+/// curli, or dupli.
+VNInfo *SplitEditor::insertCopy(LiveInterval &LI,
+ MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) {
+ MachineInstr *MI = BuildMI(MBB, I, DebugLoc(), tii_.get(TargetOpcode::COPY),
+ LI.reg).addReg(curli_->reg);
+ SlotIndex DefIdx = lis_.InsertMachineInstrInMaps(MI).getDefIndex();
+ return LI.getNextValue(DefIdx, MI, true, lis_.getVNInfoAllocator());
+}
+
+/// Create a new virtual register and live interval.
+void SplitEditor::openIntv() {
+ assert(!openli_ && "Previous LI not closed before openIntv");
+ openli_ = createInterval();
+ intervals_.push_back(openli_);
+ liveThrough_ = false;
+}
+
+/// enterIntvBefore - Enter openli before the instruction at Idx. If curli is
+/// not live before Idx, a COPY is not inserted.
+void SplitEditor::enterIntvBefore(SlotIndex Idx) {
+ assert(openli_ && "openIntv not called before enterIntvBefore");
+
+ // Copy from curli_ if it is live.
+ if (VNInfo *CurVNI = curli_->getVNInfoAt(Idx.getUseIndex())) {
+ MachineInstr *MI = lis_.getInstructionFromIndex(Idx);
+ assert(MI && "enterIntvBefore called with invalid index");
+ VNInfo *VNI = insertCopy(*openli_, *MI->getParent(), MI);
+ openli_->addRange(LiveRange(VNI->def, Idx.getDefIndex(), VNI));
+
+ // Make sure CurVNI is properly mapped.
+ VNInfo *&mapVNI = valueMap_[CurVNI];
+ // We dont have SSA update yet, so only one entry per value is allowed.
+ assert(!mapVNI && "enterIntvBefore called more than once for the same value");
+ mapVNI = VNI;
+ }
+ DEBUG(dbgs() << " enterIntvBefore " << Idx << ": " << *openli_ << '\n');
+}
+
+/// enterIntvAtEnd - Enter openli at the end of MBB.
+/// PhiMBB is a successor inside openli where a PHI value is created.
+/// Currently, all entries must share the same PhiMBB.
+void SplitEditor::enterIntvAtEnd(MachineBasicBlock &A, MachineBasicBlock &B) {
+ assert(openli_ && "openIntv not called before enterIntvAtEnd");
+
+ SlotIndex EndA = lis_.getMBBEndIdx(&A);
+ VNInfo *CurVNIA = curli_->getVNInfoAt(EndA.getPrevIndex());
+ if (!CurVNIA) {
+ DEBUG(dbgs() << " enterIntvAtEnd, curli not live out of BB#"
+ << A.getNumber() << ".\n");
+ return;
+ }
+
+ // Add a phi kill value and live range out of A.
+ VNInfo *VNIA = insertCopy(*openli_, A, A.getFirstTerminator());
+ openli_->addRange(LiveRange(VNIA->def, EndA, VNIA));
+
+ // FIXME: If this is the only entry edge, we don't need the extra PHI value.
+ // FIXME: If there are multiple entry blocks (so not a loop), we need proper
+ // SSA update.
+
+ // Now look at the start of B.
+ SlotIndex StartB = lis_.getMBBStartIdx(&B);
+ SlotIndex EndB = lis_.getMBBEndIdx(&B);
+ const LiveRange *CurB = curli_->getLiveRangeContaining(StartB);
+ if (!CurB) {
+ DEBUG(dbgs() << " enterIntvAtEnd: curli not live in to BB#"
+ << B.getNumber() << ".\n");
+ return;
+ }
+
+ VNInfo *VNIB = openli_->getVNInfoAt(StartB);
+ if (!VNIB) {
+ // Create a phi value.
+ VNIB = openli_->getNextValue(SlotIndex(StartB, true), 0, false,
+ lis_.getVNInfoAllocator());
+ VNIB->setIsPHIDef(true);
+ VNInfo *&mapVNI = valueMap_[CurB->valno];
+ if (mapVNI) {
+ // Multiple copies - must create PHI value.
+ abort();
+ } else {
+ // This is the first copy of dupLR. Mark the mapping.
+ mapVNI = VNIB;
+ }
+
+ }
+
+ DEBUG(dbgs() << " enterIntvAtEnd: " << *openli_ << '\n');
+}
+
+/// useIntv - indicate that all instructions in MBB should use openli.
+void SplitEditor::useIntv(const MachineBasicBlock &MBB) {
+ useIntv(lis_.getMBBStartIdx(&MBB), lis_.getMBBEndIdx(&MBB));
+}
+
+void SplitEditor::useIntv(SlotIndex Start, SlotIndex End) {
+ assert(openli_ && "openIntv not called before useIntv");
+
+ // Map the curli values from the interval into openli_
+ LiveInterval::const_iterator B = curli_->begin(), E = curli_->end();
+ LiveInterval::const_iterator I = std::lower_bound(B, E, Start);
+
+ if (I != B) {
+ --I;
+ // I begins before Start, but overlaps.
+ if (I->end > Start)
+ openli_->addRange(LiveRange(Start, std::min(End, I->end),
+ mapValue(I->valno)));
+ ++I;
+ }
+
+ // The remaining ranges begin after Start.
+ for (;I != E && I->start < End; ++I)
+ openli_->addRange(LiveRange(I->start, std::min(End, I->end),
+ mapValue(I->valno)));
+ DEBUG(dbgs() << " use [" << Start << ';' << End << "): " << *openli_
+ << '\n');
+}
+
+/// leaveIntvAfter - Leave openli after the instruction at Idx.
+void SplitEditor::leaveIntvAfter(SlotIndex Idx) {
+ assert(openli_ && "openIntv not called before leaveIntvAfter");
+
+ const LiveRange *CurLR = curli_->getLiveRangeContaining(Idx.getDefIndex());
+ if (!CurLR || CurLR->end <= Idx.getBoundaryIndex()) {
+ DEBUG(dbgs() << " leaveIntvAfter " << Idx << ": not live\n");
+ return;
+ }
+
+ // Was this value of curli live through openli?
+ if (!openli_->liveAt(CurLR->valno->def)) {
+ DEBUG(dbgs() << " leaveIntvAfter " << Idx << ": using external value\n");
+ liveThrough_ = true;
+ return;
+ }
+
+ // We are going to insert a back copy, so we must have a dupli_.
+ LiveRange *DupLR = getDupLI()->getLiveRangeContaining(Idx.getDefIndex());
+ assert(DupLR && "dupli not live into black, but curli is?");
+
+ // Insert the COPY instruction.
+ MachineBasicBlock::iterator I = lis_.getInstructionFromIndex(Idx);
+ MachineInstr *MI = BuildMI(*I->getParent(), llvm::next(I), I->getDebugLoc(),
+ tii_.get(TargetOpcode::COPY), dupli_->reg)
+ .addReg(openli_->reg);
+ SlotIndex CopyIdx = lis_.InsertMachineInstrInMaps(MI).getDefIndex();
+ openli_->addRange(LiveRange(Idx.getDefIndex(), CopyIdx,
+ mapValue(CurLR->valno)));
+ DupLR->valno->def = CopyIdx;
+ DEBUG(dbgs() << " leaveIntvAfter " << Idx << ": " << *openli_ << '\n');
+}
+
+/// leaveIntvAtTop - Leave the interval at the top of MBB.
+/// Currently, only one value can leave the interval.
+void SplitEditor::leaveIntvAtTop(MachineBasicBlock &MBB) {
+ assert(openli_ && "openIntv not called before leaveIntvAtTop");
+
+ SlotIndex Start = lis_.getMBBStartIdx(&MBB);
+ const LiveRange *CurLR = curli_->getLiveRangeContaining(Start);
+
+ // Is curli even live-in to MBB?
+ if (!CurLR) {
+ DEBUG(dbgs() << " leaveIntvAtTop at " << Start << ": not live\n");
+ return;
+ }
+
+ // Is curli defined by PHI at the beginning of MBB?
+ bool isPHIDef = CurLR->valno->isPHIDef() &&
+ CurLR->valno->def.getBaseIndex() == Start;
+
+ // If MBB is using a value of curli that was defined outside the openli range,
+ // we don't want to copy it back here.
+ if (!isPHIDef && !openli_->liveAt(CurLR->valno->def)) {
+ DEBUG(dbgs() << " leaveIntvAtTop at " << Start
+ << ": using external value\n");
+ liveThrough_ = true;
+ return;
+ }
+
+ // We are going to insert a back copy, so we must have a dupli_.
+ LiveRange *DupLR = getDupLI()->getLiveRangeContaining(Start);
+ assert(DupLR && "dupli not live into black, but curli is?");
+
+ // Insert the COPY instruction.
+ MachineInstr *MI = BuildMI(MBB, MBB.begin(), DebugLoc(),
+ tii_.get(TargetOpcode::COPY), dupli_->reg)
+ .addReg(openli_->reg);
+ SlotIndex Idx = lis_.InsertMachineInstrInMaps(MI).getDefIndex();
+
+ // Adjust dupli and openli values.
+ if (isPHIDef) {
+ // dupli was already a PHI on entry to MBB. Simply insert an openli PHI,
+ // and shift the dupli def down to the COPY.
+ VNInfo *VNI = openli_->getNextValue(SlotIndex(Start, true), 0, false,
+ lis_.getVNInfoAllocator());
+ VNI->setIsPHIDef(true);
+ openli_->addRange(LiveRange(VNI->def, Idx, VNI));
+
+ dupli_->removeRange(Start, Idx);
+ DupLR->valno->def = Idx;
+ DupLR->valno->setIsPHIDef(false);
+ } else {
+ // The dupli value was defined somewhere inside the openli range.
+ DEBUG(dbgs() << " leaveIntvAtTop source value defined at "
+ << DupLR->valno->def << "\n");
+ // FIXME: We may not need a PHI here if all predecessors have the same
+ // value.
+ VNInfo *VNI = openli_->getNextValue(SlotIndex(Start, true), 0, false,
+ lis_.getVNInfoAllocator());
+ VNI->setIsPHIDef(true);
+ openli_->addRange(LiveRange(VNI->def, Idx, VNI));
+
+ // FIXME: What if DupLR->valno is used by multiple exits? SSA Update.
+
+ // closeIntv is going to remove the superfluous live ranges.
+ DupLR->valno->def = Idx;
+ DupLR->valno->setIsPHIDef(false);
+ }
+
+ DEBUG(dbgs() << " leaveIntvAtTop at " << Idx << ": " << *openli_ << '\n');
+}
+
+/// closeIntv - Indicate that we are done editing the currently open
+/// LiveInterval, and ranges can be trimmed.
+void SplitEditor::closeIntv() {
+ assert(openli_ && "openIntv not called before closeIntv");
+
+ DEBUG(dbgs() << " closeIntv cleaning up\n");
+ DEBUG(dbgs() << " open " << *openli_ << '\n');
+
+ if (liveThrough_) {
+ DEBUG(dbgs() << " value live through region, leaving dupli as is.\n");
+ } else {
+ // live out with copies inserted, or killed by region. Either way we need to
+ // remove the overlapping region from dupli.
+ getDupLI();
+ for (LiveInterval::iterator I = openli_->begin(), E = openli_->end();
+ I != E; ++I) {
+ dupli_->removeRange(I->start, I->end);
+ }
+ // FIXME: A block branching to the entry block may also branch elsewhere
+ // curli is live. We need both openli and curli to be live in that case.
+ DEBUG(dbgs() << " dup2 " << *dupli_ << '\n');
+ }
+ openli_ = 0;
+ valueMap_.clear();
+}
+
+/// rewrite - after all the new live ranges have been created, rewrite
+/// instructions using curli to use the new intervals.
+void SplitEditor::rewrite() {
+ assert(!openli_ && "Previous LI not closed before rewrite");
+ const LiveInterval *curli = sa_.getCurLI();
+ for (MachineRegisterInfo::reg_iterator RI = mri_.reg_begin(curli->reg),
+ RE = mri_.reg_end(); RI != RE;) {
+ MachineOperand &MO = RI.getOperand();
+ MachineInstr *MI = MO.getParent();
+ ++RI;
+ if (MI->isDebugValue()) {
+ DEBUG(dbgs() << "Zapping " << *MI);
+ // FIXME: We can do much better with debug values.
+ MO.setReg(0);
+ continue;
+ }
+ SlotIndex Idx = lis_.getInstructionIndex(MI);
+ Idx = MO.isUse() ? Idx.getUseIndex() : Idx.getDefIndex();
+ LiveInterval *LI = dupli_;
+ for (unsigned i = firstInterval, e = intervals_.size(); i != e; ++i) {
+ LiveInterval *testli = intervals_[i];
+ if (testli->liveAt(Idx)) {
+ LI = testli;
+ break;
+ }
+ }
+ if (LI) {
+ MO.setReg(LI->reg);
+ sa_.removeUse(MI);
+ DEBUG(dbgs() << " rewrite " << Idx << '\t' << *MI);
+ }
+ }
+
+ // dupli_ goes in last, after rewriting.
+ if (dupli_) {
+ if (dupli_->empty()) {
+ DEBUG(dbgs() << " dupli became empty?\n");
+ lis_.removeInterval(dupli_->reg);
+ dupli_ = 0;
+ } else {
+ dupli_->RenumberValues(lis_);
+ intervals_.push_back(dupli_);
+ }
+ }
+
+ // Calculate spill weight and allocation hints for new intervals.
+ VirtRegAuxInfo vrai(vrm_.getMachineFunction(), lis_, sa_.loops_);
+ for (unsigned i = firstInterval, e = intervals_.size(); i != e; ++i) {
+ LiveInterval &li = *intervals_[i];
+ vrai.CalculateRegClass(li.reg);
+ vrai.CalculateWeightAndHint(li);
+ DEBUG(dbgs() << " new interval " << mri_.getRegClass(li.reg)->getName()
+ << ":" << li << '\n');
+ }
+}
+
+
+//===----------------------------------------------------------------------===//
+// Loop Splitting
+//===----------------------------------------------------------------------===//
+
+bool SplitEditor::splitAroundLoop(const MachineLoop *Loop) {
+ SplitAnalysis::LoopBlocks Blocks;
+ sa_.getLoopBlocks(Loop, Blocks);
+
+ // Break critical edges as needed.
+ SplitAnalysis::BlockPtrSet CriticalExits;
+ sa_.getCriticalExits(Blocks, CriticalExits);
+ assert(CriticalExits.empty() && "Cannot break critical exits yet");
+
+ // Create new live interval for the loop.
+ openIntv();
+
+ // Insert copies in the predecessors.
+ for (SplitAnalysis::BlockPtrSet::iterator I = Blocks.Preds.begin(),
+ E = Blocks.Preds.end(); I != E; ++I) {
+ MachineBasicBlock &MBB = const_cast<MachineBasicBlock&>(**I);
+ enterIntvAtEnd(MBB, *Loop->getHeader());
+ }
+
+ // Switch all loop blocks.
+ for (SplitAnalysis::BlockPtrSet::iterator I = Blocks.Loop.begin(),
+ E = Blocks.Loop.end(); I != E; ++I)
+ useIntv(**I);
+
+ // Insert back copies in the exit blocks.
+ for (SplitAnalysis::BlockPtrSet::iterator I = Blocks.Exits.begin(),
+ E = Blocks.Exits.end(); I != E; ++I) {
+ MachineBasicBlock &MBB = const_cast<MachineBasicBlock&>(**I);
+ leaveIntvAtTop(MBB);
+ }
+
+ // Done.
+ closeIntv();
+ rewrite();
+ return dupli_;
+}
+
+
+//===----------------------------------------------------------------------===//
+// Single Block Splitting
+//===----------------------------------------------------------------------===//
+
+/// splitSingleBlocks - Split curli into a separate live interval inside each
+/// basic block in Blocks. Return true if curli has been completely replaced,
+/// false if curli is still intact, and needs to be spilled or split further.
+bool SplitEditor::splitSingleBlocks(const SplitAnalysis::BlockPtrSet &Blocks) {
+ DEBUG(dbgs() << " splitSingleBlocks for " << Blocks.size() << " blocks.\n");
+ // Determine the first and last instruction using curli in each block.
+ typedef std::pair<SlotIndex,SlotIndex> IndexPair;
+ typedef DenseMap<const MachineBasicBlock*,IndexPair> IndexPairMap;
+ IndexPairMap MBBRange;
+ for (SplitAnalysis::InstrPtrSet::const_iterator I = sa_.usingInstrs_.begin(),
+ E = sa_.usingInstrs_.end(); I != E; ++I) {
+ const MachineBasicBlock *MBB = (*I)->getParent();
+ if (!Blocks.count(MBB))
+ continue;
+ SlotIndex Idx = lis_.getInstructionIndex(*I);
+ DEBUG(dbgs() << " BB#" << MBB->getNumber() << '\t' << Idx << '\t' << **I);
+ IndexPair &IP = MBBRange[MBB];
+ if (!IP.first.isValid() || Idx < IP.first)
+ IP.first = Idx;
+ if (!IP.second.isValid() || Idx > IP.second)
+ IP.second = Idx;
+ }
+
+ // Create a new interval for each block.
+ for (SplitAnalysis::BlockPtrSet::const_iterator I = Blocks.begin(),
+ E = Blocks.end(); I != E; ++I) {
+ IndexPair &IP = MBBRange[*I];
+ DEBUG(dbgs() << " splitting for BB#" << (*I)->getNumber() << ": ["
+ << IP.first << ';' << IP.second << ")\n");
+ assert(IP.first.isValid() && IP.second.isValid());
+
+ openIntv();
+ enterIntvBefore(IP.first);
+ useIntv(IP.first.getBaseIndex(), IP.second.getBoundaryIndex());
+ leaveIntvAfter(IP.second);
+ closeIntv();
+ }
+ rewrite();
+ return dupli_;
+}
+
+
+//===----------------------------------------------------------------------===//
+// Sub Block Splitting
+//===----------------------------------------------------------------------===//
+
+/// getBlockForInsideSplit - If curli is contained inside a single basic block,
+/// and it wou pay to subdivide the interval inside that block, return it.
+/// Otherwise return NULL. The returned block can be passed to
+/// SplitEditor::splitInsideBlock.
+const MachineBasicBlock *SplitAnalysis::getBlockForInsideSplit() {
+ // The interval must be exclusive to one block.
+ if (usingBlocks_.size() != 1)
+ return 0;
+ // Don't to this for less than 4 instructions. We want to be sure that
+ // splitting actually reduces the instruction count per interval.
+ if (usingInstrs_.size() < 4)
+ return 0;
+ return usingBlocks_.begin()->first;
+}
+
+/// splitInsideBlock - Split curli into multiple intervals inside MBB. Return
+/// true if curli has been completely replaced, false if curli is still
+/// intact, and needs to be spilled or split further.
+bool SplitEditor::splitInsideBlock(const MachineBasicBlock *MBB) {
+ SmallVector<SlotIndex, 32> Uses;
+ Uses.reserve(sa_.usingInstrs_.size());
+ for (SplitAnalysis::InstrPtrSet::const_iterator I = sa_.usingInstrs_.begin(),
+ E = sa_.usingInstrs_.end(); I != E; ++I)
+ if ((*I)->getParent() == MBB)
+ Uses.push_back(lis_.getInstructionIndex(*I));
+ DEBUG(dbgs() << " splitInsideBlock BB#" << MBB->getNumber() << " for "
+ << Uses.size() << " instructions.\n");
+ assert(Uses.size() >= 3 && "Need at least 3 instructions");
+ array_pod_sort(Uses.begin(), Uses.end());
+
+ // Simple algorithm: Find the largest gap between uses as determined by slot
+ // indices. Create new intervals for instructions before the gap and after the
+ // gap.
+ unsigned bestPos = 0;
+ int bestGap = 0;
+ DEBUG(dbgs() << " dist (" << Uses[0]);
+ for (unsigned i = 1, e = Uses.size(); i != e; ++i) {
+ int g = Uses[i-1].distance(Uses[i]);
+ DEBUG(dbgs() << ") -" << g << "- (" << Uses[i]);
+ if (g > bestGap)
+ bestPos = i, bestGap = g;
+ }
+ DEBUG(dbgs() << "), best: -" << bestGap << "-\n");
+
+ // bestPos points to the first use after the best gap.
+ assert(bestPos > 0 && "Invalid gap");
+
+ // FIXME: Don't create intervals for low densities.
+
+ // First interval before the gap. Don't create single-instr intervals.
+ if (bestPos > 1) {
+ openIntv();
+ enterIntvBefore(Uses.front());
+ useIntv(Uses.front().getBaseIndex(), Uses[bestPos-1].getBoundaryIndex());
+ leaveIntvAfter(Uses[bestPos-1]);
+ closeIntv();
+ }
+
+ // Second interval after the gap.
+ if (bestPos < Uses.size()-1) {
+ openIntv();
+ enterIntvBefore(Uses[bestPos]);
+ useIntv(Uses[bestPos].getBaseIndex(), Uses.back().getBoundaryIndex());
+ leaveIntvAfter(Uses.back());
+ closeIntv();
+ }
+
+ rewrite();
+ return dupli_;
+}
diff --git a/lib/CodeGen/SplitKit.h b/lib/CodeGen/SplitKit.h
new file mode 100644
index 0000000000000..ddef7461dc3d6
--- /dev/null
+++ b/lib/CodeGen/SplitKit.h
@@ -0,0 +1,321 @@
+//===---------- SplitKit.cpp - Toolkit for splitting live ranges ----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the SplitAnalysis class as well as mutator functions for
+// live range splitting.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/CodeGen/SlotIndexes.h"
+
+namespace llvm {
+
+class LiveInterval;
+class LiveIntervals;
+class MachineInstr;
+class MachineLoop;
+class MachineLoopInfo;
+class MachineRegisterInfo;
+class TargetInstrInfo;
+class VirtRegMap;
+class VNInfo;
+
+/// SplitAnalysis - Analyze a LiveInterval, looking for live range splitting
+/// opportunities.
+class SplitAnalysis {
+public:
+ const MachineFunction &mf_;
+ const LiveIntervals &lis_;
+ const MachineLoopInfo &loops_;
+ const TargetInstrInfo &tii_;
+
+ // Instructions using the the current register.
+ typedef SmallPtrSet<const MachineInstr*, 16> InstrPtrSet;
+ InstrPtrSet usingInstrs_;
+
+ // The number of instructions using curli in each basic block.
+ typedef DenseMap<const MachineBasicBlock*, unsigned> BlockCountMap;
+ BlockCountMap usingBlocks_;
+
+ // The number of basic block using curli in each loop.
+ typedef DenseMap<const MachineLoop*, unsigned> LoopCountMap;
+ LoopCountMap usingLoops_;
+
+private:
+ // Current live interval.
+ const LiveInterval *curli_;
+
+ // Sumarize statistics by counting instructions using curli_.
+ void analyzeUses();
+
+ /// canAnalyzeBranch - Return true if MBB ends in a branch that can be
+ /// analyzed.
+ bool canAnalyzeBranch(const MachineBasicBlock *MBB);
+
+public:
+ SplitAnalysis(const MachineFunction &mf, const LiveIntervals &lis,
+ const MachineLoopInfo &mli);
+
+ /// analyze - set curli to the specified interval, and analyze how it may be
+ /// split.
+ void analyze(const LiveInterval *li);
+
+ /// removeUse - Update statistics by noting that mi no longer uses curli.
+ void removeUse(const MachineInstr *mi);
+
+ const LiveInterval *getCurLI() { return curli_; }
+
+ /// clear - clear all data structures so SplitAnalysis is ready to analyze a
+ /// new interval.
+ void clear();
+
+ typedef SmallPtrSet<const MachineBasicBlock*, 16> BlockPtrSet;
+ typedef SmallPtrSet<const MachineLoop*, 16> LoopPtrSet;
+
+ // Sets of basic blocks surrounding a machine loop.
+ struct LoopBlocks {
+ BlockPtrSet Loop; // Blocks in the loop.
+ BlockPtrSet Preds; // Loop predecessor blocks.
+ BlockPtrSet Exits; // Loop exit blocks.
+
+ void clear() {
+ Loop.clear();
+ Preds.clear();
+ Exits.clear();
+ }
+ };
+
+ // Calculate the block sets surrounding the loop.
+ void getLoopBlocks(const MachineLoop *Loop, LoopBlocks &Blocks);
+
+ /// LoopPeripheralUse - how is a variable used in and around a loop?
+ /// Peripheral blocks are the loop predecessors and exit blocks.
+ enum LoopPeripheralUse {
+ ContainedInLoop, // All uses are inside the loop.
+ SinglePeripheral, // At most one instruction per peripheral block.
+ MultiPeripheral, // Multiple instructions in some peripheral blocks.
+ OutsideLoop // Uses outside loop periphery.
+ };
+
+ /// analyzeLoopPeripheralUse - Return an enum describing how curli_ is used in
+ /// and around the Loop.
+ LoopPeripheralUse analyzeLoopPeripheralUse(const LoopBlocks&);
+
+ /// getCriticalExits - It may be necessary to partially break critical edges
+ /// leaving the loop if an exit block has phi uses of curli. Collect the exit
+ /// blocks that need special treatment into CriticalExits.
+ void getCriticalExits(const LoopBlocks &Blocks, BlockPtrSet &CriticalExits);
+
+ /// canSplitCriticalExits - Return true if it is possible to insert new exit
+ /// blocks before the blocks in CriticalExits.
+ bool canSplitCriticalExits(const LoopBlocks &Blocks,
+ BlockPtrSet &CriticalExits);
+
+ /// getBestSplitLoop - Return the loop where curli may best be split to a
+ /// separate register, or NULL.
+ const MachineLoop *getBestSplitLoop();
+
+ /// getMultiUseBlocks - Add basic blocks to Blocks that may benefit from
+ /// having curli split to a new live interval. Return true if Blocks can be
+ /// passed to SplitEditor::splitSingleBlocks.
+ bool getMultiUseBlocks(BlockPtrSet &Blocks);
+
+ /// getBlockForInsideSplit - If curli is contained inside a single basic block,
+ /// and it wou pay to subdivide the interval inside that block, return it.
+ /// Otherwise return NULL. The returned block can be passed to
+ /// SplitEditor::splitInsideBlock.
+ const MachineBasicBlock *getBlockForInsideSplit();
+};
+
+
+/// LiveIntervalMap - Map values from a large LiveInterval into a small
+/// interval that is a subset. Insert phi-def values as needed. This class is
+/// used by SplitEditor to create new smaller LiveIntervals.
+///
+/// parentli_ is the larger interval, li_ is the subset interval. Every value
+/// in li_ corresponds to exactly one value in parentli_, and the live range
+/// of the value is contained within the live range of the parentli_ value.
+/// Values in parentli_ may map to any number of openli_ values, including 0.
+class LiveIntervalMap {
+ LiveIntervals &lis_;
+
+ // The parent interval is never changed.
+ const LiveInterval &parentli_;
+
+ // The child interval's values are fully contained inside parentli_ values.
+ LiveInterval &li_;
+
+ typedef DenseMap<const VNInfo*, VNInfo*> ValueMap;
+
+ // Map parentli_ values to simple values in li_ that are defined at the same
+ // SlotIndex, or NULL for parentli_ values that have complex li_ defs.
+ // Note there is a difference between values mapping to NULL (complex), and
+ // values not present (unknown/unmapped).
+ ValueMap valueMap_;
+
+ // extendTo - Find the last li_ value defined in MBB at or before Idx. The
+ // parentli_ is assumed to be live at Idx. Extend the live range to Idx.
+ // Return the found VNInfo, or NULL.
+ VNInfo *extendTo(MachineBasicBlock *MBB, SlotIndex Idx);
+
+ // addSimpleRange - Add a simple range from parentli_ to li_.
+ // ParentVNI must be live in the [Start;End) interval.
+ void addSimpleRange(SlotIndex Start, SlotIndex End, const VNInfo *ParentVNI);
+
+public:
+ LiveIntervalMap(LiveIntervals &lis,
+ const LiveInterval &parentli,
+ LiveInterval &li)
+ : lis_(lis), parentli_(parentli), li_(li) {}
+
+ /// defValue - define a value in li_ from the parentli_ value VNI and Idx.
+ /// Idx does not have to be ParentVNI->def, but it must be contained within
+ /// ParentVNI's live range in parentli_.
+ /// Return the new li_ value.
+ VNInfo *defValue(const VNInfo *ParentVNI, SlotIndex Idx);
+
+ /// mapValue - map ParentVNI to the corresponding li_ value at Idx. It is
+ /// assumed that ParentVNI is live at Idx.
+ /// If ParentVNI has not been defined by defValue, it is assumed that
+ /// ParentVNI->def dominates Idx.
+ /// If ParentVNI has been defined by defValue one or more times, a value that
+ /// dominates Idx will be returned. This may require creating extra phi-def
+ /// values and adding live ranges to li_.
+ VNInfo *mapValue(const VNInfo *ParentVNI, SlotIndex Idx);
+
+ /// addRange - Add live ranges to li_ where [Start;End) intersects parentli_.
+ /// All needed values whose def is not inside [Start;End) must be defined
+ /// beforehand so mapValue will work.
+ void addRange(SlotIndex Start, SlotIndex End);
+};
+
+
+/// SplitEditor - Edit machine code and LiveIntervals for live range
+/// splitting.
+///
+/// - Create a SplitEditor from a SplitAnalysis.
+/// - Start a new live interval with openIntv.
+/// - Mark the places where the new interval is entered using enterIntv*
+/// - Mark the ranges where the new interval is used with useIntv*
+/// - Mark the places where the interval is exited with exitIntv*.
+/// - Finish the current interval with closeIntv and repeat from 2.
+/// - Rewrite instructions with rewrite().
+///
+class SplitEditor {
+ SplitAnalysis &sa_;
+ LiveIntervals &lis_;
+ VirtRegMap &vrm_;
+ MachineRegisterInfo &mri_;
+ const TargetInstrInfo &tii_;
+
+ /// curli_ - The immutable interval we are currently splitting.
+ const LiveInterval *const curli_;
+
+ /// dupli_ - Created as a copy of curli_, ranges are carved out as new
+ /// intervals get added through openIntv / closeIntv. This is used to avoid
+ /// editing curli_.
+ LiveInterval *dupli_;
+
+ /// Currently open LiveInterval.
+ LiveInterval *openli_;
+
+ /// createInterval - Create a new virtual register and LiveInterval with same
+ /// register class and spill slot as curli.
+ LiveInterval *createInterval();
+
+ /// getDupLI - Ensure dupli is created and return it.
+ LiveInterval *getDupLI();
+
+ /// valueMap_ - Map values in cupli to values in openli. These are direct 1-1
+ /// mappings, and do not include values created by inserted copies.
+ DenseMap<const VNInfo*, VNInfo*> valueMap_;
+
+ /// mapValue - Return the openIntv value that corresponds to the given curli
+ /// value.
+ VNInfo *mapValue(const VNInfo *curliVNI);
+
+ /// A dupli value is live through openIntv.
+ bool liveThrough_;
+
+ /// All the new intervals created for this split are added to intervals_.
+ SmallVectorImpl<LiveInterval*> &intervals_;
+
+ /// The index into intervals_ of the first interval we added. There may be
+ /// others from before we got it.
+ unsigned firstInterval;
+
+ /// Insert a COPY instruction curli -> li. Allocate a new value from li
+ /// defined by the COPY
+ VNInfo *insertCopy(LiveInterval &LI,
+ MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I);
+
+public:
+ /// Create a new SplitEditor for editing the LiveInterval analyzed by SA.
+ /// Newly created intervals will be appended to newIntervals.
+ SplitEditor(SplitAnalysis &SA, LiveIntervals&, VirtRegMap&,
+ SmallVectorImpl<LiveInterval*> &newIntervals);
+
+ /// getAnalysis - Get the corresponding analysis.
+ SplitAnalysis &getAnalysis() { return sa_; }
+
+ /// Create a new virtual register and live interval.
+ void openIntv();
+
+ /// enterIntvBefore - Enter openli before the instruction at Idx. If curli is
+ /// not live before Idx, a COPY is not inserted.
+ void enterIntvBefore(SlotIndex Idx);
+
+ /// enterIntvAtEnd - Enter openli at the end of MBB.
+ /// PhiMBB is a successor inside openli where a PHI value is created.
+ /// Currently, all entries must share the same PhiMBB.
+ void enterIntvAtEnd(MachineBasicBlock &MBB, MachineBasicBlock &PhiMBB);
+
+ /// useIntv - indicate that all instructions in MBB should use openli.
+ void useIntv(const MachineBasicBlock &MBB);
+
+ /// useIntv - indicate that all instructions in range should use openli.
+ void useIntv(SlotIndex Start, SlotIndex End);
+
+ /// leaveIntvAfter - Leave openli after the instruction at Idx.
+ void leaveIntvAfter(SlotIndex Idx);
+
+ /// leaveIntvAtTop - Leave the interval at the top of MBB.
+ /// Currently, only one value can leave the interval.
+ void leaveIntvAtTop(MachineBasicBlock &MBB);
+
+ /// closeIntv - Indicate that we are done editing the currently open
+ /// LiveInterval, and ranges can be trimmed.
+ void closeIntv();
+
+ /// rewrite - after all the new live ranges have been created, rewrite
+ /// instructions using curli to use the new intervals.
+ void rewrite();
+
+ // ===--- High level methods ---===
+
+ /// splitAroundLoop - Split curli into a separate live interval inside
+ /// the loop. Return true if curli has been completely replaced, false if
+ /// curli is still intact, and needs to be spilled or split further.
+ bool splitAroundLoop(const MachineLoop*);
+
+ /// splitSingleBlocks - Split curli into a separate live interval inside each
+ /// basic block in Blocks. Return true if curli has been completely replaced,
+ /// false if curli is still intact, and needs to be spilled or split further.
+ bool splitSingleBlocks(const SplitAnalysis::BlockPtrSet &Blocks);
+
+ /// splitInsideBlock - Split curli into multiple intervals inside MBB. Return
+ /// true if curli has been completely replaced, false if curli is still
+ /// intact, and needs to be spilled or split further.
+ bool splitInsideBlock(const MachineBasicBlock *);
+};
+
+}
diff --git a/lib/CodeGen/Splitter.cpp b/lib/CodeGen/Splitter.cpp
new file mode 100644
index 0000000000000..38f3b1f4d35ea
--- /dev/null
+++ b/lib/CodeGen/Splitter.cpp
@@ -0,0 +1,817 @@
+//===-- llvm/CodeGen/Splitter.cpp - Splitter -----------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "loopsplitter"
+
+#include "Splitter.h"
+
+#include "SimpleRegisterCoalescing.h"
+#include "llvm/Module.h"
+#include "llvm/CodeGen/CalcSpillWeights.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/LiveStackAnalysis.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/SlotIndexes.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetInstrInfo.h"
+
+using namespace llvm;
+
+char LoopSplitter::ID = 0;
+INITIALIZE_PASS(LoopSplitter, "loop-splitting",
+ "Split virtual regists across loop boundaries.", false, false);
+
+namespace llvm {
+
+ class StartSlotComparator {
+ public:
+ StartSlotComparator(LiveIntervals &lis) : lis(lis) {}
+ bool operator()(const MachineBasicBlock *mbb1,
+ const MachineBasicBlock *mbb2) const {
+ return lis.getMBBStartIdx(mbb1) < lis.getMBBStartIdx(mbb2);
+ }
+ private:
+ LiveIntervals &lis;
+ };
+
+ class LoopSplit {
+ public:
+ LoopSplit(LoopSplitter &ls, LiveInterval &li, MachineLoop &loop)
+ : ls(ls), li(li), loop(loop), valid(true), inSplit(false), newLI(0) {
+ assert(TargetRegisterInfo::isVirtualRegister(li.reg) &&
+ "Cannot split physical registers.");
+ }
+
+ LiveInterval& getLI() const { return li; }
+
+ MachineLoop& getLoop() const { return loop; }
+
+ bool isValid() const { return valid; }
+
+ bool isWorthwhile() const { return valid && (inSplit || !outSplits.empty()); }
+
+ void invalidate() { valid = false; }
+
+ void splitIncoming() { inSplit = true; }
+
+ void splitOutgoing(MachineLoop::Edge &edge) { outSplits.insert(edge); }
+
+ void addLoopInstr(MachineInstr *i) { loopInstrs.push_back(i); }
+
+ void apply() {
+ assert(valid && "Attempt to apply invalid split.");
+ applyIncoming();
+ applyOutgoing();
+ copyRanges();
+ renameInside();
+ }
+
+ private:
+ LoopSplitter &ls;
+ LiveInterval &li;
+ MachineLoop &loop;
+ bool valid, inSplit;
+ std::set<MachineLoop::Edge> outSplits;
+ std::vector<MachineInstr*> loopInstrs;
+
+ LiveInterval *newLI;
+ std::map<VNInfo*, VNInfo*> vniMap;
+
+ LiveInterval* getNewLI() {
+ if (newLI == 0) {
+ const TargetRegisterClass *trc = ls.mri->getRegClass(li.reg);
+ unsigned vreg = ls.mri->createVirtualRegister(trc);
+ newLI = &ls.lis->getOrCreateInterval(vreg);
+ }
+ return newLI;
+ }
+
+ VNInfo* getNewVNI(VNInfo *oldVNI) {
+ VNInfo *newVNI = vniMap[oldVNI];
+
+ if (newVNI == 0) {
+ newVNI = getNewLI()->createValueCopy(oldVNI,
+ ls.lis->getVNInfoAllocator());
+ vniMap[oldVNI] = newVNI;
+ }
+
+ return newVNI;
+ }
+
+ void applyIncoming() {
+ if (!inSplit) {
+ return;
+ }
+
+ MachineBasicBlock *preHeader = loop.getLoopPreheader();
+ if (preHeader == 0) {
+ assert(ls.canInsertPreHeader(loop) &&
+ "Can't insert required preheader.");
+ preHeader = &ls.insertPreHeader(loop);
+ }
+
+ LiveRange *preHeaderRange =
+ ls.lis->findExitingRange(li, preHeader);
+ assert(preHeaderRange != 0 && "Range not live into preheader.");
+
+ // Insert the new copy.
+ MachineInstr *copy = BuildMI(*preHeader,
+ preHeader->getFirstTerminator(),
+ DebugLoc(),
+ ls.tii->get(TargetOpcode::COPY))
+ .addReg(getNewLI()->reg, RegState::Define)
+ .addReg(li.reg, RegState::Kill);
+
+ ls.lis->InsertMachineInstrInMaps(copy);
+
+ SlotIndex copyDefIdx = ls.lis->getInstructionIndex(copy).getDefIndex();
+
+ VNInfo *newVal = getNewVNI(preHeaderRange->valno);
+ newVal->def = copyDefIdx;
+ newVal->setCopy(copy);
+ newVal->setIsDefAccurate(true);
+ li.removeRange(copyDefIdx, ls.lis->getMBBEndIdx(preHeader), true);
+
+ getNewLI()->addRange(LiveRange(copyDefIdx,
+ ls.lis->getMBBEndIdx(preHeader),
+ newVal));
+ }
+
+ void applyOutgoing() {
+
+ for (std::set<MachineLoop::Edge>::iterator osItr = outSplits.begin(),
+ osEnd = outSplits.end();
+ osItr != osEnd; ++osItr) {
+ MachineLoop::Edge edge = *osItr;
+ MachineBasicBlock *outBlock = edge.second;
+ if (ls.isCriticalEdge(edge)) {
+ assert(ls.canSplitEdge(edge) && "Unsplitable critical edge.");
+ outBlock = &ls.splitEdge(edge, loop);
+ }
+ LiveRange *outRange = ls.lis->findEnteringRange(li, outBlock);
+ assert(outRange != 0 && "No exiting range?");
+
+ MachineInstr *copy = BuildMI(*outBlock, outBlock->begin(),
+ DebugLoc(),
+ ls.tii->get(TargetOpcode::COPY))
+ .addReg(li.reg, RegState::Define)
+ .addReg(getNewLI()->reg, RegState::Kill);
+
+ ls.lis->InsertMachineInstrInMaps(copy);
+
+ SlotIndex copyDefIdx = ls.lis->getInstructionIndex(copy).getDefIndex();
+
+ // Blow away output range definition.
+ outRange->valno->def = ls.lis->getInvalidIndex();
+ outRange->valno->setIsDefAccurate(false);
+ li.removeRange(ls.lis->getMBBStartIdx(outBlock), copyDefIdx);
+
+ VNInfo *newVal =
+ getNewLI()->getNextValue(SlotIndex(ls.lis->getMBBStartIdx(outBlock),
+ true),
+ 0, false, ls.lis->getVNInfoAllocator());
+
+ getNewLI()->addRange(LiveRange(ls.lis->getMBBStartIdx(outBlock),
+ copyDefIdx, newVal));
+
+ }
+ }
+
+ void copyRange(LiveRange &lr) {
+ std::pair<bool, LoopSplitter::SlotPair> lsr =
+ ls.getLoopSubRange(lr, loop);
+
+ if (!lsr.first)
+ return;
+
+ LiveRange loopRange(lsr.second.first, lsr.second.second,
+ getNewVNI(lr.valno));
+
+ li.removeRange(loopRange.start, loopRange.end, true);
+
+ getNewLI()->addRange(loopRange);
+ }
+
+ void copyRanges() {
+ for (std::vector<MachineInstr*>::iterator iItr = loopInstrs.begin(),
+ iEnd = loopInstrs.end();
+ iItr != iEnd; ++iItr) {
+ MachineInstr &instr = **iItr;
+ SlotIndex instrIdx = ls.lis->getInstructionIndex(&instr);
+ if (instr.modifiesRegister(li.reg, 0)) {
+ LiveRange *defRange =
+ li.getLiveRangeContaining(instrIdx.getDefIndex());
+ if (defRange != 0) // May have caught this already.
+ copyRange(*defRange);
+ }
+ if (instr.readsRegister(li.reg, 0)) {
+ LiveRange *useRange =
+ li.getLiveRangeContaining(instrIdx.getUseIndex());
+ if (useRange != 0) { // May have caught this already.
+ copyRange(*useRange);
+ }
+ }
+ }
+
+ for (MachineLoop::block_iterator bbItr = loop.block_begin(),
+ bbEnd = loop.block_end();
+ bbItr != bbEnd; ++bbItr) {
+ MachineBasicBlock &loopBlock = **bbItr;
+ LiveRange *enteringRange =
+ ls.lis->findEnteringRange(li, &loopBlock);
+ if (enteringRange != 0) {
+ copyRange(*enteringRange);
+ }
+ }
+ }
+
+ void renameInside() {
+ for (std::vector<MachineInstr*>::iterator iItr = loopInstrs.begin(),
+ iEnd = loopInstrs.end();
+ iItr != iEnd; ++iItr) {
+ MachineInstr &instr = **iItr;
+ for (unsigned i = 0; i < instr.getNumOperands(); ++i) {
+ MachineOperand &mop = instr.getOperand(i);
+ if (mop.isReg() && mop.getReg() == li.reg) {
+ mop.setReg(getNewLI()->reg);
+ }
+ }
+ }
+ }
+
+ };
+
+ void LoopSplitter::getAnalysisUsage(AnalysisUsage &au) const {
+ au.addRequired<MachineDominatorTree>();
+ au.addPreserved<MachineDominatorTree>();
+ au.addRequired<MachineLoopInfo>();
+ au.addPreserved<MachineLoopInfo>();
+ au.addPreserved<RegisterCoalescer>();
+ au.addPreserved<CalculateSpillWeights>();
+ au.addPreserved<LiveStacks>();
+ au.addRequired<SlotIndexes>();
+ au.addPreserved<SlotIndexes>();
+ au.addRequired<LiveIntervals>();
+ au.addPreserved<LiveIntervals>();
+ MachineFunctionPass::getAnalysisUsage(au);
+ }
+
+ bool LoopSplitter::runOnMachineFunction(MachineFunction &fn) {
+
+ mf = &fn;
+ mri = &mf->getRegInfo();
+ tii = mf->getTarget().getInstrInfo();
+ tri = mf->getTarget().getRegisterInfo();
+ sis = &getAnalysis<SlotIndexes>();
+ lis = &getAnalysis<LiveIntervals>();
+ mli = &getAnalysis<MachineLoopInfo>();
+ mdt = &getAnalysis<MachineDominatorTree>();
+
+ fqn = mf->getFunction()->getParent()->getModuleIdentifier() + "." +
+ mf->getFunction()->getName().str();
+
+ dbgs() << "Splitting " << mf->getFunction()->getName() << ".";
+
+ dumpOddTerminators();
+
+// dbgs() << "----------------------------------------\n";
+// lis->dump();
+// dbgs() << "----------------------------------------\n";
+
+// std::deque<MachineLoop*> loops;
+// std::copy(mli->begin(), mli->end(), std::back_inserter(loops));
+// dbgs() << "Loops:\n";
+// while (!loops.empty()) {
+// MachineLoop &loop = *loops.front();
+// loops.pop_front();
+// std::copy(loop.begin(), loop.end(), std::back_inserter(loops));
+
+// dumpLoopInfo(loop);
+// }
+
+ //lis->dump();
+ //exit(0);
+
+ // Setup initial intervals.
+ for (LiveIntervals::iterator liItr = lis->begin(), liEnd = lis->end();
+ liItr != liEnd; ++liItr) {
+ LiveInterval *li = liItr->second;
+
+ if (TargetRegisterInfo::isVirtualRegister(li->reg) &&
+ !lis->intervalIsInOneMBB(*li)) {
+ intervals.push_back(li);
+ }
+ }
+
+ processIntervals();
+
+ intervals.clear();
+
+// dbgs() << "----------------------------------------\n";
+// lis->dump();
+// dbgs() << "----------------------------------------\n";
+
+ dumpOddTerminators();
+
+ //exit(1);
+
+ return false;
+ }
+
+ void LoopSplitter::releaseMemory() {
+ fqn.clear();
+ intervals.clear();
+ loopRangeMap.clear();
+ }
+
+ void LoopSplitter::dumpOddTerminators() {
+ for (MachineFunction::iterator bbItr = mf->begin(), bbEnd = mf->end();
+ bbItr != bbEnd; ++bbItr) {
+ MachineBasicBlock *mbb = &*bbItr;
+ MachineBasicBlock *a = 0, *b = 0;
+ SmallVector<MachineOperand, 4> c;
+ if (tii->AnalyzeBranch(*mbb, a, b, c)) {
+ dbgs() << "MBB#" << mbb->getNumber() << " has multiway terminator.\n";
+ dbgs() << " Terminators:\n";
+ for (MachineBasicBlock::iterator iItr = mbb->begin(), iEnd = mbb->end();
+ iItr != iEnd; ++iItr) {
+ MachineInstr *instr= &*iItr;
+ dbgs() << " " << *instr << "";
+ }
+ dbgs() << "\n Listed successors: [ ";
+ for (MachineBasicBlock::succ_iterator sItr = mbb->succ_begin(), sEnd = mbb->succ_end();
+ sItr != sEnd; ++sItr) {
+ MachineBasicBlock *succMBB = *sItr;
+ dbgs() << succMBB->getNumber() << " ";
+ }
+ dbgs() << "]\n\n";
+ }
+ }
+ }
+
+ void LoopSplitter::dumpLoopInfo(MachineLoop &loop) {
+ MachineBasicBlock &headerBlock = *loop.getHeader();
+ typedef SmallVector<MachineLoop::Edge, 8> ExitEdgesList;
+ ExitEdgesList exitEdges;
+ loop.getExitEdges(exitEdges);
+
+ dbgs() << " Header: BB#" << headerBlock.getNumber() << ", Contains: [ ";
+ for (std::vector<MachineBasicBlock*>::const_iterator
+ subBlockItr = loop.getBlocks().begin(),
+ subBlockEnd = loop.getBlocks().end();
+ subBlockItr != subBlockEnd; ++subBlockItr) {
+ MachineBasicBlock &subBlock = **subBlockItr;
+ dbgs() << "BB#" << subBlock.getNumber() << " ";
+ }
+ dbgs() << "], Exit edges: [ ";
+ for (ExitEdgesList::iterator exitEdgeItr = exitEdges.begin(),
+ exitEdgeEnd = exitEdges.end();
+ exitEdgeItr != exitEdgeEnd; ++exitEdgeItr) {
+ MachineLoop::Edge &exitEdge = *exitEdgeItr;
+ dbgs() << "(MBB#" << exitEdge.first->getNumber()
+ << ", MBB#" << exitEdge.second->getNumber() << ") ";
+ }
+ dbgs() << "], Sub-Loop Headers: [ ";
+ for (MachineLoop::iterator subLoopItr = loop.begin(),
+ subLoopEnd = loop.end();
+ subLoopItr != subLoopEnd; ++subLoopItr) {
+ MachineLoop &subLoop = **subLoopItr;
+ MachineBasicBlock &subLoopBlock = *subLoop.getHeader();
+ dbgs() << "BB#" << subLoopBlock.getNumber() << " ";
+ }
+ dbgs() << "]\n";
+ }
+
+ void LoopSplitter::updateTerminators(MachineBasicBlock &mbb) {
+ mbb.updateTerminator();
+
+ for (MachineBasicBlock::iterator miItr = mbb.begin(), miEnd = mbb.end();
+ miItr != miEnd; ++miItr) {
+ if (lis->isNotInMIMap(miItr)) {
+ lis->InsertMachineInstrInMaps(miItr);
+ }
+ }
+ }
+
+ bool LoopSplitter::canInsertPreHeader(MachineLoop &loop) {
+ MachineBasicBlock *header = loop.getHeader();
+ MachineBasicBlock *a = 0, *b = 0;
+ SmallVector<MachineOperand, 4> c;
+
+ for (MachineBasicBlock::pred_iterator pbItr = header->pred_begin(),
+ pbEnd = header->pred_end();
+ pbItr != pbEnd; ++pbItr) {
+ MachineBasicBlock *predBlock = *pbItr;
+ if (!!tii->AnalyzeBranch(*predBlock, a, b, c)) {
+ return false;
+ }
+ }
+
+ MachineFunction::iterator headerItr(header);
+ if (headerItr == mf->begin())
+ return true;
+ MachineBasicBlock *headerLayoutPred = llvm::prior(headerItr);
+ assert(headerLayoutPred != 0 && "Header should have layout pred.");
+
+ return (!tii->AnalyzeBranch(*headerLayoutPred, a, b, c));
+ }
+
+ MachineBasicBlock& LoopSplitter::insertPreHeader(MachineLoop &loop) {
+ assert(loop.getLoopPreheader() == 0 && "Loop already has preheader.");
+
+ MachineBasicBlock &header = *loop.getHeader();
+
+ // Save the preds - we'll need to update them once we insert the preheader.
+ typedef std::set<MachineBasicBlock*> HeaderPreds;
+ HeaderPreds headerPreds;
+
+ for (MachineBasicBlock::pred_iterator predItr = header.pred_begin(),
+ predEnd = header.pred_end();
+ predItr != predEnd; ++predItr) {
+ if (!loop.contains(*predItr))
+ headerPreds.insert(*predItr);
+ }
+
+ assert(!headerPreds.empty() && "No predecessors for header?");
+
+ //dbgs() << fqn << " MBB#" << header.getNumber() << " inserting preheader...";
+
+ MachineBasicBlock *preHeader =
+ mf->CreateMachineBasicBlock(header.getBasicBlock());
+
+ assert(preHeader != 0 && "Failed to create pre-header.");
+
+ mf->insert(header, preHeader);
+
+ for (HeaderPreds::iterator hpItr = headerPreds.begin(),
+ hpEnd = headerPreds.end();
+ hpItr != hpEnd; ++hpItr) {
+ assert(*hpItr != 0 && "How'd a null predecessor get into this set?");
+ MachineBasicBlock &hp = **hpItr;
+ hp.ReplaceUsesOfBlockWith(&header, preHeader);
+ }
+ preHeader->addSuccessor(&header);
+
+ MachineBasicBlock *oldLayoutPred =
+ llvm::prior(MachineFunction::iterator(preHeader));
+ if (oldLayoutPred != 0) {
+ updateTerminators(*oldLayoutPred);
+ }
+
+ lis->InsertMBBInMaps(preHeader);
+
+ if (MachineLoop *parentLoop = loop.getParentLoop()) {
+ assert(parentLoop->getHeader() != loop.getHeader() &&
+ "Parent loop has same header?");
+ parentLoop->addBasicBlockToLoop(preHeader, mli->getBase());
+
+ // Invalidate all parent loop ranges.
+ while (parentLoop != 0) {
+ loopRangeMap.erase(parentLoop);
+ parentLoop = parentLoop->getParentLoop();
+ }
+ }
+
+ for (LiveIntervals::iterator liItr = lis->begin(),
+ liEnd = lis->end();
+ liItr != liEnd; ++liItr) {
+ LiveInterval &li = *liItr->second;
+
+ // Is this safe for physregs?
+ // TargetRegisterInfo::isPhysicalRegister(li.reg) ||
+ if (!lis->isLiveInToMBB(li, &header))
+ continue;
+
+ if (lis->isLiveInToMBB(li, preHeader)) {
+ assert(lis->isLiveOutOfMBB(li, preHeader) &&
+ "Range terminates in newly added preheader?");
+ continue;
+ }
+
+ bool insertRange = false;
+
+ for (MachineBasicBlock::pred_iterator predItr = preHeader->pred_begin(),
+ predEnd = preHeader->pred_end();
+ predItr != predEnd; ++predItr) {
+ MachineBasicBlock *predMBB = *predItr;
+ if (lis->isLiveOutOfMBB(li, predMBB)) {
+ insertRange = true;
+ break;
+ }
+ }
+
+ if (!insertRange)
+ continue;
+
+ VNInfo *newVal = li.getNextValue(lis->getMBBStartIdx(preHeader),
+ 0, false, lis->getVNInfoAllocator());
+ li.addRange(LiveRange(lis->getMBBStartIdx(preHeader),
+ lis->getMBBEndIdx(preHeader),
+ newVal));
+ }
+
+
+ //dbgs() << "Dumping SlotIndexes:\n";
+ //sis->dump();
+
+ //dbgs() << "done. (Added MBB#" << preHeader->getNumber() << ")\n";
+
+ return *preHeader;
+ }
+
+ bool LoopSplitter::isCriticalEdge(MachineLoop::Edge &edge) {
+ assert(edge.first->succ_size() > 1 && "Non-sensical edge.");
+ if (edge.second->pred_size() > 1)
+ return true;
+ return false;
+ }
+
+ bool LoopSplitter::canSplitEdge(MachineLoop::Edge &edge) {
+ MachineFunction::iterator outBlockItr(edge.second);
+ if (outBlockItr == mf->begin())
+ return true;
+ MachineBasicBlock *outBlockLayoutPred = llvm::prior(outBlockItr);
+ assert(outBlockLayoutPred != 0 && "Should have a layout pred if out!=begin.");
+ MachineBasicBlock *a = 0, *b = 0;
+ SmallVector<MachineOperand, 4> c;
+ return (!tii->AnalyzeBranch(*outBlockLayoutPred, a, b, c) &&
+ !tii->AnalyzeBranch(*edge.first, a, b, c));
+ }
+
+ MachineBasicBlock& LoopSplitter::splitEdge(MachineLoop::Edge &edge,
+ MachineLoop &loop) {
+
+ MachineBasicBlock &inBlock = *edge.first;
+ MachineBasicBlock &outBlock = *edge.second;
+
+ assert((inBlock.succ_size() > 1) && (outBlock.pred_size() > 1) &&
+ "Splitting non-critical edge?");
+
+ //dbgs() << fqn << " Splitting edge (MBB#" << inBlock.getNumber()
+ // << " -> MBB#" << outBlock.getNumber() << ")...";
+
+ MachineBasicBlock *splitBlock =
+ mf->CreateMachineBasicBlock();
+
+ assert(splitBlock != 0 && "Failed to create split block.");
+
+ mf->insert(&outBlock, splitBlock);
+
+ inBlock.ReplaceUsesOfBlockWith(&outBlock, splitBlock);
+ splitBlock->addSuccessor(&outBlock);
+
+ MachineBasicBlock *oldLayoutPred =
+ llvm::prior(MachineFunction::iterator(splitBlock));
+ if (oldLayoutPred != 0) {
+ updateTerminators(*oldLayoutPred);
+ }
+
+ lis->InsertMBBInMaps(splitBlock);
+
+ loopRangeMap.erase(&loop);
+
+ MachineLoop *splitParentLoop = loop.getParentLoop();
+ while (splitParentLoop != 0 &&
+ !splitParentLoop->contains(&outBlock)) {
+ splitParentLoop = splitParentLoop->getParentLoop();
+ }
+
+ if (splitParentLoop != 0) {
+ assert(splitParentLoop->contains(&loop) &&
+ "Split-block parent doesn't contain original loop?");
+ splitParentLoop->addBasicBlockToLoop(splitBlock, mli->getBase());
+
+ // Invalidate all parent loop ranges.
+ while (splitParentLoop != 0) {
+ loopRangeMap.erase(splitParentLoop);
+ splitParentLoop = splitParentLoop->getParentLoop();
+ }
+ }
+
+
+ for (LiveIntervals::iterator liItr = lis->begin(),
+ liEnd = lis->end();
+ liItr != liEnd; ++liItr) {
+ LiveInterval &li = *liItr->second;
+ bool intersects = lis->isLiveOutOfMBB(li, &inBlock) &&
+ lis->isLiveInToMBB(li, &outBlock);
+ if (lis->isLiveInToMBB(li, splitBlock)) {
+ if (!intersects) {
+ li.removeRange(lis->getMBBStartIdx(splitBlock),
+ lis->getMBBEndIdx(splitBlock), true);
+ }
+ } else if (intersects) {
+ VNInfo *newVal = li.getNextValue(lis->getMBBStartIdx(splitBlock),
+ 0, false, lis->getVNInfoAllocator());
+ li.addRange(LiveRange(lis->getMBBStartIdx(splitBlock),
+ lis->getMBBEndIdx(splitBlock),
+ newVal));
+ }
+ }
+
+ //dbgs() << "done. (Added MBB#" << splitBlock->getNumber() << ")\n";
+
+ return *splitBlock;
+ }
+
+ LoopSplitter::LoopRanges& LoopSplitter::getLoopRanges(MachineLoop &loop) {
+ typedef std::set<MachineBasicBlock*, StartSlotComparator> LoopMBBSet;
+ LoopRangeMap::iterator lrItr = loopRangeMap.find(&loop);
+ if (lrItr == loopRangeMap.end()) {
+ LoopMBBSet loopMBBs((StartSlotComparator(*lis)));
+ std::copy(loop.block_begin(), loop.block_end(),
+ std::inserter(loopMBBs, loopMBBs.begin()));
+
+ assert(!loopMBBs.empty() && "No blocks in loop?");
+
+ LoopRanges &loopRanges = loopRangeMap[&loop];
+ assert(loopRanges.empty() && "Loop encountered but not processed?");
+ SlotIndex oldEnd = lis->getMBBEndIdx(*loopMBBs.begin());
+ loopRanges.push_back(
+ std::make_pair(lis->getMBBStartIdx(*loopMBBs.begin()),
+ lis->getInvalidIndex()));
+ for (LoopMBBSet::iterator curBlockItr = llvm::next(loopMBBs.begin()),
+ curBlockEnd = loopMBBs.end();
+ curBlockItr != curBlockEnd; ++curBlockItr) {
+ SlotIndex newStart = lis->getMBBStartIdx(*curBlockItr);
+ if (newStart != oldEnd) {
+ loopRanges.back().second = oldEnd;
+ loopRanges.push_back(std::make_pair(newStart,
+ lis->getInvalidIndex()));
+ }
+ oldEnd = lis->getMBBEndIdx(*curBlockItr);
+ }
+
+ loopRanges.back().second =
+ lis->getMBBEndIdx(*llvm::prior(loopMBBs.end()));
+
+ return loopRanges;
+ }
+ return lrItr->second;
+ }
+
+ std::pair<bool, LoopSplitter::SlotPair> LoopSplitter::getLoopSubRange(
+ const LiveRange &lr,
+ MachineLoop &loop) {
+ LoopRanges &loopRanges = getLoopRanges(loop);
+ LoopRanges::iterator lrItr = loopRanges.begin(),
+ lrEnd = loopRanges.end();
+ while (lrItr != lrEnd && lr.start >= lrItr->second) {
+ ++lrItr;
+ }
+
+ if (lrItr == lrEnd) {
+ SlotIndex invalid = lis->getInvalidIndex();
+ return std::make_pair(false, SlotPair(invalid, invalid));
+ }
+
+ SlotIndex srStart(lr.start < lrItr->first ? lrItr->first : lr.start);
+ SlotIndex srEnd(lr.end > lrItr->second ? lrItr->second : lr.end);
+
+ return std::make_pair(true, SlotPair(srStart, srEnd));
+ }
+
+ void LoopSplitter::dumpLoopRanges(MachineLoop &loop) {
+ LoopRanges &loopRanges = getLoopRanges(loop);
+ dbgs() << "For loop MBB#" << loop.getHeader()->getNumber() << ", subranges are: [ ";
+ for (LoopRanges::iterator lrItr = loopRanges.begin(), lrEnd = loopRanges.end();
+ lrItr != lrEnd; ++lrItr) {
+ dbgs() << "[" << lrItr->first << ", " << lrItr->second << ") ";
+ }
+ dbgs() << "]\n";
+ }
+
+ void LoopSplitter::processHeader(LoopSplit &split) {
+ MachineBasicBlock &header = *split.getLoop().getHeader();
+ //dbgs() << " Processing loop header BB#" << header.getNumber() << "\n";
+
+ if (!lis->isLiveInToMBB(split.getLI(), &header))
+ return; // Not live in, but nothing wrong so far.
+
+ MachineBasicBlock *preHeader = split.getLoop().getLoopPreheader();
+ if (!preHeader) {
+
+ if (!canInsertPreHeader(split.getLoop())) {
+ split.invalidate();
+ return; // Couldn't insert a pre-header. Bail on this interval.
+ }
+
+ for (MachineBasicBlock::pred_iterator predItr = header.pred_begin(),
+ predEnd = header.pred_end();
+ predItr != predEnd; ++predItr) {
+ if (lis->isLiveOutOfMBB(split.getLI(), *predItr)) {
+ split.splitIncoming();
+ break;
+ }
+ }
+ } else if (lis->isLiveOutOfMBB(split.getLI(), preHeader)) {
+ split.splitIncoming();
+ }
+ }
+
+ void LoopSplitter::processLoopExits(LoopSplit &split) {
+ typedef SmallVector<MachineLoop::Edge, 8> ExitEdgesList;
+ ExitEdgesList exitEdges;
+ split.getLoop().getExitEdges(exitEdges);
+
+ //dbgs() << " Processing loop exits:\n";
+
+ for (ExitEdgesList::iterator exitEdgeItr = exitEdges.begin(),
+ exitEdgeEnd = exitEdges.end();
+ exitEdgeItr != exitEdgeEnd; ++exitEdgeItr) {
+ MachineLoop::Edge exitEdge = *exitEdgeItr;
+
+ LiveRange *outRange =
+ split.getLI().getLiveRangeContaining(lis->getMBBStartIdx(exitEdge.second));
+
+ if (outRange != 0) {
+ if (isCriticalEdge(exitEdge) && !canSplitEdge(exitEdge)) {
+ split.invalidate();
+ return;
+ }
+
+ split.splitOutgoing(exitEdge);
+ }
+ }
+ }
+
+ void LoopSplitter::processLoopUses(LoopSplit &split) {
+ std::set<MachineInstr*> processed;
+
+ for (MachineRegisterInfo::reg_iterator
+ rItr = mri->reg_begin(split.getLI().reg),
+ rEnd = mri->reg_end();
+ rItr != rEnd; ++rItr) {
+ MachineInstr &instr = *rItr;
+ if (split.getLoop().contains(&instr) && processed.count(&instr) == 0) {
+ split.addLoopInstr(&instr);
+ processed.insert(&instr);
+ }
+ }
+
+ //dbgs() << " Rewriting reg" << li.reg << " to reg" << newLI->reg
+ // << " in blocks [ ";
+ //dbgs() << "]\n";
+ }
+
+ bool LoopSplitter::splitOverLoop(LiveInterval &li, MachineLoop &loop) {
+ assert(TargetRegisterInfo::isVirtualRegister(li.reg) &&
+ "Attempt to split physical register.");
+
+ LoopSplit split(*this, li, loop);
+ processHeader(split);
+ if (split.isValid())
+ processLoopExits(split);
+ if (split.isValid())
+ processLoopUses(split);
+ if (split.isValid() /* && split.isWorthwhile() */) {
+ split.apply();
+ DEBUG(dbgs() << "Success.\n");
+ return true;
+ }
+ DEBUG(dbgs() << "Failed.\n");
+ return false;
+ }
+
+ void LoopSplitter::processInterval(LiveInterval &li) {
+ std::deque<MachineLoop*> loops;
+ std::copy(mli->begin(), mli->end(), std::back_inserter(loops));
+
+ while (!loops.empty()) {
+ MachineLoop &loop = *loops.front();
+ loops.pop_front();
+ DEBUG(
+ dbgs() << fqn << " reg" << li.reg << " " << li.weight << " BB#"
+ << loop.getHeader()->getNumber() << " ";
+ );
+ if (!splitOverLoop(li, loop)) {
+ // Couldn't split over outer loop, schedule sub-loops to be checked.
+ std::copy(loop.begin(), loop.end(), std::back_inserter(loops));
+ }
+ }
+ }
+
+ void LoopSplitter::processIntervals() {
+ while (!intervals.empty()) {
+ LiveInterval &li = *intervals.front();
+ intervals.pop_front();
+
+ assert(!lis->intervalIsInOneMBB(li) &&
+ "Single interval in process worklist.");
+
+ processInterval(li);
+ }
+ }
+
+}
diff --git a/lib/CodeGen/Splitter.h b/lib/CodeGen/Splitter.h
new file mode 100644
index 0000000000000..a726a7b834fbe
--- /dev/null
+++ b/lib/CodeGen/Splitter.h
@@ -0,0 +1,99 @@
+//===-- llvm/CodeGen/Splitter.h - Splitter -*- C++ -*----------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_SPLITTER_H
+#define LLVM_CODEGEN_SPLITTER_H
+
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/SlotIndexes.h"
+
+#include <deque>
+#include <map>
+#include <string>
+#include <vector>
+
+namespace llvm {
+
+ class LiveInterval;
+ class LiveIntervals;
+ struct LiveRange;
+ class LoopSplit;
+ class MachineDominatorTree;
+ class MachineRegisterInfo;
+ class SlotIndexes;
+ class TargetInstrInfo;
+ class VNInfo;
+
+ class LoopSplitter : public MachineFunctionPass {
+ friend class LoopSplit;
+ public:
+ static char ID;
+
+ LoopSplitter() : MachineFunctionPass(ID) {}
+
+ virtual void getAnalysisUsage(AnalysisUsage &au) const;
+
+ virtual bool runOnMachineFunction(MachineFunction &fn);
+
+ virtual void releaseMemory();
+
+
+ private:
+
+ MachineFunction *mf;
+ LiveIntervals *lis;
+ MachineLoopInfo *mli;
+ MachineRegisterInfo *mri;
+ MachineDominatorTree *mdt;
+ SlotIndexes *sis;
+ const TargetInstrInfo *tii;
+ const TargetRegisterInfo *tri;
+
+ std::string fqn;
+ std::deque<LiveInterval*> intervals;
+
+ typedef std::pair<SlotIndex, SlotIndex> SlotPair;
+ typedef std::vector<SlotPair> LoopRanges;
+ typedef std::map<MachineLoop*, LoopRanges> LoopRangeMap;
+ LoopRangeMap loopRangeMap;
+
+ void dumpLoopInfo(MachineLoop &loop);
+
+ void dumpOddTerminators();
+
+ void updateTerminators(MachineBasicBlock &mbb);
+
+ bool canInsertPreHeader(MachineLoop &loop);
+ MachineBasicBlock& insertPreHeader(MachineLoop &loop);
+
+ bool isCriticalEdge(MachineLoop::Edge &edge);
+ bool canSplitEdge(MachineLoop::Edge &edge);
+ MachineBasicBlock& splitEdge(MachineLoop::Edge &edge, MachineLoop &loop);
+
+ LoopRanges& getLoopRanges(MachineLoop &loop);
+ std::pair<bool, SlotPair> getLoopSubRange(const LiveRange &lr,
+ MachineLoop &loop);
+
+ void dumpLoopRanges(MachineLoop &loop);
+
+ void processHeader(LoopSplit &split);
+ void processLoopExits(LoopSplit &split);
+ void processLoopUses(LoopSplit &split);
+
+ bool splitOverLoop(LiveInterval &li, MachineLoop &loop);
+
+ void processInterval(LiveInterval &li);
+
+ void processIntervals();
+ };
+
+}
+
+#endif
diff --git a/lib/CodeGen/StackProtector.cpp b/lib/CodeGen/StackProtector.cpp
index ca5c28ce010cb..9f51778da7562 100644
--- a/lib/CodeGen/StackProtector.cpp
+++ b/lib/CodeGen/StackProtector.cpp
@@ -62,17 +62,17 @@ namespace {
bool RequiresStackProtector() const;
public:
static char ID; // Pass identification, replacement for typeid.
- StackProtector() : FunctionPass(&ID), TLI(0) {}
+ StackProtector() : FunctionPass(ID), TLI(0) {}
StackProtector(const TargetLowering *tli)
- : FunctionPass(&ID), TLI(tli) {}
+ : FunctionPass(ID), TLI(tli) {}
virtual bool runOnFunction(Function &Fn);
};
} // end anonymous namespace
char StackProtector::ID = 0;
-static RegisterPass<StackProtector>
-X("stack-protector", "Insert stack protectors");
+INITIALIZE_PASS(StackProtector, "stack-protector",
+ "Insert stack protectors", false, false);
FunctionPass *llvm::createStackProtectorPass(const TargetLowering *tli) {
return new StackProtector(tli);
diff --git a/lib/CodeGen/StackSlotColoring.cpp b/lib/CodeGen/StackSlotColoring.cpp
index eff3c33e3daa0..8d57ae95dde2a 100644
--- a/lib/CodeGen/StackSlotColoring.cpp
+++ b/lib/CodeGen/StackSlotColoring.cpp
@@ -95,9 +95,9 @@ namespace {
public:
static char ID; // Pass identification
StackSlotColoring() :
- MachineFunctionPass(&ID), ColorWithRegs(false), NextColor(-1) {}
+ MachineFunctionPass(ID), ColorWithRegs(false), NextColor(-1) {}
StackSlotColoring(bool RegColor) :
- MachineFunctionPass(&ID), ColorWithRegs(RegColor), NextColor(-1) {}
+ MachineFunctionPass(ID), ColorWithRegs(RegColor), NextColor(-1) {}
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesCFG();
@@ -119,7 +119,6 @@ namespace {
private:
void InitializeSlots();
- bool CheckForSetJmpCall(const MachineFunction &MF) const;
void ScanForSpillSlotRefs(MachineFunction &MF);
bool OverlapWithAssignments(LiveInterval *li, int Color) const;
int ColorSlot(LiveInterval *li);
@@ -146,8 +145,8 @@ namespace {
char StackSlotColoring::ID = 0;
-static RegisterPass<StackSlotColoring>
-X("stack-slot-coloring", "Stack Slot Coloring");
+INITIALIZE_PASS(StackSlotColoring, "stack-slot-coloring",
+ "Stack Slot Coloring", false, false);
FunctionPass *llvm::createStackSlotColoringPass(bool RegColor) {
return new StackSlotColoring(RegColor);
diff --git a/lib/CodeGen/StrongPHIElimination.cpp b/lib/CodeGen/StrongPHIElimination.cpp
index 59315cf67282b..894dbfa28bac7 100644
--- a/lib/CodeGen/StrongPHIElimination.cpp
+++ b/lib/CodeGen/StrongPHIElimination.cpp
@@ -39,7 +39,7 @@ using namespace llvm;
namespace {
struct StrongPHIElimination : public MachineFunctionPass {
static char ID; // Pass identification, replacement for typeid
- StrongPHIElimination() : MachineFunctionPass(&ID) {}
+ StrongPHIElimination() : MachineFunctionPass(ID) {}
// Waiting stores, for each MBB, the set of copies that need to
// be inserted into that MBB
@@ -150,11 +150,10 @@ namespace {
}
char StrongPHIElimination::ID = 0;
-static RegisterPass<StrongPHIElimination>
-X("strong-phi-node-elimination",
- "Eliminate PHI nodes for register allocation, intelligently");
+INITIALIZE_PASS(StrongPHIElimination, "strong-phi-node-elimination",
+ "Eliminate PHI nodes for register allocation, intelligently", false, false);
-const PassInfo *const llvm::StrongPHIEliminationID = &X;
+char &llvm::StrongPHIEliminationID = StrongPHIElimination::ID;
/// computeDFS - Computes the DFS-in and DFS-out numbers of the dominator tree
/// of the given MachineFunction. These numbers are then used in other parts
diff --git a/lib/CodeGen/TailDuplication.cpp b/lib/CodeGen/TailDuplication.cpp
index 075db803bd231..a815b364d54e7 100644
--- a/lib/CodeGen/TailDuplication.cpp
+++ b/lib/CodeGen/TailDuplication.cpp
@@ -69,7 +69,7 @@ namespace {
public:
static char ID;
explicit TailDuplicatePass(bool PreRA) :
- MachineFunctionPass(&ID), PreRegAlloc(PreRA) {}
+ MachineFunctionPass(ID), PreRegAlloc(PreRA) {}
virtual bool runOnMachineFunction(MachineFunction &MF);
virtual const char *getPassName() const { return "Tail Duplication"; }
@@ -254,14 +254,15 @@ bool TailDuplicatePass::TailDuplicateBlocks(MachineFunction &MF) {
// SSA form.
for (unsigned i = 0, e = Copies.size(); i != e; ++i) {
MachineInstr *Copy = Copies[i];
- unsigned Src, Dst, SrcSR, DstSR;
- if (TII->isMoveInstr(*Copy, Src, Dst, SrcSR, DstSR)) {
- MachineRegisterInfo::use_iterator UI = MRI->use_begin(Src);
- if (++UI == MRI->use_end()) {
- // Copy is the only use. Do trivial copy propagation here.
- MRI->replaceRegWith(Dst, Src);
- Copy->eraseFromParent();
- }
+ if (!Copy->isCopy())
+ continue;
+ unsigned Dst = Copy->getOperand(0).getReg();
+ unsigned Src = Copy->getOperand(1).getReg();
+ MachineRegisterInfo::use_iterator UI = MRI->use_begin(Src);
+ if (++UI == MRI->use_end()) {
+ // Copy is the only use. Do trivial copy propagation here.
+ MRI->replaceRegWith(Dst, Src);
+ Copy->eraseFromParent();
}
}
diff --git a/lib/CodeGen/TargetInstrInfoImpl.cpp b/lib/CodeGen/TargetInstrInfoImpl.cpp
index cdacb98e0e883..6e4a0d837ecd3 100644
--- a/lib/CodeGen/TargetInstrInfoImpl.cpp
+++ b/lib/CodeGen/TargetInstrInfoImpl.cpp
@@ -178,19 +178,6 @@ MachineInstr *TargetInstrInfoImpl::duplicate(MachineInstr *Orig,
return MF.CloneMachineInstr(Orig);
}
-unsigned
-TargetInstrInfoImpl::GetFunctionSizeInBytes(const MachineFunction &MF) const {
- unsigned FnSize = 0;
- for (MachineFunction::const_iterator MBBI = MF.begin(), E = MF.end();
- MBBI != E; ++MBBI) {
- const MachineBasicBlock &MBB = *MBBI;
- for (MachineBasicBlock::const_iterator I = MBB.begin(),E = MBB.end();
- I != E; ++I)
- FnSize += GetInstSizeInBytes(I);
- }
- return FnSize;
-}
-
// If the COPY instruction in MI can be folded to a stack operation, return
// the register class to use.
static const TargetRegisterClass *canFoldCopy(const MachineInstr *MI,
diff --git a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
index a80cfc4b256fe..f1e10eec724c6 100644
--- a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
+++ b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
@@ -519,11 +519,7 @@ void TargetLoweringObjectFileMachO::Initialize(MCContext &Ctx,
ConstTextCoalSection
= getContext().getMachOSection("__TEXT", "__const_coal",
MCSectionMachO::S_COALESCED,
- SectionKind::getText());
- ConstDataCoalSection
- = getContext().getMachOSection("__DATA","__const_coal",
- MCSectionMachO::S_COALESCED,
- SectionKind::getText());
+ SectionKind::getReadOnly());
ConstDataSection // .const_data
= getContext().getMachOSection("__DATA", "__const", 0,
SectionKind::getReadOnlyWithRel());
diff --git a/lib/CodeGen/TwoAddressInstructionPass.cpp b/lib/CodeGen/TwoAddressInstructionPass.cpp
index 564914373bb56..78989c567e42f 100644
--- a/lib/CodeGen/TwoAddressInstructionPass.cpp
+++ b/lib/CodeGen/TwoAddressInstructionPass.cpp
@@ -138,7 +138,7 @@ namespace {
public:
static char ID; // Pass identification, replacement for typeid
- TwoAddressInstructionPass() : MachineFunctionPass(&ID) {}
+ TwoAddressInstructionPass() : MachineFunctionPass(ID) {}
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesCFG();
@@ -159,10 +159,10 @@ namespace {
}
char TwoAddressInstructionPass::ID = 0;
-static RegisterPass<TwoAddressInstructionPass>
-X("twoaddressinstruction", "Two-Address instruction pass");
+INITIALIZE_PASS(TwoAddressInstructionPass, "twoaddressinstruction",
+ "Two-Address instruction pass", false, false);
-const PassInfo *const llvm::TwoAddressInstructionPassID = &X;
+char &llvm::TwoAddressInstructionPassID = TwoAddressInstructionPass::ID;
/// Sink3AddrInstruction - A two-address instruction has been converted to a
/// three-address instruction to avoid clobbering a register. Try to sink it
@@ -380,26 +380,18 @@ static bool isCopyToReg(MachineInstr &MI, const TargetInstrInfo *TII,
bool &IsSrcPhys, bool &IsDstPhys) {
SrcReg = 0;
DstReg = 0;
- unsigned SrcSubIdx, DstSubIdx;
- if (!TII->isMoveInstr(MI, SrcReg, DstReg, SrcSubIdx, DstSubIdx)) {
- if (MI.isCopy()) {
- DstReg = MI.getOperand(0).getReg();
- SrcReg = MI.getOperand(1).getReg();
- } else if (MI.isInsertSubreg()) {
- DstReg = MI.getOperand(0).getReg();
- SrcReg = MI.getOperand(2).getReg();
- } else if (MI.isSubregToReg()) {
- DstReg = MI.getOperand(0).getReg();
- SrcReg = MI.getOperand(2).getReg();
- }
- }
+ if (MI.isCopy()) {
+ DstReg = MI.getOperand(0).getReg();
+ SrcReg = MI.getOperand(1).getReg();
+ } else if (MI.isInsertSubreg() || MI.isSubregToReg()) {
+ DstReg = MI.getOperand(0).getReg();
+ SrcReg = MI.getOperand(2).getReg();
+ } else
+ return false;
- if (DstReg) {
- IsSrcPhys = TargetRegisterInfo::isPhysicalRegister(SrcReg);
- IsDstPhys = TargetRegisterInfo::isPhysicalRegister(DstReg);
- return true;
- }
- return false;
+ IsSrcPhys = TargetRegisterInfo::isPhysicalRegister(SrcReg);
+ IsDstPhys = TargetRegisterInfo::isPhysicalRegister(DstReg);
+ return true;
}
/// isKilled - Test if the given register value, which is used by the given
@@ -1454,7 +1446,17 @@ bool TwoAddressInstructionPass::EliminateRegSequences() {
//
// If the REG_SEQUENCE doesn't kill its source, keeping live variables
// correctly up to date becomes very difficult. Insert a copy.
- //
+
+ // Defer any kill flag to the last operand using SrcReg. Otherwise, we
+ // might insert a COPY that uses SrcReg after is was killed.
+ if (isKill)
+ for (unsigned j = i + 2; j < e; j += 2)
+ if (MI->getOperand(j).getReg() == SrcReg) {
+ MI->getOperand(j).setIsKill();
+ isKill = false;
+ break;
+ }
+
MachineBasicBlock::iterator InsertLoc = MI;
MachineInstr *CopyMI = BuildMI(*MI->getParent(), InsertLoc,
MI->getDebugLoc(), TII->get(TargetOpcode::COPY))
diff --git a/lib/CodeGen/UnreachableBlockElim.cpp b/lib/CodeGen/UnreachableBlockElim.cpp
index 7b338126d475f..6dd333358bc44 100644
--- a/lib/CodeGen/UnreachableBlockElim.cpp
+++ b/lib/CodeGen/UnreachableBlockElim.cpp
@@ -43,7 +43,7 @@ namespace {
virtual bool runOnFunction(Function &F);
public:
static char ID; // Pass identification, replacement for typeid
- UnreachableBlockElim() : FunctionPass(&ID) {}
+ UnreachableBlockElim() : FunctionPass(ID) {}
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
AU.addPreserved<ProfileInfo>();
@@ -51,8 +51,8 @@ namespace {
};
}
char UnreachableBlockElim::ID = 0;
-static RegisterPass<UnreachableBlockElim>
-X("unreachableblockelim", "Remove unreachable blocks from the CFG");
+INITIALIZE_PASS(UnreachableBlockElim, "unreachableblockelim",
+ "Remove unreachable blocks from the CFG", false, false);
FunctionPass *llvm::createUnreachableBlockEliminationPass() {
return new UnreachableBlockElim();
@@ -100,16 +100,15 @@ namespace {
MachineModuleInfo *MMI;
public:
static char ID; // Pass identification, replacement for typeid
- UnreachableMachineBlockElim() : MachineFunctionPass(&ID) {}
+ UnreachableMachineBlockElim() : MachineFunctionPass(ID) {}
};
}
char UnreachableMachineBlockElim::ID = 0;
-static RegisterPass<UnreachableMachineBlockElim>
-Y("unreachable-mbb-elimination",
- "Remove unreachable machine basic blocks");
+INITIALIZE_PASS(UnreachableMachineBlockElim, "unreachable-mbb-elimination",
+ "Remove unreachable machine basic blocks", false, false);
-const PassInfo *const llvm::UnreachableMachineBlockElimID = &Y;
+char &llvm::UnreachableMachineBlockElimID = UnreachableMachineBlockElim::ID;
void UnreachableMachineBlockElim::getAnalysisUsage(AnalysisUsage &AU) const {
AU.addPreserved<MachineLoopInfo>();
diff --git a/lib/CodeGen/VirtRegMap.cpp b/lib/CodeGen/VirtRegMap.cpp
index ed0269695dfe0..20ffcffa70d35 100644
--- a/lib/CodeGen/VirtRegMap.cpp
+++ b/lib/CodeGen/VirtRegMap.cpp
@@ -48,8 +48,7 @@ STATISTIC(NumSpills , "Number of register spills");
char VirtRegMap::ID = 0;
-static RegisterPass<VirtRegMap>
-X("virtregmap", "Virtual Register Map");
+INITIALIZE_PASS(VirtRegMap, "virtregmap", "Virtual Register Map", false, false);
bool VirtRegMap::runOnMachineFunction(MachineFunction &mf) {
MRI = &mf.getRegInfo();
diff --git a/lib/CodeGen/VirtRegMap.h b/lib/CodeGen/VirtRegMap.h
index a5599f68b64e6..8b6082d181932 100644
--- a/lib/CodeGen/VirtRegMap.h
+++ b/lib/CodeGen/VirtRegMap.h
@@ -139,7 +139,7 @@ namespace llvm {
public:
static char ID;
- VirtRegMap() : MachineFunctionPass(&ID), Virt2PhysMap(NO_PHYS_REG),
+ VirtRegMap() : MachineFunctionPass(ID), Virt2PhysMap(NO_PHYS_REG),
Virt2StackSlotMap(NO_STACK_SLOT),
Virt2ReMatIdMap(NO_STACK_SLOT), Virt2SplitMap(0),
Virt2SplitKillMap(SlotIndex()), ReMatMap(NULL),
@@ -152,6 +152,11 @@ namespace llvm {
MachineFunctionPass::getAnalysisUsage(AU);
}
+ MachineFunction &getMachineFunction() const {
+ assert(MF && "getMachineFunction called before runOnMAchineFunction");
+ return *MF;
+ }
+
void grow();
/// @brief returns true if the specified virtual register is
diff --git a/lib/CodeGen/VirtRegRewriter.cpp b/lib/CodeGen/VirtRegRewriter.cpp
index 57a1500e6e9da..240d28cf30114 100644
--- a/lib/CodeGen/VirtRegRewriter.cpp
+++ b/lib/CodeGen/VirtRegRewriter.cpp
@@ -67,23 +67,16 @@ VirtRegRewriter::~VirtRegRewriter() {}
/// Note that operands may be added, so the MO reference is no longer valid.
static void substitutePhysReg(MachineOperand &MO, unsigned Reg,
const TargetRegisterInfo &TRI) {
- if (unsigned SubIdx = MO.getSubReg()) {
- // Insert the physical subreg and reset the subreg field.
- MO.setReg(TRI.getSubReg(Reg, SubIdx));
- MO.setSubReg(0);
-
- // Any def, dead, and kill flags apply to the full virtual register, so they
- // also apply to the full physical register. Add imp-def/dead and imp-kill
- // as needed.
+ if (MO.getSubReg()) {
+ MO.substPhysReg(Reg, TRI);
+
+ // Any kill flags apply to the full virtual register, so they also apply to
+ // the full physical register.
+ // We assume that partial defs have already been decorated with a super-reg
+ // <imp-def> operand by LiveIntervals.
MachineInstr &MI = *MO.getParent();
- if (MO.isDef())
- if (MO.isDead())
- MI.addRegisterDead(Reg, &TRI, /*AddIfNotFound=*/ true);
- else
- MI.addRegisterDefined(Reg, &TRI);
- else if (!MO.isUndef() &&
- (MO.isKill() ||
- MI.isRegTiedToDefOperand(&MO-&MI.getOperand(0))))
+ if (MO.isUse() && !MO.isUndef() &&
+ (MO.isKill() || MI.isRegTiedToDefOperand(&MO-&MI.getOperand(0))))
MI.addRegisterKilled(Reg, &TRI, /*AddIfNotFound=*/ true);
} else {
MO.setReg(Reg);
@@ -460,7 +453,7 @@ public:
/// blocks each of which is a successor of the specified BB and has no other
/// predecessor.
static void findSinglePredSuccessor(MachineBasicBlock *MBB,
- SmallVectorImpl<MachineBasicBlock *> &Succs) {
+ SmallVectorImpl<MachineBasicBlock *> &Succs){
for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(),
SE = MBB->succ_end(); SI != SE; ++SI) {
MachineBasicBlock *SuccMBB = *SI;
@@ -852,8 +845,8 @@ unsigned ReuseInfo::GetRegForReload(const TargetRegisterClass *RC,
// Yup, use the reload register that we didn't use before.
unsigned NewReg = Op.AssignedPhysReg;
Rejected.insert(PhysReg);
- return GetRegForReload(RC, NewReg, MF, MI, Spills, MaybeDeadStores, Rejected,
- RegKills, KillOps, VRM);
+ return GetRegForReload(RC, NewReg, MF, MI, Spills, MaybeDeadStores,
+ Rejected, RegKills, KillOps, VRM);
} else {
// Otherwise, we might also have a problem if a previously reused
// value aliases the new register. If so, codegen the previous reload
@@ -1864,7 +1857,7 @@ bool LocalRewriter::InsertSpills(MachineInstr *MI) {
/// rewriteMBB - Keep track of which spills are available even after the
-/// register allocator is done with them. If possible, avid reloading vregs.
+/// register allocator is done with them. If possible, avoid reloading vregs.
void
LocalRewriter::RewriteMBB(LiveIntervals *LIs,
AvailableSpills &Spills, BitVector &RegKills,
@@ -1914,7 +1907,6 @@ LocalRewriter::RewriteMBB(LiveIntervals *LIs,
if (InsertSpills(MII))
NextMII = llvm::next(MII);
- VirtRegMap::MI2VirtMapTy::const_iterator I, End;
bool Erased = false;
bool BackTracked = false;
MachineInstr &MI = *MII;
@@ -2028,14 +2020,16 @@ LocalRewriter::RewriteMBB(LiveIntervals *LIs,
CanReuse = !ReusedOperands.isClobbered(PhysReg) &&
Spills.canClobberPhysReg(PhysReg);
}
- // If this is an asm, and PhysReg is used elsewhere as an earlyclobber
- // operand, we can't also use it as an input. (Outputs always come
- // before inputs, so we can stop looking at i.)
+ // If this is an asm, and a PhysReg alias is used elsewhere as an
+ // earlyclobber operand, we can't also use it as an input.
if (MI.isInlineAsm()) {
- for (unsigned k=0; k<i; ++k) {
+ for (unsigned k = 0, e = MI.getNumOperands(); k != e; ++k) {
MachineOperand &MOk = MI.getOperand(k);
- if (MOk.isReg() && MOk.getReg()==PhysReg && MOk.isEarlyClobber()) {
+ if (MOk.isReg() && MOk.isEarlyClobber() &&
+ TRI->regsOverlap(MOk.getReg(), PhysReg)) {
CanReuse = false;
+ DEBUG(dbgs() << "Not reusing physreg " << TRI->getName(PhysReg)
+ << " for vreg" << VirtReg << ": " << MOk << '\n');
break;
}
}
@@ -2248,15 +2242,22 @@ LocalRewriter::RewriteMBB(LiveIntervals *LIs,
// If we have folded references to memory operands, make sure we clear all
// physical registers that may contain the value of the spilled virtual
// register
+
+ // Copy the folded virts to a small vector, we may change MI2VirtMap.
+ SmallVector<std::pair<unsigned, VirtRegMap::ModRef>, 4> FoldedVirts;
+ // C++0x FTW!
+ for (std::pair<VirtRegMap::MI2VirtMapTy::const_iterator,
+ VirtRegMap::MI2VirtMapTy::const_iterator> FVRange =
+ VRM->getFoldedVirts(&MI);
+ FVRange.first != FVRange.second; ++FVRange.first)
+ FoldedVirts.push_back(FVRange.first->second);
+
SmallSet<int, 2> FoldedSS;
- for (tie(I, End) = VRM->getFoldedVirts(&MI); I != End; ) {
- unsigned VirtReg = I->second.first;
- VirtRegMap::ModRef MR = I->second.second;
+ for (unsigned FVI = 0, FVE = FoldedVirts.size(); FVI != FVE; ++FVI) {
+ unsigned VirtReg = FoldedVirts[FVI].first;
+ VirtRegMap::ModRef MR = FoldedVirts[FVI].second;
DEBUG(dbgs() << "Folded vreg: " << VirtReg << " MR: " << MR);
- // MI2VirtMap be can updated which invalidate the iterator.
- // Increment the iterator first.
- ++I;
int SS = VRM->getStackSlot(VirtReg);
if (SS == VirtRegMap::NO_STACK_SLOT)
continue;
@@ -2302,7 +2303,7 @@ LocalRewriter::RewriteMBB(LiveIntervals *LIs,
unsigned PhysReg = Spills.getSpillSlotOrReMatPhysReg(SS);
SmallVector<MachineInstr*, 4> NewMIs;
if (PhysReg &&
- TII->unfoldMemoryOperand(MF, &MI, PhysReg, false, false, NewMIs)) {
+ TII->unfoldMemoryOperand(MF, &MI, PhysReg, false, false, NewMIs)){
MBB->insert(MII, NewMIs[0]);
InvalidateKills(MI, TRI, RegKills, KillOps);
VRM->RemoveMachineInstrFromMaps(&MI);
@@ -2442,28 +2443,6 @@ LocalRewriter::RewriteMBB(LiveIntervals *LIs,
Spills.disallowClobberPhysReg(VirtReg);
goto ProcessNextInst;
}
- unsigned Src, Dst, SrcSR, DstSR;
- if (TII->isMoveInstr(MI, Src, Dst, SrcSR, DstSR) &&
- Src == Dst && SrcSR == DstSR &&
- !MI.findRegisterUseOperand(Src)->isUndef()) {
- ++NumDCE;
- DEBUG(dbgs() << "Removing now-noop copy: " << MI);
- SmallVector<unsigned, 2> KillRegs;
- InvalidateKills(MI, TRI, RegKills, KillOps, &KillRegs);
- if (MO.isDead() && !KillRegs.empty()) {
- // Source register or an implicit super/sub-register use is killed.
- assert(KillRegs[0] == Dst ||
- TRI->isSubRegister(KillRegs[0], Dst) ||
- TRI->isSuperRegister(KillRegs[0], Dst));
- // Last def is now dead.
- TransferDeadness(Src, RegKills, KillOps);
- }
- VRM->RemoveMachineInstrFromMaps(&MI);
- MBB->erase(&MI);
- Erased = true;
- Spills.disallowClobberPhysReg(VirtReg);
- goto ProcessNextInst;
- }
// If it's not a no-op copy, it clobbers the value in the destreg.
Spills.ClobberPhysReg(VirtReg);
@@ -2541,20 +2520,6 @@ LocalRewriter::RewriteMBB(LiveIntervals *LIs,
UpdateKills(*LastStore, TRI, RegKills, KillOps);
goto ProcessNextInst;
}
- {
- unsigned Src, Dst, SrcSR, DstSR;
- if (TII->isMoveInstr(MI, Src, Dst, SrcSR, DstSR) &&
- Src == Dst && SrcSR == DstSR) {
- ++NumDCE;
- DEBUG(dbgs() << "Removing now-noop copy: " << MI);
- InvalidateKills(MI, TRI, RegKills, KillOps);
- VRM->RemoveMachineInstrFromMaps(&MI);
- MBB->erase(&MI);
- Erased = true;
- UpdateKills(*LastStore, TRI, RegKills, KillOps);
- goto ProcessNextInst;
- }
- }
}
}
ProcessNextInst:
diff --git a/lib/CompilerDriver/Action.cpp b/lib/CompilerDriver/Action.cpp
index 5f30dce5855fd..0be80496a3cb7 100644
--- a/lib/CompilerDriver/Action.cpp
+++ b/lib/CompilerDriver/Action.cpp
@@ -13,6 +13,7 @@
#include "llvm/CompilerDriver/Action.h"
#include "llvm/CompilerDriver/BuiltinOptions.h"
+#include "llvm/CompilerDriver/Error.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Support/SystemUtils.h"
@@ -58,11 +59,15 @@ namespace {
if (prog.isEmpty()) {
prog = FindExecutable(name, ProgramName, (void *)(intptr_t)&Main);
- if (prog.isEmpty())
- throw std::runtime_error("Can't find program '" + name + "'");
+ if (prog.isEmpty()) {
+ PrintError("Can't find program '" + name + "'");
+ return -1;
+ }
+ }
+ if (!prog.canExecute()) {
+ PrintError("Program '" + name + "' is not executable.");
+ return -1;
}
- if (!prog.canExecute())
- throw std::runtime_error("Program '" + name + "' is not executable.");
// Build the command line vector and the redirects array.
const sys::Path* redirects[3] = {0,0,0};
diff --git a/lib/CompilerDriver/BuiltinOptions.cpp b/lib/CompilerDriver/BuiltinOptions.cpp
index d1ac8c98322c1..38442038d7385 100644
--- a/lib/CompilerDriver/BuiltinOptions.cpp
+++ b/lib/CompilerDriver/BuiltinOptions.cpp
@@ -19,7 +19,7 @@
namespace cl = llvm::cl;
-// External linkage here is intentional.
+namespace llvmc {
cl::list<std::string> InputFilenames(cl::Positional, cl::desc("<input file>"),
cl::ZeroOrMore);
@@ -57,3 +57,5 @@ cl::opt<SaveTempsEnum::Values> SaveTemps
clEnumValN(SaveTempsEnum::Obj, "", "Same as 'cwd'"),
clEnumValEnd),
cl::ValueOptional);
+
+} // End namespace llvmc.
diff --git a/lib/CompilerDriver/CompilationGraph.cpp b/lib/CompilerDriver/CompilationGraph.cpp
index 7d1c7fe4a62b9..d0c0e15bcdb7b 100644
--- a/lib/CompilerDriver/CompilationGraph.cpp
+++ b/lib/CompilerDriver/CompilationGraph.cpp
@@ -25,39 +25,46 @@
#include <iterator>
#include <limits>
#include <queue>
-#include <stdexcept>
using namespace llvm;
using namespace llvmc;
namespace llvmc {
- const std::string& LanguageMap::GetLanguage(const sys::Path& File) const {
+ const std::string* LanguageMap::GetLanguage(const sys::Path& File) const {
StringRef suf = File.getSuffix();
LanguageMap::const_iterator Lang =
this->find(suf.empty() ? "*empty*" : suf);
- if (Lang == this->end())
- throw std::runtime_error("File '" + File.str() +
- "' has unknown suffix '" + suf.str() + '\'');
- return Lang->second;
+ if (Lang == this->end()) {
+ PrintError("File '" + File.str() + "' has unknown suffix '"
+ + suf.str() + '\'');
+ return 0;
+ }
+ return &Lang->second;
}
}
namespace {
- /// ChooseEdge - Return the edge with the maximum weight.
+ /// ChooseEdge - Return the edge with the maximum weight. Returns 0 on error.
template <class C>
const Edge* ChooseEdge(const C& EdgesContainer,
const InputLanguagesSet& InLangs,
const std::string& NodeName = "root") {
const Edge* MaxEdge = 0;
- unsigned MaxWeight = 0;
+ int MaxWeight = 0;
bool SingleMax = true;
+ // TODO: fix calculation of SingleMax.
for (typename C::const_iterator B = EdgesContainer.begin(),
E = EdgesContainer.end(); B != E; ++B) {
const Edge* e = B->getPtr();
- unsigned EW = e->Weight(InLangs);
+ int EW = e->Weight(InLangs);
+ if (EW < 0) {
+ // (error) invocation in TableGen -> we don't need to print an error
+ // message.
+ return 0;
+ }
if (EW > MaxWeight) {
MaxEdge = e;
MaxWeight = EW;
@@ -67,14 +74,16 @@ namespace {
}
}
- if (!SingleMax)
- throw std::runtime_error("Node " + NodeName +
- ": multiple maximal outward edges found!"
- " Most probably a specification error.");
- if (!MaxEdge)
- throw std::runtime_error("Node " + NodeName +
- ": no maximal outward edge found!"
- " Most probably a specification error.");
+ if (!SingleMax) {
+ PrintError("Node " + NodeName + ": multiple maximal outward edges found!"
+ " Most probably a specification error.");
+ return 0;
+ }
+ if (!MaxEdge) {
+ PrintError("Node " + NodeName + ": no maximal outward edge found!"
+ " Most probably a specification error.");
+ return 0;
+ }
return MaxEdge;
}
@@ -98,29 +107,34 @@ CompilationGraph::CompilationGraph() {
NodesMap["root"] = Node(this);
}
-Node& CompilationGraph::getNode(const std::string& ToolName) {
+Node* CompilationGraph::getNode(const std::string& ToolName) {
nodes_map_type::iterator I = NodesMap.find(ToolName);
- if (I == NodesMap.end())
- throw std::runtime_error("Node " + ToolName + " is not in the graph");
- return I->second;
+ if (I == NodesMap.end()) {
+ PrintError("Node " + ToolName + " is not in the graph");
+ return 0;
+ }
+ return &I->second;
}
-const Node& CompilationGraph::getNode(const std::string& ToolName) const {
+const Node* CompilationGraph::getNode(const std::string& ToolName) const {
nodes_map_type::const_iterator I = NodesMap.find(ToolName);
- if (I == NodesMap.end())
- throw std::runtime_error("Node " + ToolName + " is not in the graph!");
- return I->second;
+ if (I == NodesMap.end()) {
+ PrintError("Node " + ToolName + " is not in the graph!");
+ return 0;
+ }
+ return &I->second;
}
// Find the tools list corresponding to the given language name.
-const CompilationGraph::tools_vector_type&
+const CompilationGraph::tools_vector_type*
CompilationGraph::getToolsVector(const std::string& LangName) const
{
tools_map_type::const_iterator I = ToolsMap.find(LangName);
- if (I == ToolsMap.end())
- throw std::runtime_error("No tool corresponding to the language "
- + LangName + " found");
- return I->second;
+ if (I == ToolsMap.end()) {
+ PrintError("No tool corresponding to the language " + LangName + " found");
+ return 0;
+ }
+ return &I->second;
}
void CompilationGraph::insertNode(Tool* V) {
@@ -128,29 +142,37 @@ void CompilationGraph::insertNode(Tool* V) {
NodesMap[V->Name()] = Node(this, V);
}
-void CompilationGraph::insertEdge(const std::string& A, Edge* Edg) {
- Node& B = getNode(Edg->ToolName());
+int CompilationGraph::insertEdge(const std::string& A, Edge* Edg) {
+ Node* B = getNode(Edg->ToolName());
+ if (B == 0)
+ return 1;
+
if (A == "root") {
- const char** InLangs = B.ToolPtr->InputLanguages();
+ const char** InLangs = B->ToolPtr->InputLanguages();
for (;*InLangs; ++InLangs)
ToolsMap[*InLangs].push_back(IntrusiveRefCntPtr<Edge>(Edg));
NodesMap["root"].AddEdge(Edg);
}
else {
- Node& N = getNode(A);
- N.AddEdge(Edg);
+ Node* N = getNode(A);
+ if (N == 0)
+ return 1;
+
+ N->AddEdge(Edg);
}
// Increase the inward edge counter.
- B.IncrInEdges();
+ B->IncrInEdges();
+
+ return 0;
}
// Pass input file through the chain until we bump into a Join node or
// a node that says that it is the last.
-void CompilationGraph::PassThroughGraph (const sys::Path& InFile,
- const Node* StartNode,
- const InputLanguagesSet& InLangs,
- const sys::Path& TempDir,
- const LanguageMap& LangMap) const {
+int CompilationGraph::PassThroughGraph (const sys::Path& InFile,
+ const Node* StartNode,
+ const InputLanguagesSet& InLangs,
+ const sys::Path& TempDir,
+ const LanguageMap& LangMap) const {
sys::Path In = InFile;
const Node* CurNode = StartNode;
@@ -158,25 +180,35 @@ void CompilationGraph::PassThroughGraph (const sys::Path& InFile,
Tool* CurTool = CurNode->ToolPtr.getPtr();
if (CurTool->IsJoin()) {
- JoinTool& JT = dynamic_cast<JoinTool&>(*CurTool);
+ JoinTool& JT = static_cast<JoinTool&>(*CurTool);
JT.AddToJoinList(In);
break;
}
- Action CurAction = CurTool->GenerateAction(In, CurNode->HasChildren(),
- TempDir, InLangs, LangMap);
+ Action CurAction;
+ if (int ret = CurTool->GenerateAction(CurAction, In, CurNode->HasChildren(),
+ TempDir, InLangs, LangMap)) {
+ return ret;
+ }
if (int ret = CurAction.Execute())
- throw error_code(ret);
+ return ret;
if (CurAction.StopCompilation())
- return;
+ return 0;
+
+ const Edge* Edg = ChooseEdge(CurNode->OutEdges, InLangs, CurNode->Name());
+ if (Edg == 0)
+ return 1;
+
+ CurNode = getNode(Edg->ToolName());
+ if (CurNode == 0)
+ return 1;
- CurNode = &getNode(ChooseEdge(CurNode->OutEdges,
- InLangs,
- CurNode->Name())->ToolName());
In = CurAction.OutFile();
}
+
+ return 0;
}
// Find the head of the toolchain corresponding to the given file.
@@ -186,26 +218,39 @@ FindToolChain(const sys::Path& In, const std::string* ForceLanguage,
InputLanguagesSet& InLangs, const LanguageMap& LangMap) const {
// Determine the input language.
- const std::string& InLanguage =
- ForceLanguage ? *ForceLanguage : LangMap.GetLanguage(In);
+ const std::string* InLang = LangMap.GetLanguage(In);
+ if (InLang == 0)
+ return 0;
+ const std::string& InLanguage = (ForceLanguage ? *ForceLanguage : *InLang);
// Add the current input language to the input language set.
InLangs.insert(InLanguage);
// Find the toolchain for the input language.
- const tools_vector_type& TV = getToolsVector(InLanguage);
- if (TV.empty())
- throw std::runtime_error("No toolchain corresponding to language "
- + InLanguage + " found");
- return &getNode(ChooseEdge(TV, InLangs)->ToolName());
+ const tools_vector_type* pTV = getToolsVector(InLanguage);
+ if (pTV == 0)
+ return 0;
+
+ const tools_vector_type& TV = *pTV;
+ if (TV.empty()) {
+ PrintError("No toolchain corresponding to language "
+ + InLanguage + " found");
+ return 0;
+ }
+
+ const Edge* Edg = ChooseEdge(TV, InLangs);
+ if (Edg == 0)
+ return 0;
+
+ return getNode(Edg->ToolName());
}
// Helper function used by Build().
// Traverses initial portions of the toolchains (up to the first Join node).
// This function is also responsible for handling the -x option.
-void CompilationGraph::BuildInitial (InputLanguagesSet& InLangs,
- const sys::Path& TempDir,
- const LanguageMap& LangMap) {
+int CompilationGraph::BuildInitial (InputLanguagesSet& InLangs,
+ const sys::Path& TempDir,
+ const LanguageMap& LangMap) {
// This is related to -x option handling.
cl::list<std::string>::const_iterator xIter = Languages.begin(),
xBegin = xIter, xEnd = Languages.end();
@@ -255,15 +300,25 @@ void CompilationGraph::BuildInitial (InputLanguagesSet& InLangs,
// Find the toolchain corresponding to this file.
const Node* N = FindToolChain(In, xLanguage, InLangs, LangMap);
+ if (N == 0)
+ return 1;
// Pass file through the chain starting at head.
- PassThroughGraph(In, N, InLangs, TempDir, LangMap);
+ if (int ret = PassThroughGraph(In, N, InLangs, TempDir, LangMap))
+ return ret;
}
+
+ return 0;
}
// Sort the nodes in topological order.
-void CompilationGraph::TopologicalSort(std::vector<const Node*>& Out) {
+int CompilationGraph::TopologicalSort(std::vector<const Node*>& Out) {
std::queue<const Node*> Q;
- Q.push(&getNode("root"));
+
+ Node* Root = getNode("root");
+ if (Root == 0)
+ return 1;
+
+ Q.push(Root);
while (!Q.empty()) {
const Node* A = Q.front();
@@ -271,12 +326,17 @@ void CompilationGraph::TopologicalSort(std::vector<const Node*>& Out) {
Out.push_back(A);
for (Node::const_iterator EB = A->EdgesBegin(), EE = A->EdgesEnd();
EB != EE; ++EB) {
- Node* B = &getNode((*EB)->ToolName());
+ Node* B = getNode((*EB)->ToolName());
+ if (B == 0)
+ return 1;
+
B->DecrInEdges();
if (B->HasNoInEdges())
Q.push(B);
}
}
+
+ return 0;
}
namespace {
@@ -287,49 +347,71 @@ namespace {
// Call TopologicalSort and filter the resulting list to include
// only Join nodes.
-void CompilationGraph::
+int CompilationGraph::
TopologicalSortFilterJoinNodes(std::vector<const Node*>& Out) {
std::vector<const Node*> TopSorted;
- TopologicalSort(TopSorted);
+ if (int ret = TopologicalSort(TopSorted))
+ return ret;
std::remove_copy_if(TopSorted.begin(), TopSorted.end(),
std::back_inserter(Out), NotJoinNode);
+
+ return 0;
}
int CompilationGraph::Build (const sys::Path& TempDir,
const LanguageMap& LangMap) {
-
InputLanguagesSet InLangs;
+ bool WasSomeActionGenerated = !InputFilenames.empty();
// Traverse initial parts of the toolchains and fill in InLangs.
- BuildInitial(InLangs, TempDir, LangMap);
+ if (int ret = BuildInitial(InLangs, TempDir, LangMap))
+ return ret;
std::vector<const Node*> JTV;
- TopologicalSortFilterJoinNodes(JTV);
+ if (int ret = TopologicalSortFilterJoinNodes(JTV))
+ return ret;
// For all join nodes in topological order:
for (std::vector<const Node*>::iterator B = JTV.begin(), E = JTV.end();
B != E; ++B) {
const Node* CurNode = *B;
- JoinTool* JT = &dynamic_cast<JoinTool&>(*CurNode->ToolPtr.getPtr());
+ JoinTool* JT = &static_cast<JoinTool&>(*CurNode->ToolPtr.getPtr());
// Are there any files in the join list?
if (JT->JoinListEmpty() && !(JT->WorksOnEmpty() && InputFilenames.empty()))
continue;
- Action CurAction = JT->GenerateAction(CurNode->HasChildren(),
- TempDir, InLangs, LangMap);
+ WasSomeActionGenerated = true;
+ Action CurAction;
+ if (int ret = JT->GenerateAction(CurAction, CurNode->HasChildren(),
+ TempDir, InLangs, LangMap)) {
+ return ret;
+ }
if (int ret = CurAction.Execute())
- throw error_code(ret);
+ return ret;
if (CurAction.StopCompilation())
return 0;
- const Node* NextNode = &getNode(ChooseEdge(CurNode->OutEdges, InLangs,
- CurNode->Name())->ToolName());
- PassThroughGraph(sys::Path(CurAction.OutFile()), NextNode,
- InLangs, TempDir, LangMap);
+ const Edge* Edg = ChooseEdge(CurNode->OutEdges, InLangs, CurNode->Name());
+ if (Edg == 0)
+ return 1;
+
+ const Node* NextNode = getNode(Edg->ToolName());
+ if (NextNode == 0)
+ return 1;
+
+ if (int ret = PassThroughGraph(sys::Path(CurAction.OutFile()), NextNode,
+ InLangs, TempDir, LangMap)) {
+ return ret;
+ }
+ }
+
+ if (!WasSomeActionGenerated) {
+ PrintError("no input files");
+ return 1;
}
return 0;
@@ -337,6 +419,7 @@ int CompilationGraph::Build (const sys::Path& TempDir,
int CompilationGraph::CheckLanguageNames() const {
int ret = 0;
+
// Check that names for output and input languages on all edges do match.
for (const_nodes_iterator B = this->NodesMap.begin(),
E = this->NodesMap.end(); B != E; ++B) {
@@ -345,9 +428,11 @@ int CompilationGraph::CheckLanguageNames() const {
if (N1.ToolPtr) {
for (Node::const_iterator EB = N1.EdgesBegin(), EE = N1.EdgesEnd();
EB != EE; ++EB) {
- const Node& N2 = this->getNode((*EB)->ToolName());
+ const Node* N2 = this->getNode((*EB)->ToolName());
+ if (N2 == 0)
+ return 1;
- if (!N2.ToolPtr) {
+ if (!N2->ToolPtr) {
++ret;
errs() << "Error: there is an edge from '" << N1.ToolPtr->Name()
<< "' back to the root!\n\n";
@@ -355,7 +440,7 @@ int CompilationGraph::CheckLanguageNames() const {
}
const char* OutLang = N1.ToolPtr->OutputLanguage();
- const char** InLangs = N2.ToolPtr->InputLanguages();
+ const char** InLangs = N2->ToolPtr->InputLanguages();
bool eq = false;
for (;*InLangs; ++InLangs) {
if (std::strcmp(OutLang, *InLangs) == 0) {
@@ -367,11 +452,11 @@ int CompilationGraph::CheckLanguageNames() const {
if (!eq) {
++ret;
errs() << "Error: Output->input language mismatch in the edge '"
- << N1.ToolPtr->Name() << "' -> '" << N2.ToolPtr->Name()
+ << N1.ToolPtr->Name() << "' -> '" << N2->ToolPtr->Name()
<< "'!\n"
<< "Expected one of { ";
- InLangs = N2.ToolPtr->InputLanguages();
+ InLangs = N2->ToolPtr->InputLanguages();
for (;*InLangs; ++InLangs) {
errs() << '\'' << *InLangs << (*(InLangs+1) ? "', " : "'");
}
@@ -395,7 +480,7 @@ int CompilationGraph::CheckMultipleDefaultEdges() const {
for (const_nodes_iterator B = this->NodesMap.begin(),
E = this->NodesMap.end(); B != E; ++B) {
const Node& N = B->second;
- unsigned MaxWeight = 0;
+ int MaxWeight = 0;
// Ignore the root node.
if (!N.ToolPtr)
@@ -403,7 +488,7 @@ int CompilationGraph::CheckMultipleDefaultEdges() const {
for (Node::const_iterator EB = N.EdgesBegin(), EE = N.EdgesEnd();
EB != EE; ++EB) {
- unsigned EdgeWeight = (*EB)->Weight(Dummy);
+ int EdgeWeight = (*EB)->Weight(Dummy);
if (EdgeWeight > MaxWeight) {
MaxWeight = EdgeWeight;
}
@@ -422,7 +507,12 @@ int CompilationGraph::CheckMultipleDefaultEdges() const {
int CompilationGraph::CheckCycles() {
unsigned deleted = 0;
std::queue<Node*> Q;
- Q.push(&getNode("root"));
+
+ Node* Root = getNode("root");
+ if (Root == 0)
+ return 1;
+
+ Q.push(Root);
// Try to delete all nodes that have no ingoing edges, starting from the
// root. If there are any nodes left after this operation, then we have a
@@ -434,7 +524,10 @@ int CompilationGraph::CheckCycles() {
for (Node::iterator EB = A->EdgesBegin(), EE = A->EdgesEnd();
EB != EE; ++EB) {
- Node* B = &getNode((*EB)->ToolName());
+ Node* B = getNode((*EB)->ToolName());
+ if (B == 0)
+ return 1;
+
B->DecrInEdges();
if (B->HasNoInEdges())
Q.push(B);
@@ -453,18 +546,28 @@ int CompilationGraph::CheckCycles() {
int CompilationGraph::Check () {
// We try to catch as many errors as we can in one go.
+ int errs = 0;
int ret = 0;
// Check that output/input language names match.
- ret += this->CheckLanguageNames();
+ ret = this->CheckLanguageNames();
+ if (ret < 0)
+ return 1;
+ errs += ret;
// Check for multiple default edges.
- ret += this->CheckMultipleDefaultEdges();
+ ret = this->CheckMultipleDefaultEdges();
+ if (ret < 0)
+ return 1;
+ errs += ret;
// Check for cycles.
- ret += this->CheckCycles();
+ ret = this->CheckCycles();
+ if (ret < 0)
+ return 1;
+ errs += ret;
- return ret;
+ return errs;
}
// Code related to graph visualization.
@@ -516,7 +619,7 @@ namespace llvm {
}
-void CompilationGraph::writeGraph(const std::string& OutputFilename) {
+int CompilationGraph::writeGraph(const std::string& OutputFilename) {
std::string ErrorInfo;
raw_fd_ostream O(OutputFilename.c_str(), ErrorInfo);
@@ -526,9 +629,11 @@ void CompilationGraph::writeGraph(const std::string& OutputFilename) {
errs() << "done.\n";
}
else {
- throw std::runtime_error("Error opening file '" + OutputFilename
- + "' for writing!");
+ PrintError("Error opening file '" + OutputFilename + "' for writing!");
+ return 1;
}
+
+ return 0;
}
void CompilationGraph::viewGraph() {
diff --git a/lib/CompilerDriver/Main.cpp b/lib/CompilerDriver/Main.cpp
index b5e507dfc3a37..0a6613aa77a3b 100644
--- a/lib/CompilerDriver/Main.cpp
+++ b/lib/CompilerDriver/Main.cpp
@@ -11,16 +11,15 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/CompilerDriver/AutoGenerated.h"
#include "llvm/CompilerDriver/BuiltinOptions.h"
#include "llvm/CompilerDriver/CompilationGraph.h"
#include "llvm/CompilerDriver/Error.h"
-#include "llvm/CompilerDriver/Plugin.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/System/Path.h"
#include <sstream>
-#include <stdexcept>
#include <string>
namespace cl = llvm::cl;
@@ -31,9 +30,9 @@ namespace {
std::stringstream* GlobalTimeLog;
- sys::Path getTempDir() {
- sys::Path tempDir;
-
+ /// GetTempDir - Get the temporary directory location. Returns non-zero value
+ /// on error.
+ int GetTempDir(sys::Path& tempDir) {
// The --temp-dir option.
if (!TempDirname.empty()) {
tempDir = TempDirname;
@@ -41,7 +40,7 @@ namespace {
// GCC 4.5-style -save-temps handling.
else if (SaveTemps == SaveTempsEnum::Unset) {
tempDir = sys::Path::GetTemporaryDirectory();
- return tempDir;
+ return 0;
}
else if (SaveTemps == SaveTempsEnum::Obj && !OutputFilename.empty()) {
tempDir = OutputFilename;
@@ -49,35 +48,35 @@ namespace {
}
else {
// SaveTemps == Cwd --> use current dir (leave tempDir empty).
- return tempDir;
+ return 0;
}
if (!tempDir.exists()) {
std::string ErrMsg;
- if (tempDir.createDirectoryOnDisk(true, &ErrMsg))
- throw std::runtime_error(ErrMsg);
+ if (tempDir.createDirectoryOnDisk(true, &ErrMsg)) {
+ PrintError(ErrMsg);
+ return 1;
+ }
}
- return tempDir;
+ return 0;
}
- /// BuildTargets - A small wrapper for CompilationGraph::Build.
+ /// BuildTargets - A small wrapper for CompilationGraph::Build. Returns
+ /// non-zero value in case of error.
int BuildTargets(CompilationGraph& graph, const LanguageMap& langMap) {
int ret;
- const sys::Path& tempDir = getTempDir();
+ sys::Path tempDir;
bool toDelete = (SaveTemps == SaveTempsEnum::Unset);
- try {
- ret = graph.Build(tempDir, langMap);
- }
- catch(...) {
- if (toDelete)
- tempDir.eraseFromDisk(true);
- throw;
- }
+ if (int ret = GetTempDir(tempDir))
+ return ret;
+
+ ret = graph.Build(tempDir, langMap);
if (toDelete)
tempDir.eraseFromDisk(true);
+
return ret;
}
}
@@ -89,68 +88,58 @@ void AppendToGlobalTimeLog(const std::string& cmd, double time) {
*GlobalTimeLog << "# " << cmd << ' ' << time << '\n';
}
-// Sometimes plugins want to condition on the value in argv[0].
+// Sometimes user code wants to access the argv[0] value.
const char* ProgramName;
int Main(int argc, char** argv) {
- try {
- LanguageMap langMap;
- CompilationGraph graph;
-
- ProgramName = argv[0];
+ int ret = 0;
+ LanguageMap langMap;
+ CompilationGraph graph;
- cl::ParseCommandLineOptions
- (argc, argv, "LLVM Compiler Driver (Work In Progress)",
- /* ReadResponseFiles = */ false);
+ ProgramName = argv[0];
- PluginLoader Plugins;
- Plugins.RunInitialization(langMap, graph);
+ cl::ParseCommandLineOptions
+ (argc, argv,
+ /* Overview = */ "LLVM Compiler Driver (Work In Progress)",
+ /* ReadResponseFiles = */ false);
- if (CheckGraph) {
- int ret = graph.Check();
- if (!ret)
- llvm::errs() << "check-graph: no errors found.\n";
+ if (int ret = autogenerated::RunInitialization(langMap, graph))
+ return ret;
- return ret;
- }
+ if (CheckGraph) {
+ ret = graph.Check();
+ if (!ret)
+ llvm::errs() << "check-graph: no errors found.\n";
- if (ViewGraph) {
- graph.viewGraph();
- if (!WriteGraph)
- return 0;
- }
+ return ret;
+ }
- if (WriteGraph) {
- graph.writeGraph(OutputFilename.empty()
- ? std::string("compilation-graph.dot")
- : OutputFilename);
+ if (ViewGraph) {
+ graph.viewGraph();
+ if (!WriteGraph)
return 0;
- }
+ }
- if (Time) {
- GlobalTimeLog = new std::stringstream;
- GlobalTimeLog->precision(2);
- }
+ if (WriteGraph) {
+ const std::string& Out = (OutputFilename.empty()
+ ? std::string("compilation-graph.dot")
+ : OutputFilename);
+ return graph.writeGraph(Out);
+ }
- int ret = BuildTargets(graph, langMap);
+ if (Time) {
+ GlobalTimeLog = new std::stringstream;
+ GlobalTimeLog->precision(2);
+ }
- if (Time) {
- llvm::errs() << GlobalTimeLog->str();
- delete GlobalTimeLog;
- }
+ ret = BuildTargets(graph, langMap);
- return ret;
- }
- catch(llvmc::error_code& ec) {
- return ec.code();
+ if (Time) {
+ llvm::errs() << GlobalTimeLog->str();
+ delete GlobalTimeLog;
}
- catch(const std::exception& ex) {
- llvm::errs() << argv[0] << ": " << ex.what() << '\n';
- }
- catch(...) {
- llvm::errs() << argv[0] << ": unknown error!\n";
- }
- return 1;
+
+ return ret;
}
} // end namespace llvmc
diff --git a/lib/CompilerDriver/Makefile b/lib/CompilerDriver/Makefile
index 66c6d11552fc7..8e8b73ca8f83f 100644
--- a/lib/CompilerDriver/Makefile
+++ b/lib/CompilerDriver/Makefile
@@ -10,39 +10,11 @@
LEVEL = ../..
# We don't want this library to appear in `llvm-config --libs` output, so its
-# name doesn't start with "LLVM".
+# name doesn't start with "LLVM" and NO_LLVM_CONFIG is set.
-ifeq ($(ENABLE_LLVMC_DYNAMIC),1)
- LIBRARYNAME = libCompilerDriver
- LLVMLIBS = LLVMSupport.a LLVMSystem.a
- LOADABLE_MODULE := 1
-else
- LIBRARYNAME = CompilerDriver
- LINK_COMPONENTS = support system
-endif
+LIBRARYNAME = CompilerDriver
+LINK_COMPONENTS = support system
+NO_LLVM_CONFIG = 1
-REQUIRES_EH := 1
-REQUIRES_RTTI := 1
include $(LEVEL)/Makefile.common
-
-ifeq ($(ENABLE_LLVMC_DYNAMIC_PLUGINS), 1)
- CPP.Flags += -DENABLE_LLVMC_DYNAMIC_PLUGINS
-endif
-
-# Copy libCompilerDriver to the bin dir so that llvmc can find it.
-ifeq ($(ENABLE_LLVMC_DYNAMIC),1)
-
-FullLibName = $(LIBRARYNAME)$(SHLIBEXT)
-
-all-local:: $(ToolDir)/$(FullLibName)
-
-$(ToolDir)/$(FullLibName): $(LibDir)/$(FullLibName) $(ToolDir)/.dir
- $(Echo) Copying $(BuildMode) Shared Library $(FullLibName) to $@
- -$(Verb) $(CP) $< $@
-
-clean-local::
- $(Echo) Removing $(BuildMode) Shared Library $(FullLibName) \
- from $(ToolDir)
- -$(Verb) $(RM) -f $(ToolDir)/$(FullLibName)
-endif
diff --git a/lib/CompilerDriver/Plugin.cpp b/lib/CompilerDriver/Plugin.cpp
deleted file mode 100644
index 0fdfef4c6a29b..0000000000000
--- a/lib/CompilerDriver/Plugin.cpp
+++ /dev/null
@@ -1,78 +0,0 @@
-//===--- Plugin.cpp - The LLVM Compiler Driver ------------------*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open
-// Source License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// Plugin support.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/CompilerDriver/Plugin.h"
-#include "llvm/Support/ManagedStatic.h"
-#include "llvm/System/Mutex.h"
-#include <algorithm>
-#include <vector>
-
-namespace {
-
- // Registry::Add<> does not do lifetime management (probably issues
- // with static constructor/destructor ordering), so we have to
- // implement it here.
- //
- // All this static registration/life-before-main model seems
- // unnecessary convoluted to me.
-
- static bool pluginListInitialized = false;
- typedef std::vector<const llvmc::BasePlugin*> PluginList;
- static PluginList Plugins;
- static llvm::ManagedStatic<llvm::sys::SmartMutex<true> > PluginMutex;
-
- struct ByPriority {
- bool operator()(const llvmc::BasePlugin* lhs,
- const llvmc::BasePlugin* rhs) {
- return lhs->Priority() < rhs->Priority();
- }
- };
-}
-
-namespace llvmc {
-
- PluginLoader::PluginLoader() {
- llvm::sys::SmartScopedLock<true> Lock(*PluginMutex);
- if (!pluginListInitialized) {
- for (PluginRegistry::iterator B = PluginRegistry::begin(),
- E = PluginRegistry::end(); B != E; ++B)
- Plugins.push_back(B->instantiate());
- std::sort(Plugins.begin(), Plugins.end(), ByPriority());
- }
- pluginListInitialized = true;
- }
-
- PluginLoader::~PluginLoader() {
- llvm::sys::SmartScopedLock<true> Lock(*PluginMutex);
- if (pluginListInitialized) {
- for (PluginList::iterator B = Plugins.begin(), E = Plugins.end();
- B != E; ++B)
- delete (*B);
- }
- pluginListInitialized = false;
- }
-
- void PluginLoader::RunInitialization(LanguageMap& langMap,
- CompilationGraph& graph) const
- {
- llvm::sys::SmartScopedLock<true> Lock(*PluginMutex);
- for (PluginList::iterator B = Plugins.begin(), E = Plugins.end();
- B != E; ++B) {
- const BasePlugin* BP = *B;
- BP->PreprocessOptions();
- BP->PopulateLanguageMap(langMap);
- BP->PopulateCompilationGraph(graph);
- }
- }
-
-}
diff --git a/lib/ExecutionEngine/ExecutionEngineBindings.cpp b/lib/ExecutionEngine/ExecutionEngineBindings.cpp
index c7495d442d9cf..f8f1f4a78ee53 100644
--- a/lib/ExecutionEngine/ExecutionEngineBindings.cpp
+++ b/lib/ExecutionEngine/ExecutionEngineBindings.cpp
@@ -236,6 +236,10 @@ LLVMBool LLVMFindFunction(LLVMExecutionEngineRef EE, const char *Name,
return 1;
}
+void *LLVMRecompileAndRelinkFunction(LLVMExecutionEngineRef EE, LLVMValueRef Fn) {
+ return unwrap(EE)->recompileAndRelinkFunction(unwrap<Function>(Fn));
+}
+
LLVMTargetDataRef LLVMGetExecutionEngineTargetData(LLVMExecutionEngineRef EE) {
return wrap(unwrap(EE)->getTargetData());
}
diff --git a/lib/ExecutionEngine/JIT/Intercept.cpp b/lib/ExecutionEngine/JIT/Intercept.cpp
index b367033d32b56..274f816f39e11 100644
--- a/lib/ExecutionEngine/JIT/Intercept.cpp
+++ b/lib/ExecutionEngine/JIT/Intercept.cpp
@@ -89,6 +89,10 @@ static int jit_atexit(void (*Fn)()) {
return 0; // Always successful
}
+static int jit_noop() {
+ return 0;
+}
+
//===----------------------------------------------------------------------===//
//
/// getPointerToNamedFunction - This method returns the address of the specified
@@ -104,6 +108,14 @@ void *JIT::getPointerToNamedFunction(const std::string &Name,
if (Name == "exit") return (void*)(intptr_t)&jit_exit;
if (Name == "atexit") return (void*)(intptr_t)&jit_atexit;
+ // We should not invoke parent's ctors/dtors from generated main()!
+ // On Mingw and Cygwin, the symbol __main is resolved to
+ // callee's(eg. tools/lli) one, to invoke wrong duplicated ctors
+ // (and register wrong callee's dtors with atexit(3)).
+ // We expect ExecutionEngine::runStaticConstructorsDestructors()
+ // is called before ExecutionEngine::runFunctionAsMain() is called.
+ if (Name == "__main") return (void*)(intptr_t)&jit_noop;
+
const char *NameStr = Name.c_str();
// If this is an asm specifier, skip the sentinal.
if (NameStr[0] == 1) ++NameStr;
diff --git a/lib/ExecutionEngine/JIT/JIT.cpp b/lib/ExecutionEngine/JIT/JIT.cpp
index 67bd3ed10ad97..63125b79c8e26 100644
--- a/lib/ExecutionEngine/JIT/JIT.cpp
+++ b/lib/ExecutionEngine/JIT/JIT.cpp
@@ -67,7 +67,7 @@ extern "C" void LLVMLinkInJIT() {
}
-#if defined(__GNUC__) && !defined(__ARM__EABI__)
+#if defined(__GNUC__) && !defined(__ARM_EABI__) && !defined(__USING_SJLJ_EXCEPTIONS__)
// libgcc defines the __register_frame function to dynamically register new
// dwarf frames for exception handling. This functionality is not portable
@@ -219,10 +219,8 @@ ExecutionEngine *JIT::createJIT(Module *M,
StringRef MArch,
StringRef MCPU,
const SmallVectorImpl<std::string>& MAttrs) {
- // Make sure we can resolve symbols in the program as well. The zero arg
- // to the function tells DynamicLibrary to load the program, not a library.
- if (sys::DynamicLibrary::LoadLibraryPermanently(0, ErrorStr))
- return 0;
+ // Try to register the program as a source of symbols to resolve against.
+ sys::DynamicLibrary::LoadLibraryPermanently(0, NULL);
// Pick a target either via -march or by guessing the native arch.
TargetMachine *TM = JIT::selectTarget(M, MArch, MCPU, MAttrs, ErrorStr);
@@ -308,7 +306,7 @@ JIT::JIT(Module *M, TargetMachine &tm, TargetJITInfo &tji,
}
// Register routine for informing unwinding runtime about new EH frames
-#if defined(__GNUC__) && !defined(__ARM_EABI__)
+#if defined(__GNUC__) && !defined(__ARM_EABI__) && !defined(__USING_SJLJ_EXCEPTIONS__)
#if USE_KEYMGR
struct LibgccObjectInfo* LOI = (struct LibgccObjectInfo*)
_keymgr_get_and_lock_processwide_ptr(KEYMGR_GCC3_DW2_OBJ_LIST);
diff --git a/lib/ExecutionEngine/JIT/JITDebugRegisterer.cpp b/lib/ExecutionEngine/JIT/JITDebugRegisterer.cpp
index 749a57d92c948..6e11a3cd9368d 100644
--- a/lib/ExecutionEngine/JIT/JITDebugRegisterer.cpp
+++ b/lib/ExecutionEngine/JIT/JITDebugRegisterer.cpp
@@ -90,8 +90,8 @@ std::string JITDebugRegisterer::MakeELF(const Function *F, DebugInfo &I) {
// section. This allows GDB to get a good stack trace, particularly on
// linux x86_64. Mark this as a PROGBITS section that needs to be loaded
// into memory at runtime.
- ELFSection &EH = EW.getSection(".eh_frame", ELFSection::SHT_PROGBITS,
- ELFSection::SHF_ALLOC);
+ ELFSection &EH = EW.getSection(".eh_frame", ELF::SHT_PROGBITS,
+ ELF::SHF_ALLOC);
// Pointers in the DWARF EH info are all relative to the EH frame start,
// which is stored here.
EH.Addr = (uint64_t)I.EhStart;
@@ -102,9 +102,9 @@ std::string JITDebugRegisterer::MakeELF(const Function *F, DebugInfo &I) {
// Add this single function to the symbol table, so the debugger prints the
// name instead of '???'. We give the symbol default global visibility.
ELFSym *FnSym = ELFSym::getGV(F,
- ELFSym::STB_GLOBAL,
- ELFSym::STT_FUNC,
- ELFSym::STV_DEFAULT);
+ ELF::STB_GLOBAL,
+ ELF::STT_FUNC,
+ ELF::STV_DEFAULT);
FnSym->SectionIdx = Text.SectionIdx;
FnSym->Size = I.FnEnd - I.FnStart;
FnSym->Value = 0; // Offset from start of section.
diff --git a/lib/ExecutionEngine/JIT/JITDwarfEmitter.cpp b/lib/ExecutionEngine/JIT/JITDwarfEmitter.cpp
index 4b3ca8759b8ac..1105bcc0437f2 100644
--- a/lib/ExecutionEngine/JIT/JITDwarfEmitter.cpp
+++ b/lib/ExecutionEngine/JIT/JITDwarfEmitter.cpp
@@ -595,443 +595,3 @@ JITDwarfEmitter::EmitEHFrame(const Function* Personality,
return StartEHPtr;
}
-
-unsigned JITDwarfEmitter::GetDwarfTableSizeInBytes(MachineFunction& F,
- JITCodeEmitter& jce,
- unsigned char* StartFunction,
- unsigned char* EndFunction) {
- const TargetMachine& TM = F.getTarget();
- TD = TM.getTargetData();
- stackGrowthDirection = TM.getFrameInfo()->getStackGrowthDirection();
- RI = TM.getRegisterInfo();
- JCE = &jce;
- unsigned FinalSize = 0;
-
- FinalSize += GetExceptionTableSizeInBytes(&F);
-
- const std::vector<const Function *> Personalities = MMI->getPersonalities();
- FinalSize +=
- GetCommonEHFrameSizeInBytes(Personalities[MMI->getPersonalityIndex()]);
-
- FinalSize += GetEHFrameSizeInBytes(Personalities[MMI->getPersonalityIndex()],
- StartFunction);
-
- return FinalSize;
-}
-
-/// RoundUpToAlign - Add the specified alignment to FinalSize and returns
-/// the new value.
-static unsigned RoundUpToAlign(unsigned FinalSize, unsigned Alignment) {
- if (Alignment == 0) Alignment = 1;
- // Since we do not know where the buffer will be allocated, be pessimistic.
- return FinalSize + Alignment;
-}
-
-unsigned
-JITDwarfEmitter::GetEHFrameSizeInBytes(const Function* Personality,
- unsigned char* StartFunction) const {
- unsigned PointerSize = TD->getPointerSize();
- unsigned FinalSize = 0;
- // EH frame header.
- FinalSize += PointerSize;
- // FDE CIE Offset
- FinalSize += 3 * PointerSize;
- // If there is a personality and landing pads then point to the language
- // specific data area in the exception table.
- if (Personality) {
- FinalSize += MCAsmInfo::getULEB128Size(4);
- FinalSize += PointerSize;
- } else {
- FinalSize += MCAsmInfo::getULEB128Size(0);
- }
-
- // Indicate locations of function specific callee saved registers in
- // frame.
- FinalSize += GetFrameMovesSizeInBytes((intptr_t)StartFunction,
- MMI->getFrameMoves());
-
- FinalSize = RoundUpToAlign(FinalSize, 4);
-
- // Double zeroes for the unwind runtime
- FinalSize += 2 * PointerSize;
-
- return FinalSize;
-}
-
-unsigned JITDwarfEmitter::GetCommonEHFrameSizeInBytes(const Function* Personality)
- const {
-
- unsigned PointerSize = TD->getPointerSize();
- int stackGrowth = stackGrowthDirection == TargetFrameInfo::StackGrowsUp ?
- PointerSize : -PointerSize;
- unsigned FinalSize = 0;
- // EH Common Frame header
- FinalSize += PointerSize;
- FinalSize += 4;
- FinalSize += 1;
- FinalSize += Personality ? 5 : 3; // "zPLR" or "zR"
- FinalSize += MCAsmInfo::getULEB128Size(1);
- FinalSize += MCAsmInfo::getSLEB128Size(stackGrowth);
- FinalSize += 1;
-
- if (Personality) {
- FinalSize += MCAsmInfo::getULEB128Size(7);
-
- // Encoding
- FinalSize+= 1;
- //Personality
- FinalSize += PointerSize;
-
- FinalSize += MCAsmInfo::getULEB128Size(dwarf::DW_EH_PE_pcrel);
- FinalSize += MCAsmInfo::getULEB128Size(dwarf::DW_EH_PE_pcrel);
-
- } else {
- FinalSize += MCAsmInfo::getULEB128Size(1);
- FinalSize += MCAsmInfo::getULEB128Size(dwarf::DW_EH_PE_pcrel);
- }
-
- std::vector<MachineMove> Moves;
- RI->getInitialFrameState(Moves);
- FinalSize += GetFrameMovesSizeInBytes(0, Moves);
- FinalSize = RoundUpToAlign(FinalSize, 4);
- return FinalSize;
-}
-
-unsigned
-JITDwarfEmitter::GetFrameMovesSizeInBytes(intptr_t BaseLabelPtr,
- const std::vector<MachineMove> &Moves) const {
- unsigned PointerSize = TD->getPointerSize();
- int stackGrowth = stackGrowthDirection == TargetFrameInfo::StackGrowsUp ?
- PointerSize : -PointerSize;
- bool IsLocal = BaseLabelPtr;
- unsigned FinalSize = 0;
-
- for (unsigned i = 0, N = Moves.size(); i < N; ++i) {
- const MachineMove &Move = Moves[i];
- MCSymbol *Label = Move.getLabel();
-
- // Throw out move if the label is invalid.
- if (Label && (*JCE->getLabelLocations())[Label] == 0)
- continue;
-
- intptr_t LabelPtr = 0;
- if (Label) LabelPtr = JCE->getLabelAddress(Label);
-
- const MachineLocation &Dst = Move.getDestination();
- const MachineLocation &Src = Move.getSource();
-
- // Advance row if new location.
- if (BaseLabelPtr && Label && (BaseLabelPtr != LabelPtr || !IsLocal)) {
- FinalSize++;
- FinalSize += PointerSize;
- BaseLabelPtr = LabelPtr;
- IsLocal = true;
- }
-
- // If advancing cfa.
- if (Dst.isReg() && Dst.getReg() == MachineLocation::VirtualFP) {
- if (!Src.isReg()) {
- if (Src.getReg() == MachineLocation::VirtualFP) {
- ++FinalSize;
- } else {
- ++FinalSize;
- unsigned RegNum = RI->getDwarfRegNum(Src.getReg(), true);
- FinalSize += MCAsmInfo::getULEB128Size(RegNum);
- }
-
- int Offset = -Src.getOffset();
-
- FinalSize += MCAsmInfo::getULEB128Size(Offset);
- } else {
- llvm_unreachable("Machine move no supported yet.");
- }
- } else if (Src.isReg() &&
- Src.getReg() == MachineLocation::VirtualFP) {
- if (Dst.isReg()) {
- ++FinalSize;
- unsigned RegNum = RI->getDwarfRegNum(Dst.getReg(), true);
- FinalSize += MCAsmInfo::getULEB128Size(RegNum);
- } else {
- llvm_unreachable("Machine move no supported yet.");
- }
- } else {
- unsigned Reg = RI->getDwarfRegNum(Src.getReg(), true);
- int Offset = Dst.getOffset() / stackGrowth;
-
- if (Offset < 0) {
- ++FinalSize;
- FinalSize += MCAsmInfo::getULEB128Size(Reg);
- FinalSize += MCAsmInfo::getSLEB128Size(Offset);
- } else if (Reg < 64) {
- ++FinalSize;
- FinalSize += MCAsmInfo::getULEB128Size(Offset);
- } else {
- ++FinalSize;
- FinalSize += MCAsmInfo::getULEB128Size(Reg);
- FinalSize += MCAsmInfo::getULEB128Size(Offset);
- }
- }
- }
- return FinalSize;
-}
-
-unsigned
-JITDwarfEmitter::GetExceptionTableSizeInBytes(MachineFunction* MF) const {
- unsigned FinalSize = 0;
-
- // Map all labels and get rid of any dead landing pads.
- MMI->TidyLandingPads(JCE->getLabelLocations());
-
- const std::vector<const GlobalVariable *> &TypeInfos = MMI->getTypeInfos();
- const std::vector<unsigned> &FilterIds = MMI->getFilterIds();
- const std::vector<LandingPadInfo> &PadInfos = MMI->getLandingPads();
- if (PadInfos.empty()) return 0;
-
- // Sort the landing pads in order of their type ids. This is used to fold
- // duplicate actions.
- SmallVector<const LandingPadInfo *, 64> LandingPads;
- LandingPads.reserve(PadInfos.size());
- for (unsigned i = 0, N = PadInfos.size(); i != N; ++i)
- LandingPads.push_back(&PadInfos[i]);
- std::sort(LandingPads.begin(), LandingPads.end(), PadLT);
-
- // Negative type ids index into FilterIds, positive type ids index into
- // TypeInfos. The value written for a positive type id is just the type
- // id itself. For a negative type id, however, the value written is the
- // (negative) byte offset of the corresponding FilterIds entry. The byte
- // offset is usually equal to the type id, because the FilterIds entries
- // are written using a variable width encoding which outputs one byte per
- // entry as long as the value written is not too large, but can differ.
- // This kind of complication does not occur for positive type ids because
- // type infos are output using a fixed width encoding.
- // FilterOffsets[i] holds the byte offset corresponding to FilterIds[i].
- SmallVector<int, 16> FilterOffsets;
- FilterOffsets.reserve(FilterIds.size());
- int Offset = -1;
- for(std::vector<unsigned>::const_iterator I = FilterIds.begin(),
- E = FilterIds.end(); I != E; ++I) {
- FilterOffsets.push_back(Offset);
- Offset -= MCAsmInfo::getULEB128Size(*I);
- }
-
- // Compute the actions table and gather the first action index for each
- // landing pad site.
- SmallVector<ActionEntry, 32> Actions;
- SmallVector<unsigned, 64> FirstActions;
- FirstActions.reserve(LandingPads.size());
-
- int FirstAction = 0;
- unsigned SizeActions = 0;
- for (unsigned i = 0, N = LandingPads.size(); i != N; ++i) {
- const LandingPadInfo *LP = LandingPads[i];
- const std::vector<int> &TypeIds = LP->TypeIds;
- const unsigned NumShared = i ? SharedTypeIds(LP, LandingPads[i-1]) : 0;
- unsigned SizeSiteActions = 0;
-
- if (NumShared < TypeIds.size()) {
- unsigned SizeAction = 0;
- ActionEntry *PrevAction = 0;
-
- if (NumShared) {
- const unsigned SizePrevIds = LandingPads[i-1]->TypeIds.size();
- assert(Actions.size());
- PrevAction = &Actions.back();
- SizeAction = MCAsmInfo::getSLEB128Size(PrevAction->NextAction) +
- MCAsmInfo::getSLEB128Size(PrevAction->ValueForTypeID);
- for (unsigned j = NumShared; j != SizePrevIds; ++j) {
- SizeAction -= MCAsmInfo::getSLEB128Size(PrevAction->ValueForTypeID);
- SizeAction += -PrevAction->NextAction;
- PrevAction = PrevAction->Previous;
- }
- }
-
- // Compute the actions.
- for (unsigned I = NumShared, M = TypeIds.size(); I != M; ++I) {
- int TypeID = TypeIds[I];
- assert(-1-TypeID < (int)FilterOffsets.size() && "Unknown filter id!");
- int ValueForTypeID = TypeID < 0 ? FilterOffsets[-1 - TypeID] : TypeID;
- unsigned SizeTypeID = MCAsmInfo::getSLEB128Size(ValueForTypeID);
-
- int NextAction = SizeAction ? -(SizeAction + SizeTypeID) : 0;
- SizeAction = SizeTypeID + MCAsmInfo::getSLEB128Size(NextAction);
- SizeSiteActions += SizeAction;
-
- ActionEntry Action = {ValueForTypeID, NextAction, PrevAction};
- Actions.push_back(Action);
-
- PrevAction = &Actions.back();
- }
-
- // Record the first action of the landing pad site.
- FirstAction = SizeActions + SizeSiteActions - SizeAction + 1;
- } // else identical - re-use previous FirstAction
-
- FirstActions.push_back(FirstAction);
-
- // Compute this sites contribution to size.
- SizeActions += SizeSiteActions;
- }
-
- // Compute the call-site table. Entries must be ordered by address.
- SmallVector<CallSiteEntry, 64> CallSites;
-
- RangeMapType PadMap;
- for (unsigned i = 0, N = LandingPads.size(); i != N; ++i) {
- const LandingPadInfo *LandingPad = LandingPads[i];
- for (unsigned j=0, E = LandingPad->BeginLabels.size(); j != E; ++j) {
- MCSymbol *BeginLabel = LandingPad->BeginLabels[j];
- assert(!PadMap.count(BeginLabel) && "Duplicate landing pad labels!");
- PadRange P = { i, j };
- PadMap[BeginLabel] = P;
- }
- }
-
- bool MayThrow = false;
- MCSymbol *LastLabel = 0;
- for (MachineFunction::const_iterator I = MF->begin(), E = MF->end();
- I != E; ++I) {
- for (MachineBasicBlock::const_iterator MI = I->begin(), E = I->end();
- MI != E; ++MI) {
- if (!MI->isLabel()) {
- MayThrow |= MI->getDesc().isCall();
- continue;
- }
-
- MCSymbol *BeginLabel = MI->getOperand(0).getMCSymbol();
-
- if (BeginLabel == LastLabel)
- MayThrow = false;
-
- RangeMapType::iterator L = PadMap.find(BeginLabel);
-
- if (L == PadMap.end())
- continue;
-
- PadRange P = L->second;
- const LandingPadInfo *LandingPad = LandingPads[P.PadIndex];
-
- assert(BeginLabel == LandingPad->BeginLabels[P.RangeIndex] &&
- "Inconsistent landing pad map!");
-
- // If some instruction between the previous try-range and this one may
- // throw, create a call-site entry with no landing pad for the region
- // between the try-ranges.
- if (MayThrow) {
- CallSiteEntry Site = {LastLabel, BeginLabel, 0, 0};
- CallSites.push_back(Site);
- }
-
- LastLabel = LandingPad->EndLabels[P.RangeIndex];
- CallSiteEntry Site = {BeginLabel, LastLabel,
- LandingPad->LandingPadLabel, FirstActions[P.PadIndex]};
-
- assert(Site.BeginLabel && Site.EndLabel && Site.PadLabel &&
- "Invalid landing pad!");
-
- // Try to merge with the previous call-site.
- if (CallSites.size()) {
- CallSiteEntry &Prev = CallSites.back();
- if (Site.PadLabel == Prev.PadLabel && Site.Action == Prev.Action) {
- // Extend the range of the previous entry.
- Prev.EndLabel = Site.EndLabel;
- continue;
- }
- }
-
- // Otherwise, create a new call-site.
- CallSites.push_back(Site);
- }
- }
- // If some instruction between the previous try-range and the end of the
- // function may throw, create a call-site entry with no landing pad for the
- // region following the try-range.
- if (MayThrow) {
- CallSiteEntry Site = {LastLabel, 0, 0, 0};
- CallSites.push_back(Site);
- }
-
- // Final tallies.
- unsigned SizeSites = CallSites.size() * (sizeof(int32_t) + // Site start.
- sizeof(int32_t) + // Site length.
- sizeof(int32_t)); // Landing pad.
- for (unsigned i = 0, e = CallSites.size(); i < e; ++i)
- SizeSites += MCAsmInfo::getULEB128Size(CallSites[i].Action);
-
- unsigned SizeTypes = TypeInfos.size() * TD->getPointerSize();
-
- unsigned TypeOffset = sizeof(int8_t) + // Call site format
- // Call-site table length
- MCAsmInfo::getULEB128Size(SizeSites) +
- SizeSites + SizeActions + SizeTypes;
-
- unsigned TotalSize = sizeof(int8_t) + // LPStart format
- sizeof(int8_t) + // TType format
- MCAsmInfo::getULEB128Size(TypeOffset) + // TType base offset
- TypeOffset;
-
- unsigned SizeAlign = (4 - TotalSize) & 3;
-
- // Begin the exception table.
- FinalSize = RoundUpToAlign(FinalSize, 4);
- for (unsigned i = 0; i != SizeAlign; ++i) {
- ++FinalSize;
- }
-
- unsigned PointerSize = TD->getPointerSize();
-
- // Emit the header.
- ++FinalSize;
- // Asm->EOL("LPStart format (DW_EH_PE_omit)");
- ++FinalSize;
- // Asm->EOL("TType format (DW_EH_PE_absptr)");
- ++FinalSize;
- // Asm->EOL("TType base offset");
- ++FinalSize;
- // Asm->EOL("Call site format (DW_EH_PE_udata4)");
- ++FinalSize;
- // Asm->EOL("Call-site table length");
-
- // Emit the landing pad site information.
- for (unsigned i = 0; i < CallSites.size(); ++i) {
- CallSiteEntry &S = CallSites[i];
-
- // Asm->EOL("Region start");
- FinalSize += PointerSize;
-
- //Asm->EOL("Region length");
- FinalSize += PointerSize;
-
- // Asm->EOL("Landing pad");
- FinalSize += PointerSize;
-
- FinalSize += MCAsmInfo::getULEB128Size(S.Action);
- // Asm->EOL("Action");
- }
-
- // Emit the actions.
- for (unsigned I = 0, N = Actions.size(); I != N; ++I) {
- ActionEntry &Action = Actions[I];
-
- //Asm->EOL("TypeInfo index");
- FinalSize += MCAsmInfo::getSLEB128Size(Action.ValueForTypeID);
- //Asm->EOL("Next action");
- FinalSize += MCAsmInfo::getSLEB128Size(Action.NextAction);
- }
-
- // Emit the type ids.
- for (unsigned M = TypeInfos.size(); M; --M) {
- // Asm->EOL("TypeInfo");
- FinalSize += PointerSize;
- }
-
- // Emit the filter typeids.
- for (unsigned j = 0, M = FilterIds.size(); j < M; ++j) {
- unsigned TypeID = FilterIds[j];
- FinalSize += MCAsmInfo::getULEB128Size(TypeID);
- //Asm->EOL("Filter TypeInfo index");
- }
-
- FinalSize = RoundUpToAlign(FinalSize, 4);
-
- return FinalSize;
-}
diff --git a/lib/ExecutionEngine/JIT/JITDwarfEmitter.h b/lib/ExecutionEngine/JIT/JITDwarfEmitter.h
index e627550d6d0e6..30956820f357e 100644
--- a/lib/ExecutionEngine/JIT/JITDwarfEmitter.h
+++ b/lib/ExecutionEngine/JIT/JITDwarfEmitter.h
@@ -49,17 +49,6 @@ class JITDwarfEmitter {
unsigned char* EndFunction,
unsigned char* ExceptionTable) const;
- unsigned GetExceptionTableSizeInBytes(MachineFunction* MF) const;
-
- unsigned
- GetFrameMovesSizeInBytes(intptr_t BaseLabelPtr,
- const std::vector<MachineMove> &Moves) const;
-
- unsigned GetCommonEHFrameSizeInBytes(const Function* Personality) const;
-
- unsigned GetEHFrameSizeInBytes(const Function* Personality,
- unsigned char* StartFunction) const;
-
public:
JITDwarfEmitter(JIT& jit);
@@ -71,11 +60,6 @@ public:
unsigned char* &EHFramePtr);
- unsigned GetDwarfTableSizeInBytes(MachineFunction& F,
- JITCodeEmitter& JCE,
- unsigned char* StartFunction,
- unsigned char* EndFunction);
-
void setModuleInfo(MachineModuleInfo* Info) {
MMI = Info;
}
diff --git a/lib/ExecutionEngine/JIT/JITEmitter.cpp b/lib/ExecutionEngine/JIT/JITEmitter.cpp
index 28d79daed350b..4c0d0789cced4 100644
--- a/lib/ExecutionEngine/JIT/JITEmitter.cpp
+++ b/lib/ExecutionEngine/JIT/JITEmitter.cpp
@@ -152,16 +152,6 @@ namespace {
FunctionToCallSitesMap[F].insert(CallSite);
}
- // Returns the Function of the stub if a stub was erased, or NULL if there
- // was no stub. This function uses the call-site->function map to find a
- // relevant function, but asserts that only stubs and not other call sites
- // will be passed in.
- Function *EraseStub(const MutexGuard &locked, void *Stub);
-
- void EraseAllCallSitesFor(const MutexGuard &locked, Function *F) {
- assert(locked.holds(TheJIT->lock));
- EraseAllCallSitesForPrelocked(F);
- }
void EraseAllCallSitesForPrelocked(Function *F);
// Erases _all_ call sites regardless of their function. This is used to
@@ -223,9 +213,6 @@ namespace {
/// specified GV address.
void *getGlobalValueIndirectSym(GlobalValue *V, void *GVAddress);
- void getRelocatableGVs(SmallVectorImpl<GlobalValue*> &GVs,
- SmallVectorImpl<void*> &Ptrs);
-
/// getGOTIndexForAddress - Return a new or existing index in the GOT for
/// an address. This function only manages slots, it does not manage the
/// contents of the slots or the memory associated with the GOT.
@@ -398,7 +385,6 @@ namespace {
/// classof - Methods for support type inquiry through isa, cast, and
/// dyn_cast:
///
- static inline bool classof(const JITEmitter*) { return true; }
static inline bool classof(const MachineCodeEmitter*) { return true; }
JITResolver &getJITResolver() { return Resolver; }
@@ -480,26 +466,10 @@ namespace {
if (DE.get()) DE->setModuleInfo(Info);
}
- void setMemoryExecutable() {
- MemMgr->setMemoryExecutable();
- }
-
- JITMemoryManager *getMemMgr() const { return MemMgr; }
-
private:
void *getPointerToGlobal(GlobalValue *GV, void *Reference,
bool MayNeedFarStub);
void *getPointerToGVIndirectSym(GlobalValue *V, void *Reference);
- unsigned addSizeOfGlobal(const GlobalVariable *GV, unsigned Size);
- unsigned addSizeOfGlobalsInConstantVal(
- const Constant *C, unsigned Size,
- SmallPtrSet<const GlobalVariable*, 8> &SeenGlobals,
- SmallVectorImpl<const GlobalVariable*> &Worklist);
- unsigned addSizeOfGlobalsInInitializer(
- const Constant *Init, unsigned Size,
- SmallPtrSet<const GlobalVariable*, 8> &SeenGlobals,
- SmallVectorImpl<const GlobalVariable*> &Worklist);
- unsigned GetSizeOfGlobalsInBytes(MachineFunction &MF);
};
}
@@ -507,39 +477,6 @@ void CallSiteValueMapConfig::onDelete(JITResolverState *JRS, Function *F) {
JRS->EraseAllCallSitesForPrelocked(F);
}
-Function *JITResolverState::EraseStub(const MutexGuard &locked, void *Stub) {
- CallSiteToFunctionMapTy::iterator C2F_I =
- CallSiteToFunctionMap.find(Stub);
- if (C2F_I == CallSiteToFunctionMap.end()) {
- // Not a stub.
- return NULL;
- }
-
- StubToResolverMap->UnregisterStubResolver(Stub);
-
- Function *const F = C2F_I->second;
-#ifndef NDEBUG
- void *RealStub = FunctionToLazyStubMap.lookup(F);
- assert(RealStub == Stub &&
- "Call-site that wasn't a stub passed in to EraseStub");
-#endif
- FunctionToLazyStubMap.erase(F);
- CallSiteToFunctionMap.erase(C2F_I);
-
- // Remove the stub from the function->call-sites map, and remove the whole
- // entry from the map if that was the last call site.
- FunctionToCallSitesMapTy::iterator F2C_I = FunctionToCallSitesMap.find(F);
- assert(F2C_I != FunctionToCallSitesMap.end() &&
- "FunctionToCallSitesMap broken");
- bool Erased = F2C_I->second.erase(Stub);
- (void)Erased;
- assert(Erased && "FunctionToCallSitesMap broken");
- if (F2C_I->second.empty())
- FunctionToCallSitesMap.erase(F2C_I);
-
- return F;
-}
-
void JITResolverState::EraseAllCallSitesForPrelocked(Function *F) {
FunctionToCallSitesMapTy::iterator F2C = FunctionToCallSitesMap.find(F);
if (F2C == FunctionToCallSitesMap.end())
@@ -690,28 +627,6 @@ unsigned JITResolver::getGOTIndexForAddr(void* addr) {
return idx;
}
-void JITResolver::getRelocatableGVs(SmallVectorImpl<GlobalValue*> &GVs,
- SmallVectorImpl<void*> &Ptrs) {
- MutexGuard locked(TheJIT->lock);
-
- const FunctionToLazyStubMapTy &FM = state.getFunctionToLazyStubMap(locked);
- GlobalToIndirectSymMapTy &GM = state.getGlobalToIndirectSymMap(locked);
-
- for (FunctionToLazyStubMapTy::const_iterator i = FM.begin(), e = FM.end();
- i != e; ++i){
- Function *F = i->first;
- if (F->isDeclaration() && F->hasExternalLinkage()) {
- GVs.push_back(i->first);
- Ptrs.push_back(i->second);
- }
- }
- for (GlobalToIndirectSymMapTy::iterator i = GM.begin(), e = GM.end();
- i != e; ++i) {
- GVs.push_back(i->first);
- Ptrs.push_back(i->second);
- }
-}
-
/// JITCompilerFn - This function is called when a lazy compilation stub has
/// been entered. It looks up which function this stub corresponds to, compiles
/// it if necessary, then returns the resultant function pointer.
@@ -831,7 +746,7 @@ void JITEmitter::processDebugLoc(DebugLoc DL, bool BeforePrintingInsn) {
if (DL.isUnknown()) return;
if (!BeforePrintingInsn) return;
- const LLVMContext& Context = EmissionDetails.MF->getFunction()->getContext();
+ const LLVMContext &Context = EmissionDetails.MF->getFunction()->getContext();
if (DL.getScope(Context) != 0 && PrevDL != DL) {
JITEvent_EmittedFunctionDetails::LineStart NextLine;
@@ -859,184 +774,6 @@ static unsigned GetConstantPoolSizeInBytes(MachineConstantPool *MCP,
return Size;
}
-static unsigned GetJumpTableSizeInBytes(MachineJumpTableInfo *MJTI, JIT *jit) {
- const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables();
- if (JT.empty()) return 0;
-
- unsigned NumEntries = 0;
- for (unsigned i = 0, e = JT.size(); i != e; ++i)
- NumEntries += JT[i].MBBs.size();
-
- return NumEntries * MJTI->getEntrySize(*jit->getTargetData());
-}
-
-static uintptr_t RoundUpToAlign(uintptr_t Size, unsigned Alignment) {
- if (Alignment == 0) Alignment = 1;
- // Since we do not know where the buffer will be allocated, be pessimistic.
- return Size + Alignment;
-}
-
-/// addSizeOfGlobal - add the size of the global (plus any alignment padding)
-/// into the running total Size.
-
-unsigned JITEmitter::addSizeOfGlobal(const GlobalVariable *GV, unsigned Size) {
- const Type *ElTy = GV->getType()->getElementType();
- size_t GVSize = (size_t)TheJIT->getTargetData()->getTypeAllocSize(ElTy);
- size_t GVAlign =
- (size_t)TheJIT->getTargetData()->getPreferredAlignment(GV);
- DEBUG(dbgs() << "JIT: Adding in size " << GVSize << " alignment " << GVAlign);
- DEBUG(GV->dump());
- // Assume code section ends with worst possible alignment, so first
- // variable needs maximal padding.
- if (Size==0)
- Size = 1;
- Size = ((Size+GVAlign-1)/GVAlign)*GVAlign;
- Size += GVSize;
- return Size;
-}
-
-/// addSizeOfGlobalsInConstantVal - find any globals that we haven't seen yet
-/// but are referenced from the constant; put them in SeenGlobals and the
-/// Worklist, and add their size into the running total Size.
-
-unsigned JITEmitter::addSizeOfGlobalsInConstantVal(
- const Constant *C,
- unsigned Size,
- SmallPtrSet<const GlobalVariable*, 8> &SeenGlobals,
- SmallVectorImpl<const GlobalVariable*> &Worklist) {
- // If its undefined, return the garbage.
- if (isa<UndefValue>(C))
- return Size;
-
- // If the value is a ConstantExpr
- if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) {
- Constant *Op0 = CE->getOperand(0);
- switch (CE->getOpcode()) {
- case Instruction::GetElementPtr:
- case Instruction::Trunc:
- case Instruction::ZExt:
- case Instruction::SExt:
- case Instruction::FPTrunc:
- case Instruction::FPExt:
- case Instruction::UIToFP:
- case Instruction::SIToFP:
- case Instruction::FPToUI:
- case Instruction::FPToSI:
- case Instruction::PtrToInt:
- case Instruction::IntToPtr:
- case Instruction::BitCast: {
- Size = addSizeOfGlobalsInConstantVal(Op0, Size, SeenGlobals, Worklist);
- break;
- }
- case Instruction::Add:
- case Instruction::FAdd:
- case Instruction::Sub:
- case Instruction::FSub:
- case Instruction::Mul:
- case Instruction::FMul:
- case Instruction::UDiv:
- case Instruction::SDiv:
- case Instruction::URem:
- case Instruction::SRem:
- case Instruction::And:
- case Instruction::Or:
- case Instruction::Xor: {
- Size = addSizeOfGlobalsInConstantVal(Op0, Size, SeenGlobals, Worklist);
- Size = addSizeOfGlobalsInConstantVal(CE->getOperand(1), Size,
- SeenGlobals, Worklist);
- break;
- }
- default: {
- std::string msg;
- raw_string_ostream Msg(msg);
- Msg << "ConstantExpr not handled: " << *CE;
- report_fatal_error(Msg.str());
- }
- }
- }
-
- if (C->getType()->getTypeID() == Type::PointerTyID)
- if (const GlobalVariable* GV = dyn_cast<GlobalVariable>(C))
- if (SeenGlobals.insert(GV)) {
- Worklist.push_back(GV);
- Size = addSizeOfGlobal(GV, Size);
- }
-
- return Size;
-}
-
-/// addSizeOfGLobalsInInitializer - handle any globals that we haven't seen yet
-/// but are referenced from the given initializer.
-
-unsigned JITEmitter::addSizeOfGlobalsInInitializer(
- const Constant *Init,
- unsigned Size,
- SmallPtrSet<const GlobalVariable*, 8> &SeenGlobals,
- SmallVectorImpl<const GlobalVariable*> &Worklist) {
- if (!isa<UndefValue>(Init) &&
- !isa<ConstantVector>(Init) &&
- !isa<ConstantAggregateZero>(Init) &&
- !isa<ConstantArray>(Init) &&
- !isa<ConstantStruct>(Init) &&
- Init->getType()->isFirstClassType())
- Size = addSizeOfGlobalsInConstantVal(Init, Size, SeenGlobals, Worklist);
- return Size;
-}
-
-/// GetSizeOfGlobalsInBytes - walk the code for the function, looking for
-/// globals; then walk the initializers of those globals looking for more.
-/// If their size has not been considered yet, add it into the running total
-/// Size.
-
-unsigned JITEmitter::GetSizeOfGlobalsInBytes(MachineFunction &MF) {
- unsigned Size = 0;
- SmallPtrSet<const GlobalVariable*, 8> SeenGlobals;
-
- for (MachineFunction::iterator MBB = MF.begin(), E = MF.end();
- MBB != E; ++MBB) {
- for (MachineBasicBlock::const_iterator I = MBB->begin(), E = MBB->end();
- I != E; ++I) {
- const TargetInstrDesc &Desc = I->getDesc();
- const MachineInstr &MI = *I;
- unsigned NumOps = Desc.getNumOperands();
- for (unsigned CurOp = 0; CurOp < NumOps; CurOp++) {
- const MachineOperand &MO = MI.getOperand(CurOp);
- if (MO.isGlobal()) {
- const GlobalValue* V = MO.getGlobal();
- const GlobalVariable *GV = dyn_cast<const GlobalVariable>(V);
- if (!GV)
- continue;
- // If seen in previous function, it will have an entry here.
- if (TheJIT->getPointerToGlobalIfAvailable(
- const_cast<GlobalVariable *>(GV)))
- continue;
- // If seen earlier in this function, it will have an entry here.
- // FIXME: it should be possible to combine these tables, by
- // assuming the addresses of the new globals in this module
- // start at 0 (or something) and adjusting them after codegen
- // complete. Another possibility is to grab a marker bit in GV.
- if (SeenGlobals.insert(GV))
- // A variable as yet unseen. Add in its size.
- Size = addSizeOfGlobal(GV, Size);
- }
- }
- }
- }
- DEBUG(dbgs() << "JIT: About to look through initializers\n");
- // Look for more globals that are referenced only from initializers.
- SmallVector<const GlobalVariable*, 8> Worklist(
- SeenGlobals.begin(), SeenGlobals.end());
- while (!Worklist.empty()) {
- const GlobalVariable* GV = Worklist.back();
- Worklist.pop_back();
- if (GV->hasInitializer())
- Size = addSizeOfGlobalsInInitializer(GV->getInitializer(), Size,
- SeenGlobals, Worklist);
- }
-
- return Size;
-}
-
void JITEmitter::startFunction(MachineFunction &F) {
DEBUG(dbgs() << "JIT: Starting CodeGen of Function "
<< F.getFunction()->getName() << "\n");
@@ -1044,43 +781,8 @@ void JITEmitter::startFunction(MachineFunction &F) {
uintptr_t ActualSize = 0;
// Set the memory writable, if it's not already
MemMgr->setMemoryWritable();
- if (MemMgr->NeedsExactSize()) {
- DEBUG(dbgs() << "JIT: ExactSize\n");
- const TargetInstrInfo* TII = F.getTarget().getInstrInfo();
- MachineConstantPool *MCP = F.getConstantPool();
-
- // Ensure the constant pool/jump table info is at least 4-byte aligned.
- ActualSize = RoundUpToAlign(ActualSize, 16);
-
- // Add the alignment of the constant pool
- ActualSize = RoundUpToAlign(ActualSize, MCP->getConstantPoolAlignment());
-
- // Add the constant pool size
- ActualSize += GetConstantPoolSizeInBytes(MCP, TheJIT->getTargetData());
-
- if (MachineJumpTableInfo *MJTI = F.getJumpTableInfo()) {
- // Add the aligment of the jump table info
- ActualSize = RoundUpToAlign(ActualSize,
- MJTI->getEntryAlignment(*TheJIT->getTargetData()));
-
- // Add the jump table size
- ActualSize += GetJumpTableSizeInBytes(MJTI, TheJIT);
- }
-
- // Add the alignment for the function
- ActualSize = RoundUpToAlign(ActualSize,
- std::max(F.getFunction()->getAlignment(), 8U));
-
- // Add the function size
- ActualSize += TII->GetFunctionSizeInBytes(F);
-
- DEBUG(dbgs() << "JIT: ActualSize before globals " << ActualSize << "\n");
- // Add the size of the globals that will be allocated after this function.
- // These are all the ones referenced from this function that were not
- // previously allocated.
- ActualSize += GetSizeOfGlobalsInBytes(F);
- DEBUG(dbgs() << "JIT: ActualSize after globals " << ActualSize << "\n");
- } else if (SizeEstimate > 0) {
+
+ if (SizeEstimate > 0) {
// SizeEstimate will be non-zero on reallocation attempts.
ActualSize = SizeEstimate;
}
@@ -1268,9 +970,6 @@ bool JITEmitter::finishFunction(MachineFunction &F) {
SavedBufferEnd = BufferEnd;
SavedCurBufferPtr = CurBufferPtr;
- if (MemMgr->NeedsExactSize())
- ActualSize = DE->GetDwarfTableSizeInBytes(F, *this, FnStart, FnEnd);
-
BufferBegin = CurBufferPtr = MemMgr->startExceptionTable(F.getFunction(),
ActualSize);
BufferEnd = BufferBegin+ActualSize;
diff --git a/lib/Linker/LinkModules.cpp b/lib/Linker/LinkModules.cpp
index 8487c83ce36ab..7e8245a9e3a6b 100644
--- a/lib/Linker/LinkModules.cpp
+++ b/lib/Linker/LinkModules.cpp
@@ -29,6 +29,7 @@
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/System/Path.h"
+#include "llvm/Transforms/Utils/ValueMapper.h"
#include "llvm/ADT/DenseMap.h"
using namespace llvm;
@@ -96,15 +97,6 @@ public:
return 0;
}
- /// erase - Remove the specified type, returning true if it was in the set.
- bool erase(const Type *Ty) {
- if (!TheMap.erase(Ty))
- return false;
- if (Ty->isAbstract())
- Ty->removeAbstractTypeUser(this);
- return true;
- }
-
/// insert - This returns true if the pointer was new to the set, false if it
/// was already in the set.
bool insert(const Type *Src, const Type *Dst) {
@@ -334,97 +326,6 @@ static bool LinkTypes(Module *Dest, const Module *Src, std::string *Err) {
return false;
}
-#ifndef NDEBUG
-static void PrintMap(const std::map<const Value*, Value*> &M) {
- for (std::map<const Value*, Value*>::const_iterator I = M.begin(), E =M.end();
- I != E; ++I) {
- dbgs() << " Fr: " << (void*)I->first << " ";
- I->first->dump();
- dbgs() << " To: " << (void*)I->second << " ";
- I->second->dump();
- dbgs() << "\n";
- }
-}
-#endif
-
-
-// RemapOperand - Use ValueMap to convert constants from one module to another.
-static Value *RemapOperand(const Value *In,
- std::map<const Value*, Value*> &ValueMap) {
- std::map<const Value*,Value*>::const_iterator I = ValueMap.find(In);
- if (I != ValueMap.end())
- return I->second;
-
- // Check to see if it's a constant that we are interested in transforming.
- Value *Result = 0;
- if (const Constant *CPV = dyn_cast<Constant>(In)) {
- if ((!isa<DerivedType>(CPV->getType()) && !isa<ConstantExpr>(CPV)) ||
- isa<ConstantInt>(CPV) || isa<ConstantAggregateZero>(CPV))
- return const_cast<Constant*>(CPV); // Simple constants stay identical.
-
- if (const ConstantArray *CPA = dyn_cast<ConstantArray>(CPV)) {
- std::vector<Constant*> Operands(CPA->getNumOperands());
- for (unsigned i = 0, e = CPA->getNumOperands(); i != e; ++i)
- Operands[i] =cast<Constant>(RemapOperand(CPA->getOperand(i), ValueMap));
- Result = ConstantArray::get(cast<ArrayType>(CPA->getType()), Operands);
- } else if (const ConstantStruct *CPS = dyn_cast<ConstantStruct>(CPV)) {
- std::vector<Constant*> Operands(CPS->getNumOperands());
- for (unsigned i = 0, e = CPS->getNumOperands(); i != e; ++i)
- Operands[i] =cast<Constant>(RemapOperand(CPS->getOperand(i), ValueMap));
- Result = ConstantStruct::get(cast<StructType>(CPS->getType()), Operands);
- } else if (isa<ConstantPointerNull>(CPV) || isa<UndefValue>(CPV)) {
- Result = const_cast<Constant*>(CPV);
- } else if (const ConstantVector *CP = dyn_cast<ConstantVector>(CPV)) {
- std::vector<Constant*> Operands(CP->getNumOperands());
- for (unsigned i = 0, e = CP->getNumOperands(); i != e; ++i)
- Operands[i] = cast<Constant>(RemapOperand(CP->getOperand(i), ValueMap));
- Result = ConstantVector::get(Operands);
- } else if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(CPV)) {
- std::vector<Constant*> Ops;
- for (unsigned i = 0, e = CE->getNumOperands(); i != e; ++i)
- Ops.push_back(cast<Constant>(RemapOperand(CE->getOperand(i),ValueMap)));
- Result = CE->getWithOperands(Ops);
- } else if (const BlockAddress *CE = dyn_cast<BlockAddress>(CPV)) {
- Result = BlockAddress::get(
- cast<Function>(RemapOperand(CE->getFunction(), ValueMap)),
- CE->getBasicBlock());
- } else {
- assert(!isa<GlobalValue>(CPV) && "Unmapped global?");
- llvm_unreachable("Unknown type of derived type constant value!");
- }
- } else if (const MDNode *MD = dyn_cast<MDNode>(In)) {
- if (MD->isFunctionLocal()) {
- SmallVector<Value*, 4> Elts;
- for (unsigned i = 0, e = MD->getNumOperands(); i != e; ++i) {
- if (MD->getOperand(i))
- Elts.push_back(RemapOperand(MD->getOperand(i), ValueMap));
- else
- Elts.push_back(NULL);
- }
- Result = MDNode::get(In->getContext(), Elts.data(), MD->getNumOperands());
- } else {
- Result = const_cast<Value*>(In);
- }
- } else if (isa<MDString>(In) || isa<InlineAsm>(In) || isa<Instruction>(In)) {
- Result = const_cast<Value*>(In);
- }
-
- // Cache the mapping in our local map structure
- if (Result) {
- ValueMap[In] = Result;
- return Result;
- }
-
-#ifndef NDEBUG
- dbgs() << "LinkModules ValueMap: \n";
- PrintMap(ValueMap);
-
- dbgs() << "Couldn't remap value: " << (void*)In << " " << *In << "\n";
- llvm_unreachable("Couldn't remap value!");
-#endif
- return 0;
-}
-
/// ForceRenaming - The LLVM SymbolTable class autorenames globals that conflict
/// in the symbol table. This is good for all clients except for us. Go
/// through the trouble to force this back.
@@ -541,25 +442,24 @@ static bool GetLinkageResult(GlobalValue *Dest, const GlobalValue *Src,
}
// Insert all of the named mdnoes in Src into the Dest module.
-static void LinkNamedMDNodes(Module *Dest, Module *Src) {
+static void LinkNamedMDNodes(Module *Dest, Module *Src,
+ ValueToValueMapTy &ValueMap) {
for (Module::const_named_metadata_iterator I = Src->named_metadata_begin(),
E = Src->named_metadata_end(); I != E; ++I) {
const NamedMDNode *SrcNMD = I;
- NamedMDNode *DestNMD = Dest->getNamedMetadata(SrcNMD->getName());
- if (!DestNMD)
- NamedMDNode::Create(SrcNMD, Dest);
- else {
- // Add Src elements into Dest node.
- for (unsigned i = 0, e = SrcNMD->getNumOperands(); i != e; ++i)
- DestNMD->addOperand(SrcNMD->getOperand(i));
- }
+ NamedMDNode *DestNMD = Dest->getOrInsertNamedMetadata(SrcNMD->getName());
+ // Add Src elements into Dest node.
+ for (unsigned i = 0, e = SrcNMD->getNumOperands(); i != e; ++i)
+ DestNMD->addOperand(cast<MDNode>(MapValue(SrcNMD->getOperand(i),
+ ValueMap,
+ true)));
}
}
// LinkGlobals - Loop through the global variables in the src module and merge
// them into the dest module.
static bool LinkGlobals(Module *Dest, const Module *Src,
- std::map<const Value*, Value*> &ValueMap,
+ ValueToValueMapTy &ValueMap,
std::multimap<std::string, GlobalVariable *> &AppendingVars,
std::string *Err) {
ValueSymbolTable &DestSymTab = Dest->getValueSymbolTable();
@@ -735,6 +635,12 @@ CalculateAliasLinkage(const GlobalValue *SGV, const GlobalValue *DGV) {
else if (SL == GlobalValue::LinkerPrivateLinkage &&
DL == GlobalValue::LinkerPrivateLinkage)
return GlobalValue::LinkerPrivateLinkage;
+ else if (SL == GlobalValue::LinkerPrivateWeakLinkage &&
+ DL == GlobalValue::LinkerPrivateWeakLinkage)
+ return GlobalValue::LinkerPrivateWeakLinkage;
+ else if (SL == GlobalValue::LinkerPrivateWeakDefAutoLinkage &&
+ DL == GlobalValue::LinkerPrivateWeakDefAutoLinkage)
+ return GlobalValue::LinkerPrivateWeakDefAutoLinkage;
else {
assert (SL == GlobalValue::PrivateLinkage &&
DL == GlobalValue::PrivateLinkage && "Unexpected linkage type");
@@ -746,7 +652,7 @@ CalculateAliasLinkage(const GlobalValue *SGV, const GlobalValue *DGV) {
// dest module. We're assuming, that all functions/global variables were already
// linked in.
static bool LinkAlias(Module *Dest, const Module *Src,
- std::map<const Value*, Value*> &ValueMap,
+ ValueToValueMapTy &ValueMap,
std::string *Err) {
// Loop over all alias in the src module
for (Module::const_alias_iterator I = Src->alias_begin(),
@@ -757,7 +663,7 @@ static bool LinkAlias(Module *Dest, const Module *Src,
// Globals were already linked, thus we can just query ValueMap for variant
// of SAliasee in Dest.
- std::map<const Value*,Value*>::const_iterator VMI = ValueMap.find(SAliasee);
+ ValueToValueMapTy::const_iterator VMI = ValueMap.find(SAliasee);
assert(VMI != ValueMap.end() && "Aliasee not linked");
GlobalValue* DAliasee = cast<GlobalValue>(VMI->second);
GlobalValue* DGV = NULL;
@@ -888,7 +794,7 @@ static bool LinkAlias(Module *Dest, const Module *Src,
ForceRenaming(NewGA, SGA->getName());
// Remember this mapping so uses in the source module get remapped
- // later by RemapOperand.
+ // later by MapValue.
ValueMap[SGA] = NewGA;
}
@@ -899,7 +805,7 @@ static bool LinkAlias(Module *Dest, const Module *Src,
// LinkGlobalInits - Update the initializers in the Dest module now that all
// globals that may be referenced are in Dest.
static bool LinkGlobalInits(Module *Dest, const Module *Src,
- std::map<const Value*, Value*> &ValueMap,
+ ValueToValueMapTy &ValueMap,
std::string *Err) {
// Loop over all of the globals in the src module, mapping them over as we go
for (Module::const_global_iterator I = Src->global_begin(),
@@ -909,7 +815,7 @@ static bool LinkGlobalInits(Module *Dest, const Module *Src,
if (SGV->hasInitializer()) { // Only process initialized GV's
// Figure out what the initializer looks like in the dest module...
Constant *SInit =
- cast<Constant>(RemapOperand(SGV->getInitializer(), ValueMap));
+ cast<Constant>(MapValue(SGV->getInitializer(), ValueMap, true));
// Grab destination global variable or alias.
GlobalValue *DGV = cast<GlobalValue>(ValueMap[SGV]->stripPointerCasts());
@@ -954,7 +860,7 @@ static bool LinkGlobalInits(Module *Dest, const Module *Src,
// to the Dest function...
//
static bool LinkFunctionProtos(Module *Dest, const Module *Src,
- std::map<const Value*, Value*> &ValueMap,
+ ValueToValueMapTy &ValueMap,
std::string *Err) {
ValueSymbolTable &DestSymTab = Dest->getValueSymbolTable();
@@ -1039,7 +945,7 @@ static bool LinkFunctionProtos(Module *Dest, const Module *Src,
ForceRenaming(NewDF, SF->getName());
// Remember this mapping so uses in the source module get remapped
- // later by RemapOperand.
+ // later by MapValue.
ValueMap[SF] = NewDF;
continue;
}
@@ -1069,7 +975,7 @@ static bool LinkFunctionProtos(Module *Dest, const Module *Src,
// fix up references to values. At this point we know that Dest is an external
// function, and that Src is not.
static bool LinkFunctionBody(Function *Dest, Function *Src,
- std::map<const Value*, Value*> &ValueMap,
+ ValueToValueMapTy &ValueMap,
std::string *Err) {
assert(Src && Dest && Dest->isDeclaration() && !Src->isDeclaration());
@@ -1091,12 +997,30 @@ static bool LinkFunctionBody(Function *Dest, Function *Src,
// the Source function as operands. Loop through all of the operands of the
// functions and patch them up to point to the local versions...
//
+ // This is the same as RemapInstruction, except that it avoids remapping
+ // instruction and basic block operands.
+ //
for (Function::iterator BB = Dest->begin(), BE = Dest->end(); BB != BE; ++BB)
- for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I)
+ for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
+ // Remap operands.
for (Instruction::op_iterator OI = I->op_begin(), OE = I->op_end();
OI != OE; ++OI)
if (!isa<Instruction>(*OI) && !isa<BasicBlock>(*OI))
- *OI = RemapOperand(*OI, ValueMap);
+ *OI = MapValue(*OI, ValueMap, true);
+
+ // Remap attached metadata.
+ SmallVector<std::pair<unsigned, MDNode *>, 4> MDs;
+ I->getAllMetadata(MDs);
+ for (SmallVectorImpl<std::pair<unsigned, MDNode *> >::iterator
+ MI = MDs.begin(), ME = MDs.end(); MI != ME; ++MI) {
+ Value *Old = MI->second;
+ if (!isa<Instruction>(Old) && !isa<BasicBlock>(Old)) {
+ Value *New = MapValue(Old, ValueMap, true);
+ if (New != Old)
+ I->setMetadata(MI->first, cast<MDNode>(New));
+ }
+ }
+ }
// There is no need to map the arguments anymore.
for (Function::arg_iterator I = Src->arg_begin(), E = Src->arg_end();
@@ -1111,7 +1035,7 @@ static bool LinkFunctionBody(Function *Dest, Function *Src,
// source module into the DestModule. This consists basically of copying the
// function over and fixing up references to values.
static bool LinkFunctionBodies(Module *Dest, Module *Src,
- std::map<const Value*, Value*> &ValueMap,
+ ValueToValueMapTy &ValueMap,
std::string *Err) {
// Loop over all of the functions in the src module, mapping them over as we
@@ -1319,8 +1243,10 @@ Linker::LinkModules(Module *Dest, Module *Src, std::string *ErrorMsg) {
return true;
// ValueMap - Mapping of values from what they used to be in Src, to what they
- // are now in Dest.
- std::map<const Value*, Value*> ValueMap;
+ // are now in Dest. ValueToValueMapTy is a ValueMap, which involves some
+ // overhead due to the use of Value handles which the Linker doesn't actually
+ // need, but this allows us to reuse the ValueMapper code.
+ ValueToValueMapTy ValueMap;
// AppendingVars - Keep track of global variables in the destination module
// with appending linkage. After the module is linked together, they are
@@ -1334,9 +1260,6 @@ Linker::LinkModules(Module *Dest, Module *Src, std::string *ErrorMsg) {
AppendingVars.insert(std::make_pair(I->getName(), I));
}
- // Insert all of the named mdnoes in Src into the Dest module.
- LinkNamedMDNodes(Dest, Src);
-
// Insert all of the globals in src into the Dest module... without linking
// initializers (which could refer to functions not yet mapped over).
if (LinkGlobals(Dest, Src, ValueMap, AppendingVars, ErrorMsg))
@@ -1370,6 +1293,11 @@ Linker::LinkModules(Module *Dest, Module *Src, std::string *ErrorMsg) {
// Resolve all uses of aliases with aliasees
if (ResolveAliases(Dest)) return true;
+ // Remap all of the named mdnoes in Src into the Dest module. We do this
+ // after linking GlobalValues so that MDNodes that reference GlobalValues
+ // are properly remapped.
+ LinkNamedMDNodes(Dest, Src, ValueMap);
+
// If the source library's module id is in the dependent library list of the
// destination library, remove it since that module is now linked in.
sys::Path modId;
diff --git a/lib/MC/CMakeLists.txt b/lib/MC/CMakeLists.txt
index fc4f3c69482ac..60a3a3e3e3128 100644
--- a/lib/MC/CMakeLists.txt
+++ b/lib/MC/CMakeLists.txt
@@ -1,4 +1,5 @@
add_llvm_library(LLVMMC
+ ELFObjectWriter.cpp
MCAsmInfo.cpp
MCAsmInfoCOFF.cpp
MCAsmInfoDarwin.cpp
@@ -7,10 +8,12 @@ add_llvm_library(LLVMMC
MCCodeEmitter.cpp
MCContext.cpp
MCDisassembler.cpp
+ MCELFStreamer.cpp
MCExpr.cpp
MCInst.cpp
MCInstPrinter.cpp
MCLabel.cpp
+ MCDwarf.cpp
MCLoggingStreamer.cpp
MCMachOStreamer.cpp
MCNullStreamer.cpp
diff --git a/lib/MC/ELFObjectWriter.cpp b/lib/MC/ELFObjectWriter.cpp
new file mode 100644
index 0000000000000..cf35b45715e1e
--- /dev/null
+++ b/lib/MC/ELFObjectWriter.cpp
@@ -0,0 +1,973 @@
+//===- lib/MC/ELFObjectWriter.cpp - ELF File Writer -------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements ELF object file writer information.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MC/ELFObjectWriter.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/MC/MCAssembler.h"
+#include "llvm/MC/MCAsmLayout.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCELFSymbolFlags.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCObjectWriter.h"
+#include "llvm/MC/MCSectionELF.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/MC/MCValue.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/ELF.h"
+#include "llvm/Target/TargetAsmBackend.h"
+
+#include "../Target/X86/X86FixupKinds.h"
+
+#include <vector>
+using namespace llvm;
+
+namespace {
+
+ class ELFObjectWriterImpl {
+ static bool isFixupKindX86PCRel(unsigned Kind) {
+ switch (Kind) {
+ default:
+ return false;
+ case X86::reloc_pcrel_1byte:
+ case X86::reloc_pcrel_4byte:
+ case X86::reloc_riprel_4byte:
+ case X86::reloc_riprel_4byte_movq_load:
+ return true;
+ }
+ }
+
+ /*static bool isFixupKindX86RIPRel(unsigned Kind) {
+ return Kind == X86::reloc_riprel_4byte ||
+ Kind == X86::reloc_riprel_4byte_movq_load;
+ }*/
+
+
+ /// ELFSymbolData - Helper struct for containing some precomputed information
+ /// on symbols.
+ struct ELFSymbolData {
+ MCSymbolData *SymbolData;
+ uint64_t StringIndex;
+ uint32_t SectionIndex;
+
+ // Support lexicographic sorting.
+ bool operator<(const ELFSymbolData &RHS) const {
+ return SymbolData->getSymbol().getName() <
+ RHS.SymbolData->getSymbol().getName();
+ }
+ };
+
+ /// @name Relocation Data
+ /// @{
+
+ struct ELFRelocationEntry {
+ // Make these big enough for both 32-bit and 64-bit
+ uint64_t r_offset;
+ uint64_t r_info;
+ uint64_t r_addend;
+
+ // Support lexicographic sorting.
+ bool operator<(const ELFRelocationEntry &RE) const {
+ return RE.r_offset < r_offset;
+ }
+ };
+
+ llvm::DenseMap<const MCSectionData*,
+ std::vector<ELFRelocationEntry> > Relocations;
+ DenseMap<const MCSection*, uint64_t> SectionStringTableIndex;
+
+ /// @}
+ /// @name Symbol Table Data
+ /// @{
+
+ SmallString<256> StringTable;
+ std::vector<ELFSymbolData> LocalSymbolData;
+ std::vector<ELFSymbolData> ExternalSymbolData;
+ std::vector<ELFSymbolData> UndefinedSymbolData;
+
+ /// @}
+
+ ELFObjectWriter *Writer;
+
+ raw_ostream &OS;
+
+ // This holds the current offset into the object file.
+ size_t FileOff;
+
+ unsigned Is64Bit : 1;
+
+ bool HasRelocationAddend;
+
+ // This holds the symbol table index of the last local symbol.
+ unsigned LastLocalSymbolIndex;
+ // This holds the .strtab section index.
+ unsigned StringTableIndex;
+
+ unsigned ShstrtabIndex;
+
+ public:
+ ELFObjectWriterImpl(ELFObjectWriter *_Writer, bool _Is64Bit,
+ bool _HasRelAddend)
+ : Writer(_Writer), OS(Writer->getStream()), FileOff(0),
+ Is64Bit(_Is64Bit), HasRelocationAddend(_HasRelAddend) {
+ }
+
+ void Write8(uint8_t Value) { Writer->Write8(Value); }
+ void Write16(uint16_t Value) { Writer->Write16(Value); }
+ void Write32(uint32_t Value) { Writer->Write32(Value); }
+ //void Write64(uint64_t Value) { Writer->Write64(Value); }
+ void WriteZeros(unsigned N) { Writer->WriteZeros(N); }
+ //void WriteBytes(StringRef Str, unsigned ZeroFillSize = 0) {
+ // Writer->WriteBytes(Str, ZeroFillSize);
+ //}
+
+ void WriteWord(uint64_t W) {
+ if (Is64Bit)
+ Writer->Write64(W);
+ else
+ Writer->Write32(W);
+ }
+
+ void String8(char *buf, uint8_t Value) {
+ buf[0] = Value;
+ }
+
+ void StringLE16(char *buf, uint16_t Value) {
+ buf[0] = char(Value >> 0);
+ buf[1] = char(Value >> 8);
+ }
+
+ void StringLE32(char *buf, uint32_t Value) {
+ StringLE16(buf, uint16_t(Value >> 0));
+ StringLE16(buf + 2, uint16_t(Value >> 16));
+ }
+
+ void StringLE64(char *buf, uint64_t Value) {
+ StringLE32(buf, uint32_t(Value >> 0));
+ StringLE32(buf + 4, uint32_t(Value >> 32));
+ }
+
+ void StringBE16(char *buf ,uint16_t Value) {
+ buf[0] = char(Value >> 8);
+ buf[1] = char(Value >> 0);
+ }
+
+ void StringBE32(char *buf, uint32_t Value) {
+ StringBE16(buf, uint16_t(Value >> 16));
+ StringBE16(buf + 2, uint16_t(Value >> 0));
+ }
+
+ void StringBE64(char *buf, uint64_t Value) {
+ StringBE32(buf, uint32_t(Value >> 32));
+ StringBE32(buf + 4, uint32_t(Value >> 0));
+ }
+
+ void String16(char *buf, uint16_t Value) {
+ if (Writer->isLittleEndian())
+ StringLE16(buf, Value);
+ else
+ StringBE16(buf, Value);
+ }
+
+ void String32(char *buf, uint32_t Value) {
+ if (Writer->isLittleEndian())
+ StringLE32(buf, Value);
+ else
+ StringBE32(buf, Value);
+ }
+
+ void String64(char *buf, uint64_t Value) {
+ if (Writer->isLittleEndian())
+ StringLE64(buf, Value);
+ else
+ StringBE64(buf, Value);
+ }
+
+ void WriteHeader(uint64_t SectionDataSize, unsigned NumberOfSections);
+
+ void WriteSymbolEntry(MCDataFragment *F, uint64_t name, uint8_t info,
+ uint64_t value, uint64_t size,
+ uint8_t other, uint16_t shndx);
+
+ void WriteSymbol(MCDataFragment *F, ELFSymbolData &MSD,
+ const MCAsmLayout &Layout);
+
+ void WriteSymbolTable(MCDataFragment *F, const MCAssembler &Asm,
+ const MCAsmLayout &Layout);
+
+ void RecordRelocation(const MCAssembler &Asm, const MCAsmLayout &Layout,
+ const MCFragment *Fragment, const MCFixup &Fixup,
+ MCValue Target, uint64_t &FixedValue);
+
+ uint64_t getSymbolIndexInSymbolTable(const MCAssembler &Asm,
+ const MCSymbol *S);
+
+ /// ComputeSymbolTable - Compute the symbol table data
+ ///
+ /// \param StringTable [out] - The string table data.
+ /// \param StringIndexMap [out] - Map from symbol names to offsets in the
+ /// string table.
+ void ComputeSymbolTable(MCAssembler &Asm);
+
+ void WriteRelocation(MCAssembler &Asm, MCAsmLayout &Layout,
+ const MCSectionData &SD);
+
+ void WriteRelocations(MCAssembler &Asm, MCAsmLayout &Layout) {
+ for (MCAssembler::const_iterator it = Asm.begin(),
+ ie = Asm.end(); it != ie; ++it) {
+ WriteRelocation(Asm, Layout, *it);
+ }
+ }
+
+ void CreateMetadataSections(MCAssembler &Asm, MCAsmLayout &Layout);
+
+ void ExecutePostLayoutBinding(MCAssembler &Asm) {
+ // Compute symbol table information.
+ ComputeSymbolTable(Asm);
+ }
+
+ void WriteSecHdrEntry(uint32_t Name, uint32_t Type, uint64_t Flags,
+ uint64_t Address, uint64_t Offset,
+ uint64_t Size, uint32_t Link, uint32_t Info,
+ uint64_t Alignment, uint64_t EntrySize);
+
+ void WriteRelocationsFragment(const MCAssembler &Asm, MCDataFragment *F,
+ const MCSectionData *SD);
+
+ void WriteObject(const MCAssembler &Asm, const MCAsmLayout &Layout);
+ };
+
+}
+
+// Emit the ELF header.
+void ELFObjectWriterImpl::WriteHeader(uint64_t SectionDataSize,
+ unsigned NumberOfSections) {
+ // ELF Header
+ // ----------
+ //
+ // Note
+ // ----
+ // emitWord method behaves differently for ELF32 and ELF64, writing
+ // 4 bytes in the former and 8 in the latter.
+
+ Write8(0x7f); // e_ident[EI_MAG0]
+ Write8('E'); // e_ident[EI_MAG1]
+ Write8('L'); // e_ident[EI_MAG2]
+ Write8('F'); // e_ident[EI_MAG3]
+
+ Write8(Is64Bit ? ELF::ELFCLASS64 : ELF::ELFCLASS32); // e_ident[EI_CLASS]
+
+ // e_ident[EI_DATA]
+ Write8(Writer->isLittleEndian() ? ELF::ELFDATA2LSB : ELF::ELFDATA2MSB);
+
+ Write8(ELF::EV_CURRENT); // e_ident[EI_VERSION]
+ Write8(ELF::ELFOSABI_LINUX); // e_ident[EI_OSABI]
+ Write8(0); // e_ident[EI_ABIVERSION]
+
+ WriteZeros(ELF::EI_NIDENT - ELF::EI_PAD);
+
+ Write16(ELF::ET_REL); // e_type
+
+ // FIXME: Make this configurable
+ Write16(Is64Bit ? ELF::EM_X86_64 : ELF::EM_386); // e_machine = target
+
+ Write32(ELF::EV_CURRENT); // e_version
+ WriteWord(0); // e_entry, no entry point in .o file
+ WriteWord(0); // e_phoff, no program header for .o
+ WriteWord(SectionDataSize + (Is64Bit ? sizeof(ELF::Elf64_Ehdr) :
+ sizeof(ELF::Elf32_Ehdr))); // e_shoff = sec hdr table off in bytes
+
+ // FIXME: Make this configurable.
+ Write32(0); // e_flags = whatever the target wants
+
+ // e_ehsize = ELF header size
+ Write16(Is64Bit ? sizeof(ELF::Elf64_Ehdr) : sizeof(ELF::Elf32_Ehdr));
+
+ Write16(0); // e_phentsize = prog header entry size
+ Write16(0); // e_phnum = # prog header entries = 0
+
+ // e_shentsize = Section header entry size
+ Write16(Is64Bit ? sizeof(ELF::Elf64_Shdr) : sizeof(ELF::Elf32_Shdr));
+
+ // e_shnum = # of section header ents
+ Write16(NumberOfSections);
+
+ // e_shstrndx = Section # of '.shstrtab'
+ Write16(ShstrtabIndex);
+}
+
+void ELFObjectWriterImpl::WriteSymbolEntry(MCDataFragment *F, uint64_t name,
+ uint8_t info, uint64_t value,
+ uint64_t size, uint8_t other,
+ uint16_t shndx) {
+ if (Is64Bit) {
+ char buf[8];
+
+ String32(buf, name);
+ F->getContents() += StringRef(buf, 4); // st_name
+
+ String8(buf, info);
+ F->getContents() += StringRef(buf, 1); // st_info
+
+ String8(buf, other);
+ F->getContents() += StringRef(buf, 1); // st_other
+
+ String16(buf, shndx);
+ F->getContents() += StringRef(buf, 2); // st_shndx
+
+ String64(buf, value);
+ F->getContents() += StringRef(buf, 8); // st_value
+
+ String64(buf, size);
+ F->getContents() += StringRef(buf, 8); // st_size
+ } else {
+ char buf[4];
+
+ String32(buf, name);
+ F->getContents() += StringRef(buf, 4); // st_name
+
+ String32(buf, value);
+ F->getContents() += StringRef(buf, 4); // st_value
+
+ String32(buf, size);
+ F->getContents() += StringRef(buf, 4); // st_size
+
+ String8(buf, info);
+ F->getContents() += StringRef(buf, 1); // st_info
+
+ String8(buf, other);
+ F->getContents() += StringRef(buf, 1); // st_other
+
+ String16(buf, shndx);
+ F->getContents() += StringRef(buf, 2); // st_shndx
+ }
+}
+
+void ELFObjectWriterImpl::WriteSymbol(MCDataFragment *F, ELFSymbolData &MSD,
+ const MCAsmLayout &Layout) {
+ MCSymbolData &Data = *MSD.SymbolData;
+ uint8_t Info = (Data.getFlags() & 0xff);
+ uint8_t Other = ((Data.getFlags() & 0xf00) >> ELF_STV_Shift);
+ uint64_t Value = 0;
+ uint64_t Size = 0;
+ const MCExpr *ESize;
+
+ if (Data.isCommon() && Data.isExternal())
+ Value = Data.getCommonAlignment();
+
+ if (!Data.isCommon())
+ if (MCFragment *FF = Data.getFragment())
+ Value = Layout.getSymbolAddress(&Data) -
+ Layout.getSectionAddress(FF->getParent());
+
+ ESize = Data.getSize();
+ if (Data.getSize()) {
+ MCValue Res;
+ if (ESize->getKind() == MCExpr::Binary) {
+ const MCBinaryExpr *BE = static_cast<const MCBinaryExpr *>(ESize);
+
+ if (BE->EvaluateAsRelocatable(Res, &Layout)) {
+ MCSymbolData &A =
+ Layout.getAssembler().getSymbolData(Res.getSymA()->getSymbol());
+ MCSymbolData &B =
+ Layout.getAssembler().getSymbolData(Res.getSymB()->getSymbol());
+
+ Size = Layout.getSymbolAddress(&A) - Layout.getSymbolAddress(&B);
+ }
+ } else if (ESize->getKind() == MCExpr::Constant) {
+ Size = static_cast<const MCConstantExpr *>(ESize)->getValue();
+ } else {
+ assert(0 && "Unsupported size expression");
+ }
+ }
+
+ // Write out the symbol table entry
+ WriteSymbolEntry(F, MSD.StringIndex, Info, Value,
+ Size, Other, MSD.SectionIndex);
+}
+
+void ELFObjectWriterImpl::WriteSymbolTable(MCDataFragment *F,
+ const MCAssembler &Asm,
+ const MCAsmLayout &Layout) {
+ // The string table must be emitted first because we need the index
+ // into the string table for all the symbol names.
+ assert(StringTable.size() && "Missing string table");
+
+ // FIXME: Make sure the start of the symbol table is aligned.
+
+ // The first entry is the undefined symbol entry.
+ unsigned EntrySize = Is64Bit ? ELF::SYMENTRY_SIZE64 : ELF::SYMENTRY_SIZE32;
+ F->getContents().append(EntrySize, '\x00');
+
+ // Write the symbol table entries.
+ LastLocalSymbolIndex = LocalSymbolData.size() + 1;
+ for (unsigned i = 0, e = LocalSymbolData.size(); i != e; ++i) {
+ ELFSymbolData &MSD = LocalSymbolData[i];
+ WriteSymbol(F, MSD, Layout);
+ }
+
+ // Write out a symbol table entry for each section.
+ // leaving out the just added .symtab which is at
+ // the very end
+ unsigned Index = 1;
+ for (MCAssembler::const_iterator it = Asm.begin(),
+ ie = Asm.end(); it != ie; ++it, ++Index) {
+ const MCSectionELF &Section =
+ static_cast<const MCSectionELF&>(it->getSection());
+ // Leave out relocations so we don't have indexes within
+ // the relocations messed up
+ if (Section.getType() == ELF::SHT_RELA || Section.getType() == ELF::SHT_REL)
+ continue;
+ if (Index == Asm.size())
+ continue;
+ WriteSymbolEntry(F, 0, ELF::STT_SECTION, 0, 0, ELF::STV_DEFAULT, Index);
+ LastLocalSymbolIndex++;
+ }
+
+ for (unsigned i = 0, e = ExternalSymbolData.size(); i != e; ++i) {
+ ELFSymbolData &MSD = ExternalSymbolData[i];
+ MCSymbolData &Data = *MSD.SymbolData;
+ assert((Data.getFlags() & ELF_STB_Global) &&
+ "External symbol requires STB_GLOBAL flag");
+ WriteSymbol(F, MSD, Layout);
+ if (Data.getFlags() & ELF_STB_Local)
+ LastLocalSymbolIndex++;
+ }
+
+ for (unsigned i = 0, e = UndefinedSymbolData.size(); i != e; ++i) {
+ ELFSymbolData &MSD = UndefinedSymbolData[i];
+ MCSymbolData &Data = *MSD.SymbolData;
+ Data.setFlags(Data.getFlags() | ELF_STB_Global);
+ WriteSymbol(F, MSD, Layout);
+ if (Data.getFlags() & ELF_STB_Local)
+ LastLocalSymbolIndex++;
+ }
+}
+
+// FIXME: this is currently X86/X86_64 only
+void ELFObjectWriterImpl::RecordRelocation(const MCAssembler &Asm,
+ const MCAsmLayout &Layout,
+ const MCFragment *Fragment,
+ const MCFixup &Fixup,
+ MCValue Target,
+ uint64_t &FixedValue) {
+ int64_t Addend = 0;
+ unsigned Index = 0;
+ int64_t Value = Target.getConstant();
+
+ if (!Target.isAbsolute()) {
+ const MCSymbol *Symbol = &Target.getSymA()->getSymbol();
+ MCSymbolData &SD = Asm.getSymbolData(*Symbol);
+ const MCSymbolData *Base = Asm.getAtom(Layout, &SD);
+ MCFragment *F = SD.getFragment();
+
+ if (Base) {
+ if (F && (!Symbol->isInSection() || SD.isCommon()) && !SD.isExternal()) {
+ Index = F->getParent()->getOrdinal() + LocalSymbolData.size() + 1;
+ Value += Layout.getSymbolAddress(&SD);
+ } else
+ Index = getSymbolIndexInSymbolTable(Asm, Symbol);
+ if (Base != &SD)
+ Value += Layout.getSymbolAddress(&SD) - Layout.getSymbolAddress(Base);
+ Addend = Value;
+ // Compensate for the addend on i386.
+ if (Is64Bit)
+ Value = 0;
+ } else {
+ if (F) {
+ // Index of the section in .symtab against this symbol
+ // is being relocated + 2 (empty section + abs. symbols).
+ Index = F->getParent()->getOrdinal() + LocalSymbolData.size() + 1;
+
+ MCSectionData *FSD = F->getParent();
+ // Offset of the symbol in the section
+ Addend = Layout.getSymbolAddress(&SD) - Layout.getSectionAddress(FSD);
+ } else {
+ FixedValue = Value;
+ return;
+ }
+ }
+ }
+
+ FixedValue = Value;
+
+ // determine the type of the relocation
+ bool IsPCRel = isFixupKindX86PCRel(Fixup.getKind());
+ unsigned Type;
+ if (Is64Bit) {
+ if (IsPCRel) {
+ Type = ELF::R_X86_64_PC32;
+ } else {
+ switch ((unsigned)Fixup.getKind()) {
+ default: llvm_unreachable("invalid fixup kind!");
+ case FK_Data_8: Type = ELF::R_X86_64_64; break;
+ case X86::reloc_pcrel_4byte:
+ case FK_Data_4:
+ // check that the offset fits within a signed long
+ if (isInt<32>(Target.getConstant()))
+ Type = ELF::R_X86_64_32S;
+ else
+ Type = ELF::R_X86_64_32;
+ break;
+ case FK_Data_2: Type = ELF::R_X86_64_16; break;
+ case X86::reloc_pcrel_1byte:
+ case FK_Data_1: Type = ELF::R_X86_64_8; break;
+ }
+ }
+ } else {
+ if (IsPCRel) {
+ Type = ELF::R_386_PC32;
+ } else {
+ switch ((unsigned)Fixup.getKind()) {
+ default: llvm_unreachable("invalid fixup kind!");
+ case X86::reloc_pcrel_4byte:
+ case FK_Data_4: Type = ELF::R_386_32; break;
+ case FK_Data_2: Type = ELF::R_386_16; break;
+ case X86::reloc_pcrel_1byte:
+ case FK_Data_1: Type = ELF::R_386_8; break;
+ }
+ }
+ }
+
+ ELFRelocationEntry ERE;
+
+ if (Is64Bit) {
+ struct ELF::Elf64_Rela ERE64;
+ ERE64.setSymbolAndType(Index, Type);
+ ERE.r_info = ERE64.r_info;
+ } else {
+ struct ELF::Elf32_Rela ERE32;
+ ERE32.setSymbolAndType(Index, Type);
+ ERE.r_info = ERE32.r_info;
+ }
+
+ ERE.r_offset = Layout.getFragmentOffset(Fragment) + Fixup.getOffset();
+
+ if (HasRelocationAddend)
+ ERE.r_addend = Addend;
+ else
+ ERE.r_addend = 0; // Silence compiler warning.
+
+ Relocations[Fragment->getParent()].push_back(ERE);
+}
+
+uint64_t
+ELFObjectWriterImpl::getSymbolIndexInSymbolTable(const MCAssembler &Asm,
+ const MCSymbol *S) {
+ MCSymbolData &SD = Asm.getSymbolData(*S);
+
+ // Local symbol.
+ if (!SD.isExternal() && !S->isUndefined())
+ return SD.getIndex() + /* empty symbol */ 1;
+
+ // External or undefined symbol.
+ return SD.getIndex() + Asm.size() + /* empty symbol */ 1;
+}
+
+void ELFObjectWriterImpl::ComputeSymbolTable(MCAssembler &Asm) {
+ // Build section lookup table.
+ DenseMap<const MCSection*, uint8_t> SectionIndexMap;
+ unsigned Index = 1;
+ for (MCAssembler::iterator it = Asm.begin(),
+ ie = Asm.end(); it != ie; ++it, ++Index)
+ SectionIndexMap[&it->getSection()] = Index;
+
+ // Index 0 is always the empty string.
+ StringMap<uint64_t> StringIndexMap;
+ StringTable += '\x00';
+
+ // Add the data for local symbols.
+ for (MCAssembler::symbol_iterator it = Asm.symbol_begin(),
+ ie = Asm.symbol_end(); it != ie; ++it) {
+ const MCSymbol &Symbol = it->getSymbol();
+
+ // Ignore non-linker visible symbols.
+ if (!Asm.isSymbolLinkerVisible(Symbol))
+ continue;
+
+ if (it->isExternal() || Symbol.isUndefined())
+ continue;
+
+ uint64_t &Entry = StringIndexMap[Symbol.getName()];
+ if (!Entry) {
+ Entry = StringTable.size();
+ StringTable += Symbol.getName();
+ StringTable += '\x00';
+ }
+
+ ELFSymbolData MSD;
+ MSD.SymbolData = it;
+ MSD.StringIndex = Entry;
+
+ if (Symbol.isAbsolute()) {
+ MSD.SectionIndex = ELF::SHN_ABS;
+ LocalSymbolData.push_back(MSD);
+ } else {
+ MSD.SectionIndex = SectionIndexMap.lookup(&Symbol.getSection());
+ assert(MSD.SectionIndex && "Invalid section index!");
+ LocalSymbolData.push_back(MSD);
+ }
+ }
+
+ // Now add non-local symbols.
+ for (MCAssembler::symbol_iterator it = Asm.symbol_begin(),
+ ie = Asm.symbol_end(); it != ie; ++it) {
+ const MCSymbol &Symbol = it->getSymbol();
+
+ // Ignore non-linker visible symbols.
+ if (!Asm.isSymbolLinkerVisible(Symbol))
+ continue;
+
+ if (!it->isExternal() && !Symbol.isUndefined())
+ continue;
+
+ uint64_t &Entry = StringIndexMap[Symbol.getName()];
+ if (!Entry) {
+ Entry = StringTable.size();
+ StringTable += Symbol.getName();
+ StringTable += '\x00';
+ }
+
+ ELFSymbolData MSD;
+ MSD.SymbolData = it;
+ MSD.StringIndex = Entry;
+
+ if (Symbol.isUndefined()) {
+ MSD.SectionIndex = ELF::SHN_UNDEF;
+ // XXX: for some reason we dont Emit* this
+ it->setFlags(it->getFlags() | ELF_STB_Global);
+ UndefinedSymbolData.push_back(MSD);
+ } else if (Symbol.isAbsolute()) {
+ MSD.SectionIndex = ELF::SHN_ABS;
+ ExternalSymbolData.push_back(MSD);
+ } else if (it->isCommon()) {
+ MSD.SectionIndex = ELF::SHN_COMMON;
+ ExternalSymbolData.push_back(MSD);
+ } else {
+ MSD.SectionIndex = SectionIndexMap.lookup(&Symbol.getSection());
+ assert(MSD.SectionIndex && "Invalid section index!");
+ ExternalSymbolData.push_back(MSD);
+ }
+ }
+
+ // Symbols are required to be in lexicographic order.
+ array_pod_sort(LocalSymbolData.begin(), LocalSymbolData.end());
+ array_pod_sort(ExternalSymbolData.begin(), ExternalSymbolData.end());
+ array_pod_sort(UndefinedSymbolData.begin(), UndefinedSymbolData.end());
+
+ // Set the symbol indices. Local symbols must come before all other
+ // symbols with non-local bindings.
+ Index = 0;
+ for (unsigned i = 0, e = LocalSymbolData.size(); i != e; ++i)
+ LocalSymbolData[i].SymbolData->setIndex(Index++);
+ for (unsigned i = 0, e = ExternalSymbolData.size(); i != e; ++i)
+ ExternalSymbolData[i].SymbolData->setIndex(Index++);
+ for (unsigned i = 0, e = UndefinedSymbolData.size(); i != e; ++i)
+ UndefinedSymbolData[i].SymbolData->setIndex(Index++);
+}
+
+void ELFObjectWriterImpl::WriteRelocation(MCAssembler &Asm, MCAsmLayout &Layout,
+ const MCSectionData &SD) {
+ if (!Relocations[&SD].empty()) {
+ MCContext &Ctx = Asm.getContext();
+ const MCSection *RelaSection;
+ const MCSectionELF &Section =
+ static_cast<const MCSectionELF&>(SD.getSection());
+
+ const StringRef SectionName = Section.getSectionName();
+ std::string RelaSectionName = HasRelocationAddend ? ".rela" : ".rel";
+ RelaSectionName += SectionName;
+
+ unsigned EntrySize;
+ if (HasRelocationAddend)
+ EntrySize = Is64Bit ? sizeof(ELF::Elf64_Rela) : sizeof(ELF::Elf32_Rela);
+ else
+ EntrySize = Is64Bit ? sizeof(ELF::Elf64_Rel) : sizeof(ELF::Elf32_Rel);
+
+ RelaSection = Ctx.getELFSection(RelaSectionName, HasRelocationAddend ?
+ ELF::SHT_RELA : ELF::SHT_REL, 0,
+ SectionKind::getReadOnly(),
+ false, EntrySize);
+
+ MCSectionData &RelaSD = Asm.getOrCreateSectionData(*RelaSection);
+ RelaSD.setAlignment(1);
+
+ MCDataFragment *F = new MCDataFragment(&RelaSD);
+
+ WriteRelocationsFragment(Asm, F, &SD);
+
+ Asm.AddSectionToTheEnd(RelaSD, Layout);
+ }
+}
+
+void ELFObjectWriterImpl::WriteSecHdrEntry(uint32_t Name, uint32_t Type,
+ uint64_t Flags, uint64_t Address,
+ uint64_t Offset, uint64_t Size,
+ uint32_t Link, uint32_t Info,
+ uint64_t Alignment,
+ uint64_t EntrySize) {
+ Write32(Name); // sh_name: index into string table
+ Write32(Type); // sh_type
+ WriteWord(Flags); // sh_flags
+ WriteWord(Address); // sh_addr
+ WriteWord(Offset); // sh_offset
+ WriteWord(Size); // sh_size
+ Write32(Link); // sh_link
+ Write32(Info); // sh_info
+ WriteWord(Alignment); // sh_addralign
+ WriteWord(EntrySize); // sh_entsize
+}
+
+void ELFObjectWriterImpl::WriteRelocationsFragment(const MCAssembler &Asm,
+ MCDataFragment *F,
+ const MCSectionData *SD) {
+ std::vector<ELFRelocationEntry> &Relocs = Relocations[SD];
+ // sort by the r_offset just like gnu as does
+ array_pod_sort(Relocs.begin(), Relocs.end());
+
+ for (unsigned i = 0, e = Relocs.size(); i != e; ++i) {
+ ELFRelocationEntry entry = Relocs[e - i - 1];
+
+ unsigned WordSize = Is64Bit ? 8 : 4;
+ F->getContents() += StringRef((const char *)&entry.r_offset, WordSize);
+ F->getContents() += StringRef((const char *)&entry.r_info, WordSize);
+
+ if (HasRelocationAddend)
+ F->getContents() += StringRef((const char *)&entry.r_addend, WordSize);
+ }
+}
+
+void ELFObjectWriterImpl::CreateMetadataSections(MCAssembler &Asm,
+ MCAsmLayout &Layout) {
+ MCContext &Ctx = Asm.getContext();
+ MCDataFragment *F;
+
+ WriteRelocations(Asm, Layout);
+
+ const MCSection *SymtabSection;
+ unsigned EntrySize = Is64Bit ? ELF::SYMENTRY_SIZE64 : ELF::SYMENTRY_SIZE32;
+
+ SymtabSection = Ctx.getELFSection(".symtab", ELF::SHT_SYMTAB, 0,
+ SectionKind::getReadOnly(),
+ false, EntrySize);
+
+ MCSectionData &SymtabSD = Asm.getOrCreateSectionData(*SymtabSection);
+
+ SymtabSD.setAlignment(Is64Bit ? 8 : 4);
+
+ F = new MCDataFragment(&SymtabSD);
+
+ // Symbol table
+ WriteSymbolTable(F, Asm, Layout);
+ Asm.AddSectionToTheEnd(SymtabSD, Layout);
+
+ const MCSection *StrtabSection;
+ StrtabSection = Ctx.getELFSection(".strtab", ELF::SHT_STRTAB, 0,
+ SectionKind::getReadOnly(), false);
+
+ MCSectionData &StrtabSD = Asm.getOrCreateSectionData(*StrtabSection);
+ StrtabSD.setAlignment(1);
+
+ // FIXME: This isn't right. If the sections get rearranged this will
+ // be wrong. We need a proper lookup.
+ StringTableIndex = Asm.size();
+
+ F = new MCDataFragment(&StrtabSD);
+ F->getContents().append(StringTable.begin(), StringTable.end());
+ Asm.AddSectionToTheEnd(StrtabSD, Layout);
+
+ const MCSection *ShstrtabSection;
+ ShstrtabSection = Ctx.getELFSection(".shstrtab", ELF::SHT_STRTAB, 0,
+ SectionKind::getReadOnly(), false);
+
+ MCSectionData &ShstrtabSD = Asm.getOrCreateSectionData(*ShstrtabSection);
+ ShstrtabSD.setAlignment(1);
+
+ F = new MCDataFragment(&ShstrtabSD);
+
+ // FIXME: This isn't right. If the sections get rearranged this will
+ // be wrong. We need a proper lookup.
+ ShstrtabIndex = Asm.size();
+
+ // Section header string table.
+ //
+ // The first entry of a string table holds a null character so skip
+ // section 0.
+ uint64_t Index = 1;
+ F->getContents() += '\x00';
+
+ for (MCAssembler::const_iterator it = Asm.begin(),
+ ie = Asm.end(); it != ie; ++it) {
+ const MCSectionELF &Section =
+ static_cast<const MCSectionELF&>(it->getSection());
+
+ // Remember the index into the string table so we can write it
+ // into the sh_name field of the section header table.
+ SectionStringTableIndex[&it->getSection()] = Index;
+
+ Index += Section.getSectionName().size() + 1;
+ F->getContents() += Section.getSectionName();
+ F->getContents() += '\x00';
+ }
+
+ Asm.AddSectionToTheEnd(ShstrtabSD, Layout);
+}
+
+void ELFObjectWriterImpl::WriteObject(const MCAssembler &Asm,
+ const MCAsmLayout &Layout) {
+ CreateMetadataSections(const_cast<MCAssembler&>(Asm),
+ const_cast<MCAsmLayout&>(Layout));
+
+ // Add 1 for the null section.
+ unsigned NumSections = Asm.size() + 1;
+
+ uint64_t SectionDataSize = 0;
+
+ for (MCAssembler::const_iterator it = Asm.begin(),
+ ie = Asm.end(); it != ie; ++it) {
+ const MCSectionData &SD = *it;
+
+ // Get the size of the section in the output file (including padding).
+ uint64_t Size = Layout.getSectionFileSize(&SD);
+ SectionDataSize += Size;
+ }
+
+ // Write out the ELF header ...
+ WriteHeader(SectionDataSize, NumSections);
+ FileOff = Is64Bit ? sizeof(ELF::Elf64_Ehdr) : sizeof(ELF::Elf32_Ehdr);
+
+ // ... then all of the sections ...
+ DenseMap<const MCSection*, uint64_t> SectionOffsetMap;
+
+ DenseMap<const MCSection*, uint8_t> SectionIndexMap;
+
+ unsigned Index = 1;
+ for (MCAssembler::const_iterator it = Asm.begin(),
+ ie = Asm.end(); it != ie; ++it) {
+ // Remember the offset into the file for this section.
+ SectionOffsetMap[&it->getSection()] = FileOff;
+
+ SectionIndexMap[&it->getSection()] = Index++;
+
+ const MCSectionData &SD = *it;
+ FileOff += Layout.getSectionFileSize(&SD);
+
+ Asm.WriteSectionData(it, Layout, Writer);
+ }
+
+ // ... and then the section header table.
+ // Should we align the section header table?
+ //
+ // Null section first.
+ WriteSecHdrEntry(0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
+
+ for (MCAssembler::const_iterator it = Asm.begin(),
+ ie = Asm.end(); it != ie; ++it) {
+ const MCSectionData &SD = *it;
+ const MCSectionELF &Section =
+ static_cast<const MCSectionELF&>(SD.getSection());
+
+ uint64_t sh_link = 0;
+ uint64_t sh_info = 0;
+
+ switch(Section.getType()) {
+ case ELF::SHT_DYNAMIC:
+ sh_link = SectionStringTableIndex[&it->getSection()];
+ sh_info = 0;
+ break;
+
+ case ELF::SHT_REL:
+ case ELF::SHT_RELA: {
+ const MCSection *SymtabSection;
+ const MCSection *InfoSection;
+
+ SymtabSection = Asm.getContext().getELFSection(".symtab", ELF::SHT_SYMTAB, 0,
+ SectionKind::getReadOnly(),
+ false);
+ sh_link = SectionIndexMap[SymtabSection];
+
+ // Remove ".rel" and ".rela" prefixes.
+ unsigned SecNameLen = (Section.getType() == ELF::SHT_REL) ? 4 : 5;
+ StringRef SectionName = Section.getSectionName().substr(SecNameLen);
+
+ InfoSection = Asm.getContext().getELFSection(SectionName,
+ ELF::SHT_PROGBITS, 0,
+ SectionKind::getReadOnly(),
+ false);
+ sh_info = SectionIndexMap[InfoSection];
+ break;
+ }
+
+ case ELF::SHT_SYMTAB:
+ case ELF::SHT_DYNSYM:
+ sh_link = StringTableIndex;
+ sh_info = LastLocalSymbolIndex;
+ break;
+
+ case ELF::SHT_PROGBITS:
+ case ELF::SHT_STRTAB:
+ case ELF::SHT_NOBITS:
+ case ELF::SHT_NULL:
+ // Nothing to do.
+ break;
+
+ case ELF::SHT_HASH:
+ case ELF::SHT_GROUP:
+ case ELF::SHT_SYMTAB_SHNDX:
+ default:
+ assert(0 && "FIXME: sh_type value not supported!");
+ break;
+ }
+
+ WriteSecHdrEntry(SectionStringTableIndex[&it->getSection()],
+ Section.getType(), Section.getFlags(),
+ Layout.getSectionAddress(&SD),
+ SectionOffsetMap.lookup(&SD.getSection()),
+ Layout.getSectionSize(&SD), sh_link,
+ sh_info, SD.getAlignment(),
+ Section.getEntrySize());
+ }
+}
+
+ELFObjectWriter::ELFObjectWriter(raw_ostream &OS,
+ bool Is64Bit,
+ bool IsLittleEndian,
+ bool HasRelocationAddend)
+ : MCObjectWriter(OS, IsLittleEndian)
+{
+ Impl = new ELFObjectWriterImpl(this, Is64Bit, HasRelocationAddend);
+}
+
+ELFObjectWriter::~ELFObjectWriter() {
+ delete (ELFObjectWriterImpl*) Impl;
+}
+
+void ELFObjectWriter::ExecutePostLayoutBinding(MCAssembler &Asm) {
+ ((ELFObjectWriterImpl*) Impl)->ExecutePostLayoutBinding(Asm);
+}
+
+void ELFObjectWriter::RecordRelocation(const MCAssembler &Asm,
+ const MCAsmLayout &Layout,
+ const MCFragment *Fragment,
+ const MCFixup &Fixup, MCValue Target,
+ uint64_t &FixedValue) {
+ ((ELFObjectWriterImpl*) Impl)->RecordRelocation(Asm, Layout, Fragment, Fixup,
+ Target, FixedValue);
+}
+
+void ELFObjectWriter::WriteObject(const MCAssembler &Asm,
+ const MCAsmLayout &Layout) {
+ ((ELFObjectWriterImpl*) Impl)->WriteObject(Asm, Layout);
+}
diff --git a/lib/MC/MCAsmInfo.cpp b/lib/MC/MCAsmInfo.cpp
index a275be2c53c5f..670b2e9b292a5 100644
--- a/lib/MC/MCAsmInfo.cpp
+++ b/lib/MC/MCAsmInfo.cpp
@@ -68,7 +68,9 @@ MCAsmInfo::MCAsmInfo() {
ExceptionsType = ExceptionHandling::None;
DwarfRequiresFrameSection = true;
DwarfUsesInlineInfoSection = false;
+ DwarfUsesAbsoluteLabelForStmtList = true;
DwarfSectionOffsetDirective = 0;
+ DwarfUsesLabelOffsetForRanges = true;
HasMicrosoftFastStdCallMangling = false;
AsmTransCBE = 0;
diff --git a/lib/MC/MCAsmInfoDarwin.cpp b/lib/MC/MCAsmInfoDarwin.cpp
index 0bd3b2d001e8c..e0e261a63c707 100644
--- a/lib/MC/MCAsmInfoDarwin.cpp
+++ b/lib/MC/MCAsmInfoDarwin.cpp
@@ -44,5 +44,8 @@ MCAsmInfoDarwin::MCAsmInfoDarwin() {
HasDotTypeDotSizeDirective = false;
HasNoDeadStrip = true;
+
+ DwarfUsesAbsoluteLabelForStmtList = false;
+ DwarfUsesLabelOffsetForRanges = false;
}
diff --git a/lib/MC/MCAsmStreamer.cpp b/lib/MC/MCAsmStreamer.cpp
index e272b60c44759..1cc8fb0b54867 100644
--- a/lib/MC/MCAsmStreamer.cpp
+++ b/lib/MC/MCAsmStreamer.cpp
@@ -31,7 +31,7 @@ class MCAsmStreamer : public MCStreamer {
formatted_raw_ostream &OS;
const MCAsmInfo &MAI;
OwningPtr<MCInstPrinter> InstPrinter;
- MCCodeEmitter *Emitter;
+ OwningPtr<MCCodeEmitter> Emitter;
SmallString<128> CommentToEmit;
raw_svector_ostream CommentStream;
@@ -217,6 +217,7 @@ static inline int64_t truncateToSize(int64_t Value, unsigned Bytes) {
void MCAsmStreamer::SwitchSection(const MCSection *Section) {
assert(Section && "Cannot switch to a null section!");
if (Section != CurSection) {
+ PrevSection = CurSection;
CurSection = Section;
Section->PrintSwitchToSection(MAI, OS);
}
diff --git a/lib/MC/MCAssembler.cpp b/lib/MC/MCAssembler.cpp
index 7d8455492780c..f0e1d7fbc21c5 100644
--- a/lib/MC/MCAssembler.cpp
+++ b/lib/MC/MCAssembler.cpp
@@ -178,8 +178,12 @@ uint64_t MCAsmLayout::getSectionSize(const MCSectionData *SD) const {
MCFragment::MCFragment() : Kind(FragmentType(~0)) {
}
+MCFragment::~MCFragment() {
+}
+
MCFragment::MCFragment(FragmentType _Kind, MCSectionData *_Parent)
- : Kind(_Kind), Parent(_Parent), Atom(0), EffectiveSize(~UINT64_C(0))
+ : Kind(_Kind), Parent(_Parent), Atom(0), Offset(~UINT64_C(0)),
+ EffectiveSize(~UINT64_C(0))
{
if (Parent)
Parent->getFragmentList().push_back(this);
@@ -207,7 +211,8 @@ MCSymbolData::MCSymbolData(const MCSymbol &_Symbol, MCFragment *_Fragment,
uint64_t _Offset, MCAssembler *A)
: Symbol(&_Symbol), Fragment(_Fragment), Offset(_Offset),
IsExternal(false), IsPrivateExtern(false),
- CommonSize(0), CommonAlign(0), Flags(0), Index(0)
+ CommonSize(0), SymbolSize(0), CommonAlign(0),
+ Flags(0), Index(0)
{
if (A)
A->getSymbolList().push_back(this);
@@ -623,8 +628,23 @@ void MCAssembler::WriteSectionData(const MCSectionData *SD,
switch (it->getKind()) {
default:
assert(0 && "Invalid fragment in virtual section!");
+ case MCFragment::FT_Data: {
+ // Check that we aren't trying to write a non-zero contents (or fixups)
+ // into a virtual section. This is to support clients which use standard
+ // directives to fill the contents of virtual sections.
+ MCDataFragment &DF = cast<MCDataFragment>(*it);
+ assert(DF.fixup_begin() == DF.fixup_end() &&
+ "Cannot have fixups in virtual section!");
+ for (unsigned i = 0, e = DF.getContents().size(); i != e; ++i)
+ assert(DF.getContents()[i] == 0 &&
+ "Invalid data value for virtual section!");
+ break;
+ }
case MCFragment::FT_Align:
- assert(!cast<MCAlignFragment>(it)->getValueSize() &&
+ // Check that we aren't trying to write a non-zero value into a virtual
+ // section.
+ assert((!cast<MCAlignFragment>(it)->getValueSize() ||
+ !cast<MCAlignFragment>(it)->getValue()) &&
"Invalid align in virtual section!");
break;
case MCFragment::FT_Fill:
@@ -647,7 +667,41 @@ void MCAssembler::WriteSectionData(const MCSectionData *SD,
assert(OW->getStream().tell() - Start == Layout.getSectionFileSize(SD));
}
-void MCAssembler::Finish() {
+void MCAssembler::AddSectionToTheEnd(MCSectionData &SD, MCAsmLayout &Layout) {
+ // Create dummy fragments and assign section ordinals.
+ unsigned SectionIndex = 0;
+ for (MCAssembler::iterator it = begin(), ie = end(); it != ie; ++it)
+ SectionIndex++;
+
+ SD.setOrdinal(SectionIndex);
+
+ // Assign layout order indices to sections and fragments.
+ unsigned FragmentIndex = 0;
+ unsigned i = 0;
+ for (unsigned e = Layout.getSectionOrder().size(); i != e; ++i) {
+ MCSectionData *SD = Layout.getSectionOrder()[i];
+
+ for (MCSectionData::iterator it2 = SD->begin(),
+ ie2 = SD->end(); it2 != ie2; ++it2)
+ FragmentIndex++;
+ }
+
+ SD.setLayoutOrder(i);
+ for (MCSectionData::iterator it2 = SD.begin(),
+ ie2 = SD.end(); it2 != ie2; ++it2) {
+ it2->setLayoutOrder(FragmentIndex++);
+ }
+ Layout.getSectionOrder().push_back(&SD);
+
+ Layout.LayoutSection(&SD);
+
+ // Layout until everything fits.
+ while (LayoutOnce(Layout))
+ continue;
+
+}
+
+void MCAssembler::Finish(MCObjectWriter *Writer) {
DEBUG_WITH_TYPE("mc-dump", {
llvm::errs() << "assembler backend - pre-layout\n--\n";
dump(); });
@@ -717,9 +771,15 @@ void MCAssembler::Finish() {
dump(); });
uint64_t StartOffset = OS.tell();
- llvm::OwningPtr<MCObjectWriter> Writer(getBackend().createObjectWriter(OS));
- if (!Writer)
- report_fatal_error("unable to create object writer!");
+
+ llvm::OwningPtr<MCObjectWriter> OwnWriter(0);
+ if (Writer == 0) {
+ //no custom Writer_ : create the default one life-managed by OwningPtr
+ OwnWriter.reset(getBackend().createObjectWriter(OS));
+ Writer = OwnWriter.get();
+ if (!Writer)
+ report_fatal_error("unable to create object writer!");
+ }
// Allow the object writer a chance to perform post-layout binding (for
// example, to set the index fields in the symbol data).
diff --git a/lib/MC/MCContext.cpp b/lib/MC/MCContext.cpp
index 11370642530a1..e5586a0d7c311 100644
--- a/lib/MC/MCContext.cpp
+++ b/lib/MC/MCContext.cpp
@@ -14,6 +14,7 @@
#include "llvm/MC/MCSectionCOFF.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/MC/MCLabel.h"
+#include "llvm/MC/MCDwarf.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/Twine.h"
using namespace llvm;
@@ -23,7 +24,8 @@ typedef StringMap<const MCSectionELF*> ELFUniqueMapTy;
typedef StringMap<const MCSectionCOFF*> COFFUniqueMapTy;
-MCContext::MCContext(const MCAsmInfo &mai) : MAI(mai), NextUniqueID(0) {
+MCContext::MCContext(const MCAsmInfo &mai) : MAI(mai), NextUniqueID(0),
+ CurrentDwarfLoc(0,0,0,0,0) {
MachOUniquingMap = 0;
ELFUniquingMap = 0;
COFFUniquingMap = 0;
@@ -31,6 +33,8 @@ MCContext::MCContext(const MCAsmInfo &mai) : MAI(mai), NextUniqueID(0) {
SecureLogFile = getenv("AS_SECURE_LOG_FILE");
SecureLog = 0;
SecureLogUsed = false;
+
+ DwarfLocSeen = false;
}
MCContext::~MCContext() {
@@ -147,7 +151,7 @@ getMachOSection(StringRef Segment, StringRef Section,
const MCSection *MCContext::
getELFSection(StringRef Section, unsigned Type, unsigned Flags,
- SectionKind Kind, bool IsExplicit) {
+ SectionKind Kind, bool IsExplicit, unsigned EntrySize) {
if (ELFUniquingMap == 0)
ELFUniquingMap = new ELFUniqueMapTy();
ELFUniqueMapTy &Map = *(ELFUniqueMapTy*)ELFUniquingMap;
@@ -157,7 +161,7 @@ getELFSection(StringRef Section, unsigned Type, unsigned Flags,
if (Entry.getValue()) return Entry.getValue();
MCSectionELF *Result = new (*this) MCSectionELF(Entry.getKey(), Type, Flags,
- Kind, IsExplicit);
+ Kind, IsExplicit, EntrySize);
Entry.setValue(Result);
return Result;
}
@@ -181,3 +185,81 @@ const MCSection *MCContext::getCOFFSection(StringRef Section,
Entry.setValue(Result);
return Result;
}
+
+//===----------------------------------------------------------------------===//
+// Dwarf Management
+//===----------------------------------------------------------------------===//
+
+/// GetDwarfFile - takes a file name an number to place in the dwarf file and
+/// directory tables. If the file number has already been allocated it is an
+/// error and zero is returned and the client reports the error, else the
+/// allocated file number is returned. The file numbers may be in any order.
+unsigned MCContext::GetDwarfFile(StringRef FileName, unsigned FileNumber) {
+ // TODO: a FileNumber of zero says to use the next available file number.
+ // Note: in GenericAsmParser::ParseDirectiveFile() FileNumber was checked
+ // to not be less than one. This needs to be change to be not less than zero.
+
+ // Make space for this FileNumber in the MCDwarfFiles vector if needed.
+ if (FileNumber >= MCDwarfFiles.size()) {
+ MCDwarfFiles.resize(FileNumber + 1);
+ } else {
+ MCDwarfFile *&ExistingFile = MCDwarfFiles[FileNumber];
+ if (ExistingFile)
+ // It is an error to use see the same number more than once.
+ return 0;
+ }
+
+ // Get the new MCDwarfFile slot for this FileNumber.
+ MCDwarfFile *&File = MCDwarfFiles[FileNumber];
+
+ // Separate the directory part from the basename of the FileName.
+ std::pair<StringRef, StringRef> Slash = FileName.rsplit('/');
+
+ // Find or make a entry in the MCDwarfDirs vector for this Directory.
+ StringRef Name;
+ unsigned DirIndex;
+ // Capture directory name.
+ if (Slash.second.empty()) {
+ Name = Slash.first;
+ DirIndex = 0; // For FileNames with no directories a DirIndex of 0 is used.
+ } else {
+ StringRef Directory = Slash.first;
+ Name = Slash.second;
+ for (DirIndex = 0; DirIndex < MCDwarfDirs.size(); DirIndex++) {
+ if (Directory == MCDwarfDirs[DirIndex])
+ break;
+ }
+ if (DirIndex >= MCDwarfDirs.size()) {
+ char *Buf = static_cast<char *>(Allocate(Directory.size()));
+ memcpy(Buf, Directory.data(), Directory.size());
+ MCDwarfDirs.push_back(StringRef(Buf, Directory.size()));
+ }
+ // The DirIndex is one based, as DirIndex of 0 is used for FileNames with
+ // no directories. MCDwarfDirs[] is unlike MCDwarfFiles[] in that the
+ // directory names are stored at MCDwarfDirs[DirIndex-1] where FileNames are
+ // stored at MCDwarfFiles[FileNumber].Name .
+ DirIndex++;
+ }
+
+ // Now make the MCDwarfFile entry and place it in the slot in the MCDwarfFiles
+ // vector.
+ char *Buf = static_cast<char *>(Allocate(Name.size()));
+ memcpy(Buf, Name.data(), Name.size());
+ File = new (*this) MCDwarfFile(StringRef(Buf, Name.size()), DirIndex);
+
+ // return the allocated FileNumber.
+ return FileNumber;
+}
+
+/// ValidateDwarfFileNumber - takes a dwarf file number and returns true if it
+/// currently is assigned and false otherwise.
+bool MCContext::ValidateDwarfFileNumber(unsigned FileNumber) {
+ if(FileNumber == 0 || FileNumber >= MCDwarfFiles.size())
+ return false;
+
+ MCDwarfFile *&ExistingFile = MCDwarfFiles[FileNumber];
+ if (ExistingFile)
+ return true;
+ else
+ return false;
+}
diff --git a/lib/MC/MCDisassembler/CMakeLists.txt b/lib/MC/MCDisassembler/CMakeLists.txt
new file mode 100644
index 0000000000000..5fa7b70194b24
--- /dev/null
+++ b/lib/MC/MCDisassembler/CMakeLists.txt
@@ -0,0 +1,7 @@
+
+add_llvm_library(LLVMMCDisassembler
+ EDDisassembler.cpp
+ EDOperand.cpp
+ EDInst.cpp
+ EDToken.cpp
+ )
diff --git a/lib/MC/MCDisassembler/EDDisassembler.cpp b/lib/MC/MCDisassembler/EDDisassembler.cpp
new file mode 100644
index 0000000000000..697b3d9c05153
--- /dev/null
+++ b/lib/MC/MCDisassembler/EDDisassembler.cpp
@@ -0,0 +1,402 @@
+//===-EDDisassembler.cpp - LLVM Enhanced Disassembler ---------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the Enhanced Disassembly library's disassembler class.
+// The disassembler is responsible for vending individual instructions according
+// to a given architecture and disassembly syntax.
+//
+//===----------------------------------------------------------------------===//
+
+#include "EDDisassembler.h"
+#include "EDInst.h"
+#include "llvm/MC/EDInstInfo.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCDisassembler.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCInstPrinter.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCParser/AsmLexer.h"
+#include "llvm/MC/MCParser/MCAsmParser.h"
+#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/MemoryObject.h"
+#include "llvm/Support/SourceMgr.h"
+#include "llvm/Target/TargetAsmLexer.h"
+#include "llvm/Target/TargetAsmParser.h"
+#include "llvm/Target/TargetRegistry.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSelect.h"
+using namespace llvm;
+
+bool EDDisassembler::sInitialized = false;
+EDDisassembler::DisassemblerMap_t EDDisassembler::sDisassemblers;
+
+struct TripleMap {
+ Triple::ArchType Arch;
+ const char *String;
+};
+
+static struct TripleMap triplemap[] = {
+ { Triple::x86, "i386-unknown-unknown" },
+ { Triple::x86_64, "x86_64-unknown-unknown" },
+ { Triple::arm, "arm-unknown-unknown" },
+ { Triple::thumb, "thumb-unknown-unknown" },
+ { Triple::InvalidArch, NULL, }
+};
+
+/// infoFromArch - Returns the TripleMap corresponding to a given architecture,
+/// or NULL if there is an error
+///
+/// @arg arch - The Triple::ArchType for the desired architecture
+static const char *tripleFromArch(Triple::ArchType arch) {
+ unsigned int infoIndex;
+
+ for (infoIndex = 0; triplemap[infoIndex].String != NULL; ++infoIndex) {
+ if (arch == triplemap[infoIndex].Arch)
+ return triplemap[infoIndex].String;
+ }
+
+ return NULL;
+}
+
+/// getLLVMSyntaxVariant - gets the constant to use to get an assembly printer
+/// for the desired assembly syntax, suitable for passing to
+/// Target::createMCInstPrinter()
+///
+/// @arg arch - The target architecture
+/// @arg syntax - The assembly syntax in sd form
+static int getLLVMSyntaxVariant(Triple::ArchType arch,
+ EDDisassembler::AssemblySyntax syntax) {
+ switch (syntax) {
+ default:
+ return -1;
+ // Mappings below from X86AsmPrinter.cpp
+ case EDDisassembler::kEDAssemblySyntaxX86ATT:
+ if (arch == Triple::x86 || arch == Triple::x86_64)
+ return 0;
+ else
+ return -1;
+ case EDDisassembler::kEDAssemblySyntaxX86Intel:
+ if (arch == Triple::x86 || arch == Triple::x86_64)
+ return 1;
+ else
+ return -1;
+ case EDDisassembler::kEDAssemblySyntaxARMUAL:
+ if (arch == Triple::arm || arch == Triple::thumb)
+ return 0;
+ else
+ return -1;
+ }
+}
+
+void EDDisassembler::initialize() {
+ if (sInitialized)
+ return;
+
+ sInitialized = true;
+
+ InitializeAllTargetInfos();
+ InitializeAllTargets();
+ InitializeAllAsmPrinters();
+ InitializeAllAsmParsers();
+ InitializeAllDisassemblers();
+}
+
+#undef BRINGUP_TARGET
+
+EDDisassembler *EDDisassembler::getDisassembler(Triple::ArchType arch,
+ AssemblySyntax syntax) {
+ CPUKey key;
+ key.Arch = arch;
+ key.Syntax = syntax;
+
+ EDDisassembler::DisassemblerMap_t::iterator i = sDisassemblers.find(key);
+
+ if (i != sDisassemblers.end()) {
+ return i->second;
+ } else {
+ EDDisassembler* sdd = new EDDisassembler(key);
+ if (!sdd->valid()) {
+ delete sdd;
+ return NULL;
+ }
+
+ sDisassemblers[key] = sdd;
+
+ return sdd;
+ }
+
+ return NULL;
+}
+
+EDDisassembler *EDDisassembler::getDisassembler(StringRef str,
+ AssemblySyntax syntax) {
+ return getDisassembler(Triple(str).getArch(), syntax);
+}
+
+EDDisassembler::EDDisassembler(CPUKey &key) :
+ Valid(false),
+ HasSemantics(false),
+ ErrorStream(nulls()),
+ Key(key) {
+ const char *triple = tripleFromArch(key.Arch);
+
+ if (!triple)
+ return;
+
+ LLVMSyntaxVariant = getLLVMSyntaxVariant(key.Arch, key.Syntax);
+
+ if (LLVMSyntaxVariant < 0)
+ return;
+
+ std::string tripleString(triple);
+ std::string errorString;
+
+ Tgt = TargetRegistry::lookupTarget(tripleString,
+ errorString);
+
+ if (!Tgt)
+ return;
+
+ std::string featureString;
+
+ TargetMachine.reset(Tgt->createTargetMachine(tripleString,
+ featureString));
+
+ const TargetRegisterInfo *registerInfo = TargetMachine->getRegisterInfo();
+
+ if (!registerInfo)
+ return;
+
+ initMaps(*registerInfo);
+
+ AsmInfo.reset(Tgt->createAsmInfo(tripleString));
+
+ if (!AsmInfo)
+ return;
+
+ Disassembler.reset(Tgt->createMCDisassembler());
+
+ if (!Disassembler)
+ return;
+
+ InstInfos = Disassembler->getEDInfo();
+
+ InstString.reset(new std::string);
+ InstStream.reset(new raw_string_ostream(*InstString));
+ InstPrinter.reset(Tgt->createMCInstPrinter(LLVMSyntaxVariant, *AsmInfo));
+
+ if (!InstPrinter)
+ return;
+
+ GenericAsmLexer.reset(new AsmLexer(*AsmInfo));
+ SpecificAsmLexer.reset(Tgt->createAsmLexer(*AsmInfo));
+ SpecificAsmLexer->InstallLexer(*GenericAsmLexer);
+
+ initMaps(*TargetMachine->getRegisterInfo());
+
+ Valid = true;
+}
+
+EDDisassembler::~EDDisassembler() {
+ if (!valid())
+ return;
+}
+
+namespace {
+ /// EDMemoryObject - a subclass of MemoryObject that allows use of a callback
+ /// as provided by the sd interface. See MemoryObject.
+ class EDMemoryObject : public llvm::MemoryObject {
+ private:
+ EDByteReaderCallback Callback;
+ void *Arg;
+ public:
+ EDMemoryObject(EDByteReaderCallback callback,
+ void *arg) : Callback(callback), Arg(arg) { }
+ ~EDMemoryObject() { }
+ uint64_t getBase() const { return 0x0; }
+ uint64_t getExtent() const { return (uint64_t)-1; }
+ int readByte(uint64_t address, uint8_t *ptr) const {
+ if (!Callback)
+ return -1;
+
+ if (Callback(ptr, address, Arg))
+ return -1;
+
+ return 0;
+ }
+ };
+}
+
+EDInst *EDDisassembler::createInst(EDByteReaderCallback byteReader,
+ uint64_t address,
+ void *arg) {
+ EDMemoryObject memoryObject(byteReader, arg);
+
+ MCInst* inst = new MCInst;
+ uint64_t byteSize;
+
+ if (!Disassembler->getInstruction(*inst,
+ byteSize,
+ memoryObject,
+ address,
+ ErrorStream)) {
+ delete inst;
+ return NULL;
+ } else {
+ const llvm::EDInstInfo *thisInstInfo;
+
+ thisInstInfo = &InstInfos[inst->getOpcode()];
+
+ EDInst* sdInst = new EDInst(inst, byteSize, *this, thisInstInfo);
+ return sdInst;
+ }
+}
+
+void EDDisassembler::initMaps(const TargetRegisterInfo &registerInfo) {
+ unsigned numRegisters = registerInfo.getNumRegs();
+ unsigned registerIndex;
+
+ for (registerIndex = 0; registerIndex < numRegisters; ++registerIndex) {
+ const char* registerName = registerInfo.get(registerIndex).Name;
+
+ RegVec.push_back(registerName);
+ RegRMap[registerName] = registerIndex;
+ }
+
+ switch (Key.Arch) {
+ default:
+ break;
+ case Triple::x86:
+ case Triple::x86_64:
+ stackPointers.insert(registerIDWithName("SP"));
+ stackPointers.insert(registerIDWithName("ESP"));
+ stackPointers.insert(registerIDWithName("RSP"));
+
+ programCounters.insert(registerIDWithName("IP"));
+ programCounters.insert(registerIDWithName("EIP"));
+ programCounters.insert(registerIDWithName("RIP"));
+ break;
+ case Triple::arm:
+ case Triple::thumb:
+ stackPointers.insert(registerIDWithName("SP"));
+
+ programCounters.insert(registerIDWithName("PC"));
+ break;
+ }
+}
+
+const char *EDDisassembler::nameWithRegisterID(unsigned registerID) const {
+ if (registerID >= RegVec.size())
+ return NULL;
+ else
+ return RegVec[registerID].c_str();
+}
+
+unsigned EDDisassembler::registerIDWithName(const char *name) const {
+ regrmap_t::const_iterator iter = RegRMap.find(std::string(name));
+ if (iter == RegRMap.end())
+ return 0;
+ else
+ return (*iter).second;
+}
+
+bool EDDisassembler::registerIsStackPointer(unsigned registerID) {
+ return (stackPointers.find(registerID) != stackPointers.end());
+}
+
+bool EDDisassembler::registerIsProgramCounter(unsigned registerID) {
+ return (programCounters.find(registerID) != programCounters.end());
+}
+
+int EDDisassembler::printInst(std::string &str, MCInst &inst) {
+ PrinterMutex.acquire();
+
+ InstPrinter->printInst(&inst, *InstStream);
+ InstStream->flush();
+ str = *InstString;
+ InstString->clear();
+
+ PrinterMutex.release();
+
+ return 0;
+}
+
+int EDDisassembler::parseInst(SmallVectorImpl<MCParsedAsmOperand*> &operands,
+ SmallVectorImpl<AsmToken> &tokens,
+ const std::string &str) {
+ int ret = 0;
+
+ switch (Key.Arch) {
+ default:
+ return -1;
+ case Triple::x86:
+ case Triple::x86_64:
+ case Triple::arm:
+ case Triple::thumb:
+ break;
+ }
+
+ const char *cStr = str.c_str();
+ MemoryBuffer *buf = MemoryBuffer::getMemBuffer(cStr, cStr + strlen(cStr));
+
+ StringRef instName;
+ SMLoc instLoc;
+
+ SourceMgr sourceMgr;
+ sourceMgr.AddNewSourceBuffer(buf, SMLoc()); // ownership of buf handed over
+ MCContext context(*AsmInfo);
+ OwningPtr<MCStreamer> streamer(createNullStreamer(context));
+ OwningPtr<MCAsmParser> genericParser(createMCAsmParser(*Tgt, sourceMgr,
+ context, *streamer,
+ *AsmInfo));
+ OwningPtr<TargetAsmParser> TargetParser(Tgt->createAsmParser(*genericParser,
+ *TargetMachine));
+
+ AsmToken OpcodeToken = genericParser->Lex();
+ AsmToken NextToken = genericParser->Lex(); // consume next token, because specificParser expects us to
+
+ if (OpcodeToken.is(AsmToken::Identifier)) {
+ instName = OpcodeToken.getString();
+ instLoc = OpcodeToken.getLoc();
+
+ if (NextToken.isNot(AsmToken::Eof) &&
+ TargetParser->ParseInstruction(instName, instLoc, operands))
+ ret = -1;
+ } else {
+ ret = -1;
+ }
+
+ ParserMutex.acquire();
+
+ if (!ret) {
+ GenericAsmLexer->setBuffer(buf);
+
+ while (SpecificAsmLexer->Lex(),
+ SpecificAsmLexer->isNot(AsmToken::Eof) &&
+ SpecificAsmLexer->isNot(AsmToken::EndOfStatement)) {
+ if (SpecificAsmLexer->is(AsmToken::Error)) {
+ ret = -1;
+ break;
+ }
+ tokens.push_back(SpecificAsmLexer->getTok());
+ }
+ }
+
+ ParserMutex.release();
+
+ return ret;
+}
+
+int EDDisassembler::llvmSyntaxVariant() const {
+ return LLVMSyntaxVariant;
+}
diff --git a/lib/MC/MCDisassembler/EDDisassembler.h b/lib/MC/MCDisassembler/EDDisassembler.h
new file mode 100644
index 0000000000000..e2f850bcdba97
--- /dev/null
+++ b/lib/MC/MCDisassembler/EDDisassembler.h
@@ -0,0 +1,271 @@
+//===-- EDDisassembler.h - LLVM Enhanced Disassembler -----------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the interface for the Enhanced Disassembly library's
+// disassembler class. The disassembler is responsible for vending individual
+// instructions according to a given architecture and disassembly syntax.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_EDDISASSEMBLER_H
+#define LLVM_EDDISASSEMBLER_H
+
+#include "EDInfo.h"
+
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/ADT/Triple.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/System/Mutex.h"
+
+#include <map>
+#include <set>
+#include <vector>
+
+namespace llvm {
+class AsmLexer;
+class AsmToken;
+class MCContext;
+class MCAsmInfo;
+class MCAsmLexer;
+class AsmParser;
+class TargetAsmLexer;
+class TargetAsmParser;
+class MCDisassembler;
+class MCInstPrinter;
+class MCInst;
+class MCParsedAsmOperand;
+class MCStreamer;
+template <typename T> class SmallVectorImpl;
+class SourceMgr;
+class Target;
+class TargetMachine;
+class TargetRegisterInfo;
+
+struct EDInstInfo;
+struct EDInst;
+struct EDOperand;
+struct EDToken;
+
+typedef int (*EDByteReaderCallback)(uint8_t *byte, uint64_t address, void *arg);
+
+/// EDDisassembler - Encapsulates a disassembler for a single architecture and
+/// disassembly syntax. Also manages the static disassembler registry.
+struct EDDisassembler {
+ typedef enum {
+ /*! @constant kEDAssemblySyntaxX86Intel Intel syntax for i386 and x86_64. */
+ kEDAssemblySyntaxX86Intel = 0,
+ /*! @constant kEDAssemblySyntaxX86ATT AT&T syntax for i386 and x86_64. */
+ kEDAssemblySyntaxX86ATT = 1,
+ kEDAssemblySyntaxARMUAL = 2
+ } AssemblySyntax;
+
+
+ ////////////////////
+ // Static members //
+ ////////////////////
+
+ /// CPUKey - Encapsulates the descriptor of an architecture/disassembly-syntax
+ /// pair
+ struct CPUKey {
+ /// The architecture type
+ llvm::Triple::ArchType Arch;
+
+ /// The assembly syntax
+ AssemblySyntax Syntax;
+
+ /// operator== - Equality operator
+ bool operator==(const CPUKey &key) const {
+ return (Arch == key.Arch &&
+ Syntax == key.Syntax);
+ }
+
+ /// operator< - Less-than operator
+ bool operator<(const CPUKey &key) const {
+ if(Arch > key.Arch)
+ return false;
+ if(Syntax >= key.Syntax)
+ return false;
+ return true;
+ }
+ };
+
+ typedef std::map<CPUKey, EDDisassembler*> DisassemblerMap_t;
+
+ /// True if the disassembler registry has been initialized; false if not
+ static bool sInitialized;
+ /// A map from disassembler specifications to disassemblers. Populated
+ /// lazily.
+ static DisassemblerMap_t sDisassemblers;
+
+ /// getDisassembler - Returns the specified disassemble, or NULL on failure
+ ///
+ /// @arg arch - The desired architecture
+ /// @arg syntax - The desired disassembly syntax
+ static EDDisassembler *getDisassembler(llvm::Triple::ArchType arch,
+ AssemblySyntax syntax);
+
+ /// getDisassembler - Returns the disassembler for a given combination of
+ /// CPU type, CPU subtype, and assembly syntax, or NULL on failure
+ ///
+ /// @arg str - The string representation of the architecture triple, e.g.,
+ /// "x86_64-apple-darwin"
+ /// @arg syntax - The disassembly syntax for the required disassembler
+ static EDDisassembler *getDisassembler(llvm::StringRef str,
+ AssemblySyntax syntax);
+
+ /// initialize - Initializes the disassembler registry and the LLVM backend
+ static void initialize();
+
+ ////////////////////////
+ // Per-object members //
+ ////////////////////////
+
+ /// True only if the object has been successfully initialized
+ bool Valid;
+ /// True if the disassembler can provide semantic information
+ bool HasSemantics;
+
+ /// The stream to write errors to
+ llvm::raw_ostream &ErrorStream;
+
+ /// The architecture/syntax pair for the current architecture
+ CPUKey Key;
+ /// The LLVM target corresponding to the disassembler
+ const llvm::Target *Tgt;
+ /// The target machine instance.
+ llvm::OwningPtr<llvm::TargetMachine> TargetMachine;
+ /// The assembly information for the target architecture
+ llvm::OwningPtr<const llvm::MCAsmInfo> AsmInfo;
+ /// The disassembler for the target architecture
+ llvm::OwningPtr<const llvm::MCDisassembler> Disassembler;
+ /// The output string for the instruction printer; must be guarded with
+ /// PrinterMutex
+ llvm::OwningPtr<std::string> InstString;
+ /// The output stream for the disassembler; must be guarded with
+ /// PrinterMutex
+ llvm::OwningPtr<llvm::raw_string_ostream> InstStream;
+ /// The instruction printer for the target architecture; must be guarded with
+ /// PrinterMutex when printing
+ llvm::OwningPtr<llvm::MCInstPrinter> InstPrinter;
+ /// The mutex that guards the instruction printer's printing functions, which
+ /// use a shared stream
+ llvm::sys::Mutex PrinterMutex;
+ /// The array of instruction information provided by the TableGen backend for
+ /// the target architecture
+ const llvm::EDInstInfo *InstInfos;
+ /// The target-specific lexer for use in tokenizing strings, in
+ /// target-independent and target-specific portions
+ llvm::OwningPtr<llvm::AsmLexer> GenericAsmLexer;
+ llvm::OwningPtr<llvm::TargetAsmLexer> SpecificAsmLexer;
+ /// The guard for the above
+ llvm::sys::Mutex ParserMutex;
+ /// The LLVM number used for the target disassembly syntax variant
+ int LLVMSyntaxVariant;
+
+ typedef std::vector<std::string> regvec_t;
+ typedef std::map<std::string, unsigned> regrmap_t;
+
+ /// A vector of registers for quick mapping from LLVM register IDs to names
+ regvec_t RegVec;
+ /// A map of registers for quick mapping from register names to LLVM IDs
+ regrmap_t RegRMap;
+
+ /// A set of register IDs for aliases of the stack pointer for the current
+ /// architecture
+ std::set<unsigned> stackPointers;
+ /// A set of register IDs for aliases of the program counter for the current
+ /// architecture
+ std::set<unsigned> programCounters;
+
+ /// Constructor - initializes a disassembler with all the necessary objects,
+ /// which come pre-allocated from the registry accessor function
+ ///
+ /// @arg key - the architecture and disassembly syntax for the
+ /// disassembler
+ EDDisassembler(CPUKey& key);
+
+ /// valid - reports whether there was a failure in the constructor.
+ bool valid() {
+ return Valid;
+ }
+
+ /// hasSemantics - reports whether the disassembler can provide operands and
+ /// tokens.
+ bool hasSemantics() {
+ return HasSemantics;
+ }
+
+ ~EDDisassembler();
+
+ /// createInst - creates and returns an instruction given a callback and
+ /// memory address, or NULL on failure
+ ///
+ /// @arg byteReader - A callback function that provides machine code bytes
+ /// @arg address - The address of the first byte of the instruction,
+ /// suitable for passing to byteReader
+ /// @arg arg - An opaque argument for byteReader
+ EDInst *createInst(EDByteReaderCallback byteReader,
+ uint64_t address,
+ void *arg);
+
+ /// initMaps - initializes regVec and regRMap using the provided register
+ /// info
+ ///
+ /// @arg registerInfo - the register information to use as a source
+ void initMaps(const llvm::TargetRegisterInfo &registerInfo);
+ /// nameWithRegisterID - Returns the name (owned by the EDDisassembler) of a
+ /// register for a given register ID, or NULL on failure
+ ///
+ /// @arg registerID - the ID of the register to be queried
+ const char *nameWithRegisterID(unsigned registerID) const;
+ /// registerIDWithName - Returns the ID of a register for a given register
+ /// name, or (unsigned)-1 on failure
+ ///
+ /// @arg name - The name of the register
+ unsigned registerIDWithName(const char *name) const;
+
+ /// registerIsStackPointer - reports whether a register ID is an alias for the
+ /// stack pointer register
+ ///
+ /// @arg registerID - The LLVM register ID
+ bool registerIsStackPointer(unsigned registerID);
+ /// registerIsStackPointer - reports whether a register ID is an alias for the
+ /// stack pointer register
+ ///
+ /// @arg registerID - The LLVM register ID
+ bool registerIsProgramCounter(unsigned registerID);
+
+ /// printInst - prints an MCInst to a string, returning 0 on success, or -1
+ /// otherwise
+ ///
+ /// @arg str - A reference to a string which is filled in with the string
+ /// representation of the instruction
+ /// @arg inst - A reference to the MCInst to be printed
+ int printInst(std::string& str,
+ llvm::MCInst& inst);
+
+ /// parseInst - extracts operands and tokens from a string for use in
+ /// tokenizing the string. Returns 0 on success, or -1 otherwise.
+ ///
+ /// @arg operands - A reference to a vector that will be filled in with the
+ /// parsed operands
+ /// @arg tokens - A reference to a vector that will be filled in with the
+ /// tokens
+ /// @arg str - The string representation of the instruction
+ int parseInst(llvm::SmallVectorImpl<llvm::MCParsedAsmOperand*> &operands,
+ llvm::SmallVectorImpl<llvm::AsmToken> &tokens,
+ const std::string &str);
+
+ /// llvmSyntaxVariant - returns the LLVM syntax variant for this disassembler
+ int llvmSyntaxVariant() const;
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/lib/MC/MCDisassembler/EDInfo.h b/lib/MC/MCDisassembler/EDInfo.h
new file mode 100644
index 0000000000000..627c06641dbc1
--- /dev/null
+++ b/lib/MC/MCDisassembler/EDInfo.h
@@ -0,0 +1,73 @@
+//===-- EDInfo.h - LLVM Enhanced Disassembler -------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_EDINFO_H
+#define LLVM_EDINFO_H
+
+enum {
+ EDIS_MAX_OPERANDS = 13,
+ EDIS_MAX_SYNTAXES = 2
+};
+
+enum OperandTypes {
+ kOperandTypeNone,
+ kOperandTypeImmediate,
+ kOperandTypeRegister,
+ kOperandTypeX86Memory,
+ kOperandTypeX86EffectiveAddress,
+ kOperandTypeX86PCRelative,
+ kOperandTypeARMBranchTarget,
+ kOperandTypeARMSoReg,
+ kOperandTypeARMSoImm,
+ kOperandTypeARMSoImm2Part,
+ kOperandTypeARMPredicate,
+ kOperandTypeARMAddrMode2,
+ kOperandTypeARMAddrMode2Offset,
+ kOperandTypeARMAddrMode3,
+ kOperandTypeARMAddrMode3Offset,
+ kOperandTypeARMAddrMode4,
+ kOperandTypeARMAddrMode5,
+ kOperandTypeARMAddrMode6,
+ kOperandTypeARMAddrMode6Offset,
+ kOperandTypeARMAddrModePC,
+ kOperandTypeARMRegisterList,
+ kOperandTypeARMTBAddrMode,
+ kOperandTypeThumbITMask,
+ kOperandTypeThumbAddrModeS1,
+ kOperandTypeThumbAddrModeS2,
+ kOperandTypeThumbAddrModeS4,
+ kOperandTypeThumbAddrModeRR,
+ kOperandTypeThumbAddrModeSP,
+ kOperandTypeThumb2SoReg,
+ kOperandTypeThumb2SoImm,
+ kOperandTypeThumb2AddrModeImm8,
+ kOperandTypeThumb2AddrModeImm8Offset,
+ kOperandTypeThumb2AddrModeImm12,
+ kOperandTypeThumb2AddrModeSoReg,
+ kOperandTypeThumb2AddrModeImm8s4,
+ kOperandTypeThumb2AddrModeImm8s4Offset
+};
+
+enum OperandFlags {
+ kOperandFlagSource = 0x1,
+ kOperandFlagTarget = 0x2
+};
+
+enum InstructionTypes {
+ kInstructionTypeNone,
+ kInstructionTypeMove,
+ kInstructionTypeBranch,
+ kInstructionTypePush,
+ kInstructionTypePop,
+ kInstructionTypeCall,
+ kInstructionTypeReturn
+};
+
+
+#endif
diff --git a/lib/MC/MCDisassembler/EDInst.cpp b/lib/MC/MCDisassembler/EDInst.cpp
new file mode 100644
index 0000000000000..e22408f060b1f
--- /dev/null
+++ b/lib/MC/MCDisassembler/EDInst.cpp
@@ -0,0 +1,207 @@
+//===-EDInst.cpp - LLVM Enhanced Disassembler -----------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the Enhanced Disassembly library's instruction class.
+// The instruction is responsible for vending the string representation,
+// individual tokens, and operands for a single instruction.
+//
+//===----------------------------------------------------------------------===//
+
+#include "EDInst.h"
+#include "EDDisassembler.h"
+#include "EDOperand.h"
+#include "EDToken.h"
+
+#include "llvm/MC/EDInstInfo.h"
+#include "llvm/MC/MCInst.h"
+
+using namespace llvm;
+
+EDInst::EDInst(llvm::MCInst *inst,
+ uint64_t byteSize,
+ EDDisassembler &disassembler,
+ const llvm::EDInstInfo *info) :
+ Disassembler(disassembler),
+ Inst(inst),
+ ThisInstInfo(info),
+ ByteSize(byteSize),
+ BranchTarget(-1),
+ MoveSource(-1),
+ MoveTarget(-1) {
+ OperandOrder = ThisInstInfo->operandOrders[Disassembler.llvmSyntaxVariant()];
+}
+
+EDInst::~EDInst() {
+ unsigned int index;
+ unsigned int numOperands = Operands.size();
+
+ for (index = 0; index < numOperands; ++index)
+ delete Operands[index];
+
+ unsigned int numTokens = Tokens.size();
+
+ for (index = 0; index < numTokens; ++index)
+ delete Tokens[index];
+
+ delete Inst;
+}
+
+uint64_t EDInst::byteSize() {
+ return ByteSize;
+}
+
+int EDInst::stringify() {
+ if (StringifyResult.valid())
+ return StringifyResult.result();
+
+ if (Disassembler.printInst(String, *Inst))
+ return StringifyResult.setResult(-1);
+
+ return StringifyResult.setResult(0);
+}
+
+int EDInst::getString(const char*& str) {
+ if (stringify())
+ return -1;
+
+ str = String.c_str();
+
+ return 0;
+}
+
+unsigned EDInst::instID() {
+ return Inst->getOpcode();
+}
+
+bool EDInst::isBranch() {
+ if (ThisInstInfo)
+ return
+ ThisInstInfo->instructionType == kInstructionTypeBranch ||
+ ThisInstInfo->instructionType == kInstructionTypeCall;
+ else
+ return false;
+}
+
+bool EDInst::isMove() {
+ if (ThisInstInfo)
+ return ThisInstInfo->instructionType == kInstructionTypeMove;
+ else
+ return false;
+}
+
+int EDInst::parseOperands() {
+ if (ParseResult.valid())
+ return ParseResult.result();
+
+ if (!ThisInstInfo)
+ return ParseResult.setResult(-1);
+
+ unsigned int opIndex;
+ unsigned int mcOpIndex = 0;
+
+ for (opIndex = 0; opIndex < ThisInstInfo->numOperands; ++opIndex) {
+ if (isBranch() &&
+ (ThisInstInfo->operandFlags[opIndex] & kOperandFlagTarget)) {
+ BranchTarget = opIndex;
+ }
+ else if (isMove()) {
+ if (ThisInstInfo->operandFlags[opIndex] & kOperandFlagSource)
+ MoveSource = opIndex;
+ else if (ThisInstInfo->operandFlags[opIndex] & kOperandFlagTarget)
+ MoveTarget = opIndex;
+ }
+
+ EDOperand *operand = new EDOperand(Disassembler, *this, opIndex, mcOpIndex);
+
+ Operands.push_back(operand);
+ }
+
+ return ParseResult.setResult(0);
+}
+
+int EDInst::branchTargetID() {
+ if (parseOperands())
+ return -1;
+ return BranchTarget;
+}
+
+int EDInst::moveSourceID() {
+ if (parseOperands())
+ return -1;
+ return MoveSource;
+}
+
+int EDInst::moveTargetID() {
+ if (parseOperands())
+ return -1;
+ return MoveTarget;
+}
+
+int EDInst::numOperands() {
+ if (parseOperands())
+ return -1;
+ return Operands.size();
+}
+
+int EDInst::getOperand(EDOperand *&operand, unsigned int index) {
+ if (parseOperands())
+ return -1;
+
+ if (index >= Operands.size())
+ return -1;
+
+ operand = Operands[index];
+ return 0;
+}
+
+int EDInst::tokenize() {
+ if (TokenizeResult.valid())
+ return TokenizeResult.result();
+
+ if (stringify())
+ return TokenizeResult.setResult(-1);
+
+ return TokenizeResult.setResult(EDToken::tokenize(Tokens,
+ String,
+ OperandOrder,
+ Disassembler));
+
+}
+
+int EDInst::numTokens() {
+ if (tokenize())
+ return -1;
+ return Tokens.size();
+}
+
+int EDInst::getToken(EDToken *&token, unsigned int index) {
+ if (tokenize())
+ return -1;
+ token = Tokens[index];
+ return 0;
+}
+
+#ifdef __BLOCKS__
+int EDInst::visitTokens(EDTokenVisitor_t visitor) {
+ if (tokenize())
+ return -1;
+
+ tokvec_t::iterator iter;
+
+ for (iter = Tokens.begin(); iter != Tokens.end(); ++iter) {
+ int ret = visitor(*iter);
+ if (ret == 1)
+ return 0;
+ if (ret != 0)
+ return -1;
+ }
+
+ return 0;
+}
+#endif
diff --git a/lib/MC/MCDisassembler/EDInst.h b/lib/MC/MCDisassembler/EDInst.h
new file mode 100644
index 0000000000000..39d264fb7aadc
--- /dev/null
+++ b/lib/MC/MCDisassembler/EDInst.h
@@ -0,0 +1,182 @@
+//===-- EDInst.h - LLVM Enhanced Disassembler -------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the interface for the Enhanced Disassembly library's
+// instruction class. The instruction is responsible for vending the string
+// representation, individual tokens and operands for a single instruction.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_EDINST_H
+#define LLVM_EDINST_H
+
+#include "llvm/System/DataTypes.h"
+#include "llvm/ADT/SmallVector.h"
+#include <string>
+#include <vector>
+
+namespace llvm {
+ class MCInst;
+ struct EDInstInfo;
+ struct EDToken;
+ struct EDDisassembler;
+ struct EDOperand;
+
+#ifdef __BLOCKS__
+ typedef int (^EDTokenVisitor_t)(EDToken *token);
+#endif
+
+/// CachedResult - Encapsulates the result of a function along with the validity
+/// of that result, so that slow functions don't need to run twice
+struct CachedResult {
+ /// True if the result has been obtained by executing the function
+ bool Valid;
+ /// The result last obtained from the function
+ int Result;
+
+ /// Constructor - Initializes an invalid result
+ CachedResult() : Valid(false) { }
+ /// valid - Returns true if the result has been obtained by executing the
+ /// function and false otherwise
+ bool valid() { return Valid; }
+ /// result - Returns the result of the function or an undefined value if
+ /// valid() is false
+ int result() { return Result; }
+ /// setResult - Sets the result of the function and declares it valid
+ /// returning the result (so that setResult() can be called from inside a
+ /// return statement)
+ /// @arg result - The result of the function
+ int setResult(int result) { Result = result; Valid = true; return result; }
+};
+
+/// EDInst - Encapsulates a single instruction, which can be queried for its
+/// string representation, as well as its operands and tokens
+struct EDInst {
+ /// The parent disassembler
+ EDDisassembler &Disassembler;
+ /// The containing MCInst
+ llvm::MCInst *Inst;
+ /// The instruction information provided by TableGen for this instruction
+ const llvm::EDInstInfo *ThisInstInfo;
+ /// The number of bytes for the machine code representation of the instruction
+ uint64_t ByteSize;
+
+ /// The result of the stringify() function
+ CachedResult StringifyResult;
+ /// The string representation of the instruction
+ std::string String;
+ /// The order in which operands from the InstInfo's operand information appear
+ /// in String
+ const char* OperandOrder;
+
+ /// The result of the parseOperands() function
+ CachedResult ParseResult;
+ typedef llvm::SmallVector<EDOperand*, 5> opvec_t;
+ /// The instruction's operands
+ opvec_t Operands;
+ /// The operand corresponding to the target, if the instruction is a branch
+ int BranchTarget;
+ /// The operand corresponding to the source, if the instruction is a move
+ int MoveSource;
+ /// The operand corresponding to the target, if the instruction is a move
+ int MoveTarget;
+
+ /// The result of the tokenize() function
+ CachedResult TokenizeResult;
+ typedef std::vector<EDToken*> tokvec_t;
+ /// The instruction's tokens
+ tokvec_t Tokens;
+
+ /// Constructor - initializes an instruction given the output of the LLVM
+ /// C++ disassembler
+ ///
+ /// @arg inst - The MCInst, which will now be owned by this object
+ /// @arg byteSize - The size of the consumed instruction, in bytes
+ /// @arg disassembler - The parent disassembler
+ /// @arg instInfo - The instruction information produced by the table
+ /// generator for this instruction
+ EDInst(llvm::MCInst *inst,
+ uint64_t byteSize,
+ EDDisassembler &disassembler,
+ const llvm::EDInstInfo *instInfo);
+ ~EDInst();
+
+ /// byteSize - returns the number of bytes consumed by the machine code
+ /// representation of the instruction
+ uint64_t byteSize();
+ /// instID - returns the LLVM instruction ID of the instruction
+ unsigned instID();
+
+ /// stringify - populates the String and AsmString members of the instruction,
+ /// returning 0 on success or -1 otherwise
+ int stringify();
+ /// getString - retrieves a pointer to the string representation of the
+ /// instructinon, returning 0 on success or -1 otherwise
+ ///
+ /// @arg str - A reference to a pointer that, on success, is set to point to
+ /// the string representation of the instruction; this string is still owned
+ /// by the instruction and will be deleted when it is
+ int getString(const char *&str);
+
+ /// isBranch - Returns true if the instruction is a branch
+ bool isBranch();
+ /// isMove - Returns true if the instruction is a move
+ bool isMove();
+
+ /// parseOperands - populates the Operands member of the instruction,
+ /// returning 0 on success or -1 otherwise
+ int parseOperands();
+ /// branchTargetID - returns the ID (suitable for use with getOperand()) of
+ /// the target operand if the instruction is a branch, or -1 otherwise
+ int branchTargetID();
+ /// moveSourceID - returns the ID of the source operand if the instruction
+ /// is a move, or -1 otherwise
+ int moveSourceID();
+ /// moveTargetID - returns the ID of the target operand if the instruction
+ /// is a move, or -1 otherwise
+ int moveTargetID();
+
+ /// numOperands - returns the number of operands available to retrieve, or -1
+ /// on error
+ int numOperands();
+ /// getOperand - retrieves an operand from the instruction's operand list by
+ /// index, returning 0 on success or -1 on error
+ ///
+ /// @arg operand - A reference whose target is pointed at the operand on
+ /// success, although the operand is still owned by the EDInst
+ /// @arg index - The index of the operand in the instruction
+ int getOperand(EDOperand *&operand, unsigned int index);
+
+ /// tokenize - populates the Tokens member of the instruction, returning 0 on
+ /// success or -1 otherwise
+ int tokenize();
+ /// numTokens - returns the number of tokens in the instruction, or -1 on
+ /// error
+ int numTokens();
+ /// getToken - retrieves a token from the instruction's token list by index,
+ /// returning 0 on success or -1 on error
+ ///
+ /// @arg token - A reference whose target is pointed at the token on success,
+ /// although the token is still owned by the EDInst
+ /// @arg index - The index of the token in the instrcutino
+ int getToken(EDToken *&token, unsigned int index);
+
+#ifdef __BLOCKS__
+ /// visitTokens - Visits each token in turn and applies a block to it,
+ /// returning 0 if all blocks are visited and/or the block signals
+ /// termination by returning 1; returns -1 on error
+ ///
+ /// @arg visitor - The visitor block to apply to all tokens.
+ int visitTokens(EDTokenVisitor_t visitor);
+#endif
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/lib/MC/MCDisassembler/EDOperand.cpp b/lib/MC/MCDisassembler/EDOperand.cpp
new file mode 100644
index 0000000000000..2aed123368dad
--- /dev/null
+++ b/lib/MC/MCDisassembler/EDOperand.cpp
@@ -0,0 +1,282 @@
+//===-- EDOperand.cpp - LLVM Enhanced Disassembler ------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the Enhanced Disassembly library's operand class. The
+// operand is responsible for allowing evaluation given a particular register
+// context.
+//
+//===----------------------------------------------------------------------===//
+
+#include "EDOperand.h"
+#include "EDDisassembler.h"
+#include "EDInst.h"
+#include "llvm/MC/EDInstInfo.h"
+#include "llvm/MC/MCInst.h"
+using namespace llvm;
+
+EDOperand::EDOperand(const EDDisassembler &disassembler,
+ const EDInst &inst,
+ unsigned int opIndex,
+ unsigned int &mcOpIndex) :
+ Disassembler(disassembler),
+ Inst(inst),
+ OpIndex(opIndex),
+ MCOpIndex(mcOpIndex) {
+ unsigned int numMCOperands = 0;
+
+ if (Disassembler.Key.Arch == Triple::x86 ||
+ Disassembler.Key.Arch == Triple::x86_64) {
+ uint8_t operandType = inst.ThisInstInfo->operandTypes[opIndex];
+
+ switch (operandType) {
+ default:
+ break;
+ case kOperandTypeImmediate:
+ numMCOperands = 1;
+ break;
+ case kOperandTypeRegister:
+ numMCOperands = 1;
+ break;
+ case kOperandTypeX86Memory:
+ numMCOperands = 5;
+ break;
+ case kOperandTypeX86EffectiveAddress:
+ numMCOperands = 4;
+ break;
+ case kOperandTypeX86PCRelative:
+ numMCOperands = 1;
+ break;
+ }
+ }
+ else if (Disassembler.Key.Arch == Triple::arm ||
+ Disassembler.Key.Arch == Triple::thumb) {
+ uint8_t operandType = inst.ThisInstInfo->operandTypes[opIndex];
+
+ switch (operandType) {
+ default:
+ case kOperandTypeARMRegisterList:
+ break;
+ case kOperandTypeImmediate:
+ case kOperandTypeRegister:
+ case kOperandTypeARMBranchTarget:
+ case kOperandTypeARMSoImm:
+ case kOperandTypeThumb2SoImm:
+ case kOperandTypeARMSoImm2Part:
+ case kOperandTypeARMPredicate:
+ case kOperandTypeThumbITMask:
+ case kOperandTypeThumb2AddrModeImm8Offset:
+ case kOperandTypeARMTBAddrMode:
+ case kOperandTypeThumb2AddrModeImm8s4Offset:
+ numMCOperands = 1;
+ break;
+ case kOperandTypeThumb2SoReg:
+ case kOperandTypeARMAddrMode2Offset:
+ case kOperandTypeARMAddrMode3Offset:
+ case kOperandTypeARMAddrMode4:
+ case kOperandTypeARMAddrMode5:
+ case kOperandTypeARMAddrModePC:
+ case kOperandTypeThumb2AddrModeImm8:
+ case kOperandTypeThumb2AddrModeImm12:
+ case kOperandTypeThumb2AddrModeImm8s4:
+ case kOperandTypeThumbAddrModeRR:
+ case kOperandTypeThumbAddrModeSP:
+ numMCOperands = 2;
+ break;
+ case kOperandTypeARMSoReg:
+ case kOperandTypeARMAddrMode2:
+ case kOperandTypeARMAddrMode3:
+ case kOperandTypeThumb2AddrModeSoReg:
+ case kOperandTypeThumbAddrModeS1:
+ case kOperandTypeThumbAddrModeS2:
+ case kOperandTypeThumbAddrModeS4:
+ case kOperandTypeARMAddrMode6Offset:
+ numMCOperands = 3;
+ break;
+ case kOperandTypeARMAddrMode6:
+ numMCOperands = 4;
+ break;
+ }
+ }
+
+ mcOpIndex += numMCOperands;
+}
+
+EDOperand::~EDOperand() {
+}
+
+int EDOperand::evaluate(uint64_t &result,
+ EDRegisterReaderCallback callback,
+ void *arg) {
+ uint8_t operandType = Inst.ThisInstInfo->operandTypes[OpIndex];
+
+ switch (Disassembler.Key.Arch) {
+ default:
+ return -1;
+ case Triple::x86:
+ case Triple::x86_64:
+ switch (operandType) {
+ default:
+ return -1;
+ case kOperandTypeImmediate:
+ result = Inst.Inst->getOperand(MCOpIndex).getImm();
+ return 0;
+ case kOperandTypeRegister:
+ {
+ unsigned reg = Inst.Inst->getOperand(MCOpIndex).getReg();
+ return callback(&result, reg, arg);
+ }
+ case kOperandTypeX86PCRelative:
+ {
+ int64_t displacement = Inst.Inst->getOperand(MCOpIndex).getImm();
+
+ uint64_t ripVal;
+
+ // TODO fix how we do this
+
+ if (callback(&ripVal, Disassembler.registerIDWithName("RIP"), arg))
+ return -1;
+
+ result = ripVal + displacement;
+ return 0;
+ }
+ case kOperandTypeX86Memory:
+ case kOperandTypeX86EffectiveAddress:
+ {
+ unsigned baseReg = Inst.Inst->getOperand(MCOpIndex).getReg();
+ uint64_t scaleAmount = Inst.Inst->getOperand(MCOpIndex+1).getImm();
+ unsigned indexReg = Inst.Inst->getOperand(MCOpIndex+2).getReg();
+ int64_t displacement = Inst.Inst->getOperand(MCOpIndex+3).getImm();
+ //unsigned segmentReg = Inst.Inst->getOperand(MCOpIndex+4).getReg();
+
+ uint64_t addr = 0;
+
+ if (baseReg) {
+ uint64_t baseVal;
+ if (callback(&baseVal, baseReg, arg))
+ return -1;
+ addr += baseVal;
+ }
+
+ if (indexReg) {
+ uint64_t indexVal;
+ if (callback(&indexVal, indexReg, arg))
+ return -1;
+ addr += (scaleAmount * indexVal);
+ }
+
+ addr += displacement;
+
+ result = addr;
+ return 0;
+ }
+ }
+ break;
+ case Triple::arm:
+ case Triple::thumb:
+ switch (operandType) {
+ default:
+ return -1;
+ case kOperandTypeImmediate:
+ result = Inst.Inst->getOperand(MCOpIndex).getImm();
+ return 0;
+ case kOperandTypeRegister:
+ {
+ unsigned reg = Inst.Inst->getOperand(MCOpIndex).getReg();
+ return callback(&result, reg, arg);
+ }
+ case kOperandTypeARMBranchTarget:
+ {
+ int64_t displacement = Inst.Inst->getOperand(MCOpIndex).getImm();
+
+ uint64_t pcVal;
+
+ if (callback(&pcVal, Disassembler.registerIDWithName("PC"), arg))
+ return -1;
+
+ result = pcVal + displacement;
+ return 0;
+ }
+ }
+ }
+
+ return -1;
+}
+
+int EDOperand::isRegister() {
+ return(Inst.ThisInstInfo->operandFlags[OpIndex] == kOperandTypeRegister);
+}
+
+unsigned EDOperand::regVal() {
+ return Inst.Inst->getOperand(MCOpIndex).getReg();
+}
+
+int EDOperand::isImmediate() {
+ return(Inst.ThisInstInfo->operandFlags[OpIndex] == kOperandTypeImmediate);
+}
+
+uint64_t EDOperand::immediateVal() {
+ return Inst.Inst->getOperand(MCOpIndex).getImm();
+}
+
+int EDOperand::isMemory() {
+ uint8_t operandType = Inst.ThisInstInfo->operandTypes[OpIndex];
+
+ switch (operandType) {
+ default:
+ return 0;
+ case kOperandTypeX86Memory:
+ case kOperandTypeX86PCRelative:
+ case kOperandTypeX86EffectiveAddress:
+ case kOperandTypeARMSoReg:
+ case kOperandTypeARMSoImm:
+ case kOperandTypeARMAddrMode2:
+ case kOperandTypeARMAddrMode2Offset:
+ case kOperandTypeARMAddrMode3:
+ case kOperandTypeARMAddrMode3Offset:
+ case kOperandTypeARMAddrMode4:
+ case kOperandTypeARMAddrMode5:
+ case kOperandTypeARMAddrMode6:
+ case kOperandTypeARMAddrModePC:
+ case kOperandTypeARMBranchTarget:
+ case kOperandTypeThumbAddrModeS1:
+ case kOperandTypeThumbAddrModeS2:
+ case kOperandTypeThumbAddrModeS4:
+ case kOperandTypeThumbAddrModeRR:
+ case kOperandTypeThumbAddrModeSP:
+ case kOperandTypeThumb2SoImm:
+ case kOperandTypeThumb2AddrModeImm8:
+ case kOperandTypeThumb2AddrModeImm8Offset:
+ case kOperandTypeThumb2AddrModeImm12:
+ case kOperandTypeThumb2AddrModeSoReg:
+ case kOperandTypeThumb2AddrModeImm8s4:
+ return 1;
+ }
+}
+
+#ifdef __BLOCKS__
+struct RegisterReaderWrapper {
+ EDOperand::EDRegisterBlock_t regBlock;
+};
+
+int readerWrapperCallback(uint64_t *value,
+ unsigned regID,
+ void *arg) {
+ struct RegisterReaderWrapper *wrapper = (struct RegisterReaderWrapper *)arg;
+ return wrapper->regBlock(value, regID);
+}
+
+int EDOperand::evaluate(uint64_t &result,
+ EDRegisterBlock_t regBlock) {
+ struct RegisterReaderWrapper wrapper;
+ wrapper.regBlock = regBlock;
+ return evaluate(result,
+ readerWrapperCallback,
+ (void*)&wrapper);
+}
+#endif
diff --git a/lib/MC/MCDisassembler/EDOperand.h b/lib/MC/MCDisassembler/EDOperand.h
new file mode 100644
index 0000000000000..6e695224318c5
--- /dev/null
+++ b/lib/MC/MCDisassembler/EDOperand.h
@@ -0,0 +1,91 @@
+//===-EDOperand.h - LLVM Enhanced Disassembler ------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the interface for the Enhanced Disassembly library's
+// operand class. The operand is responsible for allowing evaluation given a
+// particular register context.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_EDOPERAND_H
+#define LLVM_EDOPERAND_H
+
+#include "llvm/System/DataTypes.h"
+
+namespace llvm {
+
+struct EDDisassembler;
+struct EDInst;
+
+typedef int (*EDRegisterReaderCallback)(uint64_t *value, unsigned regID,
+ void* arg);
+
+
+/// EDOperand - Encapsulates a single operand, which can be evaluated by the
+/// client
+struct EDOperand {
+ /// The parent disassembler
+ const EDDisassembler &Disassembler;
+ /// The parent instruction
+ const EDInst &Inst;
+
+ /// The index of the operand in the EDInst
+ unsigned int OpIndex;
+ /// The index of the first component of the operand in the MCInst
+ unsigned int MCOpIndex;
+
+ /// Constructor - Initializes an EDOperand
+ ///
+ /// @arg disassembler - The disassembler responsible for the operand
+ /// @arg inst - The instruction containing this operand
+ /// @arg opIndex - The index of the operand in inst
+ /// @arg mcOpIndex - The index of the operand in the original MCInst
+ EDOperand(const EDDisassembler &disassembler,
+ const EDInst &inst,
+ unsigned int opIndex,
+ unsigned int &mcOpIndex);
+ ~EDOperand();
+
+ /// evaluate - Returns the numeric value of an operand to the extent possible,
+ /// returning 0 on success or -1 if there was some problem (such as a
+ /// register not being readable)
+ ///
+ /// @arg result - A reference whose target is filled in with the value of
+ /// the operand (the address if it is a memory operand)
+ /// @arg callback - A function to call to obtain register values
+ /// @arg arg - An opaque argument to pass to callback
+ int evaluate(uint64_t &result,
+ EDRegisterReaderCallback callback,
+ void *arg);
+
+ /// isRegister - Returns 1 if the operand is a register or 0 otherwise
+ int isRegister();
+ /// regVal - Returns the register value.
+ unsigned regVal();
+
+ /// isImmediate - Returns 1 if the operand is an immediate or 0 otherwise
+ int isImmediate();
+ /// immediateVal - Returns the immediate value.
+ uint64_t immediateVal();
+
+ /// isMemory - Returns 1 if the operand is a memory location or 0 otherwise
+ int isMemory();
+
+#ifdef __BLOCKS__
+ typedef int (^EDRegisterBlock_t)(uint64_t *value, unsigned regID);
+
+ /// evaluate - Like evaluate for a callback, but uses a block instead
+ int evaluate(uint64_t &result,
+ EDRegisterBlock_t regBlock);
+#endif
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/lib/MC/MCDisassembler/EDToken.cpp b/lib/MC/MCDisassembler/EDToken.cpp
new file mode 100644
index 0000000000000..400e1649e9709
--- /dev/null
+++ b/lib/MC/MCDisassembler/EDToken.cpp
@@ -0,0 +1,206 @@
+//===-- EDToken.cpp - LLVM Enhanced Disassembler --------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the Enhanced Disassembler library's token class. The
+// token is responsible for vending information about the token, such as its
+// type and logical value.
+//
+//===----------------------------------------------------------------------===//
+
+#include "EDToken.h"
+#include "EDDisassembler.h"
+#include "llvm/MC/MCParser/MCAsmLexer.h"
+#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
+#include "llvm/ADT/SmallVector.h"
+using namespace llvm;
+
+EDToken::EDToken(StringRef str,
+ enum tokenType type,
+ uint64_t localType,
+ EDDisassembler &disassembler) :
+ Disassembler(disassembler),
+ Str(str),
+ Type(type),
+ LocalType(localType),
+ OperandID(-1) {
+}
+
+EDToken::~EDToken() {
+}
+
+void EDToken::makeLiteral(bool sign, uint64_t absoluteValue) {
+ Type = kTokenLiteral;
+ LiteralSign = sign;
+ LiteralAbsoluteValue = absoluteValue;
+}
+
+void EDToken::makeRegister(unsigned registerID) {
+ Type = kTokenRegister;
+ RegisterID = registerID;
+}
+
+void EDToken::setOperandID(int operandID) {
+ OperandID = operandID;
+}
+
+enum EDToken::tokenType EDToken::type() const {
+ return Type;
+}
+
+uint64_t EDToken::localType() const {
+ return LocalType;
+}
+
+StringRef EDToken::string() const {
+ return Str;
+}
+
+int EDToken::operandID() const {
+ return OperandID;
+}
+
+int EDToken::literalSign() const {
+ if (Type != kTokenLiteral)
+ return -1;
+ return (LiteralSign ? 1 : 0);
+}
+
+int EDToken::literalAbsoluteValue(uint64_t &value) const {
+ if (Type != kTokenLiteral)
+ return -1;
+ value = LiteralAbsoluteValue;
+ return 0;
+}
+
+int EDToken::registerID(unsigned &registerID) const {
+ if (Type != kTokenRegister)
+ return -1;
+ registerID = RegisterID;
+ return 0;
+}
+
+int EDToken::tokenize(std::vector<EDToken*> &tokens,
+ std::string &str,
+ const char *operandOrder,
+ EDDisassembler &disassembler) {
+ SmallVector<MCParsedAsmOperand*, 5> parsedOperands;
+ SmallVector<AsmToken, 10> asmTokens;
+
+ if (disassembler.parseInst(parsedOperands, asmTokens, str))
+ return -1;
+
+ SmallVectorImpl<MCParsedAsmOperand*>::iterator operandIterator;
+ unsigned int operandIndex;
+ SmallVectorImpl<AsmToken>::iterator tokenIterator;
+
+ operandIterator = parsedOperands.begin();
+ operandIndex = 0;
+
+ bool readOpcode = false;
+
+ const char *wsPointer = asmTokens.begin()->getLoc().getPointer();
+
+ for (tokenIterator = asmTokens.begin();
+ tokenIterator != asmTokens.end();
+ ++tokenIterator) {
+ SMLoc tokenLoc = tokenIterator->getLoc();
+
+ const char *tokenPointer = tokenLoc.getPointer();
+
+ if (tokenPointer > wsPointer) {
+ unsigned long wsLength = tokenPointer - wsPointer;
+
+ EDToken *whitespaceToken = new EDToken(StringRef(wsPointer, wsLength),
+ EDToken::kTokenWhitespace,
+ 0,
+ disassembler);
+
+ tokens.push_back(whitespaceToken);
+ }
+
+ wsPointer = tokenPointer + tokenIterator->getString().size();
+
+ while (operandIterator != parsedOperands.end() &&
+ tokenLoc.getPointer() >
+ (*operandIterator)->getEndLoc().getPointer()) {
+ ++operandIterator;
+ ++operandIndex;
+ }
+
+ EDToken *token;
+
+ switch (tokenIterator->getKind()) {
+ case AsmToken::Identifier:
+ if (!readOpcode) {
+ token = new EDToken(tokenIterator->getString(),
+ EDToken::kTokenOpcode,
+ (uint64_t)tokenIterator->getKind(),
+ disassembler);
+ readOpcode = true;
+ break;
+ }
+ // any identifier that isn't an opcode is mere punctuation; so we fall
+ // through
+ default:
+ token = new EDToken(tokenIterator->getString(),
+ EDToken::kTokenPunctuation,
+ (uint64_t)tokenIterator->getKind(),
+ disassembler);
+ break;
+ case AsmToken::Integer:
+ {
+ token = new EDToken(tokenIterator->getString(),
+ EDToken::kTokenLiteral,
+ (uint64_t)tokenIterator->getKind(),
+ disassembler);
+
+ int64_t intVal = tokenIterator->getIntVal();
+
+ if (intVal < 0)
+ token->makeLiteral(true, -intVal);
+ else
+ token->makeLiteral(false, intVal);
+ break;
+ }
+ case AsmToken::Register:
+ {
+ token = new EDToken(tokenIterator->getString(),
+ EDToken::kTokenLiteral,
+ (uint64_t)tokenIterator->getKind(),
+ disassembler);
+
+ token->makeRegister((unsigned)tokenIterator->getRegVal());
+ break;
+ }
+ }
+
+ if (operandIterator != parsedOperands.end() &&
+ tokenLoc.getPointer() >=
+ (*operandIterator)->getStartLoc().getPointer()) {
+ /// operandIndex == 0 means the operand is the instruction (which the
+ /// AsmParser treats as an operand but edis does not). We therefore skip
+ /// operandIndex == 0 and subtract 1 from all other operand indices.
+
+ if (operandIndex > 0)
+ token->setOperandID(operandOrder[operandIndex - 1]);
+ }
+
+ tokens.push_back(token);
+ }
+
+ return 0;
+}
+
+int EDToken::getString(const char*& buf) {
+ if (PermStr.length() == 0) {
+ PermStr = Str.str();
+ }
+ buf = PermStr.c_str();
+ return 0;
+}
diff --git a/lib/MC/MCDisassembler/EDToken.h b/lib/MC/MCDisassembler/EDToken.h
new file mode 100644
index 0000000000000..6b2aeac60ba51
--- /dev/null
+++ b/lib/MC/MCDisassembler/EDToken.h
@@ -0,0 +1,139 @@
+//===-EDToken.h - LLVM Enhanced Disassembler --------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the interface for the Enhanced Disassembly library's token
+// class. The token is responsible for vending information about the token,
+// such as its type and logical value.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_EDTOKEN_H
+#define LLVM_EDTOKEN_H
+
+#include "llvm/ADT/StringRef.h"
+#include "llvm/System/DataTypes.h"
+#include <string>
+#include <vector>
+
+namespace llvm {
+
+struct EDDisassembler;
+
+/// EDToken - Encapsulates a single token, which can provide a string
+/// representation of itself or interpret itself in various ways, depending
+/// on the token type.
+struct EDToken {
+ enum tokenType {
+ kTokenWhitespace,
+ kTokenOpcode,
+ kTokenLiteral,
+ kTokenRegister,
+ kTokenPunctuation
+ };
+
+ /// The parent disassembler
+ EDDisassembler &Disassembler;
+
+ /// The token's string representation
+ llvm::StringRef Str;
+ /// The token's string representation, but in a form suitable for export
+ std::string PermStr;
+ /// The type of the token, as exposed through the external API
+ enum tokenType Type;
+ /// The type of the token, as recorded by the syntax-specific tokenizer
+ uint64_t LocalType;
+ /// The operand corresponding to the token, or (unsigned int)-1 if not
+ /// part of an operand.
+ int OperandID;
+
+ /// The sign if the token is a literal (1 if negative, 0 otherwise)
+ bool LiteralSign;
+ /// The absolute value if the token is a literal
+ uint64_t LiteralAbsoluteValue;
+ /// The LLVM register ID if the token is a register name
+ unsigned RegisterID;
+
+ /// Constructor - Initializes an EDToken with the information common to all
+ /// tokens
+ ///
+ /// @arg str - The string corresponding to the token
+ /// @arg type - The token's type as exposed through the public API
+ /// @arg localType - The token's type as recorded by the tokenizer
+ /// @arg disassembler - The disassembler responsible for the token
+ EDToken(llvm::StringRef str,
+ enum tokenType type,
+ uint64_t localType,
+ EDDisassembler &disassembler);
+
+ /// makeLiteral - Adds the information specific to a literal
+ /// @arg sign - The sign of the literal (1 if negative, 0
+ /// otherwise)
+ ///
+ /// @arg absoluteValue - The absolute value of the literal
+ void makeLiteral(bool sign, uint64_t absoluteValue);
+ /// makeRegister - Adds the information specific to a register
+ ///
+ /// @arg registerID - The LLVM register ID
+ void makeRegister(unsigned registerID);
+
+ /// setOperandID - Links the token to a numbered operand
+ ///
+ /// @arg operandID - The operand ID to link to
+ void setOperandID(int operandID);
+
+ ~EDToken();
+
+ /// type - Returns the public type of the token
+ enum tokenType type() const;
+ /// localType - Returns the tokenizer-specific type of the token
+ uint64_t localType() const;
+ /// string - Returns the string representation of the token
+ llvm::StringRef string() const;
+ /// operandID - Returns the operand ID of the token
+ int operandID() const;
+
+ /// literalSign - Returns the sign of the token
+ /// (1 if negative, 0 if positive or unsigned, -1 if it is not a literal)
+ int literalSign() const;
+ /// literalAbsoluteValue - Retrieves the absolute value of the token, and
+ /// returns -1 if the token is not a literal
+ /// @arg value - A reference to a value that is filled in with the absolute
+ /// value, if it is valid
+ int literalAbsoluteValue(uint64_t &value) const;
+ /// registerID - Retrieves the register ID of the token, and returns -1 if the
+ /// token is not a register
+ ///
+ /// @arg registerID - A reference to a value that is filled in with the
+ /// register ID, if it is valid
+ int registerID(unsigned &registerID) const;
+
+ /// tokenize - Tokenizes a string using the platform- and syntax-specific
+ /// tokenizer, and returns 0 on success (-1 on failure)
+ ///
+ /// @arg tokens - A vector that will be filled in with pointers to
+ /// allocated tokens
+ /// @arg str - The string, as outputted by the AsmPrinter
+ /// @arg operandOrder - The order of the operands from the operandFlags array
+ /// as they appear in str
+ /// @arg disassembler - The disassembler for the desired target and
+ // assembly syntax
+ static int tokenize(std::vector<EDToken*> &tokens,
+ std::string &str,
+ const char *operandOrder,
+ EDDisassembler &disassembler);
+
+ /// getString - Directs a character pointer to the string, returning 0 on
+ /// success (-1 on failure)
+ /// @arg buf - A reference to a pointer that is set to point to the string.
+ /// The string is still owned by the token.
+ int getString(const char*& buf);
+};
+
+} // end namespace llvm
+#endif
diff --git a/lib/Target/MSIL/Makefile b/lib/MC/MCDisassembler/Makefile
index 70eadb32e3606..7d71cd381a7c3 100644
--- a/lib/Target/MSIL/Makefile
+++ b/lib/MC/MCDisassembler/Makefile
@@ -1,16 +1,14 @@
-##===- lib/Target/MSIL/Makefile ----------------------------*- Makefile -*-===##
-#
-# The LLVM Compiler Infrastructure
+##===- lib/MC/MCDisassembler/Makefile ----------------------*- Makefile -*-===##
+#
+# The LLVM Compiler Infrastructure
#
# This file is distributed under the University of Illinois Open Source
# License. See LICENSE.TXT for details.
-#
+#
##===----------------------------------------------------------------------===##
LEVEL = ../../..
-LIBRARYNAME = LLVMMSIL
-DIRS = TargetInfo
+LIBRARYNAME = LLVMMCDisassembler
include $(LEVEL)/Makefile.common
-CompileCommonOpts := $(CompileCommonOpts) -Wno-format
diff --git a/lib/MC/MCDwarf.cpp b/lib/MC/MCDwarf.cpp
new file mode 100644
index 0000000000000..2da71f96c676a
--- /dev/null
+++ b/lib/MC/MCDwarf.cpp
@@ -0,0 +1,21 @@
+//===- lib/MC/MCDwarf.cpp - MCDwarf implementation ------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MC/MCDwarf.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+void MCDwarfFile::print(raw_ostream &OS) const {
+ OS << '"' << getName() << '"';
+}
+
+void MCDwarfFile::dump() const {
+ print(dbgs());
+}
diff --git a/lib/MC/MCELFStreamer.cpp b/lib/MC/MCELFStreamer.cpp
new file mode 100644
index 0000000000000..570c3917ab46f
--- /dev/null
+++ b/lib/MC/MCELFStreamer.cpp
@@ -0,0 +1,408 @@
+//===- lib/MC/MCELFStreamer.cpp - ELF Object Output ------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file assembles .s files and emits ELF .o object files.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MC/MCStreamer.h"
+
+#include "llvm/MC/MCAssembler.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCCodeEmitter.h"
+#include "llvm/MC/MCELFSymbolFlags.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCObjectStreamer.h"
+#include "llvm/MC/MCSection.h"
+#include "llvm/MC/MCSectionELF.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ELF.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetAsmBackend.h"
+
+using namespace llvm;
+
+namespace {
+
+class MCELFStreamer : public MCObjectStreamer {
+ void EmitInstToFragment(const MCInst &Inst);
+ void EmitInstToData(const MCInst &Inst);
+public:
+ MCELFStreamer(MCContext &Context, TargetAsmBackend &TAB,
+ raw_ostream &OS, MCCodeEmitter *Emitter)
+ : MCObjectStreamer(Context, TAB, OS, Emitter) {}
+
+ ~MCELFStreamer() {}
+
+ /// @name MCStreamer Interface
+ /// @{
+
+ virtual void EmitLabel(MCSymbol *Symbol);
+ virtual void EmitAssemblerFlag(MCAssemblerFlag Flag);
+ virtual void EmitAssignment(MCSymbol *Symbol, const MCExpr *Value);
+ virtual void EmitSymbolAttribute(MCSymbol *Symbol, MCSymbolAttr Attribute);
+ virtual void EmitSymbolDesc(MCSymbol *Symbol, unsigned DescValue) {
+ assert(0 && "ELF doesn't support this directive");
+ }
+ virtual void EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size,
+ unsigned ByteAlignment);
+ virtual void BeginCOFFSymbolDef(const MCSymbol *Symbol) {
+ assert(0 && "ELF doesn't support this directive");
+ }
+
+ virtual void EmitCOFFSymbolStorageClass(int StorageClass) {
+ assert(0 && "ELF doesn't support this directive");
+ }
+
+ virtual void EmitCOFFSymbolType(int Type) {
+ assert(0 && "ELF doesn't support this directive");
+ }
+
+ virtual void EndCOFFSymbolDef() {
+ assert(0 && "ELF doesn't support this directive");
+ }
+
+ virtual void EmitELFSize(MCSymbol *Symbol, const MCExpr *Value) {
+ MCSymbolData &SD = getAssembler().getOrCreateSymbolData(*Symbol);
+ SD.setSize(Value);
+ }
+
+ virtual void EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size) {
+ assert(0 && "ELF doesn't support this directive");
+ }
+ virtual void EmitZerofill(const MCSection *Section, MCSymbol *Symbol = 0,
+ unsigned Size = 0, unsigned ByteAlignment = 0) {
+ assert(0 && "ELF doesn't support this directive");
+ }
+ virtual void EmitTBSSSymbol(const MCSection *Section, MCSymbol *Symbol,
+ uint64_t Size, unsigned ByteAlignment = 0) {
+ assert(0 && "ELF doesn't support this directive");
+ }
+ virtual void EmitBytes(StringRef Data, unsigned AddrSpace);
+ virtual void EmitValue(const MCExpr *Value, unsigned Size,unsigned AddrSpace);
+ virtual void EmitGPRel32Value(const MCExpr *Value) {
+ assert(0 && "ELF doesn't support this directive");
+ }
+ virtual void EmitValueToAlignment(unsigned ByteAlignment, int64_t Value = 0,
+ unsigned ValueSize = 1,
+ unsigned MaxBytesToEmit = 0);
+ virtual void EmitCodeAlignment(unsigned ByteAlignment,
+ unsigned MaxBytesToEmit = 0);
+ virtual void EmitValueToOffset(const MCExpr *Offset,
+ unsigned char Value = 0);
+
+ virtual void EmitFileDirective(StringRef Filename);
+ virtual void EmitDwarfFileDirective(unsigned FileNo, StringRef Filename) {
+ DEBUG(dbgs() << "FIXME: MCELFStreamer:EmitDwarfFileDirective not implemented\n");
+ }
+
+ virtual void EmitInstruction(const MCInst &Inst);
+ virtual void Finish();
+
+ /// @}
+};
+
+} // end anonymous namespace.
+
+void MCELFStreamer::EmitLabel(MCSymbol *Symbol) {
+ assert(Symbol->isUndefined() && "Cannot define a symbol twice!");
+
+ // FIXME: This is wasteful, we don't necessarily need to create a data
+ // fragment. Instead, we should mark the symbol as pointing into the data
+ // fragment if it exists, otherwise we should just queue the label and set its
+ // fragment pointer when we emit the next fragment.
+ MCDataFragment *F = getOrCreateDataFragment();
+ MCSymbolData &SD = getAssembler().getOrCreateSymbolData(*Symbol);
+ assert(!SD.getFragment() && "Unexpected fragment on symbol data!");
+ SD.setFragment(F);
+ SD.setOffset(F->getContents().size());
+
+ Symbol->setSection(*CurSection);
+}
+
+void MCELFStreamer::EmitAssemblerFlag(MCAssemblerFlag Flag) {
+ switch (Flag) {
+ case MCAF_SubsectionsViaSymbols:
+ getAssembler().setSubsectionsViaSymbols(true);
+ return;
+ }
+
+ assert(0 && "invalid assembler flag!");
+}
+
+void MCELFStreamer::EmitAssignment(MCSymbol *Symbol, const MCExpr *Value) {
+ // TODO: This is exactly the same as WinCOFFStreamer. Consider merging into
+ // MCObjectStreamer.
+ // FIXME: Lift context changes into super class.
+ getAssembler().getOrCreateSymbolData(*Symbol);
+ Symbol->setVariableValue(AddValueSymbols(Value));
+}
+
+void MCELFStreamer::EmitSymbolAttribute(MCSymbol *Symbol,
+ MCSymbolAttr Attribute) {
+ // Indirect symbols are handled differently, to match how 'as' handles
+ // them. This makes writing matching .o files easier.
+ if (Attribute == MCSA_IndirectSymbol) {
+ // Note that we intentionally cannot use the symbol data here; this is
+ // important for matching the string table that 'as' generates.
+ IndirectSymbolData ISD;
+ ISD.Symbol = Symbol;
+ ISD.SectionData = getCurrentSectionData();
+ getAssembler().getIndirectSymbols().push_back(ISD);
+ return;
+ }
+
+ // Adding a symbol attribute always introduces the symbol, note that an
+ // important side effect of calling getOrCreateSymbolData here is to register
+ // the symbol with the assembler.
+ MCSymbolData &SD = getAssembler().getOrCreateSymbolData(*Symbol);
+
+ // The implementation of symbol attributes is designed to match 'as', but it
+ // leaves much to desired. It doesn't really make sense to arbitrarily add and
+ // remove flags, but 'as' allows this (in particular, see .desc).
+ //
+ // In the future it might be worth trying to make these operations more well
+ // defined.
+ switch (Attribute) {
+ case MCSA_LazyReference:
+ case MCSA_Reference:
+ case MCSA_NoDeadStrip:
+ case MCSA_PrivateExtern:
+ case MCSA_WeakDefinition:
+ case MCSA_WeakDefAutoPrivate:
+ case MCSA_Invalid:
+ case MCSA_ELF_TypeIndFunction:
+ case MCSA_IndirectSymbol:
+ assert(0 && "Invalid symbol attribute for ELF!");
+ break;
+
+ case MCSA_Global:
+ SD.setFlags(SD.getFlags() | ELF_STB_Global);
+ SD.setExternal(true);
+ break;
+
+ case MCSA_WeakReference:
+ case MCSA_Weak:
+ SD.setFlags(SD.getFlags() | ELF_STB_Weak);
+ break;
+
+ case MCSA_Local:
+ SD.setFlags(SD.getFlags() | ELF_STB_Local);
+ break;
+
+ case MCSA_ELF_TypeFunction:
+ SD.setFlags(SD.getFlags() | ELF_STT_Func);
+ break;
+
+ case MCSA_ELF_TypeObject:
+ SD.setFlags(SD.getFlags() | ELF_STT_Object);
+ break;
+
+ case MCSA_ELF_TypeTLS:
+ SD.setFlags(SD.getFlags() | ELF_STT_Tls);
+ break;
+
+ case MCSA_ELF_TypeCommon:
+ SD.setFlags(SD.getFlags() | ELF_STT_Common);
+ break;
+
+ case MCSA_ELF_TypeNoType:
+ SD.setFlags(SD.getFlags() | ELF_STT_Notype);
+ break;
+
+ case MCSA_Protected:
+ SD.setFlags(SD.getFlags() | ELF_STV_Protected);
+ break;
+
+ case MCSA_Hidden:
+ SD.setFlags(SD.getFlags() | ELF_STV_Hidden);
+ break;
+
+ case MCSA_Internal:
+ SD.setFlags(SD.getFlags() | ELF_STV_Internal);
+ break;
+ }
+}
+
+void MCELFStreamer::EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size,
+ unsigned ByteAlignment) {
+ MCSymbolData &SD = getAssembler().getOrCreateSymbolData(*Symbol);
+
+ if ((SD.getFlags() & (0xf << ELF_STB_Shift)) == ELF_STB_Local) {
+ const MCSection *Section = getAssembler().getContext().getELFSection(".bss",
+ MCSectionELF::SHT_NOBITS,
+ MCSectionELF::SHF_WRITE |
+ MCSectionELF::SHF_ALLOC,
+ SectionKind::getBSS());
+
+ MCSectionData &SectData = getAssembler().getOrCreateSectionData(*Section);
+ MCFragment *F = new MCFillFragment(0, 0, Size, &SectData);
+ SD.setFragment(F);
+ Symbol->setSection(*Section);
+ SD.setSize(MCConstantExpr::Create(Size, getContext()));
+ }
+
+ SD.setFlags(SD.getFlags() | ELF_STB_Global);
+ SD.setExternal(true);
+
+ SD.setCommon(Size, ByteAlignment);
+}
+
+void MCELFStreamer::EmitBytes(StringRef Data, unsigned AddrSpace) {
+ // TODO: This is exactly the same as WinCOFFStreamer. Consider merging into
+ // MCObjectStreamer.
+ getOrCreateDataFragment()->getContents().append(Data.begin(), Data.end());
+}
+
+void MCELFStreamer::EmitValue(const MCExpr *Value, unsigned Size,
+ unsigned AddrSpace) {
+ // TODO: This is exactly the same as WinCOFFStreamer. Consider merging into
+ // MCObjectStreamer.
+ MCDataFragment *DF = getOrCreateDataFragment();
+
+ // Avoid fixups when possible.
+ int64_t AbsValue;
+ if (AddValueSymbols(Value)->EvaluateAsAbsolute(AbsValue)) {
+ // FIXME: Endianness assumption.
+ for (unsigned i = 0; i != Size; ++i)
+ DF->getContents().push_back(uint8_t(AbsValue >> (i * 8)));
+ } else {
+ DF->addFixup(MCFixup::Create(DF->getContents().size(), AddValueSymbols(Value),
+ MCFixup::getKindForSize(Size)));
+ DF->getContents().resize(DF->getContents().size() + Size, 0);
+ }
+}
+
+void MCELFStreamer::EmitValueToAlignment(unsigned ByteAlignment,
+ int64_t Value, unsigned ValueSize,
+ unsigned MaxBytesToEmit) {
+ // TODO: This is exactly the same as WinCOFFStreamer. Consider merging into
+ // MCObjectStreamer.
+ if (MaxBytesToEmit == 0)
+ MaxBytesToEmit = ByteAlignment;
+ new MCAlignFragment(ByteAlignment, Value, ValueSize, MaxBytesToEmit,
+ getCurrentSectionData());
+
+ // Update the maximum alignment on the current section if necessary.
+ if (ByteAlignment > getCurrentSectionData()->getAlignment())
+ getCurrentSectionData()->setAlignment(ByteAlignment);
+}
+
+void MCELFStreamer::EmitCodeAlignment(unsigned ByteAlignment,
+ unsigned MaxBytesToEmit) {
+ // TODO: This is exactly the same as WinCOFFStreamer. Consider merging into
+ // MCObjectStreamer.
+ if (MaxBytesToEmit == 0)
+ MaxBytesToEmit = ByteAlignment;
+ MCAlignFragment *F = new MCAlignFragment(ByteAlignment, 0, 1, MaxBytesToEmit,
+ getCurrentSectionData());
+ F->setEmitNops(true);
+
+ // Update the maximum alignment on the current section if necessary.
+ if (ByteAlignment > getCurrentSectionData()->getAlignment())
+ getCurrentSectionData()->setAlignment(ByteAlignment);
+}
+
+void MCELFStreamer::EmitValueToOffset(const MCExpr *Offset,
+ unsigned char Value) {
+ // TODO: This is exactly the same as MCMachOStreamer. Consider merging into
+ // MCObjectStreamer.
+ new MCOrgFragment(*Offset, Value, getCurrentSectionData());
+}
+
+// Add a symbol for the file name of this module. This is the second
+// entry in the module's symbol table (the first being the null symbol).
+void MCELFStreamer::EmitFileDirective(StringRef Filename) {
+ MCSymbol *Symbol = getAssembler().getContext().GetOrCreateSymbol(Filename);
+ Symbol->setSection(*CurSection);
+ Symbol->setAbsolute();
+
+ MCSymbolData &SD = getAssembler().getOrCreateSymbolData(*Symbol);
+
+ SD.setFlags(ELF_STT_File | ELF_STB_Local | ELF_STV_Default);
+}
+
+void MCELFStreamer::EmitInstToFragment(const MCInst &Inst) {
+ MCInstFragment *IF = new MCInstFragment(Inst, getCurrentSectionData());
+
+ // Add the fixups and data.
+ //
+ // FIXME: Revisit this design decision when relaxation is done, we may be
+ // able to get away with not storing any extra data in the MCInst.
+ SmallVector<MCFixup, 4> Fixups;
+ SmallString<256> Code;
+ raw_svector_ostream VecOS(Code);
+ getAssembler().getEmitter().EncodeInstruction(Inst, VecOS, Fixups);
+ VecOS.flush();
+
+ IF->getCode() = Code;
+ IF->getFixups() = Fixups;
+}
+
+void MCELFStreamer::EmitInstToData(const MCInst &Inst) {
+ MCDataFragment *DF = getOrCreateDataFragment();
+
+ SmallVector<MCFixup, 4> Fixups;
+ SmallString<256> Code;
+ raw_svector_ostream VecOS(Code);
+ getAssembler().getEmitter().EncodeInstruction(Inst, VecOS, Fixups);
+ VecOS.flush();
+
+ // Add the fixups and data.
+ for (unsigned i = 0, e = Fixups.size(); i != e; ++i) {
+ Fixups[i].setOffset(Fixups[i].getOffset() + DF->getContents().size());
+ DF->addFixup(Fixups[i]);
+ }
+ DF->getContents().append(Code.begin(), Code.end());
+}
+
+void MCELFStreamer::EmitInstruction(const MCInst &Inst) {
+ // Scan for values.
+ for (unsigned i = 0; i != Inst.getNumOperands(); ++i)
+ if (Inst.getOperand(i).isExpr())
+ AddValueSymbols(Inst.getOperand(i).getExpr());
+
+ getCurrentSectionData()->setHasInstructions(true);
+
+ // If this instruction doesn't need relaxation, just emit it as data.
+ if (!getAssembler().getBackend().MayNeedRelaxation(Inst)) {
+ EmitInstToData(Inst);
+ return;
+ }
+
+ // Otherwise, if we are relaxing everything, relax the instruction as much as
+ // possible and emit it as data.
+ if (getAssembler().getRelaxAll()) {
+ MCInst Relaxed;
+ getAssembler().getBackend().RelaxInstruction(Inst, Relaxed);
+ while (getAssembler().getBackend().MayNeedRelaxation(Relaxed))
+ getAssembler().getBackend().RelaxInstruction(Relaxed, Relaxed);
+ EmitInstToData(Relaxed);
+ return;
+ }
+
+ // Otherwise emit to a separate fragment.
+ EmitInstToFragment(Inst);
+}
+
+void MCELFStreamer::Finish() {
+ getAssembler().Finish();
+}
+
+MCStreamer *llvm::createELFStreamer(MCContext &Context, TargetAsmBackend &TAB,
+ raw_ostream &OS, MCCodeEmitter *CE,
+ bool RelaxAll) {
+ MCELFStreamer *S = new MCELFStreamer(Context, TAB, OS, CE);
+ if (RelaxAll)
+ S->getAssembler().setRelaxAll(true);
+ return S;
+}
diff --git a/lib/MC/MCMachOStreamer.cpp b/lib/MC/MCMachOStreamer.cpp
index 44bc267c11c29..671874df2c69e 100644
--- a/lib/MC/MCMachOStreamer.cpp
+++ b/lib/MC/MCMachOStreamer.cpp
@@ -18,6 +18,8 @@
#include "llvm/MC/MCSection.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/MC/MCMachOSymbolFlags.h"
+#include "llvm/MC/MCSectionMachO.h"
+#include "llvm/MC/MCDwarf.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetAsmBackend.h"
@@ -28,58 +30,19 @@ namespace {
class MCMachOStreamer : public MCObjectStreamer {
private:
- MCFragment *getCurrentFragment() const {
- assert(getCurrentSectionData() && "No current section!");
-
- if (!getCurrentSectionData()->empty())
- return &getCurrentSectionData()->getFragmentList().back();
-
- return 0;
- }
-
- /// Get a data fragment to write into, creating a new one if the current
- /// fragment is not a data fragment.
- MCDataFragment *getOrCreateDataFragment() const {
- MCDataFragment *F = dyn_cast_or_null<MCDataFragment>(getCurrentFragment());
- if (!F)
- F = new MCDataFragment(getCurrentSectionData());
- return F;
- }
-
void EmitInstToFragment(const MCInst &Inst);
void EmitInstToData(const MCInst &Inst);
+ // FIXME: These will likely moved to a better place.
+ void MakeLineEntryForSection(const MCSection *Section);
+ const MCExpr * MakeStartMinusEndExpr(MCSymbol *Start, MCSymbol *End,
+ int IntVal);
+ void EmitDwarfFileTable(void);
public:
MCMachOStreamer(MCContext &Context, TargetAsmBackend &TAB,
raw_ostream &OS, MCCodeEmitter *Emitter)
: MCObjectStreamer(Context, TAB, OS, Emitter) {}
- const MCExpr *AddValueSymbols(const MCExpr *Value) {
- switch (Value->getKind()) {
- case MCExpr::Target: assert(0 && "Can't handle target exprs yet!");
- case MCExpr::Constant:
- break;
-
- case MCExpr::Binary: {
- const MCBinaryExpr *BE = cast<MCBinaryExpr>(Value);
- AddValueSymbols(BE->getLHS());
- AddValueSymbols(BE->getRHS());
- break;
- }
-
- case MCExpr::SymbolRef:
- getAssembler().getOrCreateSymbolData(
- cast<MCSymbolRefExpr>(Value)->getSymbol());
- break;
-
- case MCExpr::Unary:
- AddValueSymbols(cast<MCUnaryExpr>(Value)->getSubExpr());
- break;
- }
-
- return Value;
- }
-
/// @name MCStreamer Interface
/// @{
@@ -126,10 +89,16 @@ public:
unsigned char Value = 0);
virtual void EmitFileDirective(StringRef Filename) {
- report_fatal_error("unsupported directive: '.file'");
+ // FIXME: Just ignore the .file; it isn't important enough to fail the
+ // entire assembly.
+
+ //report_fatal_error("unsupported directive: '.file'");
}
virtual void EmitDwarfFileDirective(unsigned FileNo, StringRef Filename) {
- report_fatal_error("unsupported directive: '.file'");
+ // FIXME: Just ignore the .file; it isn't important enough to fail the
+ // entire assembly.
+
+ //report_fatal_error("unsupported directive: '.file'");
}
virtual void EmitInstruction(const MCInst &Inst);
@@ -142,6 +111,8 @@ public:
} // end anonymous namespace.
void MCMachOStreamer::EmitLabel(MCSymbol *Symbol) {
+ // TODO: This is almost exactly the same as WinCOFFStreamer. Consider merging
+ // into MCObjectStreamer.
assert(Symbol->isUndefined() && "Cannot define a symbol twice!");
assert(!Symbol->isVariable() && "Cannot emit a variable symbol!");
assert(CurSection && "Cannot emit before setting section!");
@@ -185,6 +156,8 @@ void MCMachOStreamer::EmitAssemblerFlag(MCAssemblerFlag Flag) {
}
void MCMachOStreamer::EmitAssignment(MCSymbol *Symbol, const MCExpr *Value) {
+ // TODO: This is exactly the same as WinCOFFStreamer. Consider merging into
+ // MCObjectStreamer.
// FIXME: Lift context changes into super class.
getAssembler().getOrCreateSymbolData(*Symbol);
Symbol->setVariableValue(AddValueSymbols(Value));
@@ -335,11 +308,15 @@ void MCMachOStreamer::EmitTBSSSymbol(const MCSection *Section, MCSymbol *Symbol,
}
void MCMachOStreamer::EmitBytes(StringRef Data, unsigned AddrSpace) {
+ // TODO: This is exactly the same as WinCOFFStreamer. Consider merging into
+ // MCObjectStreamer.
getOrCreateDataFragment()->getContents().append(Data.begin(), Data.end());
}
void MCMachOStreamer::EmitValue(const MCExpr *Value, unsigned Size,
unsigned AddrSpace) {
+ // TODO: This is exactly the same as WinCOFFStreamer. Consider merging into
+ // MCObjectStreamer.
MCDataFragment *DF = getOrCreateDataFragment();
// Avoid fixups when possible.
@@ -359,6 +336,8 @@ void MCMachOStreamer::EmitValue(const MCExpr *Value, unsigned Size,
void MCMachOStreamer::EmitValueToAlignment(unsigned ByteAlignment,
int64_t Value, unsigned ValueSize,
unsigned MaxBytesToEmit) {
+ // TODO: This is exactly the same as WinCOFFStreamer. Consider merging into
+ // MCObjectStreamer.
if (MaxBytesToEmit == 0)
MaxBytesToEmit = ByteAlignment;
new MCAlignFragment(ByteAlignment, Value, ValueSize, MaxBytesToEmit,
@@ -371,6 +350,8 @@ void MCMachOStreamer::EmitValueToAlignment(unsigned ByteAlignment,
void MCMachOStreamer::EmitCodeAlignment(unsigned ByteAlignment,
unsigned MaxBytesToEmit) {
+ // TODO: This is exactly the same as WinCOFFStreamer. Consider merging into
+ // MCObjectStreamer.
if (MaxBytesToEmit == 0)
MaxBytesToEmit = ByteAlignment;
MCAlignFragment *F = new MCAlignFragment(ByteAlignment, 0, 1, MaxBytesToEmit,
@@ -429,6 +410,10 @@ void MCMachOStreamer::EmitInstruction(const MCInst &Inst) {
getCurrentSectionData()->setHasInstructions(true);
+ // Now that a machine instruction has been assembled into this section, make
+ // a line entry for any .loc directive that has been seen.
+ MakeLineEntryForSection(getCurrentSection());
+
// If this instruction doesn't need relaxation, just emit it as data.
if (!getAssembler().getBackend().MayNeedRelaxation(Inst)) {
EmitInstToData(Inst);
@@ -450,7 +435,207 @@ void MCMachOStreamer::EmitInstruction(const MCInst &Inst) {
EmitInstToFragment(Inst);
}
+//
+// This is called when an instruction is assembled into the specified section
+// and if there is information from the last .loc directive that has yet to have
+// a line entry made for it is made.
+//
+void MCMachOStreamer::MakeLineEntryForSection(const MCSection *Section) {
+ if (!getContext().getDwarfLocSeen())
+ return;
+
+ // Create a symbol at in the current section for use in the line entry.
+ MCSymbol *LineSym = getContext().CreateTempSymbol();
+ // Set the value of the symbol to use for the MCLineEntry.
+ EmitLabel(LineSym);
+
+ // Get the current .loc info saved in the context.
+ const MCDwarfLoc &DwarfLoc = getContext().getCurrentDwarfLoc();
+
+ // Create a (local) line entry with the symbol and the current .loc info.
+ MCLineEntry LineEntry(LineSym, DwarfLoc);
+
+ // clear DwarfLocSeen saying the current .loc info is now used.
+ getContext().clearDwarfLocSeen();
+
+ // Get the MCLineSection for this section, if one does not exist for this
+ // section create it.
+ DenseMap<const MCSection *, MCLineSection *> &MCLineSections =
+ getContext().getMCLineSections();
+ MCLineSection *LineSection = MCLineSections[Section];
+ if (!LineSection) {
+ // Create a new MCLineSection. This will be deleted after the dwarf line
+ // table is created using it by iterating through the MCLineSections
+ // DenseMap.
+ LineSection = new MCLineSection;
+ // Save a pointer to the new LineSection into the MCLineSections DenseMap.
+ MCLineSections[Section] = LineSection;
+ }
+
+ // Add the line entry to this section's entries.
+ LineSection->addLineEntry(LineEntry);
+}
+
+//
+// This helper routine returns an expression of End - Start + IntVal for use
+// by EmitDwarfFileTable() below.
+//
+const MCExpr * MCMachOStreamer::MakeStartMinusEndExpr(MCSymbol *Start,
+ MCSymbol *End,
+ int IntVal) {
+ MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None;
+ const MCExpr *Res =
+ MCSymbolRefExpr::Create(End, Variant, getContext());
+ const MCExpr *RHS =
+ MCSymbolRefExpr::Create(Start, Variant, getContext());
+ const MCExpr *Res1 =
+ MCBinaryExpr::Create(MCBinaryExpr::Sub, Res, RHS,getContext());
+ const MCExpr *Res2 =
+ MCConstantExpr::Create(IntVal, getContext());
+ const MCExpr *Res3 =
+ MCBinaryExpr::Create(MCBinaryExpr::Sub, Res1, Res2, getContext());
+ return Res3;
+}
+
+//
+// This emits the Dwarf file (and eventually the line) table.
+//
+void MCMachOStreamer::EmitDwarfFileTable(void) {
+ // For now make sure we don't put out the Dwarf file table if no .file
+ // directives were seen.
+ const std::vector<MCDwarfFile *> &MCDwarfFiles =
+ getContext().getMCDwarfFiles();
+ if (MCDwarfFiles.size() == 0)
+ return;
+
+ // This is the Mach-O section, for ELF it is the .debug_line section.
+ SwitchSection(getContext().getMachOSection("__DWARF", "__debug_line",
+ MCSectionMachO::S_ATTR_DEBUG,
+ 0, SectionKind::getDataRelLocal()));
+
+ // Create a symbol at the beginning of this section.
+ MCSymbol *LineStartSym = getContext().CreateTempSymbol();
+ // Set the value of the symbol, as we are at the start of the section.
+ EmitLabel(LineStartSym);
+
+ // Create a symbol for the end of the section (to be set when we get there).
+ MCSymbol *LineEndSym = getContext().CreateTempSymbol();
+
+ // The first 4 bytes is the total length of the information for this
+ // compilation unit (not including these 4 bytes for the length).
+ EmitValue(MakeStartMinusEndExpr(LineStartSym, LineEndSym, 4), 4, 0);
+
+ // Next 2 bytes is the Version, which is Dwarf 2.
+ EmitIntValue(2, 2);
+
+ // Create a symbol for the end of the prologue (to be set when we get there).
+ MCSymbol *ProEndSym = getContext().CreateTempSymbol(); // Lprologue_end
+
+ // Length of the prologue, is the next 4 bytes. Which is the start of the
+ // section to the end of the prologue. Not including the 4 bytes for the
+ // total length, the 2 bytes for the version, and these 4 bytes for the
+ // length of the prologue.
+ EmitValue(MakeStartMinusEndExpr(LineStartSym, ProEndSym, (4 + 2 + 4)), 4, 0);
+
+ // Parameters of the state machine, are next.
+ // Define the architecture-dependent minimum instruction length (in
+ // bytes). This value should be rather too small than too big. */
+ // DWARF2_LINE_MIN_INSN_LENGTH
+ EmitIntValue(1, 1);
+ // Flag that indicates the initial value of the is_stmt_start flag.
+ // DWARF2_LINE_DEFAULT_IS_STMT
+ EmitIntValue(1, 1);
+ // Minimum line offset in a special line info. opcode. This value
+ // was chosen to give a reasonable range of values. */
+ // DWARF2_LINE_BASE
+ EmitIntValue(uint64_t(-5), 1);
+ // Range of line offsets in a special line info. opcode.
+ // DWARF2_LINE_RANGE
+ EmitIntValue(14, 1);
+ // First special line opcode - leave room for the standard opcodes.
+ // DWARF2_LINE_OPCODE_BASE
+ EmitIntValue(13, 1);
+
+ // Standard opcode lengths
+ EmitIntValue(0, 1); // length of DW_LNS_copy
+ EmitIntValue(1, 1); // length of DW_LNS_advance_pc
+ EmitIntValue(1, 1); // length of DW_LNS_advance_line
+ EmitIntValue(1, 1); // length of DW_LNS_set_file
+ EmitIntValue(1, 1); // length of DW_LNS_set_column
+ EmitIntValue(0, 1); // length of DW_LNS_negate_stmt
+ EmitIntValue(0, 1); // length of DW_LNS_set_basic_block
+ EmitIntValue(0, 1); // length of DW_LNS_const_add_pc
+ EmitIntValue(1, 1); // length of DW_LNS_fixed_advance_pc
+ EmitIntValue(0, 1); // length of DW_LNS_set_prologue_end
+ EmitIntValue(0, 1); // length of DW_LNS_set_epilogue_begin
+ EmitIntValue(1, 1); // DW_LNS_set_isa
+
+ // Put out the directory and file tables.
+
+ // First the directory table.
+ const std::vector<StringRef> &MCDwarfDirs =
+ getContext().getMCDwarfDirs();
+ for (unsigned i = 0; i < MCDwarfDirs.size(); i++) {
+ EmitBytes(MCDwarfDirs[i], 0); // the DirectoryName
+ EmitBytes(StringRef("\0", 1), 0); // the null termination of the string
+ }
+ EmitIntValue(0, 1); // Terminate the directory list
+
+ // Second the file table.
+ for (unsigned i = 1; i < MCDwarfFiles.size(); i++) {
+ EmitBytes(MCDwarfFiles[i]->getName(), 0); // FileName
+ EmitBytes(StringRef("\0", 1), 0); // the null termination of the string
+ // FIXME the Directory number should be a .uleb128 not a .byte
+ EmitIntValue(MCDwarfFiles[i]->getDirIndex(), 1);
+ EmitIntValue(0, 1); // last modification timestamp (always 0)
+ EmitIntValue(0, 1); // filesize (always 0)
+ }
+ EmitIntValue(0, 1); // Terminate the file list
+
+ // This is the end of the prologue, so set the value of the symbol at the
+ // end of the prologue (that was used in a previous expression).
+ EmitLabel(ProEndSym);
+
+ // TODO: This is the point where the line tables would be emitted.
+
+ // Delete the MCLineSections that were created in
+ // MCMachOStreamer::MakeLineEntryForSection() and used to emit the line
+ // tables.
+ DenseMap<const MCSection *, MCLineSection *> &MCLineSections =
+ getContext().getMCLineSections();
+ for (DenseMap<const MCSection *, MCLineSection *>::iterator it =
+ MCLineSections.begin(), ie = MCLineSections.end(); it != ie; ++it) {
+ delete it->second;
+ }
+
+ // If there are no line tables emited then we emit:
+ // The following DW_LNE_set_address sequence to set the address to zero
+ // TODO test for 32-bit or 64-bit output
+ // This is the sequence for 32-bit code
+ EmitIntValue(0, 1);
+ EmitIntValue(5, 1);
+ EmitIntValue(2, 1);
+ EmitIntValue(0, 1);
+ EmitIntValue(0, 1);
+ EmitIntValue(0, 1);
+ EmitIntValue(0, 1);
+
+ // Lastly emit the DW_LNE_end_sequence which consists of 3 bytes '00 01 01'
+ // (00 is the code for extended opcodes, followed by a ULEB128 length of the
+ // extended opcode (01), and the DW_LNE_end_sequence (01).
+ EmitIntValue(0, 1); // DW_LNS_extended_op
+ EmitIntValue(1, 1); // ULEB128 length of the extended opcode
+ EmitIntValue(1, 1); // DW_LNE_end_sequence
+
+ // This is the end of the section, so set the value of the symbol at the end
+ // of this section (that was used in a previous expression).
+ EmitLabel(LineEndSym);
+}
+
void MCMachOStreamer::Finish() {
+ // Dump out the dwarf file and directory tables (soon to include line table)
+ EmitDwarfFileTable();
+
// We have to set the fragment atom associations so we can relax properly for
// Mach-O.
diff --git a/lib/MC/MCNullStreamer.cpp b/lib/MC/MCNullStreamer.cpp
index 5332ade211535..f7a2f20ca4bc3 100644
--- a/lib/MC/MCNullStreamer.cpp
+++ b/lib/MC/MCNullStreamer.cpp
@@ -26,6 +26,7 @@ namespace {
/// @{
virtual void SwitchSection(const MCSection *Section) {
+ PrevSection = CurSection;
CurSection = Section;
}
diff --git a/lib/MC/MCObjectStreamer.cpp b/lib/MC/MCObjectStreamer.cpp
index d3f7f7783ffa7..2b2385ef9156d 100644
--- a/lib/MC/MCObjectStreamer.cpp
+++ b/lib/MC/MCObjectStreamer.cpp
@@ -9,7 +9,11 @@
#include "llvm/MC/MCObjectStreamer.h"
+#include "llvm/Support/ErrorHandling.h"
#include "llvm/MC/MCAssembler.h"
+#include "llvm/MC/MCCodeEmitter.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/Target/TargetAsmBackend.h"
using namespace llvm;
MCObjectStreamer::MCObjectStreamer(MCContext &Context, TargetAsmBackend &TAB,
@@ -21,15 +25,59 @@ MCObjectStreamer::MCObjectStreamer(MCContext &Context, TargetAsmBackend &TAB,
}
MCObjectStreamer::~MCObjectStreamer() {
+ delete &Assembler->getBackend();
+ delete &Assembler->getEmitter();
delete Assembler;
}
+MCFragment *MCObjectStreamer::getCurrentFragment() const {
+ assert(getCurrentSectionData() && "No current section!");
+
+ if (!getCurrentSectionData()->empty())
+ return &getCurrentSectionData()->getFragmentList().back();
+
+ return 0;
+}
+
+MCDataFragment *MCObjectStreamer::getOrCreateDataFragment() const {
+ MCDataFragment *F = dyn_cast_or_null<MCDataFragment>(getCurrentFragment());
+ if (!F)
+ F = new MCDataFragment(getCurrentSectionData());
+ return F;
+}
+
+const MCExpr *MCObjectStreamer::AddValueSymbols(const MCExpr *Value) {
+ switch (Value->getKind()) {
+ case MCExpr::Target: llvm_unreachable("Can't handle target exprs yet!");
+ case MCExpr::Constant:
+ break;
+
+ case MCExpr::Binary: {
+ const MCBinaryExpr *BE = cast<MCBinaryExpr>(Value);
+ AddValueSymbols(BE->getLHS());
+ AddValueSymbols(BE->getRHS());
+ break;
+ }
+
+ case MCExpr::SymbolRef:
+ Assembler->getOrCreateSymbolData(cast<MCSymbolRefExpr>(Value)->getSymbol());
+ break;
+
+ case MCExpr::Unary:
+ AddValueSymbols(cast<MCUnaryExpr>(Value)->getSubExpr());
+ break;
+ }
+
+ return Value;
+}
+
void MCObjectStreamer::SwitchSection(const MCSection *Section) {
assert(Section && "Cannot switch to a null section!");
// If already in this section, then this is a noop.
if (Section == CurSection) return;
+ PrevSection = CurSection;
CurSection = Section;
CurSectionData = &getAssembler().getOrCreateSectionData(*Section);
}
diff --git a/lib/MC/MCParser/AsmLexer.cpp b/lib/MC/MCParser/AsmLexer.cpp
index 465d98382877b..086df081a938f 100644
--- a/lib/MC/MCParser/AsmLexer.cpp
+++ b/lib/MC/MCParser/AsmLexer.cpp
@@ -117,6 +117,13 @@ AsmToken AsmLexer::LexLineComment() {
return AsmToken(AsmToken::EndOfStatement, StringRef(CurPtr, 0));
}
+static void SkipIgnoredIntegerSuffix(const char *&CurPtr) {
+ if (CurPtr[0] == 'L' && CurPtr[1] == 'L')
+ CurPtr += 2;
+ if (CurPtr[0] == 'U' && CurPtr[1] == 'L' && CurPtr[2] == 'L')
+ CurPtr += 3;
+}
+
/// LexDigit: First character is [0-9].
/// Local Label: [0-9][:]
@@ -133,7 +140,7 @@ AsmToken AsmLexer::LexDigit() {
++CurPtr;
StringRef Result(TokStart, CurPtr - TokStart);
-
+
long long Value;
if (Result.getAsInteger(10, Value)) {
// We have to handle minint_as_a_positive_value specially, because
@@ -143,6 +150,11 @@ AsmToken AsmLexer::LexDigit() {
else
return ReturnError(TokStart, "Invalid decimal number");
}
+
+ // The darwin/x86 (and x86-64) assembler accepts and ignores ULL and LL
+ // suffixes on integer literals.
+ SkipIgnoredIntegerSuffix(CurPtr);
+
return AsmToken(AsmToken::Integer, Result, Value);
}
@@ -165,9 +177,13 @@ AsmToken AsmLexer::LexDigit() {
StringRef Result(TokStart, CurPtr - TokStart);
long long Value;
- if (Result.getAsInteger(2, Value))
+ if (Result.substr(2).getAsInteger(2, Value))
return ReturnError(TokStart, "Invalid binary number");
+ // The darwin/x86 (and x86-64) assembler accepts and ignores ULL and LL
+ // suffixes on integer literals.
+ SkipIgnoredIntegerSuffix(CurPtr);
+
return AsmToken(AsmToken::Integer, Result, Value);
}
@@ -185,6 +201,10 @@ AsmToken AsmLexer::LexDigit() {
if (StringRef(TokStart, CurPtr - TokStart).getAsInteger(0, Result))
return ReturnError(TokStart, "Invalid hexadecimal number");
+ // The darwin/x86 (and x86-64) assembler accepts and ignores ULL and LL
+ // suffixes on integer literals.
+ SkipIgnoredIntegerSuffix(CurPtr);
+
return AsmToken(AsmToken::Integer, StringRef(TokStart, CurPtr - TokStart),
(int64_t)Result);
}
@@ -198,6 +218,10 @@ AsmToken AsmLexer::LexDigit() {
if (Result.getAsInteger(8, Value))
return ReturnError(TokStart, "Invalid octal number");
+ // The darwin/x86 (and x86-64) assembler accepts and ignores ULL and LL
+ // suffixes on integer literals.
+ SkipIgnoredIntegerSuffix(CurPtr);
+
return AsmToken(AsmToken::Integer, Result, Value);
}
diff --git a/lib/MC/MCParser/AsmParser.cpp b/lib/MC/MCParser/AsmParser.cpp
index e0949bd2856f7..f83cd5eb2a160 100644
--- a/lib/MC/MCParser/AsmParser.cpp
+++ b/lib/MC/MCParser/AsmParser.cpp
@@ -11,47 +11,237 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/MC/MCParser/AsmParser.h"
#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/StringMap.h"
#include "llvm/ADT/StringSwitch.h"
#include "llvm/ADT/Twine.h"
+#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCParser/AsmCond.h"
+#include "llvm/MC/MCParser/AsmLexer.h"
+#include "llvm/MC/MCParser/MCAsmParser.h"
+#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
+#include "llvm/MC/MCSectionMachO.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSymbol.h"
-#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
+#include "llvm/MC/MCDwarf.h"
#include "llvm/Support/Compiler.h"
-#include "llvm/Support/SourceMgr.h"
#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/SourceMgr.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetAsmParser.h"
+#include <vector>
using namespace llvm;
namespace {
+/// \brief Helper class for tracking macro definitions.
+struct Macro {
+ StringRef Name;
+ StringRef Body;
+
+public:
+ Macro(StringRef N, StringRef B) : Name(N), Body(B) {}
+};
+
+/// \brief Helper class for storing information about an active macro
+/// instantiation.
+struct MacroInstantiation {
+ /// The macro being instantiated.
+ const Macro *TheMacro;
+
+ /// The macro instantiation with substitutions.
+ MemoryBuffer *Instantiation;
+
+ /// The location of the instantiation.
+ SMLoc InstantiationLoc;
+
+ /// The location where parsing should resume upon instantiation completion.
+ SMLoc ExitLoc;
+
+public:
+ MacroInstantiation(const Macro *M, SMLoc IL, SMLoc EL,
+ const std::vector<std::vector<AsmToken> > &A);
+};
+
+/// \brief The concrete assembly parser instance.
+class AsmParser : public MCAsmParser {
+ friend class GenericAsmParser;
+
+ AsmParser(const AsmParser &); // DO NOT IMPLEMENT
+ void operator=(const AsmParser &); // DO NOT IMPLEMENT
+private:
+ AsmLexer Lexer;
+ MCContext &Ctx;
+ MCStreamer &Out;
+ SourceMgr &SrcMgr;
+ MCAsmParserExtension *GenericParser;
+ MCAsmParserExtension *PlatformParser;
+
+ /// This is the current buffer index we're lexing from as managed by the
+ /// SourceMgr object.
+ int CurBuffer;
+
+ AsmCond TheCondState;
+ std::vector<AsmCond> TheCondStack;
+
+ /// DirectiveMap - This is a table handlers for directives. Each handler is
+ /// invoked after the directive identifier is read and is responsible for
+ /// parsing and validating the rest of the directive. The handler is passed
+ /// in the directive name and the location of the directive keyword.
+ StringMap<std::pair<MCAsmParserExtension*, DirectiveHandler> > DirectiveMap;
+
+ /// MacroMap - Map of currently defined macros.
+ StringMap<Macro*> MacroMap;
+
+ /// ActiveMacros - Stack of active macro instantiations.
+ std::vector<MacroInstantiation*> ActiveMacros;
+
+ /// Boolean tracking whether macro substitution is enabled.
+ unsigned MacrosEnabled : 1;
+
+public:
+ AsmParser(const Target &T, SourceMgr &SM, MCContext &Ctx, MCStreamer &Out,
+ const MCAsmInfo &MAI);
+ ~AsmParser();
+
+ virtual bool Run(bool NoInitialTextSection, bool NoFinalize = false);
+
+ void AddDirectiveHandler(MCAsmParserExtension *Object,
+ StringRef Directive,
+ DirectiveHandler Handler) {
+ DirectiveMap[Directive] = std::make_pair(Object, Handler);
+ }
+
+public:
+ /// @name MCAsmParser Interface
+ /// {
+
+ virtual SourceMgr &getSourceManager() { return SrcMgr; }
+ virtual MCAsmLexer &getLexer() { return Lexer; }
+ virtual MCContext &getContext() { return Ctx; }
+ virtual MCStreamer &getStreamer() { return Out; }
+
+ virtual void Warning(SMLoc L, const Twine &Meg);
+ virtual bool Error(SMLoc L, const Twine &Msg);
+
+ const AsmToken &Lex();
+
+ bool ParseExpression(const MCExpr *&Res);
+ virtual bool ParseExpression(const MCExpr *&Res, SMLoc &EndLoc);
+ virtual bool ParseParenExpression(const MCExpr *&Res, SMLoc &EndLoc);
+ virtual bool ParseAbsoluteExpression(int64_t &Res);
+
+ /// }
+
+private:
+ bool ParseStatement();
+
+ bool HandleMacroEntry(StringRef Name, SMLoc NameLoc, const Macro *M);
+ void HandleMacroExit();
+
+ void PrintMacroInstantiations();
+ void PrintMessage(SMLoc Loc, const std::string &Msg, const char *Type) const;
+
+ /// EnterIncludeFile - Enter the specified file. This returns true on failure.
+ bool EnterIncludeFile(const std::string &Filename);
+
+ /// \brief Reset the current lexer position to that given by \arg Loc. The
+ /// current token is not set; clients should ensure Lex() is called
+ /// subsequently.
+ void JumpToLoc(SMLoc Loc);
+
+ void EatToEndOfStatement();
+
+ /// \brief Parse up to the end of statement and a return the contents from the
+ /// current token until the end of the statement; the current token on exit
+ /// will be either the EndOfStatement or EOF.
+ StringRef ParseStringToEndOfStatement();
+
+ bool ParseAssignment(StringRef Name);
+
+ bool ParsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc);
+ bool ParseBinOpRHS(unsigned Precedence, const MCExpr *&Res, SMLoc &EndLoc);
+ bool ParseParenExpr(const MCExpr *&Res, SMLoc &EndLoc);
+
+ /// ParseIdentifier - Parse an identifier or string (as a quoted identifier)
+ /// and set \arg Res to the identifier contents.
+ bool ParseIdentifier(StringRef &Res);
+
+ // Directive Parsing.
+ bool ParseDirectiveAscii(bool ZeroTerminated); // ".ascii", ".asciiz"
+ bool ParseDirectiveValue(unsigned Size); // ".byte", ".long", ...
+ bool ParseDirectiveFill(); // ".fill"
+ bool ParseDirectiveSpace(); // ".space"
+ bool ParseDirectiveSet(); // ".set"
+ bool ParseDirectiveOrg(); // ".org"
+ // ".align{,32}", ".p2align{,w,l}"
+ bool ParseDirectiveAlign(bool IsPow2, unsigned ValueSize);
+
+ /// ParseDirectiveSymbolAttribute - Parse a directive like ".globl" which
+ /// accepts a single symbol (which should be a label or an external).
+ bool ParseDirectiveSymbolAttribute(MCSymbolAttr Attr);
+ bool ParseDirectiveELFType(); // ELF specific ".type"
+
+ bool ParseDirectiveComm(bool IsLocal); // ".comm" and ".lcomm"
+
+ bool ParseDirectiveAbort(); // ".abort"
+ bool ParseDirectiveInclude(); // ".include"
+
+ bool ParseDirectiveIf(SMLoc DirectiveLoc); // ".if"
+ bool ParseDirectiveElseIf(SMLoc DirectiveLoc); // ".elseif"
+ bool ParseDirectiveElse(SMLoc DirectiveLoc); // ".else"
+ bool ParseDirectiveEndIf(SMLoc DirectiveLoc); // .endif
+
+ /// ParseEscapedString - Parse the current token as a string which may include
+ /// escaped characters and return the string contents.
+ bool ParseEscapedString(std::string &Data);
+};
+
/// \brief Generic implementations of directive handling, etc. which is shared
/// (or the default, at least) for all assembler parser.
class GenericAsmParser : public MCAsmParserExtension {
+ template<bool (GenericAsmParser::*Handler)(StringRef, SMLoc)>
+ void AddDirectiveHandler(StringRef Directive) {
+ getParser().AddDirectiveHandler(this, Directive,
+ HandleDirective<GenericAsmParser, Handler>);
+ }
+
public:
GenericAsmParser() {}
+ AsmParser &getParser() {
+ return (AsmParser&) this->MCAsmParserExtension::getParser();
+ }
+
virtual void Initialize(MCAsmParser &Parser) {
// Call the base implementation.
this->MCAsmParserExtension::Initialize(Parser);
// Debugging directives.
- Parser.AddDirectiveHandler(this, ".file", MCAsmParser::DirectiveHandler(
- &GenericAsmParser::ParseDirectiveFile));
- Parser.AddDirectiveHandler(this, ".line", MCAsmParser::DirectiveHandler(
- &GenericAsmParser::ParseDirectiveLine));
- Parser.AddDirectiveHandler(this, ".loc", MCAsmParser::DirectiveHandler(
- &GenericAsmParser::ParseDirectiveLoc));
+ AddDirectiveHandler<&GenericAsmParser::ParseDirectiveFile>(".file");
+ AddDirectiveHandler<&GenericAsmParser::ParseDirectiveLine>(".line");
+ AddDirectiveHandler<&GenericAsmParser::ParseDirectiveLoc>(".loc");
+
+ // Macro directives.
+ AddDirectiveHandler<&GenericAsmParser::ParseDirectiveMacrosOnOff>(
+ ".macros_on");
+ AddDirectiveHandler<&GenericAsmParser::ParseDirectiveMacrosOnOff>(
+ ".macros_off");
+ AddDirectiveHandler<&GenericAsmParser::ParseDirectiveMacro>(".macro");
+ AddDirectiveHandler<&GenericAsmParser::ParseDirectiveEndMacro>(".endm");
+ AddDirectiveHandler<&GenericAsmParser::ParseDirectiveEndMacro>(".endmacro");
}
- bool ParseDirectiveFile(StringRef, SMLoc DirectiveLoc); // ".file"
- bool ParseDirectiveLine(StringRef, SMLoc DirectiveLoc); // ".line"
- bool ParseDirectiveLoc(StringRef, SMLoc DirectiveLoc); // ".loc"
+ bool ParseDirectiveFile(StringRef, SMLoc DirectiveLoc);
+ bool ParseDirectiveLine(StringRef, SMLoc DirectiveLoc);
+ bool ParseDirectiveLoc(StringRef, SMLoc DirectiveLoc);
+
+ bool ParseDirectiveMacrosOnOff(StringRef, SMLoc DirectiveLoc);
+ bool ParseDirectiveMacro(StringRef, SMLoc DirectiveLoc);
+ bool ParseDirectiveEndMacro(StringRef, SMLoc DirectiveLoc);
};
}
@@ -69,7 +259,7 @@ AsmParser::AsmParser(const Target &T, SourceMgr &_SM, MCContext &_Ctx,
MCStreamer &_Out, const MCAsmInfo &_MAI)
: Lexer(_MAI), Ctx(_Ctx), Out(_Out), SrcMgr(_SM),
GenericParser(new GenericAsmParser), PlatformParser(0),
- TargetParser(0), CurBuffer(0) {
+ CurBuffer(0), MacrosEnabled(true) {
Lexer.setBuffer(SrcMgr.getMemoryBuffer(CurBuffer));
// Initialize the generic parser.
@@ -89,22 +279,33 @@ AsmParser::AsmParser(const Target &T, SourceMgr &_SM, MCContext &_Ctx,
}
AsmParser::~AsmParser() {
+ assert(ActiveMacros.empty() && "Unexpected active macro instantiation!");
+
+ // Destroy any macros.
+ for (StringMap<Macro*>::iterator it = MacroMap.begin(),
+ ie = MacroMap.end(); it != ie; ++it)
+ delete it->getValue();
+
delete PlatformParser;
delete GenericParser;
}
-void AsmParser::setTargetParser(TargetAsmParser &P) {
- assert(!TargetParser && "Target parser is already initialized!");
- TargetParser = &P;
- TargetParser->Initialize(*this);
+void AsmParser::PrintMacroInstantiations() {
+ // Print the active macro instantiation stack.
+ for (std::vector<MacroInstantiation*>::const_reverse_iterator
+ it = ActiveMacros.rbegin(), ie = ActiveMacros.rend(); it != ie; ++it)
+ PrintMessage((*it)->InstantiationLoc, "while in macro instantiation",
+ "note");
}
void AsmParser::Warning(SMLoc L, const Twine &Msg) {
PrintMessage(L, Msg.str(), "warning");
+ PrintMacroInstantiations();
}
bool AsmParser::Error(SMLoc L, const Twine &Msg) {
PrintMessage(L, Msg.str(), "error");
+ PrintMacroInstantiations();
return true;
}
@@ -124,7 +325,12 @@ bool AsmParser::EnterIncludeFile(const std::string &Filename) {
return false;
}
-
+
+void AsmParser::JumpToLoc(SMLoc Loc) {
+ CurBuffer = SrcMgr.FindBufferContainingLoc(Loc);
+ Lexer.setBuffer(SrcMgr.getMemoryBuffer(CurBuffer), Loc.getPointer());
+}
+
const AsmToken &AsmParser::Lex() {
const AsmToken *tok = &Lexer.Lex();
@@ -133,15 +339,13 @@ const AsmToken &AsmParser::Lex() {
// include stack.
SMLoc ParentIncludeLoc = SrcMgr.getParentIncludeLoc(CurBuffer);
if (ParentIncludeLoc != SMLoc()) {
- CurBuffer = SrcMgr.FindBufferContainingLoc(ParentIncludeLoc);
- Lexer.setBuffer(SrcMgr.getMemoryBuffer(CurBuffer),
- ParentIncludeLoc.getPointer());
+ JumpToLoc(ParentIncludeLoc);
tok = &Lexer.Lex();
}
}
if (tok->is(AsmToken::Error))
- PrintMessage(Lexer.getErrLoc(), Lexer.getErr(), "error");
+ Error(Lexer.getErrLoc(), Lexer.getErr());
return *tok;
}
@@ -174,6 +378,16 @@ bool AsmParser::Run(bool NoInitialTextSection, bool NoFinalize) {
if (TheCondState.TheCond != StartingCondState.TheCond ||
TheCondState.Ignore != StartingCondState.Ignore)
return TokError("unmatched .ifs or .elses");
+
+ // Check to see there are no empty DwarfFile slots.
+ const std::vector<MCDwarfFile *> &MCDwarfFiles =
+ getContext().getMCDwarfFiles();
+ for (unsigned i = 1; i < MCDwarfFiles.size(); i++) {
+ if (!MCDwarfFiles[i]){
+ TokError("unassigned file number: " + Twine(i) + " for .file directives");
+ HadError = true;
+ }
+ }
// Finalize the output stream if there are no errors and if the client wants
// us to.
@@ -194,6 +408,16 @@ void AsmParser::EatToEndOfStatement() {
Lex();
}
+StringRef AsmParser::ParseStringToEndOfStatement() {
+ const char *Start = getTok().getLoc().getPointer();
+
+ while (Lexer.isNot(AsmToken::EndOfStatement) &&
+ Lexer.isNot(AsmToken::Eof))
+ Lex();
+
+ const char *End = getTok().getLoc().getPointer();
+ return StringRef(Start, End - Start);
+}
/// ParseParenExpr - Parse a paren expression and return it.
/// NOTE: This assumes the leading '(' has already been consumed.
@@ -225,10 +449,17 @@ bool AsmParser::ParsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) {
return true;
Res = MCUnaryExpr::CreateLNot(Res, getContext());
return false;
+ case AsmToken::Dollar:
case AsmToken::String:
case AsmToken::Identifier: {
+ EndLoc = Lexer.getLoc();
+
+ StringRef Identifier;
+ if (ParseIdentifier(Identifier))
+ return false;
+
// This is a symbol reference.
- std::pair<StringRef, StringRef> Split = getTok().getIdentifier().split('@');
+ std::pair<StringRef, StringRef> Split = Identifier.split('@');
MCSymbol *Sym = getContext().GetOrCreateSymbol(Split.first);
// Mark the symbol as used in an expression.
@@ -236,12 +467,9 @@ bool AsmParser::ParsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) {
// Lookup the symbol variant if used.
MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None;
- if (Split.first.size() != getTok().getIdentifier().size())
+ if (Split.first.size() != Identifier.size())
Variant = MCSymbolRefExpr::getVariantKindForName(Split.second);
- EndLoc = Lexer.getLoc();
- Lex(); // Eat identifier.
-
// If this is an absolute variable reference, substitute it now to preserve
// semantics in the face of reassignment.
if (Sym->isVariable() && isa<MCConstantExpr>(Sym->getVariableValue())) {
@@ -568,7 +796,12 @@ bool AsmParser::ParseStatement() {
default: // Normal instruction or directive.
break;
}
-
+
+ // If macros are enabled, check to see if this is a macro instantiation.
+ if (MacrosEnabled)
+ if (const Macro *M = MacroMap.lookup(IDVal))
+ return HandleMacroEntry(IDVal, IDLoc, M);
+
// Otherwise, we have a normal instruction or directive.
if (IDVal[0] == '.') {
// Assembler features
@@ -591,11 +824,14 @@ bool AsmParser::ParseStatement() {
if (IDVal == ".quad")
return ParseDirectiveValue(8);
- // FIXME: Target hooks for IsPow2.
- if (IDVal == ".align")
- return ParseDirectiveAlign(/*IsPow2=*/true, /*ExprSize=*/1);
- if (IDVal == ".align32")
- return ParseDirectiveAlign(/*IsPow2=*/true, /*ExprSize=*/4);
+ if (IDVal == ".align") {
+ bool IsPow2 = !getContext().getAsmInfo().getAlignmentIsInBytes();
+ return ParseDirectiveAlign(IsPow2, /*ExprSize=*/1);
+ }
+ if (IDVal == ".align32") {
+ bool IsPow2 = !getContext().getAsmInfo().getAlignmentIsInBytes();
+ return ParseDirectiveAlign(IsPow2, /*ExprSize=*/4);
+ }
if (IDVal == ".balign")
return ParseDirectiveAlign(/*IsPow2=*/false, /*ExprSize=*/1);
if (IDVal == ".balignw")
@@ -662,7 +898,7 @@ bool AsmParser::ParseStatement() {
std::pair<MCAsmParserExtension*, DirectiveHandler> Handler =
DirectiveMap.lookup(IDVal);
if (Handler.first)
- return (Handler.first->*Handler.second)(IDVal, IDLoc);
+ return (*Handler.second)(Handler.first, IDVal, IDLoc);
// Target hook for parsing target specific directives.
if (!getTargetParser().ParseDirective(ID))
@@ -684,20 +920,29 @@ bool AsmParser::ParseStatement() {
if (!HadError && Lexer.isNot(AsmToken::EndOfStatement))
HadError = TokError("unexpected token in argument list");
+ // Dump the parsed representation, if requested.
+ if (getShowParsedOperands()) {
+ SmallString<256> Str;
+ raw_svector_ostream OS(Str);
+ OS << "parsed instruction: [";
+ for (unsigned i = 0; i != ParsedOperands.size(); ++i) {
+ if (i != 0)
+ OS << ", ";
+ ParsedOperands[i]->dump(OS);
+ }
+ OS << "]";
+
+ PrintMessage(IDLoc, OS.str(), "note");
+ }
+
// If parsing succeeded, match the instruction.
if (!HadError) {
MCInst Inst;
- if (!getTargetParser().MatchInstruction(ParsedOperands, Inst)) {
+ if (!getTargetParser().MatchInstruction(IDLoc, ParsedOperands, Inst)) {
// Emit the instruction on success.
Out.EmitInstruction(Inst);
- } else {
- // Otherwise emit a diagnostic about the match failure and set the error
- // flag.
- //
- // FIXME: We should give nicer diagnostics about the exact failure.
- Error(IDLoc, "unrecognized instruction");
+ } else
HadError = true;
- }
}
// If there was no error, consume the end-of-statement token. Otherwise this
@@ -712,6 +957,132 @@ bool AsmParser::ParseStatement() {
return HadError;
}
+MacroInstantiation::MacroInstantiation(const Macro *M, SMLoc IL, SMLoc EL,
+ const std::vector<std::vector<AsmToken> > &A)
+ : TheMacro(M), InstantiationLoc(IL), ExitLoc(EL)
+{
+ // Macro instantiation is lexical, unfortunately. We construct a new buffer
+ // to hold the macro body with substitutions.
+ SmallString<256> Buf;
+ raw_svector_ostream OS(Buf);
+
+ StringRef Body = M->Body;
+ while (!Body.empty()) {
+ // Scan for the next substitution.
+ std::size_t End = Body.size(), Pos = 0;
+ for (; Pos != End; ++Pos) {
+ // Check for a substitution or escape.
+ if (Body[Pos] != '$' || Pos + 1 == End)
+ continue;
+
+ char Next = Body[Pos + 1];
+ if (Next == '$' || Next == 'n' || isdigit(Next))
+ break;
+ }
+
+ // Add the prefix.
+ OS << Body.slice(0, Pos);
+
+ // Check if we reached the end.
+ if (Pos == End)
+ break;
+
+ switch (Body[Pos+1]) {
+ // $$ => $
+ case '$':
+ OS << '$';
+ break;
+
+ // $n => number of arguments
+ case 'n':
+ OS << A.size();
+ break;
+
+ // $[0-9] => argument
+ default: {
+ // Missing arguments are ignored.
+ unsigned Index = Body[Pos+1] - '0';
+ if (Index >= A.size())
+ break;
+
+ // Otherwise substitute with the token values, with spaces eliminated.
+ for (std::vector<AsmToken>::const_iterator it = A[Index].begin(),
+ ie = A[Index].end(); it != ie; ++it)
+ OS << it->getString();
+ break;
+ }
+ }
+
+ // Update the scan point.
+ Body = Body.substr(Pos + 2);
+ }
+
+ // We include the .endmacro in the buffer as our queue to exit the macro
+ // instantiation.
+ OS << ".endmacro\n";
+
+ Instantiation = MemoryBuffer::getMemBufferCopy(OS.str(), "<instantiation>");
+}
+
+bool AsmParser::HandleMacroEntry(StringRef Name, SMLoc NameLoc,
+ const Macro *M) {
+ // Arbitrarily limit macro nesting depth, to match 'as'. We can eliminate
+ // this, although we should protect against infinite loops.
+ if (ActiveMacros.size() == 20)
+ return TokError("macros cannot be nested more than 20 levels deep");
+
+ // Parse the macro instantiation arguments.
+ std::vector<std::vector<AsmToken> > MacroArguments;
+ MacroArguments.push_back(std::vector<AsmToken>());
+ unsigned ParenLevel = 0;
+ for (;;) {
+ if (Lexer.is(AsmToken::Eof))
+ return TokError("unexpected token in macro instantiation");
+ if (Lexer.is(AsmToken::EndOfStatement))
+ break;
+
+ // If we aren't inside parentheses and this is a comma, start a new token
+ // list.
+ if (ParenLevel == 0 && Lexer.is(AsmToken::Comma)) {
+ MacroArguments.push_back(std::vector<AsmToken>());
+ } else {
+ // Adjust the current parentheses level.
+ if (Lexer.is(AsmToken::LParen))
+ ++ParenLevel;
+ else if (Lexer.is(AsmToken::RParen) && ParenLevel)
+ --ParenLevel;
+
+ // Append the token to the current argument list.
+ MacroArguments.back().push_back(getTok());
+ }
+ Lex();
+ }
+
+ // Create the macro instantiation object and add to the current macro
+ // instantiation stack.
+ MacroInstantiation *MI = new MacroInstantiation(M, NameLoc,
+ getTok().getLoc(),
+ MacroArguments);
+ ActiveMacros.push_back(MI);
+
+ // Jump to the macro instantiation and prime the lexer.
+ CurBuffer = SrcMgr.AddNewSourceBuffer(MI->Instantiation, SMLoc());
+ Lexer.setBuffer(SrcMgr.getMemoryBuffer(CurBuffer));
+ Lex();
+
+ return false;
+}
+
+void AsmParser::HandleMacroExit() {
+ // Jump to the EndOfStatement we should return to, and consume it.
+ JumpToLoc(ActiveMacros.back()->ExitLoc);
+ Lex();
+
+ // Pop the instantiation entry.
+ delete ActiveMacros.back();
+ ActiveMacros.pop_back();
+}
+
bool AsmParser::ParseAssignment(StringRef Name) {
// FIXME: Use better location, we should use proper tokens.
SMLoc EqualLoc = Lexer.getLoc();
@@ -760,6 +1131,30 @@ bool AsmParser::ParseAssignment(StringRef Name) {
/// ::= identifier
/// ::= string
bool AsmParser::ParseIdentifier(StringRef &Res) {
+ // The assembler has relaxed rules for accepting identifiers, in particular we
+ // allow things like '.globl $foo', which would normally be separate
+ // tokens. At this level, we have already lexed so we cannot (currently)
+ // handle this as a context dependent token, instead we detect adjacent tokens
+ // and return the combined identifier.
+ if (Lexer.is(AsmToken::Dollar)) {
+ SMLoc DollarLoc = getLexer().getLoc();
+
+ // Consume the dollar sign, and check for a following identifier.
+ Lex();
+ if (Lexer.isNot(AsmToken::Identifier))
+ return true;
+
+ // We have a '$' followed by an identifier, make sure they are adjacent.
+ if (DollarLoc.getPointer() + 1 != getTok().getLoc().getPointer())
+ return true;
+
+ // Construct the joined identifier and consume the token.
+ Res = StringRef(DollarLoc.getPointer(),
+ getTok().getIdentifier().size() + 1);
+ Lex();
+ return false;
+ }
+
if (Lexer.isNot(AsmToken::Identifier) &&
Lexer.isNot(AsmToken::String))
return true;
@@ -1081,13 +1476,14 @@ bool AsmParser::ParseDirectiveAlign(bool IsPow2, unsigned ValueSize) {
bool UseCodeAlign = false;
if (const MCSectionMachO *S = dyn_cast<MCSectionMachO>(
getStreamer().getCurrentSection()))
- UseCodeAlign = S->hasAttribute(MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS);
+ UseCodeAlign = S->hasAttribute(MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS);
if ((!HasFillExpr || Lexer.getMAI().getTextAlignFillValue() == FillExpr) &&
ValueSize == 1 && UseCodeAlign) {
getStreamer().EmitCodeAlignment(Alignment, MaxBytesToFill);
} else {
// FIXME: Target specific behavior about how the "extra" bytes are filled.
- getStreamer().EmitValueToAlignment(Alignment, FillExpr, ValueSize, MaxBytesToFill);
+ getStreamer().EmitValueToAlignment(Alignment, FillExpr, ValueSize,
+ MaxBytesToFill);
}
return false;
@@ -1238,31 +1634,22 @@ bool AsmParser::ParseDirectiveComm(bool IsLocal) {
}
/// ParseDirectiveAbort
-/// ::= .abort [ "abort_string" ]
+/// ::= .abort [... message ...]
bool AsmParser::ParseDirectiveAbort() {
// FIXME: Use loc from directive.
SMLoc Loc = getLexer().getLoc();
- StringRef Str = "";
- if (getLexer().isNot(AsmToken::EndOfStatement)) {
- if (getLexer().isNot(AsmToken::String))
- return TokError("expected string in '.abort' directive");
-
- Str = getTok().getString();
-
- Lex();
- }
-
+ StringRef Str = ParseStringToEndOfStatement();
if (getLexer().isNot(AsmToken::EndOfStatement))
return TokError("unexpected token in '.abort' directive");
-
+
Lex();
- // FIXME: Handle here.
if (Str.empty())
Error(Loc, ".abort detected. Assembly stopping.");
else
Error(Loc, ".abort '" + Str + "' detected. Assembly stopping.");
+ // FIXME: Actually abort assembly here.
return false;
}
@@ -1286,9 +1673,7 @@ bool AsmParser::ParseDirectiveInclude() {
// Attempt to switch the lexer to the included file before consuming the end
// of statement to avoid losing it when we switch.
if (EnterIncludeFile(Filename)) {
- PrintMessage(IncludeLoc,
- "Could not find include file '" + Filename + "'",
- "error");
+ Error(IncludeLoc, "Could not find include file '" + Filename + "'");
return true;
}
@@ -1401,6 +1786,7 @@ bool AsmParser::ParseDirectiveEndIf(SMLoc DirectiveLoc) {
bool GenericAsmParser::ParseDirectiveFile(StringRef, SMLoc DirectiveLoc) {
// FIXME: I'm not sure what this is.
int64_t FileNumber = -1;
+ SMLoc FileNumberLoc = getLexer().getLoc();
if (getLexer().is(AsmToken::Integer)) {
FileNumber = getTok().getIntVal();
Lex();
@@ -1421,8 +1807,11 @@ bool GenericAsmParser::ParseDirectiveFile(StringRef, SMLoc DirectiveLoc) {
if (FileNumber == -1)
getStreamer().EmitFileDirective(Filename);
- else
+ else {
+ if (getContext().GetDwarfFile(Filename, FileNumber) == 0)
+ Error(FileNumberLoc, "file number already allocated");
getStreamer().EmitDwarfFileDirective(FileNumber, Filename);
+ }
return false;
}
@@ -1449,40 +1838,193 @@ bool GenericAsmParser::ParseDirectiveLine(StringRef, SMLoc DirectiveLoc) {
/// ParseDirectiveLoc
-/// ::= .loc number [number [number]]
+/// ::= .loc FileNumber [LineNumber] [ColumnPos] [basic_block] [prologue_end]
+/// [epilogue_begin] [is_stmt VALUE] [isa VALUE]
+/// The first number is a file number, must have been previously assigned with
+/// a .file directive, the second number is the line number and optionally the
+/// third number is a column position (zero if not specified). The remaining
+/// optional items are .loc sub-directives.
bool GenericAsmParser::ParseDirectiveLoc(StringRef, SMLoc DirectiveLoc) {
+
if (getLexer().isNot(AsmToken::Integer))
return TokError("unexpected token in '.loc' directive");
-
- // FIXME: What are these fields?
int64_t FileNumber = getTok().getIntVal();
- (void) FileNumber;
- // FIXME: Validate file.
-
+ if (FileNumber < 1)
+ return TokError("file number less than one in '.loc' directive");
+ if (!getContext().ValidateDwarfFileNumber(FileNumber))
+ return TokError("unassigned file number in '.loc' directive");
Lex();
- if (getLexer().isNot(AsmToken::EndOfStatement)) {
- if (getLexer().isNot(AsmToken::Integer))
- return TokError("unexpected token in '.loc' directive");
- int64_t Param2 = getTok().getIntVal();
- (void) Param2;
+ int64_t LineNumber = 0;
+ if (getLexer().is(AsmToken::Integer)) {
+ LineNumber = getTok().getIntVal();
+ if (LineNumber < 1)
+ return TokError("line number less than one in '.loc' directive");
Lex();
+ }
- if (getLexer().isNot(AsmToken::EndOfStatement)) {
- if (getLexer().isNot(AsmToken::Integer))
+ int64_t ColumnPos = 0;
+ if (getLexer().is(AsmToken::Integer)) {
+ ColumnPos = getTok().getIntVal();
+ if (ColumnPos < 0)
+ return TokError("column position less than zero in '.loc' directive");
+ Lex();
+ }
+
+ unsigned Flags = 0;
+ unsigned Isa = 0;
+ if (getLexer().isNot(AsmToken::EndOfStatement)) {
+ for (;;) {
+ if (getLexer().is(AsmToken::EndOfStatement))
+ break;
+
+ StringRef Name;
+ SMLoc Loc = getTok().getLoc();
+ if (getParser().ParseIdentifier(Name))
return TokError("unexpected token in '.loc' directive");
- int64_t Param3 = getTok().getIntVal();
- (void) Param3;
- Lex();
+ if (Name == "basic_block")
+ Flags |= DWARF2_FLAG_BASIC_BLOCK;
+ else if (Name == "prologue_end")
+ Flags |= DWARF2_FLAG_PROLOGUE_END;
+ else if (Name == "epilogue_begin")
+ Flags |= DWARF2_FLAG_EPILOGUE_BEGIN;
+ else if (Name == "is_stmt") {
+ SMLoc Loc = getTok().getLoc();
+ const MCExpr *Value;
+ if (getParser().ParseExpression(Value))
+ return true;
+ // The expression must be the constant 0 or 1.
+ if (const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(Value)) {
+ int Value = MCE->getValue();
+ if (Value == 0)
+ Flags &= ~DWARF2_FLAG_IS_STMT;
+ else if (Value == 1)
+ Flags |= DWARF2_FLAG_IS_STMT;
+ else
+ return Error(Loc, "is_stmt value not 0 or 1");
+ }
+ else {
+ return Error(Loc, "is_stmt value not the constant value of 0 or 1");
+ }
+ }
+ else if (Name == "isa") {
+ SMLoc Loc = getTok().getLoc();
+ const MCExpr *Value;
+ if (getParser().ParseExpression(Value))
+ return true;
+ // The expression must be a constant greater or equal to 0.
+ if (const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(Value)) {
+ int Value = MCE->getValue();
+ if (Value < 0)
+ return Error(Loc, "isa number less than zero");
+ Isa = Value;
+ }
+ else {
+ return Error(Loc, "isa number not a constant value");
+ }
+ }
+ else {
+ return Error(Loc, "unknown sub-directive in '.loc' directive");
+ }
- // FIXME: Do something with the .loc.
+ if (getLexer().is(AsmToken::EndOfStatement))
+ break;
}
}
+ getContext().setCurrentDwarfLoc(FileNumber, LineNumber, ColumnPos, Flags,Isa);
+
+ return false;
+}
+
+/// ParseDirectiveMacrosOnOff
+/// ::= .macros_on
+/// ::= .macros_off
+bool GenericAsmParser::ParseDirectiveMacrosOnOff(StringRef Directive,
+ SMLoc DirectiveLoc) {
if (getLexer().isNot(AsmToken::EndOfStatement))
- return TokError("unexpected token in '.file' directive");
+ return Error(getLexer().getLoc(),
+ "unexpected token in '" + Directive + "' directive");
+
+ getParser().MacrosEnabled = Directive == ".macros_on";
return false;
}
+/// ParseDirectiveMacro
+/// ::= .macro name
+bool GenericAsmParser::ParseDirectiveMacro(StringRef Directive,
+ SMLoc DirectiveLoc) {
+ StringRef Name;
+ if (getParser().ParseIdentifier(Name))
+ return TokError("expected identifier in directive");
+
+ if (getLexer().isNot(AsmToken::EndOfStatement))
+ return TokError("unexpected token in '.macro' directive");
+
+ // Eat the end of statement.
+ Lex();
+
+ AsmToken EndToken, StartToken = getTok();
+
+ // Lex the macro definition.
+ for (;;) {
+ // Check whether we have reached the end of the file.
+ if (getLexer().is(AsmToken::Eof))
+ return Error(DirectiveLoc, "no matching '.endmacro' in definition");
+
+ // Otherwise, check whether we have reach the .endmacro.
+ if (getLexer().is(AsmToken::Identifier) &&
+ (getTok().getIdentifier() == ".endm" ||
+ getTok().getIdentifier() == ".endmacro")) {
+ EndToken = getTok();
+ Lex();
+ if (getLexer().isNot(AsmToken::EndOfStatement))
+ return TokError("unexpected token in '" + EndToken.getIdentifier() +
+ "' directive");
+ break;
+ }
+
+ // Otherwise, scan til the end of the statement.
+ getParser().EatToEndOfStatement();
+ }
+
+ if (getParser().MacroMap.lookup(Name)) {
+ return Error(DirectiveLoc, "macro '" + Name + "' is already defined");
+ }
+
+ const char *BodyStart = StartToken.getLoc().getPointer();
+ const char *BodyEnd = EndToken.getLoc().getPointer();
+ StringRef Body = StringRef(BodyStart, BodyEnd - BodyStart);
+ getParser().MacroMap[Name] = new Macro(Name, Body);
+ return false;
+}
+
+/// ParseDirectiveEndMacro
+/// ::= .endm
+/// ::= .endmacro
+bool GenericAsmParser::ParseDirectiveEndMacro(StringRef Directive,
+ SMLoc DirectiveLoc) {
+ if (getLexer().isNot(AsmToken::EndOfStatement))
+ return TokError("unexpected token in '" + Directive + "' directive");
+
+ // If we are inside a macro instantiation, terminate the current
+ // instantiation.
+ if (!getParser().ActiveMacros.empty()) {
+ getParser().HandleMacroExit();
+ return false;
+ }
+
+ // Otherwise, this .endmacro is a stray entry in the file; well formed
+ // .endmacro directives are handled during the macro definition parsing.
+ return TokError("unexpected '" + Directive + "' in file, "
+ "no current macro definition");
+}
+
+/// \brief Create an MCAsmParser instance.
+MCAsmParser *llvm::createMCAsmParser(const Target &T, SourceMgr &SM,
+ MCContext &C, MCStreamer &Out,
+ const MCAsmInfo &MAI) {
+ return new AsmParser(T, SM, C, Out, MAI);
+}
diff --git a/lib/MC/MCParser/DarwinAsmParser.cpp b/lib/MC/MCParser/DarwinAsmParser.cpp
index 7d8639ea4d81f..54ddb449b2859 100644
--- a/lib/MC/MCParser/DarwinAsmParser.cpp
+++ b/lib/MC/MCParser/DarwinAsmParser.cpp
@@ -25,6 +25,12 @@ namespace {
/// \brief Implementation of directive handling which is shared across all
/// Darwin targets.
class DarwinAsmParser : public MCAsmParserExtension {
+ template<bool (DarwinAsmParser::*Handler)(StringRef, SMLoc)>
+ void AddDirectiveHandler(StringRef Directive) {
+ getParser().AddDirectiveHandler(this, Directive,
+ HandleDirective<DarwinAsmParser, Handler>);
+ }
+
bool ParseSectionSwitch(const char *Segment, const char *Section,
unsigned TAA = 0, unsigned ImplicitAlign = 0,
unsigned StubSize = 0);
@@ -36,168 +42,70 @@ public:
// Call the base implementation.
this->MCAsmParserExtension::Initialize(Parser);
- Parser.AddDirectiveHandler(this, ".desc", MCAsmParser::DirectiveHandler(
- &DarwinAsmParser::ParseDirectiveDesc));
- Parser.AddDirectiveHandler(this, ".lsym", MCAsmParser::DirectiveHandler(
- &DarwinAsmParser::ParseDirectiveLsym));
- Parser.AddDirectiveHandler(this, ".subsections_via_symbols",
- MCAsmParser::DirectiveHandler(
- &DarwinAsmParser::ParseDirectiveSubsectionsViaSymbols));
- Parser.AddDirectiveHandler(this, ".dump", MCAsmParser::DirectiveHandler(
- &DarwinAsmParser::ParseDirectiveDumpOrLoad));
- Parser.AddDirectiveHandler(this, ".load", MCAsmParser::DirectiveHandler(
- &DarwinAsmParser::ParseDirectiveDumpOrLoad));
- Parser.AddDirectiveHandler(this, ".section", MCAsmParser::DirectiveHandler(
- &DarwinAsmParser::ParseDirectiveSection));
- Parser.AddDirectiveHandler(this, ".secure_log_unique",
- MCAsmParser::DirectiveHandler(
- &DarwinAsmParser::ParseDirectiveSecureLogUnique));
- Parser.AddDirectiveHandler(this, ".secure_log_reset",
- MCAsmParser::DirectiveHandler(
- &DarwinAsmParser::ParseDirectiveSecureLogReset));
- Parser.AddDirectiveHandler(this, ".tbss",
- MCAsmParser::DirectiveHandler(
- &DarwinAsmParser::ParseDirectiveTBSS));
- Parser.AddDirectiveHandler(this, ".zerofill",
- MCAsmParser::DirectiveHandler(
- &DarwinAsmParser::ParseDirectiveZerofill));
+ AddDirectiveHandler<&DarwinAsmParser::ParseDirectiveDesc>(".desc");
+ AddDirectiveHandler<&DarwinAsmParser::ParseDirectiveLsym>(".lsym");
+ AddDirectiveHandler<&DarwinAsmParser::ParseDirectiveSubsectionsViaSymbols>(
+ ".subsections_via_symbols");
+ AddDirectiveHandler<&DarwinAsmParser::ParseDirectiveDumpOrLoad>(".dump");
+ AddDirectiveHandler<&DarwinAsmParser::ParseDirectiveDumpOrLoad>(".load");
+ AddDirectiveHandler<&DarwinAsmParser::ParseDirectiveSection>(".section");
+ AddDirectiveHandler<&DarwinAsmParser::ParseDirectiveSecureLogUnique>(
+ ".secure_log_unique");
+ AddDirectiveHandler<&DarwinAsmParser::ParseDirectiveSecureLogReset>(
+ ".secure_log_reset");
+ AddDirectiveHandler<&DarwinAsmParser::ParseDirectiveTBSS>(".tbss");
+ AddDirectiveHandler<&DarwinAsmParser::ParseDirectiveZerofill>(".zerofill");
// Special section directives.
- Parser.AddDirectiveHandler(this, ".const",
- MCAsmParser::DirectiveHandler(
- &DarwinAsmParser::ParseSectionDirectiveConst));
- Parser.AddDirectiveHandler(this, ".const_data",
- MCAsmParser::DirectiveHandler(
- &DarwinAsmParser::ParseSectionDirectiveConstData));
- Parser.AddDirectiveHandler(this, ".constructor",
- MCAsmParser::DirectiveHandler(
- &DarwinAsmParser::ParseSectionDirectiveConstructor));
- Parser.AddDirectiveHandler(this, ".cstring",
- MCAsmParser::DirectiveHandler(
- &DarwinAsmParser::ParseSectionDirectiveCString));
- Parser.AddDirectiveHandler(this, ".data",
- MCAsmParser::DirectiveHandler(
- &DarwinAsmParser::ParseSectionDirectiveData));
- Parser.AddDirectiveHandler(this, ".destructor",
- MCAsmParser::DirectiveHandler(
- &DarwinAsmParser::ParseSectionDirectiveDestructor));
- Parser.AddDirectiveHandler(this, ".dyld",
- MCAsmParser::DirectiveHandler(
- &DarwinAsmParser::ParseSectionDirectiveDyld));
- Parser.AddDirectiveHandler(this, ".fvmlib_init0",
- MCAsmParser::DirectiveHandler(
- &DarwinAsmParser::ParseSectionDirectiveFVMLibInit0));
- Parser.AddDirectiveHandler(this, ".fvmlib_init1",
- MCAsmParser::DirectiveHandler(
- &DarwinAsmParser::ParseSectionDirectiveFVMLibInit1));
- Parser.AddDirectiveHandler(this, ".lazy_symbol_pointer",
- MCAsmParser::DirectiveHandler(
- &DarwinAsmParser::ParseSectionDirectiveLazySymbolPointers));
- Parser.AddDirectiveHandler(this, ".literal16",
- MCAsmParser::DirectiveHandler(
- &DarwinAsmParser::ParseSectionDirectiveLiteral16));
- Parser.AddDirectiveHandler(this, ".literal4",
- MCAsmParser::DirectiveHandler(
- &DarwinAsmParser::ParseSectionDirectiveLiteral4));
- Parser.AddDirectiveHandler(this, ".literal8",
- MCAsmParser::DirectiveHandler(
- &DarwinAsmParser::ParseSectionDirectiveLiteral8));
- Parser.AddDirectiveHandler(this, ".mod_init_func",
- MCAsmParser::DirectiveHandler(
- &DarwinAsmParser::ParseSectionDirectiveModInitFunc));
- Parser.AddDirectiveHandler(this, ".mod_term_func",
- MCAsmParser::DirectiveHandler(
- &DarwinAsmParser::ParseSectionDirectiveModTermFunc));
- Parser.AddDirectiveHandler(this, ".non_lazy_symbol_pointer",
- MCAsmParser::DirectiveHandler(
- &DarwinAsmParser::ParseSectionDirectiveNonLazySymbolPointers));
- Parser.AddDirectiveHandler(this, ".objc_cat_cls_meth",
- MCAsmParser::DirectiveHandler(
- &DarwinAsmParser::ParseSectionDirectiveObjCCatClsMeth));
- Parser.AddDirectiveHandler(this, ".objc_cat_inst_meth",
- MCAsmParser::DirectiveHandler(
- &DarwinAsmParser::ParseSectionDirectiveObjCCatInstMeth));
- Parser.AddDirectiveHandler(this, ".objc_category",
- MCAsmParser::DirectiveHandler(
- &DarwinAsmParser::ParseSectionDirectiveObjCCategory));
- Parser.AddDirectiveHandler(this, ".objc_class",
- MCAsmParser::DirectiveHandler(
- &DarwinAsmParser::ParseSectionDirectiveObjCClass));
- Parser.AddDirectiveHandler(this, ".objc_class_names",
- MCAsmParser::DirectiveHandler(
- &DarwinAsmParser::ParseSectionDirectiveObjCClassNames));
- Parser.AddDirectiveHandler(this, ".objc_class_vars",
- MCAsmParser::DirectiveHandler(
- &DarwinAsmParser::ParseSectionDirectiveObjCClassVars));
- Parser.AddDirectiveHandler(this, ".objc_cls_meth",
- MCAsmParser::DirectiveHandler(
- &DarwinAsmParser::ParseSectionDirectiveObjCClsMeth));
- Parser.AddDirectiveHandler(this, ".objc_cls_refs",
- MCAsmParser::DirectiveHandler(
- &DarwinAsmParser::ParseSectionDirectiveObjCClsRefs));
- Parser.AddDirectiveHandler(this, ".objc_inst_meth",
- MCAsmParser::DirectiveHandler(
- &DarwinAsmParser::ParseSectionDirectiveObjCInstMeth));
- Parser.AddDirectiveHandler(this, ".objc_instance_vars",
- MCAsmParser::DirectiveHandler(
- &DarwinAsmParser::ParseSectionDirectiveObjCInstanceVars));
- Parser.AddDirectiveHandler(this, ".objc_message_refs",
- MCAsmParser::DirectiveHandler(
- &DarwinAsmParser::ParseSectionDirectiveObjCMessageRefs));
- Parser.AddDirectiveHandler(this, ".objc_meta_class",
- MCAsmParser::DirectiveHandler(
- &DarwinAsmParser::ParseSectionDirectiveObjCMetaClass));
- Parser.AddDirectiveHandler(this, ".objc_meth_var_names",
- MCAsmParser::DirectiveHandler(
- &DarwinAsmParser::ParseSectionDirectiveObjCMethVarNames));
- Parser.AddDirectiveHandler(this, ".objc_meth_var_types",
- MCAsmParser::DirectiveHandler(
- &DarwinAsmParser::ParseSectionDirectiveObjCMethVarTypes));
- Parser.AddDirectiveHandler(this, ".objc_module_info",
- MCAsmParser::DirectiveHandler(
- &DarwinAsmParser::ParseSectionDirectiveObjCModuleInfo));
- Parser.AddDirectiveHandler(this, ".objc_protocol",
- MCAsmParser::DirectiveHandler(
- &DarwinAsmParser::ParseSectionDirectiveObjCProtocol));
- Parser.AddDirectiveHandler(this, ".objc_selector_strs",
- MCAsmParser::DirectiveHandler(
- &DarwinAsmParser::ParseSectionDirectiveObjCSelectorStrs));
- Parser.AddDirectiveHandler(this, ".objc_string_object",
- MCAsmParser::DirectiveHandler(
- &DarwinAsmParser::ParseSectionDirectiveObjCStringObject));
- Parser.AddDirectiveHandler(this, ".objc_symbols",
- MCAsmParser::DirectiveHandler(
- &DarwinAsmParser::ParseSectionDirectiveObjCSymbols));
- Parser.AddDirectiveHandler(this, ".picsymbol_stub",
- MCAsmParser::DirectiveHandler(
- &DarwinAsmParser::ParseSectionDirectivePICSymbolStub));
- Parser.AddDirectiveHandler(this, ".static_const",
- MCAsmParser::DirectiveHandler(
- &DarwinAsmParser::ParseSectionDirectiveStaticConst));
- Parser.AddDirectiveHandler(this, ".static_data",
- MCAsmParser::DirectiveHandler(
- &DarwinAsmParser::ParseSectionDirectiveStaticData));
- Parser.AddDirectiveHandler(this, ".symbol_stub",
- MCAsmParser::DirectiveHandler(
- &DarwinAsmParser::ParseSectionDirectiveSymbolStub));
- Parser.AddDirectiveHandler(this, ".tdata",
- MCAsmParser::DirectiveHandler(
- &DarwinAsmParser::ParseSectionDirectiveTData));
- Parser.AddDirectiveHandler(this, ".text",
- MCAsmParser::DirectiveHandler(
- &DarwinAsmParser::ParseSectionDirectiveText));
- Parser.AddDirectiveHandler(this, ".thread_init_func",
- MCAsmParser::DirectiveHandler(
- &DarwinAsmParser::ParseSectionDirectiveThreadInitFunc));
- Parser.AddDirectiveHandler(this, ".tlv",
- MCAsmParser::DirectiveHandler(
- &DarwinAsmParser::ParseSectionDirectiveTLV));
+ AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveConst>(".const");
+ AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveConstData>(".const_data");
+ AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveConstructor>(".constructor");
+ AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveCString>(".cstring");
+ AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveData>(".data");
+ AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveDestructor>(".destructor");
+ AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveDyld>(".dyld");
+ AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveFVMLibInit0>(".fvmlib_init0");
+ AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveFVMLibInit1>(".fvmlib_init1");
+ AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveLazySymbolPointers>(".lazy_symbol_pointer");
+ AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveLiteral16>(".literal16");
+ AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveLiteral4>(".literal4");
+ AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveLiteral8>(".literal8");
+ AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveModInitFunc>(".mod_init_func");
+ AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveModTermFunc>(".mod_term_func");
+ AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveNonLazySymbolPointers>(".non_lazy_symbol_pointer");
+ AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveObjCCatClsMeth>(".objc_cat_cls_meth");
+ AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveObjCCatInstMeth>(".objc_cat_inst_meth");
+ AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveObjCCategory>(".objc_category");
+ AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveObjCClass>(".objc_class");
+ AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveObjCClassNames>(".objc_class_names");
+ AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveObjCClassVars>(".objc_class_vars");
+ AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveObjCClsMeth>(".objc_cls_meth");
+ AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveObjCClsRefs>(".objc_cls_refs");
+ AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveObjCInstMeth>(".objc_inst_meth");
+ AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveObjCInstanceVars>(".objc_instance_vars");
+ AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveObjCMessageRefs>(".objc_message_refs");
+ AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveObjCMetaClass>(".objc_meta_class");
+ AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveObjCMethVarNames>(".objc_meth_var_names");
+ AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveObjCMethVarTypes>(".objc_meth_var_types");
+ AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveObjCModuleInfo>(".objc_module_info");
+ AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveObjCProtocol>(".objc_protocol");
+ AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveObjCSelectorStrs>(".objc_selector_strs");
+ AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveObjCStringObject>(".objc_string_object");
+ AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveObjCSymbols>(".objc_symbols");
+ AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectivePICSymbolStub>(".picsymbol_stub");
+ AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveStaticConst>(".static_const");
+ AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveStaticData>(".static_data");
+ AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveSymbolStub>(".symbol_stub");
+ AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveTData>(".tdata");
+ AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveText>(".text");
+ AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveThreadInitFunc>(".thread_init_func");
+ AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveTLV>(".tlv");
}
bool ParseDirectiveDesc(StringRef, SMLoc);
bool ParseDirectiveDumpOrLoad(StringRef, SMLoc);
bool ParseDirectiveLsym(StringRef, SMLoc);
- bool ParseDirectiveSection();
+ bool ParseDirectiveSection(StringRef, SMLoc);
bool ParseDirectiveSecureLogReset(StringRef, SMLoc);
bool ParseDirectiveSecureLogUnique(StringRef, SMLoc);
bool ParseDirectiveSubsectionsViaSymbols(StringRef, SMLoc);
@@ -493,7 +401,7 @@ bool DarwinAsmParser::ParseDirectiveLsym(StringRef, SMLoc) {
/// ParseDirectiveSection:
/// ::= .section identifier (',' identifier)*
-bool DarwinAsmParser::ParseDirectiveSection() {
+bool DarwinAsmParser::ParseDirectiveSection(StringRef, SMLoc) {
SMLoc Loc = getLexer().getLoc();
StringRef SectionName;
@@ -537,28 +445,22 @@ bool DarwinAsmParser::ParseDirectiveSection() {
}
/// ParseDirectiveSecureLogUnique
-/// ::= .secure_log_unique "log message"
+/// ::= .secure_log_unique ... message ...
bool DarwinAsmParser::ParseDirectiveSecureLogUnique(StringRef, SMLoc IDLoc) {
- std::string LogMessage;
-
- if (getLexer().isNot(AsmToken::String))
- LogMessage = "";
- else{
- LogMessage = getTok().getString();
- Lex();
- }
-
+ StringRef LogMessage = getParser().ParseStringToEndOfStatement();
if (getLexer().isNot(AsmToken::EndOfStatement))
return TokError("unexpected token in '.secure_log_unique' directive");
if (getContext().getSecureLogUsed() != false)
return Error(IDLoc, ".secure_log_unique specified multiple times");
- char *SecureLogFile = getContext().getSecureLogFile();
+ // Get the secure log path.
+ const char *SecureLogFile = getContext().getSecureLogFile();
if (SecureLogFile == NULL)
return Error(IDLoc, ".secure_log_unique used but AS_SECURE_LOG_FILE "
"environment variable unset.");
+ // Open the secure log file if we haven't already.
raw_ostream *OS = getContext().getSecureLog();
if (OS == NULL) {
std::string Err;
@@ -571,6 +473,7 @@ bool DarwinAsmParser::ParseDirectiveSecureLogUnique(StringRef, SMLoc IDLoc) {
getContext().setSecureLog(OS);
}
+ // Write the message.
int CurBuf = getSourceManager().FindBufferContainingLoc(IDLoc);
*OS << getSourceManager().getBufferInfo(CurBuf).Buffer->getBufferIdentifier()
<< ":" << getSourceManager().FindLineNumber(IDLoc, CurBuf) << ":"
diff --git a/lib/MC/MCParser/ELFAsmParser.cpp b/lib/MC/MCParser/ELFAsmParser.cpp
index 7a54dd39aa479..f982fdaecb123 100644
--- a/lib/MC/MCParser/ELFAsmParser.cpp
+++ b/lib/MC/MCParser/ELFAsmParser.cpp
@@ -8,15 +8,24 @@
//===----------------------------------------------------------------------===//
#include "llvm/MC/MCParser/MCAsmParserExtension.h"
-#include "llvm/MC/MCSectionELF.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCContext.h"
-#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCParser/MCAsmLexer.h"
+#include "llvm/MC/MCSectionELF.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/ADT/Twine.h"
using namespace llvm;
namespace {
class ELFAsmParser : public MCAsmParserExtension {
+ template<bool (ELFAsmParser::*Handler)(StringRef, SMLoc)>
+ void AddDirectiveHandler(StringRef Directive) {
+ getParser().AddDirectiveHandler(this, Directive,
+ HandleDirective<ELFAsmParser, Handler>);
+ }
+
bool ParseSectionSwitch(StringRef Section, unsigned Type,
unsigned Flags, SectionKind Kind);
@@ -27,10 +36,21 @@ public:
// Call the base implementation.
this->MCAsmParserExtension::Initialize(Parser);
- Parser.AddDirectiveHandler(this, ".data", MCAsmParser::DirectiveHandler(
- &ELFAsmParser::ParseSectionDirectiveData));
- Parser.AddDirectiveHandler(this, ".text", MCAsmParser::DirectiveHandler(
- &ELFAsmParser::ParseSectionDirectiveText));
+ AddDirectiveHandler<&ELFAsmParser::ParseSectionDirectiveData>(".data");
+ AddDirectiveHandler<&ELFAsmParser::ParseSectionDirectiveText>(".text");
+ AddDirectiveHandler<&ELFAsmParser::ParseSectionDirectiveBSS>(".bss");
+ AddDirectiveHandler<&ELFAsmParser::ParseSectionDirectiveRoData>(".rodata");
+ AddDirectiveHandler<&ELFAsmParser::ParseSectionDirectiveTData>(".tdata");
+ AddDirectiveHandler<&ELFAsmParser::ParseSectionDirectiveTBSS>(".tbss");
+ AddDirectiveHandler<&ELFAsmParser::ParseSectionDirectiveDataRel>(".data.rel");
+ AddDirectiveHandler<&ELFAsmParser::ParseSectionDirectiveDataRelRo>(".data.rel.ro");
+ AddDirectiveHandler<&ELFAsmParser::ParseSectionDirectiveDataRelRoLocal>(".data.rel.ro.local");
+ AddDirectiveHandler<&ELFAsmParser::ParseSectionDirectiveEhFrame>(".eh_frame");
+ AddDirectiveHandler<&ELFAsmParser::ParseDirectiveSection>(".section");
+ AddDirectiveHandler<&ELFAsmParser::ParseDirectiveSize>(".size");
+ AddDirectiveHandler<&ELFAsmParser::ParseDirectiveLEB128>(".sleb128");
+ AddDirectiveHandler<&ELFAsmParser::ParseDirectiveLEB128>(".uleb128");
+ AddDirectiveHandler<&ELFAsmParser::ParseDirectivePrevious>(".previous");
}
bool ParseSectionDirectiveData(StringRef, SMLoc) {
@@ -43,6 +63,56 @@ public:
MCSectionELF::SHF_EXECINSTR |
MCSectionELF::SHF_ALLOC, SectionKind::getText());
}
+ bool ParseSectionDirectiveBSS(StringRef, SMLoc) {
+ return ParseSectionSwitch(".bss", MCSectionELF::SHT_NOBITS,
+ MCSectionELF::SHF_WRITE |
+ MCSectionELF::SHF_ALLOC, SectionKind::getBSS());
+ }
+ bool ParseSectionDirectiveRoData(StringRef, SMLoc) {
+ return ParseSectionSwitch(".rodata", MCSectionELF::SHT_PROGBITS,
+ MCSectionELF::SHF_ALLOC,
+ SectionKind::getReadOnly());
+ }
+ bool ParseSectionDirectiveTData(StringRef, SMLoc) {
+ return ParseSectionSwitch(".tdata", MCSectionELF::SHT_PROGBITS,
+ MCSectionELF::SHF_ALLOC |
+ MCSectionELF::SHF_TLS | MCSectionELF::SHF_WRITE,
+ SectionKind::getThreadData());
+ }
+ bool ParseSectionDirectiveTBSS(StringRef, SMLoc) {
+ return ParseSectionSwitch(".tbss", MCSectionELF::SHT_NOBITS,
+ MCSectionELF::SHF_ALLOC |
+ MCSectionELF::SHF_TLS | MCSectionELF::SHF_WRITE,
+ SectionKind::getThreadBSS());
+ }
+ bool ParseSectionDirectiveDataRel(StringRef, SMLoc) {
+ return ParseSectionSwitch(".data.rel", MCSectionELF::SHT_PROGBITS,
+ MCSectionELF::SHF_ALLOC |
+ MCSectionELF::SHF_WRITE,
+ SectionKind::getDataRel());
+ }
+ bool ParseSectionDirectiveDataRelRo(StringRef, SMLoc) {
+ return ParseSectionSwitch(".data.rel.ro", MCSectionELF::SHT_PROGBITS,
+ MCSectionELF::SHF_ALLOC |
+ MCSectionELF::SHF_WRITE,
+ SectionKind::getReadOnlyWithRel());
+ }
+ bool ParseSectionDirectiveDataRelRoLocal(StringRef, SMLoc) {
+ return ParseSectionSwitch(".data.rel.ro.local", MCSectionELF::SHT_PROGBITS,
+ MCSectionELF::SHF_ALLOC |
+ MCSectionELF::SHF_WRITE,
+ SectionKind::getReadOnlyWithRelLocal());
+ }
+ bool ParseSectionDirectiveEhFrame(StringRef, SMLoc) {
+ return ParseSectionSwitch(".eh_frame", MCSectionELF::SHT_PROGBITS,
+ MCSectionELF::SHF_ALLOC |
+ MCSectionELF::SHF_WRITE,
+ SectionKind::getDataRel());
+ }
+ bool ParseDirectiveLEB128(StringRef, SMLoc);
+ bool ParseDirectiveSection(StringRef, SMLoc);
+ bool ParseDirectiveSize(StringRef, SMLoc);
+ bool ParseDirectivePrevious(StringRef, SMLoc);
};
}
@@ -59,6 +129,159 @@ bool ELFAsmParser::ParseSectionSwitch(StringRef Section, unsigned Type,
return false;
}
+bool ELFAsmParser::ParseDirectiveSize(StringRef, SMLoc) {
+ StringRef Name;
+ if (getParser().ParseIdentifier(Name))
+ return TokError("expected identifier in directive");
+ MCSymbol *Sym = getContext().GetOrCreateSymbol(Name);;
+
+ if (getLexer().isNot(AsmToken::Comma))
+ return TokError("unexpected token in directive");
+ Lex();
+
+ const MCExpr *Expr;
+ if (getParser().ParseExpression(Expr))
+ return true;
+
+ if (getLexer().isNot(AsmToken::EndOfStatement))
+ return TokError("unexpected token in directive");
+
+ getStreamer().EmitELFSize(Sym, Expr);
+ return false;
+}
+
+// FIXME: This is a work in progress.
+bool ELFAsmParser::ParseDirectiveSection(StringRef, SMLoc) {
+ StringRef SectionName;
+ // FIXME: This doesn't parse section names like ".note.GNU-stack" correctly.
+ if (getParser().ParseIdentifier(SectionName))
+ return TokError("expected identifier in directive");
+
+ std::string FlagsStr;
+ StringRef TypeName;
+ int64_t Size = 0;
+ if (getLexer().is(AsmToken::Comma)) {
+ Lex();
+
+ if (getLexer().isNot(AsmToken::String))
+ return TokError("expected string in directive");
+
+ FlagsStr = getTok().getStringContents();
+ Lex();
+
+ AsmToken::TokenKind TypeStartToken;
+ if (getContext().getAsmInfo().getCommentString()[0] == '@')
+ TypeStartToken = AsmToken::Percent;
+ else
+ TypeStartToken = AsmToken::At;
+
+ if (getLexer().is(AsmToken::Comma)) {
+ Lex();
+ if (getLexer().is(TypeStartToken)) {
+ Lex();
+ if (getParser().ParseIdentifier(TypeName))
+ return TokError("expected identifier in directive");
+
+ if (getLexer().is(AsmToken::Comma)) {
+ Lex();
+
+ if (getParser().ParseAbsoluteExpression(Size))
+ return true;
+
+ if (Size <= 0)
+ return TokError("section size must be positive");
+ }
+ }
+ }
+ }
+
+ if (getLexer().isNot(AsmToken::EndOfStatement))
+ return TokError("unexpected token in directive");
+
+ unsigned Flags = 0;
+ for (unsigned i = 0; i < FlagsStr.size(); i++) {
+ switch (FlagsStr[i]) {
+ case 'a':
+ Flags |= MCSectionELF::SHF_ALLOC;
+ break;
+ case 'x':
+ Flags |= MCSectionELF::SHF_EXECINSTR;
+ break;
+ case 'w':
+ Flags |= MCSectionELF::SHF_WRITE;
+ break;
+ case 'M':
+ Flags |= MCSectionELF::SHF_MERGE;
+ break;
+ case 'S':
+ Flags |= MCSectionELF::SHF_STRINGS;
+ break;
+ case 'T':
+ Flags |= MCSectionELF::SHF_TLS;
+ break;
+ case 'c':
+ Flags |= MCSectionELF::XCORE_SHF_CP_SECTION;
+ break;
+ case 'd':
+ Flags |= MCSectionELF::XCORE_SHF_DP_SECTION;
+ break;
+ default:
+ return TokError("unknown flag");
+ }
+ }
+
+ unsigned Type = MCSectionELF::SHT_NULL;
+ if (!TypeName.empty()) {
+ if (TypeName == "init_array")
+ Type = MCSectionELF::SHT_INIT_ARRAY;
+ else if (TypeName == "fini_array")
+ Type = MCSectionELF::SHT_FINI_ARRAY;
+ else if (TypeName == "preinit_array")
+ Type = MCSectionELF::SHT_PREINIT_ARRAY;
+ else if (TypeName == "nobits")
+ Type = MCSectionELF::SHT_NOBITS;
+ else if (TypeName == "progbits")
+ Type = MCSectionELF::SHT_PROGBITS;
+ else
+ return TokError("unknown section type");
+ }
+
+ SectionKind Kind = (Flags & MCSectionELF::SHF_EXECINSTR)
+ ? SectionKind::getText()
+ : SectionKind::getDataRel();
+ getStreamer().SwitchSection(getContext().getELFSection(SectionName, Type,
+ Flags, Kind, false));
+ return false;
+}
+
+bool ELFAsmParser::ParseDirectiveLEB128(StringRef DirName, SMLoc) {
+ int64_t Value;
+ if (getParser().ParseAbsoluteExpression(Value))
+ return true;
+
+ if (getLexer().isNot(AsmToken::EndOfStatement))
+ return TokError("unexpected token in directive");
+
+ // FIXME: Add proper MC support.
+ if (getContext().getAsmInfo().hasLEB128()) {
+ if (DirName[1] == 's')
+ getStreamer().EmitRawText("\t.sleb128\t" + Twine(Value));
+ else
+ getStreamer().EmitRawText("\t.uleb128\t" + Twine(Value));
+ return false;
+ }
+ // FIXME: This shouldn't be an error!
+ return TokError("LEB128 not supported yet");
+}
+
+bool ELFAsmParser::ParseDirectivePrevious(StringRef DirName, SMLoc) {
+ const MCSection *PreviousSection = getStreamer().getPreviousSection();
+ if (PreviousSection != NULL)
+ getStreamer().SwitchSection(PreviousSection);
+
+ return false;
+}
+
namespace llvm {
MCAsmParserExtension *createELFAsmParser() {
diff --git a/lib/MC/MCParser/MCAsmParser.cpp b/lib/MC/MCParser/MCAsmParser.cpp
index bee30641c7fcd..70295efc613ca 100644
--- a/lib/MC/MCParser/MCAsmParser.cpp
+++ b/lib/MC/MCParser/MCAsmParser.cpp
@@ -12,19 +12,26 @@
#include "llvm/MC/MCParser/MCAsmLexer.h"
#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
#include "llvm/Support/SourceMgr.h"
+#include "llvm/Target/TargetAsmParser.h"
using namespace llvm;
-MCAsmParser::MCAsmParser() {
+MCAsmParser::MCAsmParser() : TargetParser(0), ShowParsedOperands(0) {
}
MCAsmParser::~MCAsmParser() {
}
+void MCAsmParser::setTargetParser(TargetAsmParser &P) {
+ assert(!TargetParser && "Target parser is already initialized!");
+ TargetParser = &P;
+ TargetParser->Initialize(*this);
+}
+
const AsmToken &MCAsmParser::getTok() {
return getLexer().getTok();
}
-bool MCAsmParser::TokError(const char *Msg) {
+bool MCAsmParser::TokError(const Twine &Msg) {
Error(getLexer().getLoc(), Msg);
return true;
}
@@ -34,8 +41,4 @@ bool MCAsmParser::ParseExpression(const MCExpr *&Res) {
return ParseExpression(Res, L);
}
-/// getStartLoc - Get the location of the first token of this operand.
-SMLoc MCParsedAsmOperand::getStartLoc() const { return SMLoc(); }
-SMLoc MCParsedAsmOperand::getEndLoc() const { return SMLoc(); }
-
diff --git a/lib/MC/MCParser/TargetAsmParser.cpp b/lib/MC/MCParser/TargetAsmParser.cpp
index 05760c96cc658..8d43c21f4bc96 100644
--- a/lib/MC/MCParser/TargetAsmParser.cpp
+++ b/lib/MC/MCParser/TargetAsmParser.cpp
@@ -11,7 +11,7 @@
using namespace llvm;
TargetAsmParser::TargetAsmParser(const Target &T)
- : TheTarget(T)
+ : TheTarget(T), AvailableFeatures(0)
{
}
diff --git a/lib/MC/MCStreamer.cpp b/lib/MC/MCStreamer.cpp
index 573f2a3530ee9..3e9d02ea5ae73 100644
--- a/lib/MC/MCStreamer.cpp
+++ b/lib/MC/MCStreamer.cpp
@@ -15,7 +15,8 @@
#include <cstdlib>
using namespace llvm;
-MCStreamer::MCStreamer(MCContext &_Context) : Context(_Context), CurSection(0) {
+MCStreamer::MCStreamer(MCContext &Ctx) : Context(Ctx), CurSection(0),
+ PrevSection(0) {
}
MCStreamer::~MCStreamer() {
diff --git a/lib/MC/MachObjectWriter.cpp b/lib/MC/MachObjectWriter.cpp
index 7ca09511bdebd..cffabfadb3165 100644
--- a/lib/MC/MachObjectWriter.cpp
+++ b/lib/MC/MachObjectWriter.cpp
@@ -769,7 +769,7 @@ public:
IsPCRel = 1;
FixedValue = (FixupAddress - Layout.getSymbolAddress(SD_B) +
Target.getConstant());
- FixedValue += 1 << Log2Size;
+ FixedValue += 1ULL << Log2Size;
} else {
FixedValue = 0;
}
diff --git a/lib/MC/Makefile b/lib/MC/Makefile
index a661fa6f40804..bf8b7c0e78318 100644
--- a/lib/MC/Makefile
+++ b/lib/MC/Makefile
@@ -10,7 +10,7 @@
LEVEL = ../..
LIBRARYNAME = LLVMMC
BUILD_ARCHIVE := 1
-PARALLEL_DIRS := MCParser
+PARALLEL_DIRS := MCParser MCDisassembler
include $(LEVEL)/Makefile.common
diff --git a/lib/MC/WinCOFFObjectWriter.cpp b/lib/MC/WinCOFFObjectWriter.cpp
index 6804766b28957..eeb2b9675f4b5 100644
--- a/lib/MC/WinCOFFObjectWriter.cpp
+++ b/lib/MC/WinCOFFObjectWriter.cpp
@@ -12,41 +12,552 @@
//===----------------------------------------------------------------------===//
#define DEBUG_TYPE "WinCOFFObjectWriter"
+
#include "llvm/MC/MCObjectWriter.h"
+#include "llvm/MC/MCSection.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCValue.h"
#include "llvm/MC/MCAssembler.h"
#include "llvm/MC/MCAsmLayout.h"
+#include "llvm/MC/MCSectionCOFF.h"
+
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/StringRef.h"
+
+#include "llvm/Support/COFF.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+
+#include "llvm/System/TimeValue.h"
+
+#include "../Target/X86/X86FixupKinds.h"
+
+#include <cstdio>
+
using namespace llvm;
namespace {
+typedef llvm::SmallString<COFF::NameSize> name;
+
+enum AuxiliaryType {
+ ATFunctionDefinition,
+ ATbfAndefSymbol,
+ ATWeakExternal,
+ ATFile,
+ ATSectionDefinition
+};
+
+struct AuxSymbol {
+ AuxiliaryType AuxType;
+ COFF::Auxiliary Aux;
+};
+
+class COFFSymbol {
+public:
+ COFF::symbol Data;
+
+ typedef llvm::SmallVector<AuxSymbol, 1> AuxiliarySymbols;
+
+ name Name;
+ size_t Index;
+ AuxiliarySymbols Aux;
+ COFFSymbol *Other;
+
+ MCSymbolData const *MCData;
+
+ COFFSymbol(llvm::StringRef name, size_t index);
+ size_t size() const;
+ void set_name_offset(uint32_t Offset);
+};
+
+// This class contains staging data for a COFF relocation entry.
+struct COFFRelocation {
+ COFF::relocation Data;
+ COFFSymbol *Symb;
+
+ COFFRelocation() : Symb(NULL) {}
+ static size_t size() { return COFF::RelocationSize; }
+};
+
+typedef std::vector<COFFRelocation> relocations;
+
+class COFFSection {
+public:
+ COFF::section Header;
+
+ std::string Name;
+ size_t Number;
+ MCSectionData const *MCData;
+ COFFSymbol *Symb;
+ relocations Relocations;
+
+ COFFSection(llvm::StringRef name, size_t Index);
+ static size_t size();
+};
+
+// This class holds the COFF string table.
+class StringTable {
+ typedef llvm::StringMap<size_t> map;
+ map Map;
+
+ void update_length();
+public:
+ std::vector<char> Data;
+
+ StringTable();
+ size_t size() const;
+ size_t insert(llvm::StringRef String);
+};
+
+class WinCOFFObjectWriter : public MCObjectWriter {
+public:
+
+ typedef std::vector<COFFSymbol*> symbols;
+ typedef std::vector<COFFSection*> sections;
+
+ typedef StringMap<COFFSymbol *> name_symbol_map;
+ typedef StringMap<COFFSection *> name_section_map;
+
+ typedef DenseMap<MCSymbolData const *, COFFSymbol *> symbol_map;
+ typedef DenseMap<MCSectionData const *, COFFSection *> section_map;
+
+ // Root level file contents.
+ bool Is64Bit;
+ COFF::header Header;
+ sections Sections;
+ symbols Symbols;
+ StringTable Strings;
+
+ // Maps used during object file creation.
+ section_map SectionMap;
+ symbol_map SymbolMap;
+
+ WinCOFFObjectWriter(raw_ostream &OS, bool is64Bit);
+ ~WinCOFFObjectWriter();
+
+ COFFSymbol *createSymbol(llvm::StringRef Name);
+ COFFSection *createSection(llvm::StringRef Name);
+
+ void InitCOFFEntity(COFFSymbol &Symbol);
+ void InitCOFFEntity(COFFSection &Section);
+
+ template <typename object_t, typename list_t>
+ object_t *createCOFFEntity(llvm::StringRef Name, list_t &List);
+
+ void DefineSection(MCSectionData const &SectionData);
+ void DefineSymbol(MCSymbolData const &SymbolData, MCAssembler &Assembler);
+
+ bool ExportSection(COFFSection *S);
+ bool ExportSymbol(MCSymbolData const &SymbolData, MCAssembler &Asm);
+
+ // Entity writing methods.
+
+ void WriteFileHeader(const COFF::header &Header);
+ void WriteSymbol(const COFFSymbol *S);
+ void WriteAuxiliarySymbols(const COFFSymbol::AuxiliarySymbols &S);
+ void WriteSectionHeader(const COFF::section &S);
+ void WriteRelocation(const COFF::relocation &R);
+
+ // MCObjectWriter interface implementation.
+
+ void ExecutePostLayoutBinding(MCAssembler &Asm);
+
+ void RecordRelocation(const MCAssembler &Asm,
+ const MCAsmLayout &Layout,
+ const MCFragment *Fragment,
+ const MCFixup &Fixup,
+ MCValue Target,
+ uint64_t &FixedValue);
+
+ void WriteObject(const MCAssembler &Asm, const MCAsmLayout &Layout);
+};
+}
+
+static inline void write_uint32_le(void *Data, uint32_t const &Value) {
+ uint8_t *Ptr = reinterpret_cast<uint8_t *>(Data);
+ Ptr[0] = (Value & 0x000000FF) >> 0;
+ Ptr[1] = (Value & 0x0000FF00) >> 8;
+ Ptr[2] = (Value & 0x00FF0000) >> 16;
+ Ptr[3] = (Value & 0xFF000000) >> 24;
+}
+
+static inline void write_uint16_le(void *Data, uint16_t const &Value) {
+ uint8_t *Ptr = reinterpret_cast<uint8_t *>(Data);
+ Ptr[0] = (Value & 0x00FF) >> 0;
+ Ptr[1] = (Value & 0xFF00) >> 8;
+}
+
+static inline void write_uint8_le(void *Data, uint8_t const &Value) {
+ uint8_t *Ptr = reinterpret_cast<uint8_t *>(Data);
+ Ptr[0] = (Value & 0xFF) >> 0;
+}
- class WinCOFFObjectWriter : public MCObjectWriter {
- public:
- WinCOFFObjectWriter(raw_ostream &OS);
+//------------------------------------------------------------------------------
+// Symbol class implementation
+
+COFFSymbol::COFFSymbol(llvm::StringRef name, size_t index)
+ : Name(name.begin(), name.end()), Index(-1)
+ , Other(NULL), MCData(NULL) {
+ memset(&Data, 0, sizeof(Data));
+}
+
+size_t COFFSymbol::size() const {
+ return COFF::SymbolSize + (Data.NumberOfAuxSymbols * COFF::SymbolSize);
+}
+
+// In the case that the name does not fit within 8 bytes, the offset
+// into the string table is stored in the last 4 bytes instead, leaving
+// the first 4 bytes as 0.
+void COFFSymbol::set_name_offset(uint32_t Offset) {
+ write_uint32_le(Data.Name + 0, 0);
+ write_uint32_le(Data.Name + 4, Offset);
+}
+
+//------------------------------------------------------------------------------
+// Section class implementation
+
+COFFSection::COFFSection(llvm::StringRef name, size_t Index)
+ : Name(name), Number(Index + 1)
+ , MCData(NULL), Symb(NULL) {
+ memset(&Header, 0, sizeof(Header));
+}
+
+size_t COFFSection::size() {
+ return COFF::SectionSize;
+}
+
+//------------------------------------------------------------------------------
+// StringTable class implementation
+
+/// Write the length of the string table into Data.
+/// The length of the string table includes uint32 length header.
+void StringTable::update_length() {
+ write_uint32_le(&Data.front(), Data.size());
+}
+
+StringTable::StringTable() {
+ // The string table data begins with the length of the entire string table
+ // including the length header. Allocate space for this header.
+ Data.resize(4);
+}
+
+size_t StringTable::size() const {
+ return Data.size();
+}
+
+/// Add String to the table iff it is not already there.
+/// @returns the index into the string table where the string is now located.
+size_t StringTable::insert(llvm::StringRef String) {
+ map::iterator i = Map.find(String);
+
+ if (i != Map.end())
+ return i->second;
+
+ size_t Offset = Data.size();
- // MCObjectWriter interface implementation.
+ // Insert string data into string table.
+ Data.insert(Data.end(), String.begin(), String.end());
+ Data.push_back('\0');
- void ExecutePostLayoutBinding(MCAssembler &Asm);
+ // Put a reference to it in the map.
+ Map[String] = Offset;
- void RecordRelocation(const MCAssembler &Asm,
- const MCAsmLayout &Layout,
- const MCFragment *Fragment,
- const MCFixup &Fixup,
- MCValue Target,
- uint64_t &FixedValue);
+ // Update the internal length field.
+ update_length();
- void WriteObject(const MCAssembler &Asm, const MCAsmLayout &Layout);
- };
+ return Offset;
}
-WinCOFFObjectWriter::WinCOFFObjectWriter(raw_ostream &OS)
- : MCObjectWriter(OS, true) {
+//------------------------------------------------------------------------------
+// WinCOFFObjectWriter class implementation
+
+WinCOFFObjectWriter::WinCOFFObjectWriter(raw_ostream &OS, bool is64Bit)
+ : MCObjectWriter(OS, true)
+ , Is64Bit(is64Bit) {
+ memset(&Header, 0, sizeof(Header));
+
+ Is64Bit ? Header.Machine = COFF::IMAGE_FILE_MACHINE_AMD64
+ : Header.Machine = COFF::IMAGE_FILE_MACHINE_I386;
+}
+
+WinCOFFObjectWriter::~WinCOFFObjectWriter() {
+ for (symbols::iterator I = Symbols.begin(), E = Symbols.end(); I != E; ++I)
+ delete *I;
+ for (sections::iterator I = Sections.begin(), E = Sections.end(); I != E; ++I)
+ delete *I;
+}
+
+COFFSymbol *WinCOFFObjectWriter::createSymbol(llvm::StringRef Name) {
+ return createCOFFEntity<COFFSymbol>(Name, Symbols);
+}
+
+COFFSection *WinCOFFObjectWriter::createSection(llvm::StringRef Name) {
+ return createCOFFEntity<COFFSection>(Name, Sections);
+}
+
+/// This function initializes a symbol by entering its name into the string
+/// table if it is too long to fit in the symbol table header.
+void WinCOFFObjectWriter::InitCOFFEntity(COFFSymbol &S) {
+ if (S.Name.size() > COFF::NameSize) {
+ size_t StringTableEntry = Strings.insert(S.Name.c_str());
+
+ S.set_name_offset(StringTableEntry);
+ } else
+ memcpy(S.Data.Name, S.Name.c_str(), S.Name.size());
+}
+
+/// This function initializes a section by entering its name into the string
+/// table if it is too long to fit in the section table header.
+void WinCOFFObjectWriter::InitCOFFEntity(COFFSection &S) {
+ if (S.Name.size() > COFF::NameSize) {
+ size_t StringTableEntry = Strings.insert(S.Name.c_str());
+
+ // FIXME: Why is this number 999999? This number is never mentioned in the
+ // spec. I'm assuming this is due to the printed value needing to fit into
+ // the S.Header.Name field. In which case why not 9999999 (7 9's instead of
+ // 6)? The spec does not state if this entry should be null terminated in
+ // this case, and thus this seems to be the best way to do it. I think I
+ // just solved my own FIXME...
+ if (StringTableEntry > 999999)
+ report_fatal_error("COFF string table is greater than 999999 bytes.");
+
+ sprintf(S.Header.Name, "/%d", (unsigned)StringTableEntry);
+ } else
+ memcpy(S.Header.Name, S.Name.c_str(), S.Name.size());
+}
+
+/// A template used to lookup or create a symbol/section, and initialize it if
+/// needed.
+template <typename object_t, typename list_t>
+object_t *WinCOFFObjectWriter::createCOFFEntity(llvm::StringRef Name,
+ list_t &List) {
+ object_t *Object = new object_t(Name, List.size());
+
+ InitCOFFEntity(*Object);
+
+ List.push_back(Object);
+
+ return Object;
+}
+
+/// This function takes a section data object from the assembler
+/// and creates the associated COFF section staging object.
+void WinCOFFObjectWriter::DefineSection(MCSectionData const &SectionData) {
+ // FIXME: Not sure how to verify this (at least in a debug build).
+ MCSectionCOFF const &Sec =
+ static_cast<MCSectionCOFF const &>(SectionData.getSection());
+
+ COFFSection *coff_section = createSection(Sec.getSectionName());
+ COFFSymbol *coff_symbol = createSymbol(Sec.getSectionName());
+
+ coff_section->Symb = coff_symbol;
+ coff_symbol->Data.StorageClass = COFF::IMAGE_SYM_CLASS_STATIC;
+ coff_symbol->Data.SectionNumber = coff_section->Number;
+
+ // In this case the auxiliary symbol is a Section Definition.
+ coff_symbol->Aux.resize(1);
+ memset(&coff_symbol->Aux[0], 0, sizeof(coff_symbol->Aux[0]));
+ coff_symbol->Aux[0].AuxType = ATSectionDefinition;
+ coff_symbol->Aux[0].Aux.SectionDefinition.Number = coff_section->Number;
+ coff_symbol->Aux[0].Aux.SectionDefinition.Selection = Sec.getSelection();
+
+ coff_section->Header.Characteristics = Sec.getCharacteristics();
+
+ uint32_t &Characteristics = coff_section->Header.Characteristics;
+ switch (SectionData.getAlignment()) {
+ case 1: Characteristics |= COFF::IMAGE_SCN_ALIGN_1BYTES; break;
+ case 2: Characteristics |= COFF::IMAGE_SCN_ALIGN_2BYTES; break;
+ case 4: Characteristics |= COFF::IMAGE_SCN_ALIGN_4BYTES; break;
+ case 8: Characteristics |= COFF::IMAGE_SCN_ALIGN_8BYTES; break;
+ case 16: Characteristics |= COFF::IMAGE_SCN_ALIGN_16BYTES; break;
+ case 32: Characteristics |= COFF::IMAGE_SCN_ALIGN_32BYTES; break;
+ case 64: Characteristics |= COFF::IMAGE_SCN_ALIGN_64BYTES; break;
+ case 128: Characteristics |= COFF::IMAGE_SCN_ALIGN_128BYTES; break;
+ case 256: Characteristics |= COFF::IMAGE_SCN_ALIGN_256BYTES; break;
+ case 512: Characteristics |= COFF::IMAGE_SCN_ALIGN_512BYTES; break;
+ case 1024: Characteristics |= COFF::IMAGE_SCN_ALIGN_1024BYTES; break;
+ case 2048: Characteristics |= COFF::IMAGE_SCN_ALIGN_2048BYTES; break;
+ case 4096: Characteristics |= COFF::IMAGE_SCN_ALIGN_4096BYTES; break;
+ case 8192: Characteristics |= COFF::IMAGE_SCN_ALIGN_8192BYTES; break;
+ default:
+ llvm_unreachable("unsupported section alignment");
+ }
+
+ // Bind internal COFF section to MC section.
+ coff_section->MCData = &SectionData;
+ SectionMap[&SectionData] = coff_section;
+}
+
+/// This function takes a section data object from the assembler
+/// and creates the associated COFF symbol staging object.
+void WinCOFFObjectWriter::DefineSymbol(MCSymbolData const &SymbolData,
+ MCAssembler &Assembler) {
+ COFFSymbol *coff_symbol = createSymbol(SymbolData.getSymbol().getName());
+
+ coff_symbol->Data.Type = (SymbolData.getFlags() & 0x0000FFFF) >> 0;
+ coff_symbol->Data.StorageClass = (SymbolData.getFlags() & 0x00FF0000) >> 16;
+
+ // If no storage class was specified in the streamer, define it here.
+ if (coff_symbol->Data.StorageClass == 0) {
+ bool external = SymbolData.isExternal() || (SymbolData.Fragment == NULL);
+
+ coff_symbol->Data.StorageClass =
+ external ? COFF::IMAGE_SYM_CLASS_EXTERNAL : COFF::IMAGE_SYM_CLASS_STATIC;
+ }
+
+ if (SymbolData.getFlags() & COFF::SF_WeakReference) {
+ coff_symbol->Data.StorageClass = COFF::IMAGE_SYM_CLASS_WEAK_EXTERNAL;
+
+ const MCExpr *Value = SymbolData.getSymbol().getVariableValue();
+
+ // FIXME: This assert message isn't very good.
+ assert(Value->getKind() == MCExpr::SymbolRef &&
+ "Value must be a SymbolRef!");
+
+ const MCSymbolRefExpr *SymbolRef =
+ static_cast<const MCSymbolRefExpr *>(Value);
+
+ const MCSymbolData &OtherSymbolData =
+ Assembler.getSymbolData(SymbolRef->getSymbol());
+
+ // FIXME: This assert message isn't very good.
+ assert(SymbolMap.find(&OtherSymbolData) != SymbolMap.end() &&
+ "OtherSymbolData must be in the symbol map!");
+
+ coff_symbol->Other = SymbolMap[&OtherSymbolData];
+
+ // Setup the Weak External auxiliary symbol.
+ coff_symbol->Aux.resize(1);
+ memset(&coff_symbol->Aux[0], 0, sizeof(coff_symbol->Aux[0]));
+ coff_symbol->Aux[0].AuxType = ATWeakExternal;
+ coff_symbol->Aux[0].Aux.WeakExternal.TagIndex = 0;
+ coff_symbol->Aux[0].Aux.WeakExternal.Characteristics =
+ COFF::IMAGE_WEAK_EXTERN_SEARCH_LIBRARY;
+ }
+
+ // Bind internal COFF symbol to MC symbol.
+ coff_symbol->MCData = &SymbolData;
+ SymbolMap[&SymbolData] = coff_symbol;
+}
+
+bool WinCOFFObjectWriter::ExportSection(COFFSection *S) {
+ return (S->Header.Characteristics
+ & COFF::IMAGE_SCN_CNT_UNINITIALIZED_DATA) == 0;
+}
+
+bool WinCOFFObjectWriter::ExportSymbol(MCSymbolData const &SymbolData,
+ MCAssembler &Asm) {
+ // This doesn't seem to be right. Strings referred to from the .data section
+ // need symbols so they can be linked to code in the .text section right?
+
+ // return Asm.isSymbolLinkerVisible (&SymbolData);
+
+ // For now, all symbols are exported, the linker will sort it out for us.
+ return true;
+}
+
+//------------------------------------------------------------------------------
+// entity writing methods
+
+void WinCOFFObjectWriter::WriteFileHeader(const COFF::header &Header) {
+ WriteLE16(Header.Machine);
+ WriteLE16(Header.NumberOfSections);
+ WriteLE32(Header.TimeDateStamp);
+ WriteLE32(Header.PointerToSymbolTable);
+ WriteLE32(Header.NumberOfSymbols);
+ WriteLE16(Header.SizeOfOptionalHeader);
+ WriteLE16(Header.Characteristics);
+}
+
+void WinCOFFObjectWriter::WriteSymbol(const COFFSymbol *S) {
+ WriteBytes(StringRef(S->Data.Name, COFF::NameSize));
+ WriteLE32(S->Data.Value);
+ WriteLE16(S->Data.SectionNumber);
+ WriteLE16(S->Data.Type);
+ Write8(S->Data.StorageClass);
+ Write8(S->Data.NumberOfAuxSymbols);
+ WriteAuxiliarySymbols(S->Aux);
+}
+
+void WinCOFFObjectWriter::WriteAuxiliarySymbols(
+ const COFFSymbol::AuxiliarySymbols &S) {
+ for(COFFSymbol::AuxiliarySymbols::const_iterator i = S.begin(), e = S.end();
+ i != e; ++i) {
+ switch(i->AuxType) {
+ case ATFunctionDefinition:
+ WriteLE32(i->Aux.FunctionDefinition.TagIndex);
+ WriteLE32(i->Aux.FunctionDefinition.TotalSize);
+ WriteLE32(i->Aux.FunctionDefinition.PointerToLinenumber);
+ WriteLE32(i->Aux.FunctionDefinition.PointerToNextFunction);
+ WriteZeros(sizeof(i->Aux.FunctionDefinition.unused));
+ break;
+ case ATbfAndefSymbol:
+ WriteZeros(sizeof(i->Aux.bfAndefSymbol.unused1));
+ WriteLE16(i->Aux.bfAndefSymbol.Linenumber);
+ WriteZeros(sizeof(i->Aux.bfAndefSymbol.unused2));
+ WriteLE32(i->Aux.bfAndefSymbol.PointerToNextFunction);
+ WriteZeros(sizeof(i->Aux.bfAndefSymbol.unused3));
+ break;
+ case ATWeakExternal:
+ WriteLE32(i->Aux.WeakExternal.TagIndex);
+ WriteLE32(i->Aux.WeakExternal.Characteristics);
+ WriteZeros(sizeof(i->Aux.WeakExternal.unused));
+ break;
+ case ATFile:
+ WriteBytes(StringRef(reinterpret_cast<const char *>(i->Aux.File.FileName),
+ sizeof(i->Aux.File.FileName)));
+ break;
+ case ATSectionDefinition:
+ WriteLE32(i->Aux.SectionDefinition.Length);
+ WriteLE16(i->Aux.SectionDefinition.NumberOfRelocations);
+ WriteLE16(i->Aux.SectionDefinition.NumberOfLinenumbers);
+ WriteLE32(i->Aux.SectionDefinition.CheckSum);
+ WriteLE16(i->Aux.SectionDefinition.Number);
+ Write8(i->Aux.SectionDefinition.Selection);
+ WriteZeros(sizeof(i->Aux.SectionDefinition.unused));
+ break;
+ }
+ }
+}
+
+void WinCOFFObjectWriter::WriteSectionHeader(const COFF::section &S) {
+ WriteBytes(StringRef(S.Name, COFF::NameSize));
+
+ WriteLE32(S.VirtualSize);
+ WriteLE32(S.VirtualAddress);
+ WriteLE32(S.SizeOfRawData);
+ WriteLE32(S.PointerToRawData);
+ WriteLE32(S.PointerToRelocations);
+ WriteLE32(S.PointerToLineNumbers);
+ WriteLE16(S.NumberOfRelocations);
+ WriteLE16(S.NumberOfLineNumbers);
+ WriteLE32(S.Characteristics);
+}
+
+void WinCOFFObjectWriter::WriteRelocation(const COFF::relocation &R) {
+ WriteLE32(R.VirtualAddress);
+ WriteLE32(R.SymbolTableIndex);
+ WriteLE16(R.Type);
}
////////////////////////////////////////////////////////////////////////////////
// MCObjectWriter interface implementations
void WinCOFFObjectWriter::ExecutePostLayoutBinding(MCAssembler &Asm) {
+ // "Define" each section & symbol. This creates section & symbol
+ // entries in the staging area and gives them their final indexes.
+
+ for (MCAssembler::const_iterator i = Asm.begin(), e = Asm.end(); i != e; i++)
+ DefineSection(*i);
+
+ for (MCAssembler::const_symbol_iterator i = Asm.symbol_begin(),
+ e = Asm.symbol_end(); i != e; i++) {
+ if (ExportSymbol(*i, Asm))
+ DefineSymbol(*i, Asm);
+ }
}
void WinCOFFObjectWriter::RecordRelocation(const MCAssembler &Asm,
@@ -55,17 +566,209 @@ void WinCOFFObjectWriter::RecordRelocation(const MCAssembler &Asm,
const MCFixup &Fixup,
MCValue Target,
uint64_t &FixedValue) {
+ assert(Target.getSymA() != NULL && "Relocation must reference a symbol!");
+
+ const MCSymbol *A = &Target.getSymA()->getSymbol();
+ MCSymbolData &A_SD = Asm.getSymbolData(*A);
+
+ MCSectionData const *SectionData = Fragment->getParent();
+
+ // Mark this symbol as requiring an entry in the symbol table.
+ assert(SectionMap.find(SectionData) != SectionMap.end() &&
+ "Section must already have been defined in ExecutePostLayoutBinding!");
+ assert(SymbolMap.find(&A_SD) != SymbolMap.end() &&
+ "Symbol must already have been defined in ExecutePostLayoutBinding!");
+
+ COFFSection *coff_section = SectionMap[SectionData];
+ COFFSymbol *coff_symbol = SymbolMap[&A_SD];
+
+ if (Target.getSymB()) {
+ const MCSymbol *B = &Target.getSymB()->getSymbol();
+ MCSymbolData &B_SD = Asm.getSymbolData(*B);
+
+ FixedValue = Layout.getSymbolAddress(&A_SD) - Layout.getSymbolAddress(&B_SD);
+
+ // In the case where we have SymbA and SymB, we just need to store the delta
+ // between the two symbols. Update FixedValue to account for the delta, and
+ // skip recording the relocation.
+ return;
+ } else {
+ FixedValue = Target.getConstant();
+ }
+
+ COFFRelocation Reloc;
+
+ Reloc.Data.SymbolTableIndex = 0;
+ Reloc.Data.VirtualAddress = Layout.getFragmentOffset(Fragment);
+ Reloc.Symb = coff_symbol;
+
+ Reloc.Data.VirtualAddress += Fixup.getOffset();
+
+ switch (Fixup.getKind()) {
+ case X86::reloc_pcrel_4byte:
+ case X86::reloc_riprel_4byte:
+ case X86::reloc_riprel_4byte_movq_load:
+ Reloc.Data.Type = Is64Bit ? COFF::IMAGE_REL_AMD64_REL32
+ : COFF::IMAGE_REL_I386_REL32;
+ // FIXME: Can anyone explain what this does other than adjust for the size
+ // of the offset?
+ FixedValue += 4;
+ break;
+ case FK_Data_4:
+ Reloc.Data.Type = Is64Bit ? COFF::IMAGE_REL_AMD64_ADDR32
+ : COFF::IMAGE_REL_I386_DIR32;
+ break;
+ case FK_Data_8:
+ if (Is64Bit)
+ Reloc.Data.Type = COFF::IMAGE_REL_AMD64_ADDR64;
+ else
+ llvm_unreachable("unsupported relocation type");
+ break;
+ default:
+ llvm_unreachable("unsupported relocation type");
+ }
+
+ coff_section->Relocations.push_back(Reloc);
}
void WinCOFFObjectWriter::WriteObject(const MCAssembler &Asm,
const MCAsmLayout &Layout) {
+ // Assign symbol and section indexes and offsets.
+
+ Header.NumberOfSymbols = 0;
+
+ for (symbols::iterator i = Symbols.begin(), e = Symbols.end(); i != e; i++) {
+ COFFSymbol *coff_symbol = *i;
+ MCSymbolData const *SymbolData = coff_symbol->MCData;
+
+ coff_symbol->Index = Header.NumberOfSymbols++;
+
+ // Update section number & offset for symbols that have them.
+ if ((SymbolData != NULL) && (SymbolData->Fragment != NULL)) {
+ COFFSection *coff_section = SectionMap[SymbolData->Fragment->getParent()];
+
+ coff_symbol->Data.SectionNumber = coff_section->Number;
+ coff_symbol->Data.Value = Layout.getFragmentOffset(SymbolData->Fragment)
+ + SymbolData->Offset;
+ }
+
+ // Update auxiliary symbol info.
+ coff_symbol->Data.NumberOfAuxSymbols = coff_symbol->Aux.size();
+ Header.NumberOfSymbols += coff_symbol->Data.NumberOfAuxSymbols;
+ }
+
+ // Fixup weak external references.
+ for (symbols::iterator i = Symbols.begin(), e = Symbols.end(); i != e; i++) {
+ COFFSymbol *symb = *i;
+
+ if (symb->Other != NULL) {
+ assert(symb->Aux.size() == 1 &&
+ "Symbol must contain one aux symbol!");
+ assert(symb->Aux[0].AuxType == ATWeakExternal &&
+ "Symbol's aux symbol must be a Weak External!");
+ symb->Aux[0].Aux.WeakExternal.TagIndex = symb->Other->Index;
+ }
+ }
+
+ // Assign file offsets to COFF object file structures.
+
+ unsigned offset = 0;
+
+ offset += COFF::HeaderSize;
+ offset += COFF::SectionSize * Asm.size();
+
+ Header.NumberOfSections = Sections.size();
+
+ for (MCAssembler::const_iterator i = Asm.begin(),
+ e = Asm.end();
+ i != e; i++) {
+ COFFSection *Sec = SectionMap[i];
+
+ Sec->Header.SizeOfRawData = Layout.getSectionFileSize(i);
+
+ if (ExportSection(Sec)) {
+ Sec->Header.PointerToRawData = offset;
+
+ offset += Sec->Header.SizeOfRawData;
+ }
+
+ if (Sec->Relocations.size() > 0) {
+ Sec->Header.NumberOfRelocations = Sec->Relocations.size();
+ Sec->Header.PointerToRelocations = offset;
+
+ offset += COFF::RelocationSize * Sec->Relocations.size();
+
+ for (relocations::iterator cr = Sec->Relocations.begin(),
+ er = Sec->Relocations.end();
+ cr != er; cr++) {
+ (*cr).Data.SymbolTableIndex = (*cr).Symb->Index;
+ }
+ }
+
+ assert(Sec->Symb->Aux.size() == 1 && "Section's symbol must have one aux!");
+ AuxSymbol &Aux = Sec->Symb->Aux[0];
+ assert(Aux.AuxType == ATSectionDefinition &&
+ "Section's symbol's aux symbol must be a Section Definition!");
+ Aux.Aux.SectionDefinition.Length = Sec->Header.SizeOfRawData;
+ Aux.Aux.SectionDefinition.NumberOfRelocations =
+ Sec->Header.NumberOfRelocations;
+ Aux.Aux.SectionDefinition.NumberOfLinenumbers =
+ Sec->Header.NumberOfLineNumbers;
+ }
+
+ Header.PointerToSymbolTable = offset;
+
+ Header.TimeDateStamp = sys::TimeValue::now().toEpochTime();
+
+ // Write it all to disk...
+ WriteFileHeader(Header);
+
+ {
+ sections::iterator i, ie;
+ MCAssembler::const_iterator j, je;
+
+ for (i = Sections.begin(), ie = Sections.end(); i != ie; i++)
+ WriteSectionHeader((*i)->Header);
+
+ for (i = Sections.begin(), ie = Sections.end(),
+ j = Asm.begin(), je = Asm.end();
+ (i != ie) && (j != je); i++, j++) {
+ if ((*i)->Header.PointerToRawData != 0) {
+ assert(OS.tell() == (*i)->Header.PointerToRawData &&
+ "Section::PointerToRawData is insane!");
+
+ Asm.WriteSectionData(j, Layout, this);
+ }
+
+ if ((*i)->Relocations.size() > 0) {
+ assert(OS.tell() == (*i)->Header.PointerToRelocations &&
+ "Section::PointerToRelocations is insane!");
+
+ for (relocations::const_iterator k = (*i)->Relocations.begin(),
+ ke = (*i)->Relocations.end();
+ k != ke; k++) {
+ WriteRelocation(k->Data);
+ }
+ } else
+ assert((*i)->Header.PointerToRelocations == 0 &&
+ "Section::PointerToRelocations is insane!");
+ }
+ }
+
+ assert(OS.tell() == Header.PointerToSymbolTable &&
+ "Header::PointerToSymbolTable is insane!");
+
+ for (symbols::iterator i = Symbols.begin(), e = Symbols.end(); i != e; i++)
+ WriteSymbol(*i);
+
+ OS.write((char const *)&Strings.Data.front(), Strings.Data.size());
}
//------------------------------------------------------------------------------
// WinCOFFObjectWriter factory function
namespace llvm {
- MCObjectWriter *createWinCOFFObjectWriter(raw_ostream &OS) {
- return new WinCOFFObjectWriter(OS);
+ MCObjectWriter *createWinCOFFObjectWriter(raw_ostream &OS, bool is64Bit) {
+ return new WinCOFFObjectWriter(OS, is64Bit);
}
}
diff --git a/lib/MC/WinCOFFStreamer.cpp b/lib/MC/WinCOFFStreamer.cpp
index 1030cdb28d2cd..8a194bff21513 100644
--- a/lib/MC/WinCOFFStreamer.cpp
+++ b/lib/MC/WinCOFFStreamer.cpp
@@ -18,27 +18,34 @@
#include "llvm/MC/MCSection.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCValue.h"
+#include "llvm/MC/MCAssembler.h"
+#include "llvm/MC/MCAsmLayout.h"
#include "llvm/MC/MCCodeEmitter.h"
#include "llvm/MC/MCSectionCOFF.h"
+#include "llvm/Target/TargetRegistry.h"
#include "llvm/Target/TargetAsmBackend.h"
+#include "llvm/ADT/StringMap.h"
+
#include "llvm/Support/COFF.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
-#define dbg_notimpl(x) \
- do { dbgs() << "not implemented, " << __FUNCTION__ << " (" << x << ")"; \
- abort(); } while (false);
-
namespace {
class WinCOFFStreamer : public MCObjectStreamer {
public:
+ MCSymbol const *CurSymbol;
+
WinCOFFStreamer(MCContext &Context,
TargetAsmBackend &TAB,
MCCodeEmitter &CE,
raw_ostream &OS);
+ void AddCommonSymbol(MCSymbol *Symbol, uint64_t Size,
+ unsigned ByteAlignment, bool External);
+
// MCStreamer interface
virtual void EmitLabel(MCSymbol *Symbol);
@@ -52,18 +59,18 @@ public:
virtual void EndCOFFSymbolDef();
virtual void EmitELFSize(MCSymbol *Symbol, const MCExpr *Value);
virtual void EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size,
- unsigned ByteAlignment);
+ unsigned ByteAlignment);
virtual void EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size);
virtual void EmitZerofill(const MCSection *Section, MCSymbol *Symbol,
- unsigned Size,unsigned ByteAlignment);
+ unsigned Size,unsigned ByteAlignment);
virtual void EmitTBSSSymbol(const MCSection *Section, MCSymbol *Symbol,
- uint64_t Size, unsigned ByteAlignment);
+ uint64_t Size, unsigned ByteAlignment);
virtual void EmitBytes(StringRef Data, unsigned AddrSpace);
- virtual void EmitValue(const MCExpr *Value, unsigned Size,
+ virtual void EmitValue(const MCExpr *Value, unsigned Size,
unsigned AddrSpace);
virtual void EmitGPRel32Value(const MCExpr *Value);
virtual void EmitValueToAlignment(unsigned ByteAlignment, int64_t Value,
- unsigned ValueSize, unsigned MaxBytesToEmit);
+ unsigned ValueSize, unsigned MaxBytesToEmit);
virtual void EmitCodeAlignment(unsigned ByteAlignment,
unsigned MaxBytesToEmit);
virtual void EmitValueToOffset(const MCExpr *Offset, unsigned char Value);
@@ -78,96 +85,224 @@ WinCOFFStreamer::WinCOFFStreamer(MCContext &Context,
TargetAsmBackend &TAB,
MCCodeEmitter &CE,
raw_ostream &OS)
- : MCObjectStreamer(Context, TAB, OS, &CE) {
+ : MCObjectStreamer(Context, TAB, OS, &CE)
+ , CurSymbol(NULL) {
+}
+
+void WinCOFFStreamer::AddCommonSymbol(MCSymbol *Symbol, uint64_t Size,
+ unsigned ByteAlignment, bool External) {
+ assert(!Symbol->isInSection() && "Symbol must not already have a section!");
+
+ std::string SectionName(".bss$linkonce");
+ SectionName.append(Symbol->getName().begin(), Symbol->getName().end());
+
+ MCSymbolData &SymbolData = getAssembler().getOrCreateSymbolData(*Symbol);
+
+ unsigned Characteristics =
+ COFF::IMAGE_SCN_LNK_COMDAT |
+ COFF::IMAGE_SCN_CNT_UNINITIALIZED_DATA |
+ COFF::IMAGE_SCN_MEM_READ |
+ COFF::IMAGE_SCN_MEM_WRITE;
+
+ int Selection = COFF::IMAGE_COMDAT_SELECT_LARGEST;
+
+ const MCSection *Section = MCStreamer::getContext().getCOFFSection(
+ SectionName, Characteristics, Selection, SectionKind::getBSS());
+
+ MCSectionData &SectionData = getAssembler().getOrCreateSectionData(*Section);
+
+ if (SectionData.getAlignment() < ByteAlignment)
+ SectionData.setAlignment(ByteAlignment);
+
+ SymbolData.setExternal(External);
+
+ Symbol->setSection(*Section);
+
+ if (ByteAlignment != 1)
+ new MCAlignFragment(ByteAlignment, 0, 0, ByteAlignment, &SectionData);
+
+ SymbolData.setFragment(new MCFillFragment(0, 0, Size, &SectionData));
}
// MCStreamer interface
void WinCOFFStreamer::EmitLabel(MCSymbol *Symbol) {
+ // TODO: This is copied almost exactly from the MachOStreamer. Consider
+ // merging into MCObjectStreamer?
+ assert(Symbol->isUndefined() && "Cannot define a symbol twice!");
+ assert(!Symbol->isVariable() && "Cannot emit a variable symbol!");
+ assert(CurSection && "Cannot emit before setting section!");
+
+ Symbol->setSection(*CurSection);
+
+ MCSymbolData &SD = getAssembler().getOrCreateSymbolData(*Symbol);
+
+ // FIXME: This is wasteful, we don't necessarily need to create a data
+ // fragment. Instead, we should mark the symbol as pointing into the data
+ // fragment if it exists, otherwise we should just queue the label and set its
+ // fragment pointer when we emit the next fragment.
+ MCDataFragment *DF = getOrCreateDataFragment();
+
+ assert(!SD.getFragment() && "Unexpected fragment on symbol data!");
+ SD.setFragment(DF);
+ SD.setOffset(DF->getContents().size());
}
void WinCOFFStreamer::EmitAssemblerFlag(MCAssemblerFlag Flag) {
- dbg_notimpl("Flag = " << Flag);
+ llvm_unreachable("not implemented");
}
void WinCOFFStreamer::EmitAssignment(MCSymbol *Symbol, const MCExpr *Value) {
+ // TODO: This is exactly the same as MachOStreamer. Consider merging into
+ // MCObjectStreamer.
+ getAssembler().getOrCreateSymbolData(*Symbol);
+ AddValueSymbols(Value);
+ Symbol->setVariableValue(Value);
}
void WinCOFFStreamer::EmitSymbolAttribute(MCSymbol *Symbol,
MCSymbolAttr Attribute) {
+ switch (Attribute) {
+ case MCSA_WeakReference:
+ getAssembler().getOrCreateSymbolData(*Symbol).modifyFlags(
+ COFF::SF_WeakReference,
+ COFF::SF_WeakReference);
+ break;
+
+ case MCSA_Global:
+ getAssembler().getOrCreateSymbolData(*Symbol).setExternal(true);
+ break;
+
+ default:
+ llvm_unreachable("unsupported attribute");
+ break;
+ }
}
void WinCOFFStreamer::EmitSymbolDesc(MCSymbol *Symbol, unsigned DescValue) {
- dbg_notimpl("Symbol = " << Symbol->getName() << ", DescValue = "<< DescValue);
+ llvm_unreachable("not implemented");
}
void WinCOFFStreamer::BeginCOFFSymbolDef(MCSymbol const *Symbol) {
+ assert(CurSymbol == NULL && "EndCOFFSymbolDef must be called between calls "
+ "to BeginCOFFSymbolDef!");
+ CurSymbol = Symbol;
}
void WinCOFFStreamer::EmitCOFFSymbolStorageClass(int StorageClass) {
+ assert(CurSymbol != NULL && "BeginCOFFSymbolDef must be called first!");
+ assert((StorageClass & ~0xFF) == 0 && "StorageClass must only have data in "
+ "the first byte!");
+
+ getAssembler().getOrCreateSymbolData(*CurSymbol).modifyFlags(
+ StorageClass << COFF::SF_ClassShift,
+ COFF::SF_ClassMask);
}
void WinCOFFStreamer::EmitCOFFSymbolType(int Type) {
+ assert(CurSymbol != NULL && "BeginCOFFSymbolDef must be called first!");
+ assert((Type & ~0xFFFF) == 0 && "Type must only have data in the first 2 "
+ "bytes");
+
+ getAssembler().getOrCreateSymbolData(*CurSymbol).modifyFlags(
+ Type << COFF::SF_TypeShift,
+ COFF::SF_TypeMask);
}
void WinCOFFStreamer::EndCOFFSymbolDef() {
+ assert(CurSymbol != NULL && "BeginCOFFSymbolDef must be called first!");
+ CurSymbol = NULL;
}
void WinCOFFStreamer::EmitELFSize(MCSymbol *Symbol, const MCExpr *Value) {
- dbg_notimpl("Symbol = " << Symbol->getName() << ", Value = " << *Value);
+ llvm_unreachable("not implemented");
}
void WinCOFFStreamer::EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size,
- unsigned ByteAlignment) {
+ unsigned ByteAlignment) {
+ AddCommonSymbol(Symbol, Size, ByteAlignment, true);
}
void WinCOFFStreamer::EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size) {
+ AddCommonSymbol(Symbol, Size, 1, false);
}
void WinCOFFStreamer::EmitZerofill(const MCSection *Section, MCSymbol *Symbol,
- unsigned Size,unsigned ByteAlignment) {
- MCSectionCOFF const *SectionCOFF =
- static_cast<MCSectionCOFF const *>(Section);
-
- dbg_notimpl("Section = " << SectionCOFF->getSectionName() << ", Symbol = " <<
- Symbol->getName() << ", Size = " << Size << ", ByteAlignment = "
- << ByteAlignment);
+ unsigned Size,unsigned ByteAlignment) {
+ llvm_unreachable("not implemented");
}
void WinCOFFStreamer::EmitTBSSSymbol(const MCSection *Section, MCSymbol *Symbol,
uint64_t Size, unsigned ByteAlignment) {
- MCSectionCOFF const *SectionCOFF =
- static_cast<MCSectionCOFF const *>(Section);
-
- dbg_notimpl("Section = " << SectionCOFF->getSectionName() << ", Symbol = " <<
- Symbol->getName() << ", Size = " << Size << ", ByteAlignment = "
- << ByteAlignment);
+ llvm_unreachable("not implemented");
}
void WinCOFFStreamer::EmitBytes(StringRef Data, unsigned AddrSpace) {
+ // TODO: This is copied exactly from the MachOStreamer. Consider merging into
+ // MCObjectStreamer?
+ getOrCreateDataFragment()->getContents().append(Data.begin(), Data.end());
}
void WinCOFFStreamer::EmitValue(const MCExpr *Value, unsigned Size,
- unsigned AddrSpace) {
+ unsigned AddrSpace) {
+ assert(AddrSpace == 0 && "Address space must be 0!");
+
+ // TODO: This is copied exactly from the MachOStreamer. Consider merging into
+ // MCObjectStreamer?
+ MCDataFragment *DF = getOrCreateDataFragment();
+
+ // Avoid fixups when possible.
+ int64_t AbsValue;
+ if (AddValueSymbols(Value)->EvaluateAsAbsolute(AbsValue)) {
+ // FIXME: Endianness assumption.
+ for (unsigned i = 0; i != Size; ++i)
+ DF->getContents().push_back(uint8_t(AbsValue >> (i * 8)));
+ } else {
+ DF->addFixup(MCFixup::Create(DF->getContents().size(),
+ AddValueSymbols(Value),
+ MCFixup::getKindForSize(Size)));
+ DF->getContents().resize(DF->getContents().size() + Size, 0);
+ }
}
void WinCOFFStreamer::EmitGPRel32Value(const MCExpr *Value) {
- dbg_notimpl("Value = '" << *Value);
+ llvm_unreachable("not implemented");
}
void WinCOFFStreamer::EmitValueToAlignment(unsigned ByteAlignment,
- int64_t Value,
- unsigned ValueSize,
- unsigned MaxBytesToEmit) {
+ int64_t Value,
+ unsigned ValueSize,
+ unsigned MaxBytesToEmit) {
+ // TODO: This is copied exactly from the MachOStreamer. Consider merging into
+ // MCObjectStreamer?
+ if (MaxBytesToEmit == 0)
+ MaxBytesToEmit = ByteAlignment;
+ new MCAlignFragment(ByteAlignment, Value, ValueSize, MaxBytesToEmit,
+ getCurrentSectionData());
+
+ // Update the maximum alignment on the current section if necessary.
+ if (ByteAlignment > getCurrentSectionData()->getAlignment())
+ getCurrentSectionData()->setAlignment(ByteAlignment);
}
void WinCOFFStreamer::EmitCodeAlignment(unsigned ByteAlignment,
- unsigned MaxBytesToEmit = 0) {
+ unsigned MaxBytesToEmit) {
+ // TODO: This is copied exactly from the MachOStreamer. Consider merging into
+ // MCObjectStreamer?
+ if (MaxBytesToEmit == 0)
+ MaxBytesToEmit = ByteAlignment;
+ MCAlignFragment *F = new MCAlignFragment(ByteAlignment, 0, 1, MaxBytesToEmit,
+ getCurrentSectionData());
+ F->setEmitNops(true);
+
+ // Update the maximum alignment on the current section if necessary.
+ if (ByteAlignment > getCurrentSectionData()->getAlignment())
+ getCurrentSectionData()->setAlignment(ByteAlignment);
}
void WinCOFFStreamer::EmitValueToOffset(const MCExpr *Offset,
- unsigned char Value = 0) {
- dbg_notimpl("Offset = '" << *Offset << "', Value = " << Value);
+ unsigned char Value) {
+ llvm_unreachable("not implemented");
}
void WinCOFFStreamer::EmitFileDirective(StringRef Filename) {
@@ -176,11 +311,24 @@ void WinCOFFStreamer::EmitFileDirective(StringRef Filename) {
}
void WinCOFFStreamer::EmitDwarfFileDirective(unsigned FileNo,
- StringRef Filename) {
- dbg_notimpl("FileNo = " << FileNo << ", Filename = '" << Filename << "'");
+ StringRef Filename) {
+ llvm_unreachable("not implemented");
}
void WinCOFFStreamer::EmitInstruction(const MCInst &Instruction) {
+ for (unsigned i = 0, e = Instruction.getNumOperands(); i != e; ++i)
+ if (Instruction.getOperand(i).isExpr())
+ AddValueSymbols(Instruction.getOperand(i).getExpr());
+
+ getCurrentSectionData()->setHasInstructions(true);
+
+ MCInstFragment *Fragment =
+ new MCInstFragment(Instruction, getCurrentSectionData());
+
+ raw_svector_ostream VecOS(Fragment->getCode());
+
+ getAssembler().getEmitter().EncodeInstruction(Instruction, VecOS,
+ Fragment->getFixups());
}
void WinCOFFStreamer::Finish() {
@@ -192,7 +340,10 @@ namespace llvm
MCStreamer *createWinCOFFStreamer(MCContext &Context,
TargetAsmBackend &TAB,
MCCodeEmitter &CE,
- raw_ostream &OS) {
- return new WinCOFFStreamer(Context, TAB, CE, OS);
+ raw_ostream &OS,
+ bool RelaxAll) {
+ WinCOFFStreamer *S = new WinCOFFStreamer(Context, TAB, CE, OS);
+ S->getAssembler().setRelaxAll(RelaxAll);
+ return S;
}
}
diff --git a/lib/Support/APFloat.cpp b/lib/Support/APFloat.cpp
index 2e78557011331..b87ddf9c95b58 100644
--- a/lib/Support/APFloat.cpp
+++ b/lib/Support/APFloat.cpp
@@ -153,6 +153,7 @@ readExponent(StringRef::iterator begin, StringRef::iterator end)
value += absExponent * 10;
if (absExponent >= overlargeExponent) {
absExponent = overlargeExponent;
+ p = end; /* outwit assert below */
break;
}
absExponent = value;
diff --git a/lib/Support/APInt.cpp b/lib/Support/APInt.cpp
index 262fa42ab2ced..8a212a291f24d 100644
--- a/lib/Support/APInt.cpp
+++ b/lib/Support/APInt.cpp
@@ -2123,15 +2123,16 @@ void APInt::toString(SmallVectorImpl<char> &Str, unsigned Radix,
char *BufPtr = Buffer+65;
uint64_t N;
- if (Signed) {
+ if (!Signed) {
+ N = getZExtValue();
+ } else {
int64_t I = getSExtValue();
- if (I < 0) {
+ if (I >= 0) {
+ N = I;
+ } else {
Str.push_back('-');
- I = -I;
+ N = -(uint64_t)I;
}
- N = I;
- } else {
- N = getZExtValue();
}
while (N) {
diff --git a/lib/Support/CMakeLists.txt b/lib/Support/CMakeLists.txt
index 366d2f799211d..0c70a402654e5 100644
--- a/lib/Support/CMakeLists.txt
+++ b/lib/Support/CMakeLists.txt
@@ -6,6 +6,7 @@ add_llvm_library(LLVMSupport
circular_raw_ostream.cpp
CommandLine.cpp
ConstantRange.cpp
+ CrashRecoveryContext.cpp
Debug.cpp
DeltaAlgorithm.cpp
DAGDeltaAlgorithm.cpp
@@ -23,7 +24,6 @@ add_llvm_library(LLVMSupport
PluginLoader.cpp
PrettyStackTrace.cpp
Regex.cpp
- SlowOperationInformer.cpp
SmallPtrSet.cpp
SmallVector.cpp
SourceMgr.cpp
diff --git a/lib/Support/ConstantRange.cpp b/lib/Support/ConstantRange.cpp
index 2746f7aaaa5e8..8ef3785f53318 100644
--- a/lib/Support/ConstantRange.cpp
+++ b/lib/Support/ConstantRange.cpp
@@ -21,6 +21,7 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/Constants.h"
#include "llvm/Support/ConstantRange.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
@@ -38,7 +39,7 @@ ConstantRange::ConstantRange(uint32_t BitWidth, bool Full) {
/// Initialize a range to hold the single specified value.
///
-ConstantRange::ConstantRange(const APInt & V) : Lower(V), Upper(V + 1) {}
+ConstantRange::ConstantRange(const APInt &V) : Lower(V), Upper(V + 1) {}
ConstantRange::ConstantRange(const APInt &L, const APInt &U) :
Lower(L), Upper(U) {
@@ -202,14 +203,12 @@ bool ConstantRange::contains(const APInt &V) const {
}
/// contains - Return true if the argument is a subset of this range.
-/// Two equal set contain each other. The empty set is considered to be
-/// contained by all other sets.
+/// Two equal sets contain each other. The empty set contained by all other
+/// sets.
///
bool ConstantRange::contains(const ConstantRange &Other) const {
- if (isFullSet()) return true;
- if (Other.isFullSet()) return false;
- if (Other.isEmptySet()) return true;
- if (isEmptySet()) return false;
+ if (isFullSet() || Other.isEmptySet()) return true;
+ if (isEmptySet() || Other.isFullSet()) return false;
if (!isWrappedSet()) {
if (Other.isWrappedSet())
@@ -235,46 +234,6 @@ ConstantRange ConstantRange::subtract(const APInt &Val) const {
return ConstantRange(Lower - Val, Upper - Val);
}
-
-// intersect1Wrapped - This helper function is used to intersect two ranges when
-// it is known that LHS is wrapped and RHS isn't.
-//
-ConstantRange
-ConstantRange::intersect1Wrapped(const ConstantRange &LHS,
- const ConstantRange &RHS) {
- assert(LHS.isWrappedSet() && !RHS.isWrappedSet());
-
- // Check to see if we overlap on the Left side of RHS...
- //
- if (RHS.Lower.ult(LHS.Upper)) {
- // We do overlap on the left side of RHS, see if we overlap on the right of
- // RHS...
- if (RHS.Upper.ugt(LHS.Lower)) {
- // Ok, the result overlaps on both the left and right sides. See if the
- // resultant interval will be smaller if we wrap or not...
- //
- if (LHS.getSetSize().ult(RHS.getSetSize()))
- return LHS;
- else
- return RHS;
-
- } else {
- // No overlap on the right, just on the left.
- return ConstantRange(RHS.Lower, LHS.Upper);
- }
- } else {
- // We don't overlap on the left side of RHS, see if we overlap on the right
- // of RHS...
- if (RHS.Upper.ugt(LHS.Lower)) {
- // Simple overlap...
- return ConstantRange(LHS.Lower, RHS.Upper);
- } else {
- // No overlap...
- return ConstantRange(LHS.getBitWidth(), false);
- }
- }
-}
-
/// intersectWith - Return the range that results from the intersection of this
/// range with another range. The resultant range is guaranteed to include all
/// elements contained in both input ranges, and to have the smallest possible
@@ -486,7 +445,7 @@ ConstantRange ConstantRange::truncate(uint32_t DstTySize) const {
assert(SrcTySize > DstTySize && "Not a value truncation");
APInt Size(APInt::getLowBitsSet(SrcTySize, DstTySize));
if (isFullSet() || getSetSize().ugt(Size))
- return ConstantRange(DstTySize);
+ return ConstantRange(DstTySize, /*isFullSet=*/true);
APInt L = Lower; L.trunc(DstTySize);
APInt U = Upper; U.trunc(DstTySize);
@@ -539,6 +498,27 @@ ConstantRange::add(const ConstantRange &Other) const {
}
ConstantRange
+ConstantRange::sub(const ConstantRange &Other) const {
+ if (isEmptySet() || Other.isEmptySet())
+ return ConstantRange(getBitWidth(), /*isFullSet=*/false);
+ if (isFullSet() || Other.isFullSet())
+ return ConstantRange(getBitWidth(), /*isFullSet=*/true);
+
+ APInt Spread_X = getSetSize(), Spread_Y = Other.getSetSize();
+ APInt NewLower = getLower() - Other.getLower();
+ APInt NewUpper = getUpper() - Other.getUpper() + 1;
+ if (NewLower == NewUpper)
+ return ConstantRange(getBitWidth(), /*isFullSet=*/true);
+
+ ConstantRange X = ConstantRange(NewLower, NewUpper);
+ if (X.getSetSize().ult(Spread_X) || X.getSetSize().ult(Spread_Y))
+ // We've wrapped, therefore, full set.
+ return ConstantRange(getBitWidth(), /*isFullSet=*/true);
+
+ return X;
+}
+
+ConstantRange
ConstantRange::multiply(const ConstantRange &Other) const {
// TODO: If either operand is a single element and the multiply is known to
// be non-wrapping, round the result min and max value to the appropriate
@@ -616,40 +596,42 @@ ConstantRange::udiv(const ConstantRange &RHS) const {
}
ConstantRange
-ConstantRange::shl(const ConstantRange &Amount) const {
- if (isEmptySet())
- return *this;
+ConstantRange::shl(const ConstantRange &Other) const {
+ if (isEmptySet() || Other.isEmptySet())
+ return ConstantRange(getBitWidth(), /*isFullSet=*/false);
- APInt min = getUnsignedMin() << Amount.getUnsignedMin();
- APInt max = getUnsignedMax() << Amount.getUnsignedMax();
+ APInt min = getUnsignedMin().shl(Other.getUnsignedMin());
+ APInt max = getUnsignedMax().shl(Other.getUnsignedMax());
// there's no overflow!
APInt Zeros(getBitWidth(), getUnsignedMax().countLeadingZeros());
- if (Zeros.uge(Amount.getUnsignedMax()))
- return ConstantRange(min, max);
+ if (Zeros.ugt(Other.getUnsignedMax()))
+ return ConstantRange(min, max + 1);
// FIXME: implement the other tricky cases
- return ConstantRange(getBitWidth());
+ return ConstantRange(getBitWidth(), /*isFullSet=*/true);
}
ConstantRange
-ConstantRange::ashr(const ConstantRange &Amount) const {
- if (isEmptySet())
- return *this;
+ConstantRange::lshr(const ConstantRange &Other) const {
+ if (isEmptySet() || Other.isEmptySet())
+ return ConstantRange(getBitWidth(), /*isFullSet=*/false);
+
+ APInt max = getUnsignedMax().lshr(Other.getUnsignedMin());
+ APInt min = getUnsignedMin().lshr(Other.getUnsignedMax());
+ if (min == max + 1)
+ return ConstantRange(getBitWidth(), /*isFullSet=*/true);
- APInt min = getUnsignedMax().ashr(Amount.getUnsignedMin());
- APInt max = getUnsignedMin().ashr(Amount.getUnsignedMax());
- return ConstantRange(min, max);
+ return ConstantRange(min, max + 1);
}
-ConstantRange
-ConstantRange::lshr(const ConstantRange &Amount) const {
- if (isEmptySet())
- return *this;
-
- APInt min = getUnsignedMax().lshr(Amount.getUnsignedMin());
- APInt max = getUnsignedMin().lshr(Amount.getUnsignedMax());
- return ConstantRange(min, max);
+ConstantRange ConstantRange::inverse() const {
+ if (isFullSet()) {
+ return ConstantRange(getBitWidth(), /*isFullSet=*/false);
+ } else if (isEmptySet()) {
+ return ConstantRange(getBitWidth(), /*isFullSet=*/true);
+ }
+ return ConstantRange(Upper, Lower);
}
/// print - Print out the bounds to a stream...
@@ -668,5 +650,3 @@ void ConstantRange::print(raw_ostream &OS) const {
void ConstantRange::dump() const {
print(dbgs());
}
-
-
diff --git a/lib/Support/CrashRecoveryContext.cpp b/lib/Support/CrashRecoveryContext.cpp
new file mode 100644
index 0000000000000..49258ede83c1d
--- /dev/null
+++ b/lib/Support/CrashRecoveryContext.cpp
@@ -0,0 +1,204 @@
+//===--- CrashRecoveryContext.cpp - Crash Recovery ------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/CrashRecoveryContext.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/Config/config.h"
+#include "llvm/System/Mutex.h"
+#include "llvm/System/ThreadLocal.h"
+#include <setjmp.h>
+#include <cstdio>
+using namespace llvm;
+
+namespace {
+
+struct CrashRecoveryContextImpl;
+
+static sys::ThreadLocal<const CrashRecoveryContextImpl> CurrentContext;
+
+struct CrashRecoveryContextImpl {
+ CrashRecoveryContext *CRC;
+ std::string Backtrace;
+ ::jmp_buf JumpBuffer;
+ volatile unsigned Failed : 1;
+
+public:
+ CrashRecoveryContextImpl(CrashRecoveryContext *CRC) : CRC(CRC),
+ Failed(false) {
+ CurrentContext.set(this);
+ }
+ ~CrashRecoveryContextImpl() {
+ CurrentContext.erase();
+ }
+
+ void HandleCrash() {
+ // Eliminate the current context entry, to avoid re-entering in case the
+ // cleanup code crashes.
+ CurrentContext.erase();
+
+ assert(!Failed && "Crash recovery context already failed!");
+ Failed = true;
+
+ // FIXME: Stash the backtrace.
+
+ // Jump back to the RunSafely we were called under.
+ longjmp(JumpBuffer, 1);
+ }
+};
+
+}
+
+static sys::Mutex gCrashRecoveryContexMutex;
+static bool gCrashRecoveryEnabled = false;
+
+CrashRecoveryContext::~CrashRecoveryContext() {
+ CrashRecoveryContextImpl *CRCI = (CrashRecoveryContextImpl *) Impl;
+ delete CRCI;
+}
+
+CrashRecoveryContext *CrashRecoveryContext::GetCurrent() {
+ const CrashRecoveryContextImpl *CRCI = CurrentContext.get();
+ if (!CRCI)
+ return 0;
+
+ return CRCI->CRC;
+}
+
+#ifdef LLVM_ON_WIN32
+
+// FIXME: No real Win32 implementation currently.
+
+void CrashRecoveryContext::Enable() {
+ sys::ScopedLock L(gCrashRecoveryContexMutex);
+
+ if (gCrashRecoveryEnabled)
+ return;
+
+ gCrashRecoveryEnabled = true;
+}
+
+void CrashRecoveryContext::Disable() {
+ sys::ScopedLock L(gCrashRecoveryContexMutex);
+
+ if (!gCrashRecoveryEnabled)
+ return;
+
+ gCrashRecoveryEnabled = false;
+}
+
+#else
+
+// Generic POSIX implementation.
+//
+// This implementation relies on synchronous signals being delivered to the
+// current thread. We use a thread local object to keep track of the active
+// crash recovery context, and install signal handlers to invoke HandleCrash on
+// the active object.
+//
+// This implementation does not to attempt to chain signal handlers in any
+// reliable fashion -- if we get a signal outside of a crash recovery context we
+// simply disable crash recovery and raise the signal again.
+
+#include <signal.h>
+
+static int Signals[] = { SIGABRT, SIGBUS, SIGFPE, SIGILL, SIGSEGV, SIGTRAP };
+static const unsigned NumSignals = sizeof(Signals) / sizeof(Signals[0]);
+static struct sigaction PrevActions[NumSignals];
+
+static void CrashRecoverySignalHandler(int Signal) {
+ // Lookup the current thread local recovery object.
+ const CrashRecoveryContextImpl *CRCI = CurrentContext.get();
+
+ if (!CRCI) {
+ // We didn't find a crash recovery context -- this means either we got a
+ // signal on a thread we didn't expect it on, the application got a signal
+ // outside of a crash recovery context, or something else went horribly
+ // wrong.
+ //
+ // Disable crash recovery and raise the signal again. The assumption here is
+ // that the enclosing application will terminate soon, and we won't want to
+ // attempt crash recovery again.
+ //
+ // This call of Disable isn't thread safe, but it doesn't actually matter.
+ CrashRecoveryContext::Disable();
+ raise(Signal);
+ }
+
+ // Unblock the signal we received.
+ sigset_t SigMask;
+ sigemptyset(&SigMask);
+ sigaddset(&SigMask, Signal);
+ sigprocmask(SIG_UNBLOCK, &SigMask, 0);
+
+ if (CRCI)
+ const_cast<CrashRecoveryContextImpl*>(CRCI)->HandleCrash();
+}
+
+void CrashRecoveryContext::Enable() {
+ sys::ScopedLock L(gCrashRecoveryContexMutex);
+
+ if (gCrashRecoveryEnabled)
+ return;
+
+ gCrashRecoveryEnabled = true;
+
+ // Setup the signal handler.
+ struct sigaction Handler;
+ Handler.sa_handler = CrashRecoverySignalHandler;
+ Handler.sa_flags = 0;
+ sigemptyset(&Handler.sa_mask);
+
+ for (unsigned i = 0; i != NumSignals; ++i) {
+ sigaction(Signals[i], &Handler, &PrevActions[i]);
+ }
+}
+
+void CrashRecoveryContext::Disable() {
+ sys::ScopedLock L(gCrashRecoveryContexMutex);
+
+ if (!gCrashRecoveryEnabled)
+ return;
+
+ gCrashRecoveryEnabled = false;
+
+ // Restore the previous signal handlers.
+ for (unsigned i = 0; i != NumSignals; ++i)
+ sigaction(Signals[i], &PrevActions[i], 0);
+}
+
+#endif
+
+bool CrashRecoveryContext::RunSafely(void (*Fn)(void*), void *UserData) {
+ // If crash recovery is disabled, do nothing.
+ if (gCrashRecoveryEnabled) {
+ assert(!Impl && "Crash recovery context already initialized!");
+ CrashRecoveryContextImpl *CRCI = new CrashRecoveryContextImpl(this);
+ Impl = CRCI;
+
+ if (setjmp(CRCI->JumpBuffer) != 0) {
+ return false;
+ }
+ }
+
+ Fn(UserData);
+ return true;
+}
+
+void CrashRecoveryContext::HandleCrash() {
+ CrashRecoveryContextImpl *CRCI = (CrashRecoveryContextImpl *) Impl;
+ assert(CRCI && "Crash recovery context never initialized!");
+ CRCI->HandleCrash();
+}
+
+const std::string &CrashRecoveryContext::getBacktrace() const {
+ CrashRecoveryContextImpl *CRC = (CrashRecoveryContextImpl *) Impl;
+ assert(CRC && "Crash recovery context never initialized!");
+ assert(CRC->Failed && "No crash was detected!");
+ return CRC->Backtrace;
+}
diff --git a/lib/Support/ErrorHandling.cpp b/lib/Support/ErrorHandling.cpp
index 7e7ca9debe9a7..0b7af3e5905be 100644
--- a/lib/Support/ErrorHandling.cpp
+++ b/lib/Support/ErrorHandling.cpp
@@ -18,8 +18,19 @@
#include "llvm/Support/raw_ostream.h"
#include "llvm/System/Signals.h"
#include "llvm/System/Threading.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Config/config.h"
#include <cassert>
#include <cstdlib>
+
+#if defined(HAVE_UNISTD_H)
+# include <unistd.h>
+#endif
+#if defined(_MSC_VER)
+# include <io.h>
+# include <fcntl.h>
+#endif
+
using namespace llvm;
using namespace std;
@@ -39,19 +50,26 @@ void llvm::remove_fatal_error_handler() {
ErrorHandler = 0;
}
-void llvm::report_fatal_error(const char *reason) {
- report_fatal_error(Twine(reason));
+void llvm::report_fatal_error(const char *Reason) {
+ report_fatal_error(Twine(Reason));
}
-void llvm::report_fatal_error(const std::string &reason) {
- report_fatal_error(Twine(reason));
+void llvm::report_fatal_error(const std::string &Reason) {
+ report_fatal_error(Twine(Reason));
}
-void llvm::report_fatal_error(const Twine &reason) {
- if (!ErrorHandler) {
- errs() << "LLVM ERROR: " << reason << "\n";
+void llvm::report_fatal_error(const Twine &Reason) {
+ if (ErrorHandler) {
+ ErrorHandler(ErrorHandlerUserData, Reason.str());
} else {
- ErrorHandler(ErrorHandlerUserData, reason.str());
+ // Blast the result out to stderr. We don't try hard to make sure this
+ // succeeds (e.g. handling EINTR) and we can't use errs() here because
+ // raw ostreams can call report_fatal_error.
+ SmallVector<char, 64> Buffer;
+ raw_svector_ostream OS(Buffer);
+ OS << "LLVM ERROR: " << Reason << "\n";
+ StringRef MessageStr = OS.str();
+ (void)::write(2, MessageStr.data(), MessageStr.size());
}
// If we reached here, we are failing ungracefully. Run the interrupt handlers
diff --git a/lib/Support/FoldingSet.cpp b/lib/Support/FoldingSet.cpp
index b8dca334da494..29b5952208874 100644
--- a/lib/Support/FoldingSet.cpp
+++ b/lib/Support/FoldingSet.cpp
@@ -23,6 +23,37 @@
using namespace llvm;
//===----------------------------------------------------------------------===//
+// FoldingSetNodeIDRef Implementation
+
+/// ComputeHash - Compute a strong hash value for this FoldingSetNodeIDRef,
+/// used to lookup the node in the FoldingSetImpl.
+unsigned FoldingSetNodeIDRef::ComputeHash() const {
+ // This is adapted from SuperFastHash by Paul Hsieh.
+ unsigned Hash = static_cast<unsigned>(Size);
+ for (const unsigned *BP = Data, *E = BP+Size; BP != E; ++BP) {
+ unsigned Data = *BP;
+ Hash += Data & 0xFFFF;
+ unsigned Tmp = ((Data >> 16) << 11) ^ Hash;
+ Hash = (Hash << 16) ^ Tmp;
+ Hash += Hash >> 11;
+ }
+
+ // Force "avalanching" of final 127 bits.
+ Hash ^= Hash << 3;
+ Hash += Hash >> 5;
+ Hash ^= Hash << 4;
+ Hash += Hash >> 17;
+ Hash ^= Hash << 25;
+ Hash += Hash >> 6;
+ return Hash;
+}
+
+bool FoldingSetNodeIDRef::operator==(FoldingSetNodeIDRef RHS) const {
+ if (Size != RHS.Size) return false;
+ return memcmp(Data, RHS.Data, Size*sizeof(*Data)) == 0;
+}
+
+//===----------------------------------------------------------------------===//
// FoldingSetNodeID Implementation
/// Add* - Add various data types to Bit data.
@@ -104,31 +135,19 @@ void FoldingSetNodeID::AddString(StringRef String) {
/// ComputeHash - Compute a strong hash value for this FoldingSetNodeID, used to
/// lookup the node in the FoldingSetImpl.
unsigned FoldingSetNodeID::ComputeHash() const {
- // This is adapted from SuperFastHash by Paul Hsieh.
- unsigned Hash = static_cast<unsigned>(Bits.size());
- for (const unsigned *BP = &Bits[0], *E = BP+Bits.size(); BP != E; ++BP) {
- unsigned Data = *BP;
- Hash += Data & 0xFFFF;
- unsigned Tmp = ((Data >> 16) << 11) ^ Hash;
- Hash = (Hash << 16) ^ Tmp;
- Hash += Hash >> 11;
- }
-
- // Force "avalanching" of final 127 bits.
- Hash ^= Hash << 3;
- Hash += Hash >> 5;
- Hash ^= Hash << 4;
- Hash += Hash >> 17;
- Hash ^= Hash << 25;
- Hash += Hash >> 6;
- return Hash;
+ return FoldingSetNodeIDRef(Bits.data(), Bits.size()).ComputeHash();
}
/// operator== - Used to compare two nodes to each other.
///
bool FoldingSetNodeID::operator==(const FoldingSetNodeID &RHS)const{
- if (Bits.size() != RHS.Bits.size()) return false;
- return memcmp(&Bits[0], &RHS.Bits[0], Bits.size()*sizeof(Bits[0])) == 0;
+ return *this == FoldingSetNodeIDRef(RHS.Bits.data(), RHS.Bits.size());
+}
+
+/// operator== - Used to compare two nodes to each other.
+///
+bool FoldingSetNodeID::operator==(FoldingSetNodeIDRef RHS) const {
+ return FoldingSetNodeIDRef(Bits.data(), Bits.size()) == RHS;
}
/// Intern - Copy this node's data to a memory region allocated from the
@@ -168,10 +187,9 @@ static void **GetBucketPtr(void *NextInBucketPtr) {
/// GetBucketFor - Hash the specified node ID and return the hash bucket for
/// the specified ID.
-static void **GetBucketFor(const FoldingSetNodeID &ID,
- void **Buckets, unsigned NumBuckets) {
+static void **GetBucketFor(unsigned Hash, void **Buckets, unsigned NumBuckets) {
// NumBuckets is always a power of 2.
- unsigned BucketNum = ID.ComputeHash() & (NumBuckets-1);
+ unsigned BucketNum = Hash & (NumBuckets-1);
return Buckets + BucketNum;
}
@@ -219,7 +237,7 @@ void FoldingSetImpl::GrowHashTable() {
NumNodes = 0;
// Walk the old buckets, rehashing nodes into their new place.
- FoldingSetNodeID ID;
+ FoldingSetNodeID TempID;
for (unsigned i = 0; i != OldNumBuckets; ++i) {
void *Probe = OldBuckets[i];
if (!Probe) continue;
@@ -229,9 +247,10 @@ void FoldingSetImpl::GrowHashTable() {
NodeInBucket->SetNextInBucket(0);
// Insert the node into the new bucket, after recomputing the hash.
- GetNodeProfile(ID, NodeInBucket);
- InsertNode(NodeInBucket, GetBucketFor(ID, Buckets, NumBuckets));
- ID.clear();
+ InsertNode(NodeInBucket,
+ GetBucketFor(ComputeNodeHash(NodeInBucket, TempID),
+ Buckets, NumBuckets));
+ TempID.clear();
}
}
@@ -245,19 +264,18 @@ FoldingSetImpl::Node
*FoldingSetImpl::FindNodeOrInsertPos(const FoldingSetNodeID &ID,
void *&InsertPos) {
- void **Bucket = GetBucketFor(ID, Buckets, NumBuckets);
+ void **Bucket = GetBucketFor(ID.ComputeHash(), Buckets, NumBuckets);
void *Probe = *Bucket;
InsertPos = 0;
- FoldingSetNodeID OtherID;
+ FoldingSetNodeID TempID;
while (Node *NodeInBucket = GetNextPtr(Probe)) {
- GetNodeProfile(OtherID, NodeInBucket);
- if (OtherID == ID)
+ if (NodeEquals(NodeInBucket, ID, TempID))
return NodeInBucket;
+ TempID.clear();
Probe = NodeInBucket->getNextInBucket();
- OtherID.clear();
}
// Didn't find the node, return null with the bucket as the InsertPos.
@@ -273,9 +291,8 @@ void FoldingSetImpl::InsertNode(Node *N, void *InsertPos) {
// Do we need to grow the hashtable?
if (NumNodes+1 > NumBuckets*2) {
GrowHashTable();
- FoldingSetNodeID ID;
- GetNodeProfile(ID, N);
- InsertPos = GetBucketFor(ID, Buckets, NumBuckets);
+ FoldingSetNodeID TempID;
+ InsertPos = GetBucketFor(ComputeNodeHash(N, TempID), Buckets, NumBuckets);
}
++NumNodes;
@@ -341,7 +358,7 @@ bool FoldingSetImpl::RemoveNode(Node *N) {
/// instead.
FoldingSetImpl::Node *FoldingSetImpl::GetOrInsertNode(FoldingSetImpl::Node *N) {
FoldingSetNodeID ID;
- GetNodeProfile(ID, N);
+ GetNodeProfile(N, ID);
void *IP;
if (Node *E = FindNodeOrInsertPos(ID, IP))
return E;
diff --git a/lib/Support/PrettyStackTrace.cpp b/lib/Support/PrettyStackTrace.cpp
index a99ab2f30df0e..3c8a10849d149 100644
--- a/lib/Support/PrettyStackTrace.cpp
+++ b/lib/Support/PrettyStackTrace.cpp
@@ -72,7 +72,7 @@ asm(".desc ___crashreporter_info__, 0x10");
/// CrashHandler - This callback is run if a fatal signal is delivered to the
/// process, it prints the pretty stack trace.
-static void CrashHandler(void *Cookie) {
+static void CrashHandler(void *) {
#ifndef __APPLE__
// On non-apple systems, just emit the crash stack trace to stderr.
PrintCurStackTrace(errs());
@@ -89,7 +89,8 @@ static void CrashHandler(void *Cookie) {
#ifndef HAVE_CRASHREPORTERCLIENT_H
__crashreporter_info__ = strdup(std::string(TmpStr.str()).c_str());
#else
- CRSetCrashLogMessage(std::string(TmpStr.str()).c_str());
+ // Cast to void to avoid warning.
+ (void)CRSetCrashLogMessage(std::string(TmpStr.str()).c_str());
#endif
errs() << TmpStr.str();
}
diff --git a/lib/Support/SlowOperationInformer.cpp b/lib/Support/SlowOperationInformer.cpp
deleted file mode 100644
index b4e9430e5fdfe..0000000000000
--- a/lib/Support/SlowOperationInformer.cpp
+++ /dev/null
@@ -1,67 +0,0 @@
-//===-- SlowOperationInformer.cpp - Keep the user informed ----------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements the SlowOperationInformer class for the LLVM debugger.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/Support/SlowOperationInformer.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/System/Alarm.h"
-#include <sstream>
-#include <cassert>
-using namespace llvm;
-
-SlowOperationInformer::SlowOperationInformer(const std::string &Name)
- : OperationName(Name), LastPrintAmount(0) {
- sys::SetupAlarm(1);
-}
-
-SlowOperationInformer::~SlowOperationInformer() {
- sys::TerminateAlarm();
- if (LastPrintAmount) {
- // If we have printed something, make _sure_ we print the 100% amount, and
- // also print a newline.
- outs() << std::string(LastPrintAmount, '\b') << "Progress "
- << OperationName << ": 100% \n";
- }
-}
-
-/// progress - Clients should periodically call this method when they are in
-/// an exception-safe state. The Amount variable should indicate how far
-/// along the operation is, given in 1/10ths of a percent (in other words,
-/// Amount should range from 0 to 1000).
-bool SlowOperationInformer::progress(unsigned Amount) {
- int status = sys::AlarmStatus();
- if (status == -1) {
- outs() << "\n";
- LastPrintAmount = 0;
- return true;
- }
-
- // If we haven't spent enough time in this operation to warrant displaying the
- // progress bar, don't do so yet.
- if (status == 0)
- return false;
-
- // Delete whatever we printed last time.
- std::string ToPrint = std::string(LastPrintAmount, '\b');
-
- std::ostringstream OS;
- OS << "Progress " << OperationName << ": " << Amount/10;
- if (unsigned Rem = Amount % 10)
- OS << "." << Rem << "%";
- else
- OS << "% ";
-
- LastPrintAmount = OS.str().size();
- outs() << ToPrint+OS.str();
- outs().flush();
- return false;
-}
diff --git a/lib/Support/SmallVector.cpp b/lib/Support/SmallVector.cpp
index 2e17af864155a..a89f14957635e 100644
--- a/lib/Support/SmallVector.cpp
+++ b/lib/Support/SmallVector.cpp
@@ -18,7 +18,7 @@ using namespace llvm;
/// on POD-like datatypes and is out of line to reduce code duplication.
void SmallVectorBase::grow_pod(size_t MinSizeInBytes, size_t TSize) {
size_t CurSizeBytes = size_in_bytes();
- size_t NewCapacityInBytes = 2 * capacity_in_bytes();
+ size_t NewCapacityInBytes = 2 * capacity_in_bytes() + TSize; // Always grow.
if (NewCapacityInBytes < MinSizeInBytes)
NewCapacityInBytes = MinSizeInBytes;
diff --git a/lib/Support/Statistic.cpp b/lib/Support/Statistic.cpp
index 7d5f65af28422..e32ab74a2d4c4 100644
--- a/lib/Support/Statistic.cpp
+++ b/lib/Support/Statistic.cpp
@@ -44,7 +44,7 @@ Enabled("stats", cl::desc("Enable statistics output from program"));
namespace {
/// StatisticInfo - This class is used in a ManagedStatic so that it is created
-/// on demand (when the first statistic is bumped) and destroyed only when
+/// on demand (when the first statistic is bumped) and destroyed only when
/// llvm_shutdown is called. We print statistics from the destructor.
class StatisticInfo {
std::vector<const Statistic*> Stats;
@@ -52,7 +52,7 @@ class StatisticInfo {
friend void llvm::PrintStatistics(raw_ostream &OS);
public:
~StatisticInfo();
-
+
void addStatistic(const Statistic *S) {
Stats.push_back(S);
}
@@ -71,7 +71,7 @@ void Statistic::RegisterStatistic() {
if (!Initialized) {
if (Enabled)
StatInfo->addStatistic(this);
-
+
sys::MemoryFence();
// Remember we have been registered.
Initialized = true;
@@ -84,7 +84,7 @@ struct NameCompare {
bool operator()(const Statistic *LHS, const Statistic *RHS) const {
int Cmp = std::strcmp(LHS->getName(), RHS->getName());
if (Cmp != 0) return Cmp < 0;
-
+
// Secondary key is the description.
return std::strcmp(LHS->getDesc(), RHS->getDesc()) < 0;
}
@@ -112,7 +112,7 @@ void llvm::PrintStatistics(raw_ostream &OS) {
MaxNameLen = std::max(MaxNameLen,
(unsigned)std::strlen(Stats.Stats[i]->getName()));
}
-
+
// Sort the fields by name.
std::stable_sort(Stats.Stats.begin(), Stats.Stats.end(), NameCompare());
@@ -120,7 +120,7 @@ void llvm::PrintStatistics(raw_ostream &OS) {
OS << "===" << std::string(73, '-') << "===\n"
<< " ... Statistics Collected ...\n"
<< "===" << std::string(73, '-') << "===\n\n";
-
+
// Print all of the statistics.
for (size_t i = 0, e = Stats.Stats.size(); i != e; ++i) {
std::string CountStr = utostr(Stats.Stats[i]->getValue());
@@ -129,7 +129,7 @@ void llvm::PrintStatistics(raw_ostream &OS) {
<< std::string(MaxNameLen-std::strlen(Stats.Stats[i]->getName()), ' ')
<< " - " << Stats.Stats[i]->getDesc() << "\n";
}
-
+
OS << '\n'; // Flush the output stream.
OS.flush();
diff --git a/lib/Support/StringRef.cpp b/lib/Support/StringRef.cpp
index ca0f518a88b62..46f26b242aac3 100644
--- a/lib/Support/StringRef.cpp
+++ b/lib/Support/StringRef.cpp
@@ -9,6 +9,7 @@
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/APInt.h"
+#include <bitset>
using namespace llvm;
@@ -30,14 +31,14 @@ static bool ascii_isdigit(char x) {
/// compare_lower - Compare strings, ignoring case.
int StringRef::compare_lower(StringRef RHS) const {
for (size_t I = 0, E = min(Length, RHS.Length); I != E; ++I) {
- char LHC = ascii_tolower(Data[I]);
- char RHC = ascii_tolower(RHS.Data[I]);
+ unsigned char LHC = ascii_tolower(Data[I]);
+ unsigned char RHC = ascii_tolower(RHS.Data[I]);
if (LHC != RHC)
return LHC < RHC ? -1 : 1;
}
if (Length == RHS.Length)
- return 0;
+ return 0;
return Length < RHS.Length ? -1 : 1;
}
@@ -58,10 +59,10 @@ int StringRef::compare_numeric(StringRef RHS) const {
break;
}
}
- return Data[I] < RHS.Data[I] ? -1 : 1;
+ return (unsigned char)Data[I] < (unsigned char)RHS.Data[I] ? -1 : 1;
}
if (Length == RHS.Length)
- return 0;
+ return 0;
return Length < RHS.Length ? -1 : 1;
}
@@ -153,11 +154,15 @@ size_t StringRef::rfind(StringRef Str) const {
/// find_first_of - Find the first character in the string that is in \arg
/// Chars, or npos if not found.
///
-/// Note: O(size() * Chars.size())
+/// Note: O(size() + Chars.size())
StringRef::size_type StringRef::find_first_of(StringRef Chars,
size_t From) const {
+ std::bitset<1 << CHAR_BIT> CharBits;
+ for (size_type i = 0; i != Chars.size(); ++i)
+ CharBits.set((unsigned char)Chars[i]);
+
for (size_type i = min(From, Length), e = Length; i != e; ++i)
- if (Chars.find(Data[i]) != npos)
+ if (CharBits.test((unsigned char)Data[i]))
return i;
return npos;
}
@@ -174,11 +179,15 @@ StringRef::size_type StringRef::find_first_not_of(char C, size_t From) const {
/// find_first_not_of - Find the first character in the string that is not
/// in the string \arg Chars, or npos if not found.
///
-/// Note: O(size() * Chars.size())
+/// Note: O(size() + Chars.size())
StringRef::size_type StringRef::find_first_not_of(StringRef Chars,
size_t From) const {
+ std::bitset<1 << CHAR_BIT> CharBits;
+ for (size_type i = 0; i != Chars.size(); ++i)
+ CharBits.set((unsigned char)Chars[i]);
+
for (size_type i = min(From, Length), e = Length; i != e; ++i)
- if (Chars.find(Data[i]) == npos)
+ if (!CharBits.test((unsigned char)Data[i]))
return i;
return npos;
}
diff --git a/lib/Support/SystemUtils.cpp b/lib/Support/SystemUtils.cpp
index 299032f187156..c8b260c2e3dd9 100644
--- a/lib/Support/SystemUtils.cpp
+++ b/lib/Support/SystemUtils.cpp
@@ -49,6 +49,16 @@ sys::Path llvm::FindExecutable(const std::string &ExeName,
Result.appendComponent(ExeName);
if (Result.canExecute())
return Result;
+ // If the path is absolute (and it usually is), call FindProgramByName to
+ // allow it to try platform-specific logic, such as appending a .exe suffix
+ // on Windows. Don't do this if we somehow have a relative path, because
+ // we don't want to go searching the PATH and accidentally find an unrelated
+ // version of the program.
+ if (Result.isAbsolute()) {
+ Result = sys::Program::FindProgramByName(Result.str());
+ if (!Result.empty())
+ return Result;
+ }
}
return sys::Path();
diff --git a/lib/Support/Triple.cpp b/lib/Support/Triple.cpp
index 6a70449b56dc7..3a95b65e69000 100644
--- a/lib/Support/Triple.cpp
+++ b/lib/Support/Triple.cpp
@@ -221,121 +221,238 @@ const char *Triple::getArchNameForAssembler() {
//
-void Triple::Parse() const {
- assert(!isInitialized() && "Invalid parse call.");
-
- StringRef ArchName = getArchName();
- StringRef VendorName = getVendorName();
- StringRef OSName = getOSName();
-
+Triple::ArchType Triple::ParseArch(StringRef ArchName) {
if (ArchName.size() == 4 && ArchName[0] == 'i' &&
ArchName[2] == '8' && ArchName[3] == '6' &&
ArchName[1] - '3' < 6) // i[3-9]86
- Arch = x86;
+ return x86;
else if (ArchName == "amd64" || ArchName == "x86_64")
- Arch = x86_64;
+ return x86_64;
else if (ArchName == "bfin")
- Arch = bfin;
+ return bfin;
else if (ArchName == "pic16")
- Arch = pic16;
+ return pic16;
else if (ArchName == "powerpc")
- Arch = ppc;
+ return ppc;
else if ((ArchName == "powerpc64") || (ArchName == "ppu"))
- Arch = ppc64;
+ return ppc64;
else if (ArchName == "mblaze")
- Arch = mblaze;
+ return mblaze;
else if (ArchName == "arm" ||
ArchName.startswith("armv") ||
ArchName == "xscale")
- Arch = arm;
+ return arm;
else if (ArchName == "thumb" ||
ArchName.startswith("thumbv"))
- Arch = thumb;
+ return thumb;
else if (ArchName.startswith("alpha"))
- Arch = alpha;
+ return alpha;
else if (ArchName == "spu" || ArchName == "cellspu")
- Arch = cellspu;
+ return cellspu;
else if (ArchName == "msp430")
- Arch = msp430;
+ return msp430;
else if (ArchName == "mips" || ArchName == "mipsallegrex")
- Arch = mips;
+ return mips;
else if (ArchName == "mipsel" || ArchName == "mipsallegrexel" ||
ArchName == "psp")
- Arch = mipsel;
+ return mipsel;
else if (ArchName == "sparc")
- Arch = sparc;
+ return sparc;
else if (ArchName == "sparcv9")
- Arch = sparcv9;
+ return sparcv9;
else if (ArchName == "s390x")
- Arch = systemz;
+ return systemz;
else if (ArchName == "tce")
- Arch = tce;
+ return tce;
else if (ArchName == "xcore")
- Arch = xcore;
+ return xcore;
else
- Arch = UnknownArch;
-
-
- // Handle some exceptional cases where the OS / environment components are
- // stuck into the vendor field.
- if (StringRef(getTriple()).count('-') == 1) {
- StringRef VendorName = getVendorName();
-
- if (VendorName.startswith("mingw32")) { // 'i386-mingw32', etc.
- Vendor = PC;
- OS = MinGW32;
- return;
- }
-
- // arm-elf is another example, but we don't currently parse anything about
- // the environment.
- }
+ return UnknownArch;
+}
+Triple::VendorType Triple::ParseVendor(StringRef VendorName) {
if (VendorName == "apple")
- Vendor = Apple;
+ return Apple;
else if (VendorName == "pc")
- Vendor = PC;
+ return PC;
else
- Vendor = UnknownVendor;
+ return UnknownVendor;
+}
+Triple::OSType Triple::ParseOS(StringRef OSName) {
if (OSName.startswith("auroraux"))
- OS = AuroraUX;
+ return AuroraUX;
else if (OSName.startswith("cygwin"))
- OS = Cygwin;
+ return Cygwin;
else if (OSName.startswith("darwin"))
- OS = Darwin;
+ return Darwin;
else if (OSName.startswith("dragonfly"))
- OS = DragonFly;
+ return DragonFly;
else if (OSName.startswith("freebsd"))
- OS = FreeBSD;
+ return FreeBSD;
else if (OSName.startswith("linux"))
- OS = Linux;
+ return Linux;
else if (OSName.startswith("lv2"))
- OS = Lv2;
+ return Lv2;
else if (OSName.startswith("mingw32"))
- OS = MinGW32;
+ return MinGW32;
else if (OSName.startswith("mingw64"))
- OS = MinGW64;
+ return MinGW64;
else if (OSName.startswith("netbsd"))
- OS = NetBSD;
+ return NetBSD;
else if (OSName.startswith("openbsd"))
- OS = OpenBSD;
+ return OpenBSD;
else if (OSName.startswith("psp"))
- OS = Psp;
+ return Psp;
else if (OSName.startswith("solaris"))
- OS = Solaris;
+ return Solaris;
else if (OSName.startswith("win32"))
- OS = Win32;
+ return Win32;
else if (OSName.startswith("haiku"))
- OS = Haiku;
+ return Haiku;
else if (OSName.startswith("minix"))
- OS = Minix;
+ return Minix;
else
- OS = UnknownOS;
+ return UnknownOS;
+}
+
+void Triple::Parse() const {
+ assert(!isInitialized() && "Invalid parse call.");
+
+ Arch = ParseArch(getArchName());
+ Vendor = ParseVendor(getVendorName());
+ OS = ParseOS(getOSName());
assert(isInitialized() && "Failed to initialize!");
}
+std::string Triple::normalize(StringRef Str) {
+ // Parse into components.
+ SmallVector<StringRef, 4> Components;
+ for (size_t First = 0, Last = 0; Last != StringRef::npos; First = Last + 1) {
+ Last = Str.find('-', First);
+ Components.push_back(Str.slice(First, Last));
+ }
+
+ // If the first component corresponds to a known architecture, preferentially
+ // use it for the architecture. If the second component corresponds to a
+ // known vendor, preferentially use it for the vendor, etc. This avoids silly
+ // component movement when a component parses as (eg) both a valid arch and a
+ // valid os.
+ ArchType Arch = UnknownArch;
+ if (Components.size() > 0)
+ Arch = ParseArch(Components[0]);
+ VendorType Vendor = UnknownVendor;
+ if (Components.size() > 1)
+ Vendor = ParseVendor(Components[1]);
+ OSType OS = UnknownOS;
+ if (Components.size() > 2)
+ OS = ParseOS(Components[2]);
+
+ // Note which components are already in their final position. These will not
+ // be moved.
+ bool Found[3];
+ Found[0] = Arch != UnknownArch;
+ Found[1] = Vendor != UnknownVendor;
+ Found[2] = OS != UnknownOS;
+
+ // If they are not there already, permute the components into their canonical
+ // positions by seeing if they parse as a valid architecture, and if so moving
+ // the component to the architecture position etc.
+ for (unsigned Pos = 0; Pos != 3; ++Pos) {
+ if (Found[Pos])
+ continue; // Already in the canonical position.
+
+ for (unsigned Idx = 0; Idx != Components.size(); ++Idx) {
+ // Do not reparse any components that already matched.
+ if (Idx < 3 && Found[Idx])
+ continue;
+
+ // Does this component parse as valid for the target position?
+ bool Valid = false;
+ StringRef Comp = Components[Idx];
+ switch (Pos) {
+ default:
+ assert(false && "unexpected component type!");
+ case 0:
+ Arch = ParseArch(Comp);
+ Valid = Arch != UnknownArch;
+ break;
+ case 1:
+ Vendor = ParseVendor(Comp);
+ Valid = Vendor != UnknownVendor;
+ break;
+ case 2:
+ OS = ParseOS(Comp);
+ Valid = OS != UnknownOS;
+ break;
+ }
+ if (!Valid)
+ continue; // Nope, try the next component.
+
+ // Move the component to the target position, pushing any non-fixed
+ // components that are in the way to the right. This tends to give
+ // good results in the common cases of a forgotten vendor component
+ // or a wrongly positioned environment.
+ if (Pos < Idx) {
+ // Insert left, pushing the existing components to the right. For
+ // example, a-b-i386 -> i386-a-b when moving i386 to the front.
+ StringRef CurrentComponent(""); // The empty component.
+ // Replace the component we are moving with an empty component.
+ std::swap(CurrentComponent, Components[Idx]);
+ // Insert the component being moved at Pos, displacing any existing
+ // components to the right.
+ for (unsigned i = Pos; !CurrentComponent.empty(); ++i) {
+ // Skip over any fixed components.
+ while (i < 3 && Found[i]) ++i;
+ // Place the component at the new position, getting the component
+ // that was at this position - it will be moved right.
+ std::swap(CurrentComponent, Components[i]);
+ }
+ } else if (Pos > Idx) {
+ // Push right by inserting empty components until the component at Idx
+ // reaches the target position Pos. For example, pc-a -> -pc-a when
+ // moving pc to the second position.
+ do {
+ // Insert one empty component at Idx.
+ StringRef CurrentComponent(""); // The empty component.
+ for (unsigned i = Idx; i < Components.size(); ++i) {
+ // Skip over any fixed components.
+ while (i < 3 && Found[i]) ++i;
+ // Place the component at the new position, getting the component
+ // that was at this position - it will be moved right.
+ std::swap(CurrentComponent, Components[i]);
+ // If it was placed on top of an empty component then we are done.
+ if (CurrentComponent.empty())
+ break;
+ }
+ // The last component was pushed off the end - append it.
+ if (!CurrentComponent.empty())
+ Components.push_back(CurrentComponent);
+
+ // Advance Idx to the component's new position.
+ while (++Idx < 3 && Found[Idx]) {}
+ } while (Idx < Pos); // Add more until the final position is reached.
+ }
+ assert(Pos < Components.size() && Components[Pos] == Comp &&
+ "Component moved wrong!");
+ Found[Pos] = true;
+ break;
+ }
+ }
+
+ // Special case logic goes here. At this point Arch, Vendor and OS have the
+ // correct values for the computed components.
+
+ // Stick the corrected components back together to form the normalized string.
+ std::string Normalized;
+ for (unsigned i = 0, e = Components.size(); i != e; ++i) {
+ if (i) Normalized += '-';
+ Normalized += Components[i];
+ }
+ return Normalized;
+}
+
StringRef Triple::getArchName() const {
return StringRef(Data).split('-').first; // Isolate first component
}
diff --git a/lib/Support/raw_ostream.cpp b/lib/Support/raw_ostream.cpp
index 8054ae63688c9..dba46df362566 100644
--- a/lib/Support/raw_ostream.cpp
+++ b/lib/Support/raw_ostream.cpp
@@ -19,6 +19,7 @@
#include "llvm/Config/config.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/System/Signals.h"
#include "llvm/ADT/STLExtras.h"
#include <cctype>
#include <cerrno>
@@ -56,13 +57,6 @@ raw_ostream::~raw_ostream() {
if (BufferMode == InternalBuffer)
delete [] OutBufStart;
-
- // If there are any pending errors, report them now. Clients wishing
- // to avoid report_fatal_error calls should check for errors with
- // has_error() and clear the error flag with clear_error() before
- // destructing raw_ostream objects which may have errors.
- if (Error)
- report_fatal_error("IO failure on output stream.");
}
// An out of line virtual method to provide a home for the class vtable.
@@ -143,9 +137,10 @@ raw_ostream &raw_ostream::operator<<(unsigned long long N) {
}
raw_ostream &raw_ostream::operator<<(long long N) {
- if (N < 0) {
+ if (N < 0) {
*this << '-';
- N = -N;
+ // Avoid undefined behavior on INT64_MIN with a cast.
+ N = -(unsigned long long)N;
}
return this->operator<<(static_cast<unsigned long long>(N));
@@ -368,7 +363,7 @@ void format_object_base::home() {
/// stream should be immediately destroyed; the string will be empty
/// if no error occurred.
raw_fd_ostream::raw_fd_ostream(const char *Filename, std::string &ErrorInfo,
- unsigned Flags) : pos(0) {
+ unsigned Flags) : Error(false), pos(0) {
assert(Filename != 0 && "Filename is null");
// Verify that we don't have both "append" and "excl".
assert((!(Flags & F_Excl) || !(Flags & F_Append)) &&
@@ -376,14 +371,17 @@ raw_fd_ostream::raw_fd_ostream(const char *Filename, std::string &ErrorInfo,
ErrorInfo.clear();
- // Handle "-" as stdout.
+ // Handle "-" as stdout. Note that when we do this, we consider ourself
+ // the owner of stdout. This means that we can do things like close the
+ // file descriptor when we're done and set the "binary" flag globally.
if (Filename[0] == '-' && Filename[1] == 0) {
FD = STDOUT_FILENO;
// If user requested binary then put stdout into binary mode if
// possible.
if (Flags & F_Binary)
sys::Program::ChangeStdoutToBinary();
- ShouldClose = false;
+ // Close stdout when we're done, to detect any output errors.
+ ShouldClose = true;
return;
}
@@ -413,14 +411,22 @@ raw_fd_ostream::raw_fd_ostream(const char *Filename, std::string &ErrorInfo,
}
raw_fd_ostream::~raw_fd_ostream() {
- if (FD < 0) return;
- flush();
- if (ShouldClose)
- while (::close(FD) != 0)
- if (errno != EINTR) {
- error_detected();
- break;
- }
+ if (FD >= 0) {
+ flush();
+ if (ShouldClose)
+ while (::close(FD) != 0)
+ if (errno != EINTR) {
+ error_detected();
+ break;
+ }
+ }
+
+ // If there are any pending errors, report them now. Clients wishing
+ // to avoid report_fatal_error calls should check for errors with
+ // has_error() and clear the error flag with clear_error() before
+ // destructing raw_ostream objects which may have errors.
+ if (has_error())
+ report_fatal_error("IO failure on output stream.");
}
@@ -534,30 +540,24 @@ bool raw_fd_ostream::is_displayed() const {
}
//===----------------------------------------------------------------------===//
-// raw_stdout/err_ostream
+// outs(), errs(), nulls()
//===----------------------------------------------------------------------===//
-// Set buffer settings to model stdout and stderr behavior.
-// Set standard error to be unbuffered by default.
-raw_stdout_ostream::raw_stdout_ostream():raw_fd_ostream(STDOUT_FILENO, false) {}
-raw_stderr_ostream::raw_stderr_ostream():raw_fd_ostream(STDERR_FILENO, false,
- true) {}
-
-// An out of line virtual method to provide a home for the class vtable.
-void raw_stdout_ostream::handle() {}
-void raw_stderr_ostream::handle() {}
-
/// outs() - This returns a reference to a raw_ostream for standard output.
/// Use it like: outs() << "foo" << "bar";
raw_ostream &llvm::outs() {
- static raw_stdout_ostream S;
+ // Set buffer settings to model stdout behavior.
+ // Delete the file descriptor when the program exists, forcing error
+ // detection. If you don't want this behavior, don't use outs().
+ static raw_fd_ostream S(STDOUT_FILENO, true);
return S;
}
/// errs() - This returns a reference to a raw_ostream for standard error.
/// Use it like: errs() << "foo" << "bar";
raw_ostream &llvm::errs() {
- static raw_stderr_ostream S;
+ // Set standard error to be unbuffered by default.
+ static raw_fd_ostream S(STDERR_FILENO, false, true);
return S;
}
@@ -665,3 +665,34 @@ void raw_null_ostream::write_impl(const char *Ptr, size_t Size) {
uint64_t raw_null_ostream::current_pos() const {
return 0;
}
+
+//===----------------------------------------------------------------------===//
+// tool_output_file
+//===----------------------------------------------------------------------===//
+
+tool_output_file::CleanupInstaller::CleanupInstaller(const char *filename)
+ : Filename(filename), Keep(false) {
+ // Arrange for the file to be deleted if the process is killed.
+ if (Filename != "-")
+ sys::RemoveFileOnSignal(sys::Path(Filename));
+}
+
+tool_output_file::CleanupInstaller::~CleanupInstaller() {
+ // Delete the file if the client hasn't told us not to.
+ if (!Keep && Filename != "-")
+ sys::Path(Filename).eraseFromDisk();
+
+ // Ok, the file is successfully written and closed, or deleted. There's no
+ // further need to clean it up on signals.
+ if (Filename != "-")
+ sys::DontRemoveFileOnSignal(sys::Path(Filename));
+}
+
+tool_output_file::tool_output_file(const char *filename, std::string &ErrorInfo,
+ unsigned Flags)
+ : Installer(filename),
+ OS(filename, ErrorInfo, Flags) {
+ // If open fails, no cleanup is needed.
+ if (!ErrorInfo.empty())
+ Installer.Keep = true;
+}
diff --git a/lib/System/DynamicLibrary.cpp b/lib/System/DynamicLibrary.cpp
index 6f6890c06c499..660db492d6b9a 100644
--- a/lib/System/DynamicLibrary.cpp
+++ b/lib/System/DynamicLibrary.cpp
@@ -70,6 +70,12 @@ bool DynamicLibrary::LoadLibraryPermanently(const char *Filename,
if (ErrMsg) *ErrMsg = dlerror();
return true;
}
+#ifdef __CYGWIN__
+ // Cygwin searches symbols only in the main
+ // with the handle of dlopen(NULL, RTLD_GLOBAL).
+ if (Filename == NULL)
+ H = RTLD_DEFAULT;
+#endif
if (OpenedHandles == 0)
OpenedHandles = new std::vector<void *>();
OpenedHandles->push_back(H);
diff --git a/lib/System/Path.cpp b/lib/System/Path.cpp
index 1235257b27e20..4445c667d86e9 100644
--- a/lib/System/Path.cpp
+++ b/lib/System/Path.cpp
@@ -61,7 +61,7 @@ sys::IdentifyFileType(const char *magic, unsigned length) {
if (memcmp(magic,"!<arch>\n",8) == 0)
return Archive_FileType;
break;
-
+
case '\177':
if (magic[1] == 'E' && magic[2] == 'L' && magic[3] == 'F') {
if (length >= 18 && magic[17] == 0)
@@ -76,11 +76,11 @@ sys::IdentifyFileType(const char *magic, unsigned length) {
break;
case 0xCA:
- if (magic[1] == char(0xFE) && magic[2] == char(0xBA) &&
+ if (magic[1] == char(0xFE) && magic[2] == char(0xBA) &&
magic[3] == char(0xBE)) {
- // This is complicated by an overlap with Java class files.
+ // This is complicated by an overlap with Java class files.
// See the Mach-O section in /usr/share/file/magic for details.
- if (length >= 8 && magic[7] < 43)
+ if (length >= 8 && magic[7] < 43)
// FIXME: Universal Binary of any type.
return Mach_O_DynamicallyLinkedSharedLib_FileType;
}
@@ -89,18 +89,18 @@ sys::IdentifyFileType(const char *magic, unsigned length) {
case 0xFE:
case 0xCE: {
uint16_t type = 0;
- if (magic[0] == char(0xFE) && magic[1] == char(0xED) &&
+ if (magic[0] == char(0xFE) && magic[1] == char(0xED) &&
magic[2] == char(0xFA) && magic[3] == char(0xCE)) {
/* Native endian */
if (length >= 16) type = magic[14] << 8 | magic[15];
- } else if (magic[0] == char(0xCE) && magic[1] == char(0xFA) &&
+ } else if (magic[0] == char(0xCE) && magic[1] == char(0xFA) &&
magic[2] == char(0xED) && magic[3] == char(0xFE)) {
/* Reverse endian */
if (length >= 14) type = magic[13] << 8 | magic[12];
}
switch (type) {
- default: break;
- case 1: return Mach_O_Object_FileType;
+ default: break;
+ case 1: return Mach_O_Object_FileType;
case 2: return Mach_O_Executable_FileType;
case 3: return Mach_O_FixedVirtualMemorySharedLib_FileType;
case 4: return Mach_O_Core_FileType;
@@ -219,38 +219,38 @@ static StringRef getDirnameCharSep(StringRef path, const char *Sep) {
"Sep must be a 1-character string literal.");
if (path.empty())
return ".";
-
+
// If the path is all slashes, return a single slash.
// Otherwise, remove all trailing slashes.
-
+
signed pos = static_cast<signed>(path.size()) - 1;
-
+
while (pos >= 0 && path[pos] == Sep[0])
--pos;
-
+
if (pos < 0)
return path[0] == Sep[0] ? Sep : ".";
-
+
// Any slashes left?
signed i = 0;
-
+
while (i < pos && path[i] != Sep[0])
++i;
-
+
if (i == pos) // No slashes? Return "."
return ".";
-
- // There is at least one slash left. Remove all trailing non-slashes.
+
+ // There is at least one slash left. Remove all trailing non-slashes.
while (pos >= 0 && path[pos] != Sep[0])
--pos;
-
+
// Remove any trailing slashes.
while (pos >= 0 && path[pos] == Sep[0])
--pos;
-
+
if (pos < 0)
return path[0] == Sep[0] ? Sep : ".";
-
+
return path.substr(0, pos+1);
}
diff --git a/lib/System/RWMutex.cpp b/lib/System/RWMutex.cpp
index 5faf220eb9168..deb04709d829c 100644
--- a/lib/System/RWMutex.cpp
+++ b/lib/System/RWMutex.cpp
@@ -71,23 +71,9 @@ RWMutexImpl::RWMutexImpl()
bzero(rwlock, sizeof(pthread_rwlock_t));
#endif
- pthread_rwlockattr_t attr;
-
- // Initialize the rwlock attributes
- int errorcode = pthread_rwlockattr_init(&attr);
- assert(errorcode == 0);
-
-#if !defined(__FreeBSD__) && !defined(__OpenBSD__) && !defined(__NetBSD__) && !defined(__DragonFly__)
- // Make it a process local rwlock
- errorcode = pthread_rwlockattr_setpshared(&attr, PTHREAD_PROCESS_PRIVATE);
-#endif
-
// Initialize the rwlock
- errorcode = pthread_rwlock_init(rwlock, &attr);
- assert(errorcode == 0);
-
- // Destroy the attributes
- errorcode = pthread_rwlockattr_destroy(&attr);
+ int errorcode = pthread_rwlock_init(rwlock, NULL);
+ (void)errorcode;
assert(errorcode == 0);
// Assign the data member
diff --git a/lib/System/ThreadLocal.cpp b/lib/System/ThreadLocal.cpp
index e7054b5281471..f6a55a1c0b9b1 100644
--- a/lib/System/ThreadLocal.cpp
+++ b/lib/System/ThreadLocal.cpp
@@ -27,6 +27,7 @@ ThreadLocalImpl::ThreadLocalImpl() { }
ThreadLocalImpl::~ThreadLocalImpl() { }
void ThreadLocalImpl::setInstance(const void* d) { data = const_cast<void*>(d);}
const void* ThreadLocalImpl::getInstance() { return data; }
+void ThreadLocalImpl::removeInstance() { data = 0; }
}
#else
@@ -67,6 +68,10 @@ const void* ThreadLocalImpl::getInstance() {
return pthread_getspecific(*key);
}
+void ThreadLocalImpl::removeInstance() {
+ setInstance(0);
+}
+
}
#elif defined(LLVM_ON_UNIX)
diff --git a/lib/System/Unix/Path.inc b/lib/System/Unix/Path.inc
index bc104a32a3aea..47e4d1ac3c6b5 100644
--- a/lib/System/Unix/Path.inc
+++ b/lib/System/Unix/Path.inc
@@ -276,20 +276,20 @@ Path::GetCurrentDirectory() {
char pathname[MAXPATHLEN];
if (!getcwd(pathname,MAXPATHLEN)) {
assert (false && "Could not query current working directory.");
- return Path("");
+ return Path();
}
return Path(pathname);
}
-#ifdef __FreeBSD__
+#if defined(__FreeBSD__) || defined (__NetBSD__) || defined(__minix)
static int
test_dir(char buf[PATH_MAX], char ret[PATH_MAX],
const char *dir, const char *bin)
{
struct stat sb;
- snprintf(buf, PATH_MAX, "%s//%s", dir, bin);
+ snprintf(buf, PATH_MAX, "%s/%s", dir, bin);
if (realpath(buf, ret) == NULL)
return (1);
if (stat(buf, &sb) != 0)
@@ -334,7 +334,7 @@ getprogpath(char ret[PATH_MAX], const char *bin)
free(pv);
return (NULL);
}
-#endif // __FreeBSD__
+#endif // __FreeBSD__ || __NetBSD__
/// GetMainExecutable - Return the path to the main executable, given the
/// value of argv[0] from program startup.
@@ -350,7 +350,7 @@ Path Path::GetMainExecutable(const char *argv0, void *MainAddr) {
if (realpath(exe_path, link_path))
return Path(std::string(link_path));
}
-#elif defined(__FreeBSD__)
+#elif defined(__FreeBSD__) || defined (__NetBSD__) || defined(__minix)
char exe_path[PATH_MAX];
if (getprogpath(exe_path, argv0) != NULL)
@@ -408,7 +408,7 @@ Path::getSuffix() const {
std::string::size_type dot = path.rfind('.');
if (dot == std::string::npos || dot < slash)
- return StringRef("");
+ return StringRef();
else
return StringRef(path).substr(dot + 1);
}
diff --git a/lib/System/Unix/Signals.inc b/lib/System/Unix/Signals.inc
index 1e74647e5fdcb..7b7c43efc7864 100644
--- a/lib/System/Unix/Signals.inc
+++ b/lib/System/Unix/Signals.inc
@@ -182,6 +182,16 @@ bool llvm::sys::RemoveFileOnSignal(const sys::Path &Filename,
return false;
}
+// DontRemoveFileOnSignal - The public API
+void llvm::sys::DontRemoveFileOnSignal(const sys::Path &Filename) {
+ SignalsMutex.acquire();
+ std::vector<sys::Path>::reverse_iterator I =
+ std::find(FilesToRemove.rbegin(), FilesToRemove.rend(), Filename);
+ if (I != FilesToRemove.rend())
+ FilesToRemove.erase(I.base()-1);
+ SignalsMutex.release();
+}
+
/// AddSignalHandler - Add a function to be called when a signal is delivered
/// to the process. The handler can have a cookie passed to it to identify
/// what instance of the handler it is.
@@ -253,3 +263,37 @@ void llvm::sys::PrintStackTraceOnErrorSignal() {
AddSignalHandler(PrintStackTrace, 0);
}
+
+/***/
+
+// On Darwin, raise sends a signal to the main thread instead of the current
+// thread. This has the unfortunate effect that assert() and abort() will end up
+// bypassing our crash recovery attempts. We work around this for anything in
+// the same linkage unit by just defining our own versions of the assert handler
+// and abort.
+
+#ifdef __APPLE__
+
+void __assert_rtn(const char *func,
+ const char *file,
+ int line,
+ const char *expr) {
+ if (func)
+ fprintf(stderr, "Assertion failed: (%s), function %s, file %s, line %d.\n",
+ expr, func, file, line);
+ else
+ fprintf(stderr, "Assertion failed: (%s), file %s, line %d.\n",
+ expr, file, line);
+ abort();
+}
+
+#include <signal.h>
+#include <pthread.h>
+
+void abort() {
+ pthread_kill(pthread_self(), SIGABRT);
+ usleep(1000);
+ __builtin_trap();
+}
+
+#endif
diff --git a/lib/System/Unix/ThreadLocal.inc b/lib/System/Unix/ThreadLocal.inc
index 83d554d3077c7..6769520a6fb66 100644
--- a/lib/System/Unix/ThreadLocal.inc
+++ b/lib/System/Unix/ThreadLocal.inc
@@ -22,4 +22,5 @@ ThreadLocalImpl::ThreadLocalImpl() { }
ThreadLocalImpl::~ThreadLocalImpl() { }
void ThreadLocalImpl::setInstance(const void* d) { data = const_cast<void*>(d);}
const void* ThreadLocalImpl::getInstance() { return data; }
+void ThreadLocalImpl::removeInstance() { setInstance(0); }
}
diff --git a/lib/System/Win32/Path.inc b/lib/System/Win32/Path.inc
index 379527d4ebf24..4a6dbd3ddf299 100644
--- a/lib/System/Win32/Path.inc
+++ b/lib/System/Win32/Path.inc
@@ -400,8 +400,10 @@ PathWithStatus::getFileStatus(bool update, std::string *ErrStr) const {
for (unsigned i = 0; i < path.length(); ++i)
status.uniqueID += path[i];
- __int64 ft = *reinterpret_cast<__int64*>(&fi.ftLastWriteTime);
- status.modTime.fromWin32Time(ft);
+ ULARGE_INTEGER ui;
+ ui.LowPart = fi.ftLastWriteTime.dwLowDateTime;
+ ui.HighPart = fi.ftLastWriteTime.dwHighDateTime;
+ status.modTime.fromWin32Time(ui.QuadPart);
status.isDir = fi.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY;
fsIsValid = true;
@@ -720,7 +722,7 @@ Path::eraseFromDisk(bool remove_contents, std::string *ErrStr) const {
bool Path::getMagicNumber(std::string& Magic, unsigned len) const {
assert(len < 1024 && "Request for magic string too long");
- char* buf = (char*) alloca(1 + len);
+ char* buf = reinterpret_cast<char*>(alloca(len));
HANDLE h = CreateFile(path.c_str(),
GENERIC_READ,
@@ -739,8 +741,7 @@ bool Path::getMagicNumber(std::string& Magic, unsigned len) const {
if (!ret || nRead != len)
return false;
- buf[len] = '\0';
- Magic = buf;
+ Magic = std::string(buf, len);
return true;
}
@@ -777,8 +778,11 @@ Path::setStatusInfoOnDisk(const FileStatus &si, std::string *ErrMsg) const {
return MakeErrMsg(ErrMsg, path + ": GetFileInformationByHandle: ");
}
+ ULARGE_INTEGER ui;
+ ui.QuadPart = si.modTime.toWin32Time();
FILETIME ft;
- (uint64_t&)ft = si.modTime.toWin32Time();
+ ft.dwLowDateTime = ui.LowPart;
+ ft.dwHighDateTime = ui.HighPart;
BOOL ret = SetFileTime(h, NULL, &ft, &ft);
DWORD err = GetLastError();
CloseHandle(h);
diff --git a/lib/System/Win32/Signals.inc b/lib/System/Win32/Signals.inc
index d6db71ba4f359..2498a26ea99c5 100644
--- a/lib/System/Win32/Signals.inc
+++ b/lib/System/Win32/Signals.inc
@@ -140,6 +140,20 @@ bool sys::RemoveFileOnSignal(const sys::Path &Filename, std::string* ErrMsg) {
return false;
}
+// DontRemoveFileOnSignal - The public API
+void sys::DontRemoveFileOnSignal(const sys::Path &Filename) {
+ if (FilesToRemove == NULL)
+ return;
+
+ FilesToRemove->push_back(Filename);
+ std::vector<sys::Path>::reverse_iterator I =
+ std::find(FilesToRemove->rbegin(), FilesToRemove->rend(), Filename);
+ if (I != FilesToRemove->rend())
+ FilesToRemove->erase(I.base()-1);
+
+ LeaveCriticalSection(&CriticalSection);
+}
+
/// PrintStackTraceOnErrorSignal - When an error signal (such as SIBABRT or
/// SIGSEGV) is delivered to the process, print a stack trace and then exit.
void sys::PrintStackTraceOnErrorSignal() {
diff --git a/lib/System/Win32/ThreadLocal.inc b/lib/System/Win32/ThreadLocal.inc
index c8f7840b00387..b8b933c4d29d9 100644
--- a/lib/System/Win32/ThreadLocal.inc
+++ b/lib/System/Win32/ThreadLocal.inc
@@ -46,4 +46,8 @@ void ThreadLocalImpl::setInstance(const void* d){
assert(errorcode != 0);
}
+void ThreadLocalImpl::removeInstance() {
+ setInstance(0);
+}
+
}
diff --git a/lib/Target/ARM/ARM.h b/lib/Target/ARM/ARM.h
index 14825a785649d..271ca44c2b69d 100644
--- a/lib/Target/ARM/ARM.h
+++ b/lib/Target/ARM/ARM.h
@@ -30,22 +30,22 @@ class formatted_raw_ostream;
namespace ARMCC {
// The CondCodes constants map directly to the 4-bit encoding of the
// condition field for predicated instructions.
- enum CondCodes {
- EQ,
- NE,
- HS,
- LO,
- MI,
- PL,
- VS,
- VC,
- HI,
- LS,
- GE,
- LT,
- GT,
- LE,
- AL
+ enum CondCodes { // Meaning (integer) Meaning (floating-point)
+ EQ, // Equal Equal
+ NE, // Not equal Not equal, or unordered
+ HS, // Carry set >, ==, or unordered
+ LO, // Carry clear Less than
+ MI, // Minus, negative Less than
+ PL, // Plus, positive or zero >, ==, or unordered
+ VS, // Overflow Unordered
+ VC, // No overflow Not unordered
+ HI, // Unsigned higher Greater than, or unordered
+ LS, // Unsigned lower or same Less than or equal
+ GE, // Greater than or equal Greater than or equal
+ LT, // Less than Less than, or unordered
+ GT, // Greater than Greater than
+ LE, // Less than or equal <, ==, or unordered
+ AL // Always (unconditional) Always (unconditional)
};
inline static CondCodes getOppositeCondition(CondCodes CC) {
@@ -90,6 +90,33 @@ inline static const char *ARMCondCodeToString(ARMCC::CondCodes CC) {
}
}
+namespace ARM_MB {
+ // The Memory Barrier Option constants map directly to the 4-bit encoding of
+ // the option field for memory barrier operations.
+ enum MemBOpt {
+ ST = 14,
+ ISH = 11,
+ ISHST = 10,
+ NSH = 7,
+ NSHST = 6,
+ OSH = 3,
+ OSHST = 2
+ };
+
+ inline static const char *MemBOptToString(unsigned val) {
+ switch (val) {
+ default: llvm_unreachable("Unknown memory opetion");
+ case ST: return "st";
+ case ISH: return "ish";
+ case ISHST: return "ishst";
+ case NSH: return "nsh";
+ case NSHST: return "nshst";
+ case OSH: return "osh";
+ case OSHST: return "oshst";
+ }
+ }
+} // namespace ARM_MB
+
FunctionPass *createARMISelDag(ARMBaseTargetMachine &TM,
CodeGenOpt::Level OptLevel);
@@ -98,6 +125,7 @@ FunctionPass *createARMJITCodeEmitterPass(ARMBaseTargetMachine &TM,
FunctionPass *createARMLoadStoreOptimizationPass(bool PreAlloc = false);
FunctionPass *createARMExpandPseudoPass();
+FunctionPass *createARMGlobalMergePass(const TargetLowering* tli);
FunctionPass *createARMConstantIslandPass();
FunctionPass *createNEONPreAllocPass();
FunctionPass *createNEONMoveFixPass();
diff --git a/lib/Target/ARM/ARM.td b/lib/Target/ARM/ARM.td
index fa64d6c2a4b4d..d6a8f19724dc9 100644
--- a/lib/Target/ARM/ARM.td
+++ b/lib/Target/ARM/ARM.td
@@ -1,4 +1,4 @@
-//===- ARM.td - Describe the ARM Target Machine -----------------*- C++ -*-===//
+//===- ARM.td - Describe the ARM Target Machine ------------*- tablegen -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -20,20 +20,6 @@ include "llvm/Target/Target.td"
// ARM Subtarget features.
//
-def ArchV4T : SubtargetFeature<"v4t", "ARMArchVersion", "V4T",
- "ARM v4T">;
-def ArchV5T : SubtargetFeature<"v5t", "ARMArchVersion", "V5T",
- "ARM v5T">;
-def ArchV5TE : SubtargetFeature<"v5te", "ARMArchVersion", "V5TE",
- "ARM v5TE, v5TEj, v5TExp">;
-def ArchV6 : SubtargetFeature<"v6", "ARMArchVersion", "V6",
- "ARM v6">;
-def ArchV6T2 : SubtargetFeature<"v6t2", "ARMArchVersion", "V6T2",
- "ARM v6t2">;
-def ArchV7A : SubtargetFeature<"v7a", "ARMArchVersion", "V7A",
- "ARM v7A">;
-def ArchV7M : SubtargetFeature<"v7m", "ARMArchVersion", "V7M",
- "ARM v7M">;
def FeatureVFP2 : SubtargetFeature<"vfp2", "ARMFPUType", "VFPv2",
"Enable VFP2 instructions">;
def FeatureVFP3 : SubtargetFeature<"vfp3", "ARMFPUType", "VFPv3",
@@ -42,14 +28,20 @@ def FeatureNEON : SubtargetFeature<"neon", "ARMFPUType", "NEON",
"Enable NEON instructions">;
def FeatureThumb2 : SubtargetFeature<"thumb2", "ThumbMode", "Thumb2",
"Enable Thumb2 instructions">;
+def FeatureNoARM : SubtargetFeature<"noarm", "NoARM", "true",
+ "Does not support ARM mode execution">;
def FeatureFP16 : SubtargetFeature<"fp16", "HasFP16", "true",
"Enable half-precision floating point">;
def FeatureHWDiv : SubtargetFeature<"hwdiv", "HasHardwareDivide", "true",
"Enable divide instructions">;
-def FeatureT2ExtractPack: SubtargetFeature<"t2xtpk", "HasT2ExtractPack", "true",
+def FeatureT2XtPk : SubtargetFeature<"t2xtpk", "HasT2ExtractPack", "true",
"Enable Thumb2 extract and pack instructions">;
+def FeatureDB : SubtargetFeature<"db", "HasDataBarrier", "true",
+ "Has data barrier (dmb / dsb) instructions">;
def FeatureSlowFPBrcc : SubtargetFeature<"slow-fp-brcc", "SlowFPBrcc", "true",
"FP compare + branch is slow">;
+def FeatureVFPOnlySP : SubtargetFeature<"fp-only-sp", "FPOnlySP", "true",
+ "Floating point unit supports single precision only">;
// Some processors have multiply-accumulate instructions that don't
// play nicely with other VFP instructions, and it's generally better
@@ -57,14 +49,41 @@ def FeatureSlowFPBrcc : SubtargetFeature<"slow-fp-brcc", "SlowFPBrcc", "true",
// FIXME: Currently, this is only flagged for Cortex-A8. It may be true for
// others as well. We should do more benchmarking and confirm one way or
// the other.
-def FeatureHasSlowVMLx : SubtargetFeature<"vmlx", "SlowVMLx", "true",
- "Disable VFP MAC instructions">;
+def FeatureHasSlowVMLx : SubtargetFeature<"vmlx", "SlowVMLx", "true",
+ "Disable VFP MAC instructions">;
// Some processors benefit from using NEON instructions for scalar
// single-precision FP operations.
def FeatureNEONForFP : SubtargetFeature<"neonfp", "UseNEONForSinglePrecisionFP",
"true",
"Use NEON for single precision FP">;
+// Disable 32-bit to 16-bit narrowing for experimentation.
+def FeaturePref32BitThumb : SubtargetFeature<"32bit", "Pref32BitThumb", "true",
+ "Prefer 32-bit Thumb instrs">;
+
+
+// ARM architectures.
+def ArchV4T : SubtargetFeature<"v4t", "ARMArchVersion", "V4T",
+ "ARM v4T">;
+def ArchV5T : SubtargetFeature<"v5t", "ARMArchVersion", "V5T",
+ "ARM v5T">;
+def ArchV5TE : SubtargetFeature<"v5te", "ARMArchVersion", "V5TE",
+ "ARM v5TE, v5TEj, v5TExp">;
+def ArchV6 : SubtargetFeature<"v6", "ARMArchVersion", "V6",
+ "ARM v6">;
+def ArchV6M : SubtargetFeature<"v6m", "ARMArchVersion", "V6M",
+ "ARM v6m",
+ [FeatureNoARM, FeatureDB]>;
+def ArchV6T2 : SubtargetFeature<"v6t2", "ARMArchVersion", "V6T2",
+ "ARM v6t2",
+ [FeatureThumb2]>;
+def ArchV7A : SubtargetFeature<"v7a", "ARMArchVersion", "V7A",
+ "ARM v7A",
+ [FeatureThumb2, FeatureNEON, FeatureDB]>;
+def ArchV7M : SubtargetFeature<"v7m", "ARMArchVersion", "V7M",
+ "ARM v7M",
+ [FeatureThumb2, FeatureNoARM, FeatureDB,
+ FeatureHWDiv]>;
//===----------------------------------------------------------------------===//
// ARM Processors supported.
@@ -122,20 +141,23 @@ def : Processor<"arm1176jzf-s", ARMV6Itineraries, [ArchV6, FeatureVFP2]>;
def : Processor<"mpcorenovfp", ARMV6Itineraries, [ArchV6]>;
def : Processor<"mpcore", ARMV6Itineraries, [ArchV6, FeatureVFP2]>;
+// V6M Processors.
+def : Processor<"cortex-m0", ARMV6Itineraries, [ArchV6M]>;
+
// V6T2 Processors.
-def : Processor<"arm1156t2-s", ARMV6Itineraries,
- [ArchV6T2, FeatureThumb2]>;
-def : Processor<"arm1156t2f-s", ARMV6Itineraries,
- [ArchV6T2, FeatureThumb2, FeatureVFP2]>;
+def : Processor<"arm1156t2-s", ARMV6Itineraries, [ArchV6T2]>;
+def : Processor<"arm1156t2f-s", ARMV6Itineraries, [ArchV6T2, FeatureVFP2]>;
// V7 Processors.
def : Processor<"cortex-a8", CortexA8Itineraries,
- [ArchV7A, FeatureThumb2, FeatureNEON, FeatureHasSlowVMLx,
- FeatureSlowFPBrcc, FeatureNEONForFP, FeatureT2ExtractPack]>;
+ [ArchV7A, FeatureHasSlowVMLx,
+ FeatureSlowFPBrcc, FeatureNEONForFP, FeatureT2XtPk]>;
def : Processor<"cortex-a9", CortexA9Itineraries,
- [ArchV7A, FeatureThumb2, FeatureNEON, FeatureT2ExtractPack]>;
-def : ProcNoItin<"cortex-m3", [ArchV7M, FeatureThumb2, FeatureHWDiv]>;
-def : ProcNoItin<"cortex-m4", [ArchV7M, FeatureThumb2, FeatureHWDiv]>;
+ [ArchV7A, FeatureT2XtPk]>;
+
+// V7M Processors.
+def : ProcNoItin<"cortex-m3", [ArchV7M]>;
+def : ProcNoItin<"cortex-m4", [ArchV7M, FeatureVFP2, FeatureVFPOnlySP]>;
//===----------------------------------------------------------------------===//
// Register File Description
diff --git a/lib/Target/ARM/ARMAddressingModes.h b/lib/Target/ARM/ARMAddressingModes.h
index 92a13f1d751ca..db481005b3a4c 100644
--- a/lib/Target/ARM/ARMAddressingModes.h
+++ b/lib/Target/ARM/ARMAddressingModes.h
@@ -458,6 +458,7 @@ namespace ARM_AM {
// IB - Increment before
// DA - Decrement after
// DB - Decrement before
+ // For VFP instructions, only the IA and DB modes are valid.
static inline AMSubMode getAM4SubMode(unsigned Mode) {
return (AMSubMode)(Mode & 0x7);
@@ -477,14 +478,6 @@ namespace ARM_AM {
//
// The first operand is always a Reg. The second operand encodes the
// operation in bit 8 and the immediate in bits 0-7.
- //
- // This is also used for FP load/store multiple ops. The second operand
- // encodes the number of registers (or 2 times the number of registers
- // for DPR ops) in bits 0-7. In addition, bits 8-10 encode one of the
- // following two sub-modes:
- //
- // IA - Increment after
- // DB - Decrement before
/// getAM5Opc - This function encodes the addrmode5 opc field.
static inline unsigned getAM5Opc(AddrOpc Opc, unsigned char Offset) {
@@ -498,17 +491,6 @@ namespace ARM_AM {
return ((AM5Opc >> 8) & 1) ? sub : add;
}
- /// getAM5Opc - This function encodes the addrmode5 opc field for VLDM and
- /// VSTM instructions.
- static inline unsigned getAM5Opc(AMSubMode SubMode, unsigned char Offset) {
- assert((SubMode == ia || SubMode == db) &&
- "Illegal addressing mode 5 sub-mode!");
- return ((int)SubMode << 8) | Offset;
- }
- static inline AMSubMode getAM5SubMode(unsigned AM5Opc) {
- return (AMSubMode)((AM5Opc >> 8) & 0x7);
- }
-
//===--------------------------------------------------------------------===//
// Addressing Mode #6
//===--------------------------------------------------------------------===//
diff --git a/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp b/lib/Target/ARM/ARMAsmPrinter.cpp
index 946f4744f5bbc..6cfd5961149fd 100644
--- a/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp
+++ b/lib/Target/ARM/ARMAsmPrinter.cpp
@@ -17,7 +17,7 @@
#include "ARMBuildAttrs.h"
#include "ARMAddressingModes.h"
#include "ARMConstantPoolValue.h"
-#include "ARMInstPrinter.h"
+#include "AsmPrinter/ARMInstPrinter.h"
#include "ARMMachineFunctionInfo.h"
#include "ARMMCInstLower.h"
#include "ARMTargetMachine.h"
@@ -47,6 +47,7 @@
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
#include <cctype>
@@ -56,6 +57,15 @@ static cl::opt<bool>
EnableMCInst("enable-arm-mcinst-printer", cl::Hidden,
cl::desc("enable experimental asmprinter gunk in the arm backend"));
+namespace llvm {
+ namespace ARM {
+ enum DW_ISA {
+ DW_ISA_ARM_thumb = 1,
+ DW_ISA_ARM_arm = 2
+ };
+ }
+}
+
namespace {
class ARMAsmPrinter : public AsmPrinter {
@@ -80,9 +90,9 @@ namespace {
virtual const char *getPassName() const {
return "ARM Assembly Printer";
}
-
+
void printInstructionThroughMCStreamer(const MachineInstr *MI);
-
+
void printOperand(const MachineInstr *MI, int OpNum, raw_ostream &O,
const char *Modifier = 0);
@@ -110,8 +120,12 @@ namespace {
void printAddrModePCOperand(const MachineInstr *MI, int OpNum,
raw_ostream &O,
const char *Modifier = 0);
- void printBitfieldInvMaskImmOperand (const MachineInstr *MI, int OpNum,
- raw_ostream &O);
+ void printBitfieldInvMaskImmOperand(const MachineInstr *MI, int OpNum,
+ raw_ostream &O);
+ void printMemBOption(const MachineInstr *MI, int OpNum,
+ raw_ostream &O);
+ void printShiftImmOperand(const MachineInstr *MI, int OpNum,
+ raw_ostream &O);
void printThumbS4ImmOperand(const MachineInstr *MI, int OpNum,
raw_ostream &O);
@@ -190,12 +204,32 @@ namespace {
virtual void EmitInstruction(const MachineInstr *MI);
bool runOnMachineFunction(MachineFunction &F);
-
+
virtual void EmitConstantPool() {} // we emit constant pools customly!
virtual void EmitFunctionEntryLabel();
void EmitStartOfAsmFile(Module &M);
void EmitEndOfAsmFile(Module &M);
+ MachineLocation getDebugValueLocation(const MachineInstr *MI) const {
+ MachineLocation Location;
+ assert (MI->getNumOperands() == 4 && "Invalid no. of machine operands!");
+ // Frame address. Currently handles register +- offset only.
+ if (MI->getOperand(0).isReg() && MI->getOperand(1).isImm())
+ Location.set(MI->getOperand(0).getReg(), MI->getOperand(1).getImm());
+ else {
+ DEBUG(dbgs() << "DBG_VALUE instruction ignored! " << *MI << "\n");
+ }
+ return Location;
+ }
+
+ virtual unsigned getISAEncoding() {
+ // ARM/Darwin adds ISA to the DWARF info for each function.
+ if (!Subtarget->isTargetDarwin())
+ return 0;
+ return Subtarget->isThumb() ?
+ llvm::ARM::DW_ISA_ARM_thumb : llvm::ARM::DW_ISA_ARM_arm;
+ }
+
MCSymbol *GetARMSetPICJumpTableLabel2(unsigned uid, unsigned uid2,
const MachineBasicBlock *MBB) const;
MCSymbol *GetARMJTIPICJumpTableLabel2(unsigned uid, unsigned uid2) const;
@@ -208,7 +242,7 @@ namespace {
EmitMachineConstantPoolValue(MCPV, OS);
OutStreamer.EmitRawText(OS.str());
}
-
+
void EmitMachineConstantPoolValue(MachineConstantPoolValue *MCPV,
raw_ostream &O) {
switch (TM.getTargetData()->getTypeAllocSize(MCPV->getType())) {
@@ -234,7 +268,7 @@ namespace {
// FIXME: Remove this when Darwin transition to @GOT like syntax.
MCSymbol *Sym = GetSymbolWithGlobalValueBase(GV, "$non_lazy_ptr");
O << *Sym;
-
+
MachineModuleInfoMachO &MMIMachO =
MMI->getObjFileInfo<MachineModuleInfoMachO>();
MachineModuleInfoImpl::StubValueTy &StubSym =
@@ -278,7 +312,7 @@ void ARMAsmPrinter::EmitFunctionEntryLabel() {
OutStreamer.EmitRawText(OS.str());
}
}
-
+
OutStreamer.EmitLabel(CurrentFnSym);
}
@@ -358,7 +392,7 @@ void ARMAsmPrinter::printOperand(const MachineInstr *MI, int OpNum,
case MachineOperand::MO_ExternalSymbol: {
bool isCallOp = Modifier && !strcmp(Modifier, "call");
O << *GetExternalSymbolSymbol(MO.getSymbolName());
-
+
if (isCallOp && Subtarget->isTargetELF() &&
TM.getRelocationModel() == Reloc::PIC_)
O << "(PLT)";
@@ -438,15 +472,13 @@ void ARMAsmPrinter::printSORegOperand(const MachineInstr *MI, int Op,
O << getRegisterName(MO1.getReg());
// Print the shift opc.
- O << ", "
- << ARM_AM::getShiftOpcStr(ARM_AM::getSORegShOp(MO3.getImm()))
- << " ";
-
+ ARM_AM::ShiftOpc ShOpc = ARM_AM::getSORegShOp(MO3.getImm());
+ O << ", " << ARM_AM::getShiftOpcStr(ShOpc);
if (MO2.getReg()) {
- O << getRegisterName(MO2.getReg());
+ O << ' ' << getRegisterName(MO2.getReg());
assert(ARM_AM::getSORegOffset(MO3.getImm()) == 0);
- } else {
- O << "#" << ARM_AM::getSORegOffset(MO3.getImm());
+ } else if (ShOpc != ARM_AM::rrx) {
+ O << " #" << ARM_AM::getSORegOffset(MO3.getImm());
}
}
@@ -575,16 +607,6 @@ void ARMAsmPrinter::printAddrMode5Operand(const MachineInstr *MI, int Op,
assert(TargetRegisterInfo::isPhysicalRegister(MO1.getReg()));
- if (Modifier && strcmp(Modifier, "submode") == 0) {
- ARM_AM::AMSubMode Mode = ARM_AM::getAM5SubMode(MO2.getImm());
- O << ARM_AM::getAMSubModeStr(Mode);
- return;
- } else if (Modifier && strcmp(Modifier, "base") == 0) {
- // Used for FSTM{D|S} and LSTM{D|S} operations.
- O << getRegisterName(MO1.getReg());
- return;
- }
-
O << "[" << getRegisterName(MO1.getReg());
if (unsigned ImmOffs = ARM_AM::getAM5Offset(MO2.getImm())) {
@@ -641,6 +663,32 @@ ARMAsmPrinter::printBitfieldInvMaskImmOperand(const MachineInstr *MI, int Op,
O << "#" << lsb << ", #" << width;
}
+void
+ARMAsmPrinter::printMemBOption(const MachineInstr *MI, int OpNum,
+ raw_ostream &O) {
+ unsigned val = MI->getOperand(OpNum).getImm();
+ O << ARM_MB::MemBOptToString(val);
+}
+
+void ARMAsmPrinter::printShiftImmOperand(const MachineInstr *MI, int OpNum,
+ raw_ostream &O) {
+ unsigned ShiftOp = MI->getOperand(OpNum).getImm();
+ ARM_AM::ShiftOpc Opc = ARM_AM::getSORegShOp(ShiftOp);
+ switch (Opc) {
+ case ARM_AM::no_shift:
+ return;
+ case ARM_AM::lsl:
+ O << ", lsl #";
+ break;
+ case ARM_AM::asr:
+ O << ", asr #";
+ break;
+ default:
+ assert(0 && "unexpected shift opcode for shift immediate operand");
+ }
+ O << ARM_AM::getSORegOffset(ShiftOp);
+}
+
//===--------------------------------------------------------------------===//
void ARMAsmPrinter::printThumbS4ImmOperand(const MachineInstr *MI, int Op,
@@ -737,12 +785,11 @@ void ARMAsmPrinter::printT2SOOperand(const MachineInstr *MI, int OpNum,
O << getRegisterName(Reg);
// Print the shift opc.
- O << ", "
- << ARM_AM::getShiftOpcStr(ARM_AM::getSORegShOp(MO2.getImm()))
- << " ";
-
assert(MO2.isImm() && "Not a valid t2_so_reg value!");
- O << "#" << ARM_AM::getSORegOffset(MO2.getImm());
+ ARM_AM::ShiftOpc ShOpc = ARM_AM::getSORegShOp(MO2.getImm());
+ O << ", " << ARM_AM::getShiftOpcStr(ShOpc);
+ if (ShOpc != ARM_AM::rrx)
+ O << " #" << ARM_AM::getSORegOffset(MO2.getImm());
}
void ARMAsmPrinter::printT2AddrModeImm12Operand(const MachineInstr *MI,
@@ -916,12 +963,12 @@ void ARMAsmPrinter::printJTBlockOperand(const MachineInstr *MI, int OpNum,
const MachineOperand &MO1 = MI->getOperand(OpNum);
const MachineOperand &MO2 = MI->getOperand(OpNum+1); // Unique Id
-
+
unsigned JTI = MO1.getIndex();
MCSymbol *JTISymbol = GetARMJTIPICJumpTableLabel2(JTI, MO2.getImm());
// Can't use EmitLabel until instprinter happens, label comes out in the wrong
// order.
- O << *JTISymbol << ":\n";
+ O << "\n" << *JTISymbol << ":\n";
const char *JTEntryDirective = MAI->getData32bitsDirective();
@@ -958,12 +1005,12 @@ void ARMAsmPrinter::printJT2BlockOperand(const MachineInstr *MI, int OpNum,
const MachineOperand &MO1 = MI->getOperand(OpNum);
const MachineOperand &MO2 = MI->getOperand(OpNum+1); // Unique Id
unsigned JTI = MO1.getIndex();
-
+
MCSymbol *JTISymbol = GetARMJTIPICJumpTableLabel2(JTI, MO2.getImm());
-
+
// Can't use EmitLabel until instprinter happens, label comes out in the wrong
// order.
- O << *JTISymbol << ":\n";
+ O << "\n" << *JTISymbol << ":\n";
const MachineJumpTableInfo *MJTI = MF->getJumpTableInfo();
const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables();
@@ -980,7 +1027,7 @@ void ARMAsmPrinter::printJT2BlockOperand(const MachineInstr *MI, int OpNum,
O << MAI->getData8bitsDirective();
else if (HalfWordOffset)
O << MAI->getData16bitsDirective();
-
+
if (ByteOffset || HalfWordOffset)
O << '(' << *MBB->getSymbol() << "-" << *JTISymbol << ")/2";
else
@@ -1086,10 +1133,10 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
printInstructionThroughMCStreamer(MI);
return;
}
-
+
if (MI->getOpcode() == ARM::CONSTPOOL_ENTRY)
EmitAlignment(2);
-
+
SmallString<128> Str;
raw_svector_ostream OS(Str);
if (MI->getOpcode() == ARM::DBG_VALUE) {
@@ -1112,7 +1159,7 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
printInstruction(MI, OS);
OutStreamer.EmitRawText(OS.str());
-
+
// Make sure the instruction that follows TBB is 2-byte aligned.
// FIXME: Constant island pass should insert an "ALIGN" instruction instead.
if (MI->getOpcode() == ARM::t2TBB)
@@ -1129,7 +1176,7 @@ void ARMAsmPrinter::EmitStartOfAsmFile(Module &M) {
// avoid out-of-range branches that are due a fundamental limitation of
// the way symbol offsets are encoded with the current Darwin ARM
// relocations.
- const TargetLoweringObjectFileMachO &TLOFMacho =
+ const TargetLoweringObjectFileMachO &TLOFMacho =
static_cast<const TargetLoweringObjectFileMachO &>(
getObjFileLowering());
OutStreamer.SwitchSection(TLOFMacho.getTextSection());
@@ -1148,6 +1195,12 @@ void ARMAsmPrinter::EmitStartOfAsmFile(Module &M) {
16, SectionKind::getText());
OutStreamer.SwitchSection(sect);
}
+ const MCSection *StaticInitSect =
+ OutContext.getMachOSection("__TEXT", "__StaticInit",
+ MCSectionMachO::S_REGULAR |
+ MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS,
+ SectionKind::getText());
+ OutStreamer.SwitchSection(StaticInitSect);
}
}
@@ -1173,8 +1226,8 @@ void ARMAsmPrinter::EmitStartOfAsmFile(Module &M) {
OutStreamer.EmitRawText("\t.eabi_attribute " +
Twine(ARMBuildAttrs::ABI_FP_exceptions) + ", 1");
}
-
- if (FiniteOnlyFPMath())
+
+ if (NoInfsFPMath && NoNaNsFPMath)
OutStreamer.EmitRawText("\t.eabi_attribute " +
Twine(ARMBuildAttrs::ABI_FP_number_model)+ ", 1");
else
@@ -1280,7 +1333,7 @@ void ARMAsmPrinter::printInstructionThroughMCStreamer(const MachineInstr *MI) {
// LPC0:
// add r0, pc, r0
// This adds the address of LPC0 to r0.
-
+
// Emit the label.
// FIXME: MOVE TO SHARED PLACE.
unsigned Id = (unsigned)MI->getOperand(2).getImm();
@@ -1288,8 +1341,8 @@ void ARMAsmPrinter::printInstructionThroughMCStreamer(const MachineInstr *MI) {
MCSymbol *Label =OutContext.GetOrCreateSymbol(Twine(Prefix)
+ "PC" + Twine(getFunctionNumber()) + "_" + Twine(Id));
OutStreamer.EmitLabel(Label);
-
-
+
+
// Form and emit tha dd.
MCInst AddInst;
AddInst.setOpcode(ARM::ADDrr);
@@ -1315,7 +1368,7 @@ void ARMAsmPrinter::printInstructionThroughMCStreamer(const MachineInstr *MI) {
EmitMachineConstantPoolValue(MCPE.Val.MachineCPVal);
else
EmitGlobalConstant(MCPE.Val.ConstVal);
-
+
return;
}
case ARM::MOVi2pieces: { // FIXME: Remove asmstring from td file.
@@ -1325,13 +1378,13 @@ void ARMAsmPrinter::printInstructionThroughMCStreamer(const MachineInstr *MI) {
unsigned SOImmValV1 = ARM_AM::getSOImmTwoPartFirst(ImmVal);
unsigned SOImmValV2 = ARM_AM::getSOImmTwoPartSecond(ImmVal);
-
+
{
MCInst TmpInst;
TmpInst.setOpcode(ARM::MOVi);
TmpInst.addOperand(MCOperand::CreateReg(DstReg));
TmpInst.addOperand(MCOperand::CreateImm(SOImmValV1));
-
+
// Predicate.
TmpInst.addOperand(MCOperand::CreateImm(MI->getOperand(2).getImm()));
TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(3).getReg()));
@@ -1349,11 +1402,11 @@ void ARMAsmPrinter::printInstructionThroughMCStreamer(const MachineInstr *MI) {
// Predicate.
TmpInst.addOperand(MCOperand::CreateImm(MI->getOperand(2).getImm()));
TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(3).getReg()));
-
+
TmpInst.addOperand(MCOperand::CreateReg(0)); // cc_out
OutStreamer.EmitInstruction(TmpInst);
}
- return;
+ return;
}
case ARM::MOVi32imm: { // FIXME: Remove asmstring from td file.
// This is a hack that lowers as a two instruction sequence.
@@ -1384,32 +1437,32 @@ void ARMAsmPrinter::printInstructionThroughMCStreamer(const MachineInstr *MI) {
TmpInst.setOpcode(ARM::MOVi16);
TmpInst.addOperand(MCOperand::CreateReg(DstReg)); // dstreg
TmpInst.addOperand(V1); // lower16(imm)
-
+
// Predicate.
TmpInst.addOperand(MCOperand::CreateImm(MI->getOperand(2).getImm()));
TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(3).getReg()));
-
+
OutStreamer.EmitInstruction(TmpInst);
}
-
+
{
MCInst TmpInst;
TmpInst.setOpcode(ARM::MOVTi16);
TmpInst.addOperand(MCOperand::CreateReg(DstReg)); // dstreg
TmpInst.addOperand(MCOperand::CreateReg(DstReg)); // srcreg
TmpInst.addOperand(V2); // upper16(imm)
-
+
// Predicate.
TmpInst.addOperand(MCOperand::CreateImm(MI->getOperand(2).getImm()));
TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(3).getReg()));
-
+
OutStreamer.EmitInstruction(TmpInst);
}
-
+
return;
}
}
-
+
MCInst TmpInst;
MCInstLowering.Lower(MI, TmpInst);
OutStreamer.EmitInstruction(TmpInst);
diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp
index 49c16f3e07205..3a8bebe0dd247 100644
--- a/lib/Target/ARM/ARMBaseInstrInfo.cpp
+++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp
@@ -15,9 +15,9 @@
#include "ARM.h"
#include "ARMAddressingModes.h"
#include "ARMConstantPoolValue.h"
-#include "ARMGenInstrInfo.inc"
#include "ARMMachineFunctionInfo.h"
#include "ARMRegisterInfo.h"
+#include "ARMGenInstrInfo.inc"
#include "llvm/Constants.h"
#include "llvm/Function.h"
#include "llvm/GlobalValue.h"
@@ -501,7 +501,7 @@ unsigned ARMBaseInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const {
llvm_unreachable("Unknown or unset size field for instr!");
case TargetOpcode::IMPLICIT_DEF:
case TargetOpcode::KILL:
- case TargetOpcode::DBG_LABEL:
+ case TargetOpcode::PROLOG_LABEL:
case TargetOpcode::EH_LABEL:
case TargetOpcode::DBG_VALUE:
return 0;
@@ -573,48 +573,6 @@ unsigned ARMBaseInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const {
return 0; // Not reached
}
-/// Return true if the instruction is a register to register move and
-/// leave the source and dest operands in the passed parameters.
-///
-bool
-ARMBaseInstrInfo::isMoveInstr(const MachineInstr &MI,
- unsigned &SrcReg, unsigned &DstReg,
- unsigned& SrcSubIdx, unsigned& DstSubIdx) const {
- switch (MI.getOpcode()) {
- default: break;
- case ARM::VMOVS:
- case ARM::VMOVD:
- case ARM::VMOVDneon:
- case ARM::VMOVQ:
- case ARM::VMOVQQ : {
- SrcReg = MI.getOperand(1).getReg();
- DstReg = MI.getOperand(0).getReg();
- SrcSubIdx = MI.getOperand(1).getSubReg();
- DstSubIdx = MI.getOperand(0).getSubReg();
- return true;
- }
- case ARM::MOVr:
- case ARM::MOVr_TC:
- case ARM::tMOVr:
- case ARM::tMOVgpr2tgpr:
- case ARM::tMOVtgpr2gpr:
- case ARM::tMOVgpr2gpr:
- case ARM::t2MOVr: {
- assert(MI.getDesc().getNumOperands() >= 2 &&
- MI.getOperand(0).isReg() &&
- MI.getOperand(1).isReg() &&
- "Invalid ARM MOV instruction");
- SrcReg = MI.getOperand(1).getReg();
- DstReg = MI.getOperand(0).getReg();
- SrcSubIdx = MI.getOperand(1).getSubReg();
- DstSubIdx = MI.getOperand(0).getSubReg();
- return true;
- }
- }
-
- return false;
-}
-
unsigned
ARMBaseInstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
int &FrameIndex) const {
@@ -763,8 +721,9 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
Align);
// tGPR is used sometimes in ARM instructions that need to avoid using
- // certain registers. Just treat it as GPR here.
- if (RC == ARM::tGPRRegisterClass || RC == ARM::tcGPRRegisterClass)
+ // certain registers. Just treat it as GPR here. Likewise, rGPR.
+ if (RC == ARM::tGPRRegisterClass || RC == ARM::tcGPRRegisterClass
+ || RC == ARM::rGPRRegisterClass)
RC = ARM::GPRRegisterClass;
switch (RC->getID()) {
@@ -798,7 +757,7 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTMQ))
.addReg(SrcReg, getKillRegState(isKill))
.addFrameIndex(FI)
- .addImm(ARM_AM::getAM5Opc(ARM_AM::ia, 4))
+ .addImm(ARM_AM::getAM4ModeImm(ARM_AM::ia))
.addMemOperand(MMO));
}
break;
@@ -818,7 +777,7 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
MachineInstrBuilder MIB =
AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTMD))
.addFrameIndex(FI)
- .addImm(ARM_AM::getAM5Opc(ARM_AM::ia, 4)))
+ .addImm(ARM_AM::getAM4ModeImm(ARM_AM::ia)))
.addMemOperand(MMO);
MIB = AddDReg(MIB, SrcReg, ARM::dsub_0, getKillRegState(isKill), TRI);
MIB = AddDReg(MIB, SrcReg, ARM::dsub_1, 0, TRI);
@@ -830,7 +789,7 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
MachineInstrBuilder MIB =
AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTMD))
.addFrameIndex(FI)
- .addImm(ARM_AM::getAM5Opc(ARM_AM::ia, 4)))
+ .addImm(ARM_AM::getAM4ModeImm(ARM_AM::ia)))
.addMemOperand(MMO);
MIB = AddDReg(MIB, SrcReg, ARM::dsub_0, getKillRegState(isKill), TRI);
MIB = AddDReg(MIB, SrcReg, ARM::dsub_1, 0, TRI);
@@ -865,7 +824,8 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
// tGPR is used sometimes in ARM instructions that need to avoid using
// certain registers. Just treat it as GPR here.
- if (RC == ARM::tGPRRegisterClass || RC == ARM::tcGPRRegisterClass)
+ if (RC == ARM::tGPRRegisterClass || RC == ARM::tcGPRRegisterClass
+ || RC == ARM::rGPRRegisterClass)
RC = ARM::GPRRegisterClass;
switch (RC->getID()) {
@@ -893,7 +853,7 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
} else {
AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDMQ), DestReg)
.addFrameIndex(FI)
- .addImm(ARM_AM::getAM5Opc(ARM_AM::ia, 4))
+ .addImm(ARM_AM::getAM4ModeImm(ARM_AM::ia))
.addMemOperand(MMO));
}
break;
@@ -910,7 +870,7 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
MachineInstrBuilder MIB =
AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDMD))
.addFrameIndex(FI)
- .addImm(ARM_AM::getAM5Opc(ARM_AM::ia, 4)))
+ .addImm(ARM_AM::getAM4ModeImm(ARM_AM::ia)))
.addMemOperand(MMO);
MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::Define, TRI);
MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::Define, TRI);
@@ -922,7 +882,7 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
MachineInstrBuilder MIB =
AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDMD))
.addFrameIndex(FI)
- .addImm(ARM_AM::getAM5Opc(ARM_AM::ia, 4)))
+ .addImm(ARM_AM::getAM4ModeImm(ARM_AM::ia)))
.addMemOperand(MMO);
MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::Define, TRI);
MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::Define, TRI);
@@ -963,6 +923,11 @@ static unsigned duplicateCPV(MachineFunction &MF, unsigned &CPI) {
unsigned PCLabelId = AFI->createConstPoolEntryUId();
ARMConstantPoolValue *NewCPV = 0;
+ // FIXME: The below assumes PIC relocation model and that the function
+ // is Thumb mode (t1 or t2). PCAdjustment would be 8 for ARM mode PIC, and
+ // zero for non-PIC in ARM or Thumb. The callers are all of thumb LDR
+ // instructions, so that's probably OK, but is PIC always correct when
+ // we get here?
if (ACPV->isGlobalValue())
NewCPV = new ARMConstantPoolValue(ACPV->getGV(), PCLabelId,
ARMCP::CPValue, 4);
@@ -972,6 +937,9 @@ static unsigned duplicateCPV(MachineFunction &MF, unsigned &CPI) {
else if (ACPV->isBlockAddress())
NewCPV = new ARMConstantPoolValue(ACPV->getBlockAddress(), PCLabelId,
ARMCP::CPBlockAddress, 4);
+ else if (ACPV->isLSDA())
+ NewCPV = new ARMConstantPoolValue(MF.getFunction(), PCLabelId,
+ ARMCP::CPLSDA, 4);
else
llvm_unreachable("Unexpected ARM constantpool value type!!");
CPI = MCP->getConstantPoolIndex(NewCPV, MCPE.getAlignment());
@@ -1393,3 +1361,63 @@ bool llvm::rewriteARMFrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
Offset = (isSub) ? -Offset : Offset;
return Offset == 0;
}
+
+bool ARMBaseInstrInfo::
+AnalyzeCompare(const MachineInstr *MI, unsigned &SrcReg, int &CmpValue) const {
+ switch (MI->getOpcode()) {
+ default: break;
+ case ARM::CMPri:
+ case ARM::CMPzri:
+ case ARM::t2CMPri:
+ case ARM::t2CMPzri:
+ SrcReg = MI->getOperand(0).getReg();
+ CmpValue = MI->getOperand(1).getImm();
+ return true;
+ }
+
+ return false;
+}
+
+/// ConvertToSetZeroFlag - Convert the instruction to set the "zero" flag so
+/// that we can remove a "comparison with zero".
+bool ARMBaseInstrInfo::
+ConvertToSetZeroFlag(MachineInstr *MI, MachineInstr *CmpInstr) const {
+ // Conservatively refuse to convert an instruction which isn't in the same BB
+ // as the comparison.
+ if (MI->getParent() != CmpInstr->getParent())
+ return false;
+
+ // Check that CPSR isn't set between the comparison instruction and the one we
+ // want to change.
+ MachineBasicBlock::const_iterator I = CmpInstr, E = MI;
+ --I;
+ for (; I != E; --I) {
+ const MachineInstr &Instr = *I;
+
+ for (unsigned IO = 0, EO = Instr.getNumOperands(); IO != EO; ++IO) {
+ const MachineOperand &MO = Instr.getOperand(IO);
+ if (!MO.isReg() || !MO.isDef()) continue;
+
+ // This instruction modifies CPSR before the one we want to change. We
+ // can't do this transformation.
+ if (MO.getReg() == ARM::CPSR)
+ return false;
+ }
+ }
+
+ // Set the "zero" bit in CPSR.
+ switch (MI->getOpcode()) {
+ default: break;
+ case ARM::ADDri:
+ case ARM::SUBri:
+ case ARM::t2ADDri:
+ case ARM::t2SUBri:
+ MI->RemoveOperand(5);
+ MachineInstrBuilder(MI)
+ .addReg(ARM::CPSR, RegState::Define | RegState::Implicit);
+ CmpInstr->eraseFromParent();
+ return true;
+ }
+
+ return false;
+}
diff --git a/lib/Target/ARM/ARMBaseInstrInfo.h b/lib/Target/ARM/ARMBaseInstrInfo.h
index 89a2db74a75ef..b4f4a33a70adf 100644
--- a/lib/Target/ARM/ARMBaseInstrInfo.h
+++ b/lib/Target/ARM/ARMBaseInstrInfo.h
@@ -15,11 +15,12 @@
#define ARMBASEINSTRUCTIONINFO_H
#include "ARM.h"
-#include "ARMRegisterInfo.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/Target/TargetInstrInfo.h"
namespace llvm {
+ class ARMSubtarget;
+ class ARMBaseRegisterInfo;
/// ARMII - This namespace holds all of the target specific flags that
/// instruction info tracks.
@@ -97,44 +98,45 @@ namespace ARMII {
// Miscellaneous arithmetic instructions
ArithMiscFrm = 12 << FormShift,
+ SatFrm = 13 << FormShift,
// Extend instructions
- ExtFrm = 13 << FormShift,
+ ExtFrm = 14 << FormShift,
// VFP formats
- VFPUnaryFrm = 14 << FormShift,
- VFPBinaryFrm = 15 << FormShift,
- VFPConv1Frm = 16 << FormShift,
- VFPConv2Frm = 17 << FormShift,
- VFPConv3Frm = 18 << FormShift,
- VFPConv4Frm = 19 << FormShift,
- VFPConv5Frm = 20 << FormShift,
- VFPLdStFrm = 21 << FormShift,
- VFPLdStMulFrm = 22 << FormShift,
- VFPMiscFrm = 23 << FormShift,
+ VFPUnaryFrm = 15 << FormShift,
+ VFPBinaryFrm = 16 << FormShift,
+ VFPConv1Frm = 17 << FormShift,
+ VFPConv2Frm = 18 << FormShift,
+ VFPConv3Frm = 19 << FormShift,
+ VFPConv4Frm = 20 << FormShift,
+ VFPConv5Frm = 21 << FormShift,
+ VFPLdStFrm = 22 << FormShift,
+ VFPLdStMulFrm = 23 << FormShift,
+ VFPMiscFrm = 24 << FormShift,
// Thumb format
- ThumbFrm = 24 << FormShift,
+ ThumbFrm = 25 << FormShift,
// Miscelleaneous format
- MiscFrm = 25 << FormShift,
+ MiscFrm = 26 << FormShift,
// NEON formats
- NGetLnFrm = 26 << FormShift,
- NSetLnFrm = 27 << FormShift,
- NDupFrm = 28 << FormShift,
- NLdStFrm = 29 << FormShift,
- N1RegModImmFrm= 30 << FormShift,
- N2RegFrm = 31 << FormShift,
- NVCVTFrm = 32 << FormShift,
- NVDupLnFrm = 33 << FormShift,
- N2RegVShLFrm = 34 << FormShift,
- N2RegVShRFrm = 35 << FormShift,
- N3RegFrm = 36 << FormShift,
- N3RegVShFrm = 37 << FormShift,
- NVExtFrm = 38 << FormShift,
- NVMulSLFrm = 39 << FormShift,
- NVTBLFrm = 40 << FormShift,
+ NGetLnFrm = 27 << FormShift,
+ NSetLnFrm = 28 << FormShift,
+ NDupFrm = 29 << FormShift,
+ NLdStFrm = 30 << FormShift,
+ N1RegModImmFrm= 31 << FormShift,
+ N2RegFrm = 32 << FormShift,
+ NVCVTFrm = 33 << FormShift,
+ NVDupLnFrm = 34 << FormShift,
+ N2RegVShLFrm = 35 << FormShift,
+ N2RegVShRFrm = 36 << FormShift,
+ N3RegFrm = 37 << FormShift,
+ N3RegVShFrm = 38 << FormShift,
+ NVExtFrm = 39 << FormShift,
+ NVMulSLFrm = 40 << FormShift,
+ NVTBLFrm = 41 << FormShift,
//===------------------------------------------------------------------===//
// Misc flags.
@@ -198,7 +200,7 @@ namespace ARMII {
}
class ARMBaseInstrInfo : public TargetInstrInfoImpl {
- const ARMSubtarget& Subtarget;
+ const ARMSubtarget &Subtarget;
protected:
// Can be only subclassed.
explicit ARMBaseInstrInfo(const ARMSubtarget &STI);
@@ -223,7 +225,7 @@ public:
virtual bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
MachineBasicBlock *&FBB,
SmallVectorImpl<MachineOperand> &Cond,
- bool AllowModify) const;
+ bool AllowModify = false) const;
virtual unsigned RemoveBranch(MachineBasicBlock &MBB) const;
virtual unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
MachineBasicBlock *FBB,
@@ -262,12 +264,6 @@ public:
///
virtual unsigned GetInstSizeInBytes(const MachineInstr* MI) const;
- /// Return true if the instruction is a register to register move and return
- /// the source and dest operands and their sub-register indices by reference.
- virtual bool isMoveInstr(const MachineInstr &MI,
- unsigned &SrcReg, unsigned &DstReg,
- unsigned &SrcSubIdx, unsigned &DstSubIdx) const;
-
virtual unsigned isLoadFromStackSlot(const MachineInstr *MI,
int &FrameIndex) const;
virtual unsigned isStoreToStackSlot(const MachineInstr *MI,
@@ -341,6 +337,17 @@ public:
unsigned NumInstrs) const {
return NumInstrs && NumInstrs == 1;
}
+
+ /// AnalyzeCompare - For a comparison instruction, return the source register
+ /// in SrcReg and the value it compares against in CmpValue. Return true if
+ /// the comparison instruction can be analyzed.
+ virtual bool AnalyzeCompare(const MachineInstr *MI, unsigned &SrcReg,
+ int &CmpValue) const;
+
+ /// ConvertToSetZeroFlag - Convert the instruction to set the zero flag so
+ /// that we can remove a "comparison with zero".
+ virtual bool ConvertToSetZeroFlag(MachineInstr *Instr,
+ MachineInstr *CmpInstr) const;
};
static inline
diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/lib/Target/ARM/ARMBaseRegisterInfo.cpp
index 182bd99371457..eceafad63f17a 100644
--- a/lib/Target/ARM/ARMBaseRegisterInfo.cpp
+++ b/lib/Target/ARM/ARMBaseRegisterInfo.cpp
@@ -40,13 +40,20 @@
#include "llvm/Support/CommandLine.h"
namespace llvm {
-cl::opt<bool>
-ReuseFrameIndexVals("arm-reuse-frame-index-vals", cl::Hidden, cl::init(true),
- cl::desc("Reuse repeated frame index values"));
+static cl::opt<bool>
+ForceAllBaseRegAlloc("arm-force-base-reg-alloc", cl::Hidden, cl::init(false),
+ cl::desc("Force use of virtual base registers for stack load/store"));
+static cl::opt<bool>
+EnableLocalStackAlloc("enable-local-stack-alloc", cl::init(true), cl::Hidden,
+ cl::desc("Enable pre-regalloc stack frame index allocation"));
}
using namespace llvm;
+static cl::opt<bool>
+EnableBasePointer("arm-use-base-pointer", cl::Hidden, cl::init(true),
+ cl::desc("Enable use of a base pointer for complex stack frames"));
+
unsigned ARMBaseRegisterInfo::getRegisterNumbering(unsigned RegEnum,
bool *isSPVFP) {
if (isSPVFP)
@@ -143,7 +150,8 @@ ARMBaseRegisterInfo::ARMBaseRegisterInfo(const ARMBaseInstrInfo &tii,
const ARMSubtarget &sti)
: ARMGenRegisterInfo(ARM::ADJCALLSTACKDOWN, ARM::ADJCALLSTACKUP),
TII(tii), STI(sti),
- FramePtr((STI.isTargetDarwin() || STI.isThumb()) ? ARM::R7 : ARM::R11) {
+ FramePtr((STI.isTargetDarwin() || STI.isThumb()) ? ARM::R7 : ARM::R11),
+ BasePtr(ARM::R6) {
}
const unsigned*
@@ -176,8 +184,11 @@ getReservedRegs(const MachineFunction &MF) const {
BitVector Reserved(getNumRegs());
Reserved.set(ARM::SP);
Reserved.set(ARM::PC);
- if (STI.isTargetDarwin() || hasFP(MF))
+ Reserved.set(ARM::FPSCR);
+ if (hasFP(MF))
Reserved.set(FramePtr);
+ if (hasBasePointer(MF))
+ Reserved.set(BasePtr);
// Some targets reserve R9.
if (STI.isR9Reserved())
Reserved.set(ARM::R9);
@@ -191,9 +202,13 @@ bool ARMBaseRegisterInfo::isReservedReg(const MachineFunction &MF,
case ARM::SP:
case ARM::PC:
return true;
+ case ARM::R6:
+ if (hasBasePointer(MF))
+ return true;
+ break;
case ARM::R7:
case ARM::R11:
- if (FramePtr == Reg && (STI.isTargetDarwin() || hasFP(MF)))
+ if (FramePtr == Reg && hasFP(MF))
return true;
break;
case ARM::R9:
@@ -510,7 +525,7 @@ ARMBaseRegisterInfo::getAllocationOrder(const TargetRegisterClass *RC,
return std::make_pair(RC->allocation_order_begin(MF),
RC->allocation_order_end(MF));
- if (!STI.isTargetDarwin() && !hasFP(MF)) {
+ if (!hasFP(MF)) {
if (!STI.isR9Reserved())
return std::make_pair(GPREven1,
GPREven1 + (sizeof(GPREven1)/sizeof(unsigned)));
@@ -539,7 +554,7 @@ ARMBaseRegisterInfo::getAllocationOrder(const TargetRegisterClass *RC,
return std::make_pair(RC->allocation_order_begin(MF),
RC->allocation_order_end(MF));
- if (!STI.isTargetDarwin() && !hasFP(MF)) {
+ if (!hasFP(MF)) {
if (!STI.isR9Reserved())
return std::make_pair(GPROdd1,
GPROdd1 + (sizeof(GPROdd1)/sizeof(unsigned)));
@@ -609,30 +624,68 @@ ARMBaseRegisterInfo::UpdateRegAllocHint(unsigned Reg, unsigned NewReg,
/// or if frame pointer elimination is disabled.
///
bool ARMBaseRegisterInfo::hasFP(const MachineFunction &MF) const {
+ // Mac OS X requires FP not to be clobbered for backtracing purpose.
+ if (STI.isTargetDarwin())
+ return true;
+
const MachineFrameInfo *MFI = MF.getFrameInfo();
- return ((DisableFramePointerElim(MF) && MFI->adjustsStack())||
+ // Always eliminate non-leaf frame pointers.
+ return ((DisableFramePointerElim(MF) && MFI->hasCalls()) ||
needsStackRealignment(MF) ||
MFI->hasVarSizedObjects() ||
MFI->isFrameAddressTaken());
}
+bool ARMBaseRegisterInfo::hasBasePointer(const MachineFunction &MF) const {
+ const MachineFrameInfo *MFI = MF.getFrameInfo();
+ const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+
+ if (!EnableBasePointer)
+ return false;
+
+ if (needsStackRealignment(MF) && MFI->hasVarSizedObjects())
+ return true;
+
+ // Thumb has trouble with negative offsets from the FP. Thumb2 has a limited
+ // negative range for ldr/str (255), and thumb1 is positive offsets only.
+ // It's going to be better to use the SP or Base Pointer instead. When there
+ // are variable sized objects, we can't reference off of the SP, so we
+ // reserve a Base Pointer.
+ if (AFI->isThumbFunction() && MFI->hasVarSizedObjects()) {
+ // Conservatively estimate whether the negative offset from the frame
+ // pointer will be sufficient to reach. If a function has a smallish
+ // frame, it's less likely to have lots of spills and callee saved
+ // space, so it's all more likely to be within range of the frame pointer.
+ // If it's wrong, the scavenger will still enable access to work, it just
+ // won't be optimal.
+ if (AFI->isThumb2Function() && MFI->getLocalFrameSize() < 128)
+ return false;
+ return true;
+ }
+
+ return false;
+}
+
bool ARMBaseRegisterInfo::canRealignStack(const MachineFunction &MF) const {
const MachineFrameInfo *MFI = MF.getFrameInfo();
const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
- return (RealignStack &&
- !AFI->isThumb1OnlyFunction() &&
- !MFI->hasVarSizedObjects());
+ // We can't realign the stack if:
+ // 1. Dynamic stack realignment is explicitly disabled,
+ // 2. This is a Thumb1 function (it's not useful, so we don't bother), or
+ // 3. There are VLAs in the function and the base pointer is disabled.
+ return (RealignStack && !AFI->isThumb1OnlyFunction() &&
+ (!MFI->hasVarSizedObjects() || EnableBasePointer));
}
bool ARMBaseRegisterInfo::
needsStackRealignment(const MachineFunction &MF) const {
const MachineFrameInfo *MFI = MF.getFrameInfo();
- const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+ const Function *F = MF.getFunction();
unsigned StackAlign = MF.getTarget().getFrameInfo()->getStackAlignment();
- return (RealignStack &&
- !AFI->isThumb1OnlyFunction() &&
- (MFI->getMaxAlignment() > StackAlign) &&
- !MFI->hasVarSizedObjects());
+ bool requiresRealignment = ((MFI->getLocalFrameMaxAlign() > StackAlign) ||
+ F->hasFnAttr(Attribute::StackAlignment));
+
+ return requiresRealignment && canRealignStack(MF);
}
bool ARMBaseRegisterInfo::
@@ -668,6 +721,7 @@ static unsigned estimateStackSize(MachineFunction &MF) {
/// instructions will require a scratch register during their expansion later.
unsigned
ARMBaseRegisterInfo::estimateRSStackSizeLimit(MachineFunction &MF) const {
+ const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
unsigned Limit = (1 << 12) - 1;
for (MachineFunction::iterator BB = MF.begin(),E = MF.end(); BB != E; ++BB) {
for (MachineBasicBlock::iterator I = BB->begin(), E = BB->end();
@@ -693,7 +747,10 @@ ARMBaseRegisterInfo::estimateRSStackSizeLimit(MachineFunction &MF) const {
Limit = std::min(Limit, ((1U << 8) - 1) * 4);
break;
case ARMII::AddrModeT2_i12:
- if (hasFP(MF)) Limit = std::min(Limit, (1U << 8) - 1);
+ // i12 supports only positive offset so these will be converted to
+ // i8 opcodes. See llvm::rewriteT2FrameIndex.
+ if (hasFP(MF) && AFI->hasStackFrame())
+ Limit = std::min(Limit, (1U << 8) - 1);
break;
case ARMII::AddrMode6:
// Addressing mode 6 (load/store) instructions can't encode an
@@ -710,6 +767,19 @@ ARMBaseRegisterInfo::estimateRSStackSizeLimit(MachineFunction &MF) const {
return Limit;
}
+static unsigned GetFunctionSizeInBytes(const MachineFunction &MF,
+ const ARMBaseInstrInfo &TII) {
+ unsigned FnSize = 0;
+ for (MachineFunction::const_iterator MBBI = MF.begin(), E = MF.end();
+ MBBI != E; ++MBBI) {
+ const MachineBasicBlock &MBB = *MBBI;
+ for (MachineBasicBlock::const_iterator I = MBB.begin(),E = MBB.end();
+ I != E; ++I)
+ FnSize += TII.GetInstSizeInBytes(I);
+ }
+ return FnSize;
+}
+
void
ARMBaseRegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
RegScavenger *RS) const {
@@ -737,6 +807,10 @@ ARMBaseRegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
if (AFI->isThumb1OnlyFunction() && AFI->getVarArgsRegSaveSize() > 0)
MF.getRegInfo().setPhysRegUsed(ARM::LR);
+ // Spill the BasePtr if it's used.
+ if (hasBasePointer(MF))
+ MF.getRegInfo().setPhysRegUsed(BasePtr);
+
// Don't spill FP if the frame can be eliminated. This is determined
// by scanning the callee-save registers to see if any is used.
const unsigned *CSRegs = getCalleeSavedRegs();
@@ -807,7 +881,7 @@ ARMBaseRegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
bool ForceLRSpill = false;
if (!LRSpilled && AFI->isThumb1OnlyFunction()) {
- unsigned FnSize = TII.GetFunctionSizeInBytes(MF);
+ unsigned FnSize = GetFunctionSizeInBytes(MF, TII);
// Force LR to be spilled if the Thumb function size is > 2048. This enables
// use of BL to implement far jump. If it turns out that it's not needed
// then the branch fix up path will undo it.
@@ -824,13 +898,19 @@ ARMBaseRegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
// slot of the previous FP. Also, if we have variable sized objects in the
// function, stack slot references will often be negative, and some of
// our instructions are positive-offset only, so conservatively consider
- // that case to want a spill slot (or register) as well.
+ // that case to want a spill slot (or register) as well. Similarly, if
+ // the function adjusts the stack pointer during execution and the
+ // adjustments aren't already part of our stack size estimate, our offset
+ // calculations may be off, so be conservative.
// FIXME: We could add logic to be more precise about negative offsets
// and which instructions will need a scratch register for them. Is it
// worth the effort and added fragility?
bool BigStack =
- (RS && (estimateStackSize(MF) + (hasFP(MF) ? 4:0) >=
- estimateRSStackSizeLimit(MF))) || MFI->hasVarSizedObjects();
+ (RS &&
+ (estimateStackSize(MF) + ((hasFP(MF) && AFI->hasStackFrame()) ? 4:0) >=
+ estimateRSStackSizeLimit(MF)))
+ || MFI->hasVarSizedObjects()
+ || (MFI->adjustsStack() && !canSimplifyCallFramePseudos(MF));
bool ExtraCSSpill = false;
if (BigStack || !CanEliminateFrame || cannotEliminateFrame(MF)) {
@@ -848,9 +928,7 @@ ARMBaseRegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
ExtraCSSpill = true;
}
- // Darwin ABI requires FP to point to the stack slot that contains the
- // previous FP.
- if (STI.isTargetDarwin() || hasFP(MF)) {
+ if (hasFP(MF)) {
MF.getRegInfo().setPhysRegUsed(FramePtr);
NumGPRSpills++;
}
@@ -941,55 +1019,88 @@ unsigned ARMBaseRegisterInfo::getRARegister() const {
return ARM::LR;
}
-unsigned
+unsigned
ARMBaseRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
- if (STI.isTargetDarwin() || hasFP(MF))
+ if (hasFP(MF))
return FramePtr;
return ARM::SP;
}
+// Provide a base+offset reference to an FI slot for debug info. It's the
+// same as what we use for resolving the code-gen references for now.
+// FIXME: This can go wrong when references are SP-relative and simple call
+// frames aren't used.
int
ARMBaseRegisterInfo::getFrameIndexReference(const MachineFunction &MF, int FI,
unsigned &FrameReg) const {
+ return ResolveFrameIndexReference(MF, FI, FrameReg, 0);
+}
+
+int
+ARMBaseRegisterInfo::ResolveFrameIndexReference(const MachineFunction &MF,
+ int FI,
+ unsigned &FrameReg,
+ int SPAdj) const {
const MachineFrameInfo *MFI = MF.getFrameInfo();
const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
int Offset = MFI->getObjectOffset(FI) + MFI->getStackSize();
+ int FPOffset = Offset - AFI->getFramePtrSpillOffset();
bool isFixed = MFI->isFixedObjectIndex(FI);
FrameReg = ARM::SP;
+ Offset += SPAdj;
if (AFI->isGPRCalleeSavedArea1Frame(FI))
- Offset -= AFI->getGPRCalleeSavedArea1Offset();
+ return Offset - AFI->getGPRCalleeSavedArea1Offset();
else if (AFI->isGPRCalleeSavedArea2Frame(FI))
- Offset -= AFI->getGPRCalleeSavedArea2Offset();
+ return Offset - AFI->getGPRCalleeSavedArea2Offset();
else if (AFI->isDPRCalleeSavedAreaFrame(FI))
- Offset -= AFI->getDPRCalleeSavedAreaOffset();
- else if (needsStackRealignment(MF)) {
- // When dynamically realigning the stack, use the frame pointer for
- // parameters, and the stack pointer for locals.
+ return Offset - AFI->getDPRCalleeSavedAreaOffset();
+
+ // When dynamically realigning the stack, use the frame pointer for
+ // parameters, and the stack/base pointer for locals.
+ if (needsStackRealignment(MF)) {
assert (hasFP(MF) && "dynamic stack realignment without a FP!");
if (isFixed) {
FrameReg = getFrameRegister(MF);
- Offset -= AFI->getFramePtrSpillOffset();
+ Offset = FPOffset;
+ } else if (MFI->hasVarSizedObjects()) {
+ assert(hasBasePointer(MF) &&
+ "VLAs and dynamic stack alignment, but missing base pointer!");
+ FrameReg = BasePtr;
}
- } else if (hasFP(MF) && AFI->hasStackFrame()) {
- if (isFixed || MFI->hasVarSizedObjects()) {
- // Use frame pointer to reference fixed objects unless this is a
- // frameless function.
+ return Offset;
+ }
+
+ // If there is a frame pointer, use it when we can.
+ if (hasFP(MF) && AFI->hasStackFrame()) {
+ // Use frame pointer to reference fixed objects. Use it for locals if
+ // there are VLAs (and thus the SP isn't reliable as a base).
+ if (isFixed || (MFI->hasVarSizedObjects() && !hasBasePointer(MF))) {
FrameReg = getFrameRegister(MF);
- Offset -= AFI->getFramePtrSpillOffset();
+ return FPOffset;
+ } else if (MFI->hasVarSizedObjects()) {
+ assert(hasBasePointer(MF) && "missing base pointer!");
+ // Use the base register since we have it.
+ FrameReg = BasePtr;
} else if (AFI->isThumb2Function()) {
- // In Thumb2 mode, the negative offset is very limited.
- int FPOffset = Offset - AFI->getFramePtrSpillOffset();
+ // In Thumb2 mode, the negative offset is very limited. Try to avoid
+ // out of range references.
if (FPOffset >= -255 && FPOffset < 0) {
FrameReg = getFrameRegister(MF);
- Offset = FPOffset;
+ return FPOffset;
}
+ } else if (Offset > (FPOffset < 0 ? -FPOffset : FPOffset)) {
+ // Otherwise, use SP or FP, whichever is closer to the stack slot.
+ FrameReg = getFrameRegister(MF);
+ return FPOffset;
}
}
+ // Use the base pointer if we have one.
+ if (hasBasePointer(MF))
+ FrameReg = BasePtr;
return Offset;
}
-
int
ARMBaseRegisterInfo::getFrameIndexOffset(const MachineFunction &MF,
int FI) const {
@@ -1024,7 +1135,8 @@ unsigned ARMBaseRegisterInfo::getRegisterPairEven(unsigned Reg,
case ARM::R5:
return ARM::R4;
case ARM::R7:
- return isReservedReg(MF, ARM::R7) ? 0 : ARM::R6;
+ return (isReservedReg(MF, ARM::R7) || isReservedReg(MF, ARM::R6))
+ ? 0 : ARM::R6;
case ARM::R9:
return isReservedReg(MF, ARM::R9) ? 0 :ARM::R8;
case ARM::R11:
@@ -1113,7 +1225,8 @@ unsigned ARMBaseRegisterInfo::getRegisterPairOdd(unsigned Reg,
case ARM::R4:
return ARM::R5;
case ARM::R6:
- return isReservedReg(MF, ARM::R7) ? 0 : ARM::R7;
+ return (isReservedReg(MF, ARM::R7) || isReservedReg(MF, ARM::R6))
+ ? 0 : ARM::R7;
case ARM::R8:
return isReservedReg(MF, ARM::R9) ? 0 :ARM::R9;
case ARM::R10:
@@ -1220,13 +1333,18 @@ requiresFrameIndexScavenging(const MachineFunction &MF) const {
return true;
}
+bool ARMBaseRegisterInfo::
+requiresVirtualBaseRegisters(const MachineFunction &MF) const {
+ return EnableLocalStackAlloc;
+}
+
// hasReservedCallFrame - Under normal circumstances, when a frame pointer is
// not required, we reserve argument space for call sites in the function
// immediately on entry to the current function. This eliminates the need for
// add/sub sp brackets around call sites. Returns true if the call frame is
// included as part of the stack frame.
bool ARMBaseRegisterInfo::
-hasReservedCallFrame(MachineFunction &MF) const {
+hasReservedCallFrame(const MachineFunction &MF) const {
const MachineFrameInfo *FFI = MF.getFrameInfo();
unsigned CFSize = FFI->getMaxCallFrameSize();
// It's not always a good idea to include the call frame as part of the
@@ -1244,7 +1362,7 @@ hasReservedCallFrame(MachineFunction &MF) const {
// is not sufficient here since we still may reference some objects via SP
// even when FP is available in Thumb2 mode.
bool ARMBaseRegisterInfo::
-canSimplifyCallFramePseudos(MachineFunction &MF) const {
+canSimplifyCallFramePseudos(const MachineFunction &MF) const {
return hasReservedCallFrame(MF) || MF.getFrameInfo()->hasVarSizedObjects();
}
@@ -1305,10 +1423,258 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
MBB.erase(I);
}
-unsigned
+int64_t ARMBaseRegisterInfo::
+getFrameIndexInstrOffset(const MachineInstr *MI, int Idx) const {
+ const TargetInstrDesc &Desc = MI->getDesc();
+ unsigned AddrMode = (Desc.TSFlags & ARMII::AddrModeMask);
+ int64_t InstrOffs = 0;;
+ int Scale = 1;
+ unsigned ImmIdx = 0;
+ switch (AddrMode) {
+ case ARMII::AddrModeT2_i8:
+ case ARMII::AddrModeT2_i12:
+ // i8 supports only negative, and i12 supports only positive, so
+ // based on Offset sign, consider the appropriate instruction
+ InstrOffs = MI->getOperand(Idx+1).getImm();
+ Scale = 1;
+ break;
+ case ARMII::AddrMode5: {
+ // VFP address mode.
+ const MachineOperand &OffOp = MI->getOperand(Idx+1);
+ InstrOffs = ARM_AM::getAM5Offset(OffOp.getImm());
+ if (ARM_AM::getAM5Op(OffOp.getImm()) == ARM_AM::sub)
+ InstrOffs = -InstrOffs;
+ Scale = 4;
+ break;
+ }
+ case ARMII::AddrMode2: {
+ ImmIdx = Idx+2;
+ InstrOffs = ARM_AM::getAM2Offset(MI->getOperand(ImmIdx).getImm());
+ if (ARM_AM::getAM2Op(MI->getOperand(ImmIdx).getImm()) == ARM_AM::sub)
+ InstrOffs = -InstrOffs;
+ break;
+ }
+ case ARMII::AddrMode3: {
+ ImmIdx = Idx+2;
+ InstrOffs = ARM_AM::getAM3Offset(MI->getOperand(ImmIdx).getImm());
+ if (ARM_AM::getAM3Op(MI->getOperand(ImmIdx).getImm()) == ARM_AM::sub)
+ InstrOffs = -InstrOffs;
+ break;
+ }
+ case ARMII::AddrModeT1_s: {
+ ImmIdx = Idx+1;
+ InstrOffs = MI->getOperand(ImmIdx).getImm();
+ Scale = 4;
+ break;
+ }
+ default:
+ llvm_unreachable("Unsupported addressing mode!");
+ break;
+ }
+
+ return InstrOffs * Scale;
+}
+
+/// needsFrameBaseReg - Returns true if the instruction's frame index
+/// reference would be better served by a base register other than FP
+/// or SP. Used by LocalStackFrameAllocation to determine which frame index
+/// references it should create new base registers for.
+bool ARMBaseRegisterInfo::
+needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const {
+ for (unsigned i = 0; !MI->getOperand(i).isFI(); ++i) {
+ assert(i < MI->getNumOperands() &&"Instr doesn't have FrameIndex operand!");
+ }
+
+ // It's the load/store FI references that cause issues, as it can be difficult
+ // to materialize the offset if it won't fit in the literal field. Estimate
+ // based on the size of the local frame and some conservative assumptions
+ // about the rest of the stack frame (note, this is pre-regalloc, so
+ // we don't know everything for certain yet) whether this offset is likely
+ // to be out of range of the immediate. Return true if so.
+
+ // We only generate virtual base registers for loads and stores, so
+ // return false for everything else.
+ unsigned Opc = MI->getOpcode();
+ switch (Opc) {
+ case ARM::LDR: case ARM::LDRH: case ARM::LDRB:
+ case ARM::STR: case ARM::STRH: case ARM::STRB:
+ case ARM::t2LDRi12: case ARM::t2LDRi8:
+ case ARM::t2STRi12: case ARM::t2STRi8:
+ case ARM::VLDRS: case ARM::VLDRD:
+ case ARM::VSTRS: case ARM::VSTRD:
+ case ARM::tSTRspi: case ARM::tLDRspi:
+ if (ForceAllBaseRegAlloc)
+ return true;
+ break;
+ default:
+ return false;
+ }
+
+ // Without a virtual base register, if the function has variable sized
+ // objects, all fixed-size local references will be via the frame pointer,
+ // Approximate the offset and see if it's legal for the instruction.
+ // Note that the incoming offset is based on the SP value at function entry,
+ // so it'll be negative.
+ MachineFunction &MF = *MI->getParent()->getParent();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+
+ // Estimate an offset from the frame pointer.
+ // Conservatively assume all callee-saved registers get pushed. R4-R6
+ // will be earlier than the FP, so we ignore those.
+ // R7, LR
+ int64_t FPOffset = Offset - 8;
+ // ARM and Thumb2 functions also need to consider R8-R11 and D8-D15
+ if (!AFI->isThumbFunction() || !AFI->isThumb1OnlyFunction())
+ FPOffset -= 80;
+ // Estimate an offset from the stack pointer.
+ // The incoming offset is relating to the SP at the start of the function,
+ // but when we access the local it'll be relative to the SP after local
+ // allocation, so adjust our SP-relative offset by that allocation size.
+ Offset = -Offset;
+ Offset += MFI->getLocalFrameSize();
+ // Assume that we'll have at least some spill slots allocated.
+ // FIXME: This is a total SWAG number. We should run some statistics
+ // and pick a real one.
+ Offset += 128; // 128 bytes of spill slots
+
+ // If there is a frame pointer, try using it.
+ // The FP is only available if there is no dynamic realignment. We
+ // don't know for sure yet whether we'll need that, so we guess based
+ // on whether there are any local variables that would trigger it.
+ unsigned StackAlign = MF.getTarget().getFrameInfo()->getStackAlignment();
+ if (hasFP(MF) &&
+ !((MFI->getLocalFrameMaxAlign() > StackAlign) && canRealignStack(MF))) {
+ if (isFrameOffsetLegal(MI, FPOffset))
+ return false;
+ }
+ // If we can reference via the stack pointer, try that.
+ // FIXME: This (and the code that resolves the references) can be improved
+ // to only disallow SP relative references in the live range of
+ // the VLA(s). In practice, it's unclear how much difference that
+ // would make, but it may be worth doing.
+ if (!MFI->hasVarSizedObjects() && isFrameOffsetLegal(MI, Offset))
+ return false;
+
+ // The offset likely isn't legal, we want to allocate a virtual base register.
+ return true;
+}
+
+/// materializeFrameBaseRegister - Insert defining instruction(s) for
+/// BaseReg to be a pointer to FrameIdx before insertion point I.
+void ARMBaseRegisterInfo::
+materializeFrameBaseRegister(MachineBasicBlock::iterator I, unsigned BaseReg,
+ int FrameIdx, int64_t Offset) const {
+ ARMFunctionInfo *AFI =
+ I->getParent()->getParent()->getInfo<ARMFunctionInfo>();
+ unsigned ADDriOpc = !AFI->isThumbFunction() ? ARM::ADDri :
+ (AFI->isThumb1OnlyFunction() ? ARM::tADDrSPi : ARM::t2ADDri);
+
+ MachineInstrBuilder MIB =
+ BuildMI(*I->getParent(), I, I->getDebugLoc(), TII.get(ADDriOpc), BaseReg)
+ .addFrameIndex(FrameIdx).addImm(Offset);
+ if (!AFI->isThumb1OnlyFunction())
+ AddDefaultCC(AddDefaultPred(MIB));
+}
+
+void
+ARMBaseRegisterInfo::resolveFrameIndex(MachineBasicBlock::iterator I,
+ unsigned BaseReg, int64_t Offset) const {
+ MachineInstr &MI = *I;
+ MachineBasicBlock &MBB = *MI.getParent();
+ MachineFunction &MF = *MBB.getParent();
+ ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+ int Off = Offset; // ARM doesn't need the general 64-bit offsets
+ unsigned i = 0;
+
+ assert(!AFI->isThumb1OnlyFunction() &&
+ "This resolveFrameIndex does not support Thumb1!");
+
+ while (!MI.getOperand(i).isFI()) {
+ ++i;
+ assert(i < MI.getNumOperands() && "Instr doesn't have FrameIndex operand!");
+ }
+ bool Done = false;
+ if (!AFI->isThumbFunction())
+ Done = rewriteARMFrameIndex(MI, i, BaseReg, Off, TII);
+ else {
+ assert(AFI->isThumb2Function());
+ Done = rewriteT2FrameIndex(MI, i, BaseReg, Off, TII);
+ }
+ assert (Done && "Unable to resolve frame index!");
+}
+
+bool ARMBaseRegisterInfo::isFrameOffsetLegal(const MachineInstr *MI,
+ int64_t Offset) const {
+ const TargetInstrDesc &Desc = MI->getDesc();
+ unsigned AddrMode = (Desc.TSFlags & ARMII::AddrModeMask);
+ unsigned i = 0;
+
+ while (!MI->getOperand(i).isFI()) {
+ ++i;
+ assert(i < MI->getNumOperands() &&"Instr doesn't have FrameIndex operand!");
+ }
+
+ // AddrMode4 and AddrMode6 cannot handle any offset.
+ if (AddrMode == ARMII::AddrMode4 || AddrMode == ARMII::AddrMode6)
+ return Offset == 0;
+
+ unsigned NumBits = 0;
+ unsigned Scale = 1;
+ bool isSigned = true;
+ switch (AddrMode) {
+ case ARMII::AddrModeT2_i8:
+ case ARMII::AddrModeT2_i12:
+ // i8 supports only negative, and i12 supports only positive, so
+ // based on Offset sign, consider the appropriate instruction
+ Scale = 1;
+ if (Offset < 0) {
+ NumBits = 8;
+ Offset = -Offset;
+ } else {
+ NumBits = 12;
+ }
+ break;
+ case ARMII::AddrMode5:
+ // VFP address mode.
+ NumBits = 8;
+ Scale = 4;
+ break;
+ case ARMII::AddrMode2:
+ NumBits = 12;
+ break;
+ case ARMII::AddrMode3:
+ NumBits = 8;
+ break;
+ case ARMII::AddrModeT1_s:
+ NumBits = 5;
+ Scale = 4;
+ isSigned = false;
+ break;
+ default:
+ llvm_unreachable("Unsupported addressing mode!");
+ break;
+ }
+
+ Offset += getFrameIndexInstrOffset(MI, i);
+ // Make sure the offset is encodable for instructions that scale the
+ // immediate.
+ if ((Offset & (Scale-1)) != 0)
+ return false;
+
+ if (isSigned && Offset < 0)
+ Offset = -Offset;
+
+ unsigned Mask = (1 << NumBits) - 1;
+ if ((unsigned)Offset <= Mask * Scale)
+ return true;
+
+ return false;
+}
+
+void
ARMBaseRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
- int SPAdj, FrameIndexValue *Value,
- RegScavenger *RS) const {
+ int SPAdj, RegScavenger *RS) const {
unsigned i = 0;
MachineInstr &MI = *II;
MachineBasicBlock &MBB = *MI.getParent();
@@ -1325,16 +1691,13 @@ ARMBaseRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
int FrameIndex = MI.getOperand(i).getIndex();
unsigned FrameReg;
- int Offset = getFrameIndexReference(MF, FrameIndex, FrameReg);
- if (FrameReg != ARM::SP)
- SPAdj = 0;
- Offset += SPAdj;
+ int Offset = ResolveFrameIndexReference(MF, FrameIndex, FrameReg, SPAdj);
// Special handling of dbg_value instructions.
if (MI.isDebugValue()) {
MI.getOperand(i). ChangeToRegister(FrameReg, false /*isDef*/);
MI.getOperand(i+1).ChangeToImmediate(Offset);
- return 0;
+ return;
}
// Modify MI as necessary to handle as much of 'Offset' as possible
@@ -1346,7 +1709,7 @@ ARMBaseRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
Done = rewriteT2FrameIndex(MI, i, FrameReg, Offset, TII);
}
if (Done)
- return 0;
+ return;
// If we get here, the immediate doesn't fit into the instruction. We folded
// as much as possible above, handle the rest, providing a register that is
@@ -1366,10 +1729,6 @@ ARMBaseRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
MI.getOperand(i).ChangeToRegister(FrameReg, false, false, false);
else {
ScratchReg = MF.getRegInfo().createVirtualRegister(ARM::GPRRegisterClass);
- if (Value) {
- Value->first = FrameReg; // use the frame register as a kind indicator
- Value->second = Offset;
- }
if (!AFI->isThumbFunction())
emitARMRegPlusImmediate(MBB, II, MI.getDebugLoc(), ScratchReg, FrameReg,
Offset, Pred, PredReg, TII);
@@ -1379,10 +1738,7 @@ ARMBaseRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
Offset, Pred, PredReg, TII);
}
MI.getOperand(i).ChangeToRegister(ScratchReg, false, false, true);
- if (!ReuseFrameIndexVals)
- ScratchReg = 0;
}
- return ScratchReg;
}
/// Move iterator past the next bunch of callee save load / store ops for
@@ -1494,7 +1850,8 @@ emitPrologue(MachineFunction &MF) const {
// Otherwise, if this is not Darwin, all the callee-saved registers go
// into spill area 1, including the FP in R11. In either case, it is
// now safe to emit this assignment.
- if (STI.isTargetDarwin() || hasFP(MF)) {
+ bool HasFP = hasFP(MF);
+ if (HasFP) {
unsigned ADDriOpc = !AFI->isThumbFunction() ? ARM::ADDri : ARM::t2ADDri;
MachineInstrBuilder MIB =
BuildMI(MBB, MBBI, dl, TII.get(ADDriOpc), FramePtr)
@@ -1513,7 +1870,7 @@ emitPrologue(MachineFunction &MF) const {
unsigned DPRCSOffset = NumBytes - (GPRCS1Size + GPRCS2Size + DPRCSSize);
unsigned GPRCS2Offset = DPRCSOffset + DPRCSSize;
unsigned GPRCS1Offset = GPRCS2Offset + GPRCS2Size;
- if (STI.isTargetDarwin() || hasFP(MF))
+ if (HasFP)
AFI->setFramePtrSpillOffset(MFI->getObjectOffset(FramePtrSpillFI) +
NumBytes);
AFI->setGPRCalleeSavedArea1Offset(GPRCS1Offset);
@@ -1525,18 +1882,22 @@ emitPrologue(MachineFunction &MF) const {
if (NumBytes) {
// Adjust SP after all the callee-save spills.
emitSPUpdate(isARM, MBB, MBBI, dl, TII, -NumBytes);
+ if (HasFP)
+ AFI->setShouldRestoreSPFromFP(true);
}
if (STI.isTargetELF() && hasFP(MF)) {
MFI->setOffsetAdjustment(MFI->getOffsetAdjustment() -
AFI->getFramePtrSpillOffset());
+ AFI->setShouldRestoreSPFromFP(true);
}
AFI->setGPRCalleeSavedArea1Size(GPRCS1Size);
AFI->setGPRCalleeSavedArea2Size(GPRCS2Size);
AFI->setDPRCalleeSavedAreaSize(DPRCSSize);
- // If we need dynamic stack realignment, do it here.
+ // If we need dynamic stack realignment, do it here. Be paranoid and make
+ // sure if we also have VLAs, we have a base pointer for frame access.
if (needsStackRealignment(MF)) {
unsigned MaxAlign = MFI->getMaxAlignment();
assert (!AFI->isThumb1OnlyFunction());
@@ -1562,7 +1923,28 @@ emitPrologue(MachineFunction &MF) const {
BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVtgpr2gpr), ARM::SP)
.addReg(ARM::R4, RegState::Kill);
}
+
+ AFI->setShouldRestoreSPFromFP(true);
+ }
+
+ // If we need a base pointer, set it up here. It's whatever the value
+ // of the stack pointer is at this point. Any variable size objects
+ // will be allocated after this, so we can still use the base pointer
+ // to reference locals.
+ if (hasBasePointer(MF)) {
+ if (isARM)
+ BuildMI(MBB, MBBI, dl, TII.get(ARM::MOVr), BasePtr)
+ .addReg(ARM::SP)
+ .addImm((unsigned)ARMCC::AL).addReg(0).addReg(0);
+ else
+ BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVgpr2gpr), BasePtr)
+ .addReg(ARM::SP);
}
+
+ // If the frame has variable sized objects then the epilogue must restore
+ // the sp from fp.
+ if (!AFI->shouldRestoreSPFromFP() && MFI->hasVarSizedObjects())
+ AFI->setShouldRestoreSPFromFP(true);
}
static bool isCalleeSavedRegister(unsigned Reg, const unsigned *CSRegs) {
@@ -1617,34 +1999,25 @@ emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const {
AFI->getGPRCalleeSavedArea2Size() +
AFI->getDPRCalleeSavedAreaSize());
- // Darwin ABI requires FP to point to the stack slot that contains the
- // previous FP.
- bool HasFP = hasFP(MF);
- if ((STI.isTargetDarwin() && NumBytes) || HasFP) {
+ // Reset SP based on frame pointer only if the stack frame extends beyond
+ // frame pointer stack slot or target is ELF and the function has FP.
+ if (AFI->shouldRestoreSPFromFP()) {
NumBytes = AFI->getFramePtrSpillOffset() - NumBytes;
- // Reset SP based on frame pointer only if the stack frame extends beyond
- // frame pointer stack slot or target is ELF and the function has FP.
- if (HasFP ||
- AFI->getGPRCalleeSavedArea2Size() ||
- AFI->getDPRCalleeSavedAreaSize() ||
- AFI->getDPRCalleeSavedAreaOffset()) {
- if (NumBytes) {
- if (isARM)
- emitARMRegPlusImmediate(MBB, MBBI, dl, ARM::SP, FramePtr, -NumBytes,
- ARMCC::AL, 0, TII);
- else
- emitT2RegPlusImmediate(MBB, MBBI, dl, ARM::SP, FramePtr, -NumBytes,
- ARMCC::AL, 0, TII);
- } else {
- // Thumb2 or ARM.
- if (isARM)
- BuildMI(MBB, MBBI, dl, TII.get(ARM::MOVr), ARM::SP)
- .addReg(FramePtr)
- .addImm((unsigned)ARMCC::AL).addReg(0).addReg(0);
- else
- BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVgpr2gpr), ARM::SP)
- .addReg(FramePtr);
- }
+ if (NumBytes) {
+ if (isARM)
+ emitARMRegPlusImmediate(MBB, MBBI, dl, ARM::SP, FramePtr, -NumBytes,
+ ARMCC::AL, 0, TII);
+ else
+ emitT2RegPlusImmediate(MBB, MBBI, dl, ARM::SP, FramePtr, -NumBytes,
+ ARMCC::AL, 0, TII);
+ } else {
+ // Thumb2 or ARM.
+ if (isARM)
+ BuildMI(MBB, MBBI, dl, TII.get(ARM::MOVr), ARM::SP)
+ .addReg(FramePtr).addImm((unsigned)ARMCC::AL).addReg(0).addReg(0);
+ else
+ BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVgpr2gpr), ARM::SP)
+ .addReg(FramePtr);
}
} else if (NumBytes)
emitSPUpdate(isARM, MBB, MBBI, dl, TII, NumBytes);
@@ -1670,7 +2043,7 @@ emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const {
// Jump to label or value in register.
if (RetOpcode == ARM::TCRETURNdi) {
- BuildMI(MBB, MBBI, dl,
+ BuildMI(MBB, MBBI, dl,
TII.get(STI.isThumb() ? ARM::TAILJMPdt : ARM::TAILJMPd)).
addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset(),
JumpTarget.getTargetFlags());
@@ -1685,7 +2058,7 @@ emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const {
} else if (RetOpcode == ARM::TCRETURNriND) {
BuildMI(MBB, MBBI, dl, TII.get(ARM::TAILJMPrND)).
addReg(JumpTarget.getReg(), RegState::Kill);
- }
+ }
MachineInstr *NewMI = prior(MBBI);
for (unsigned i = 1, e = MBBI->getNumOperands(); i != e; ++i)
diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.h b/lib/Target/ARM/ARMBaseRegisterInfo.h
index f7ee0d5cc66d0..fa2eb6c10498e 100644
--- a/lib/Target/ARM/ARMBaseRegisterInfo.h
+++ b/lib/Target/ARM/ARMBaseRegisterInfo.h
@@ -44,7 +44,7 @@ static inline bool isARMLowRegister(unsigned Reg) {
}
}
-struct ARMBaseRegisterInfo : public ARMGenRegisterInfo {
+class ARMBaseRegisterInfo : public ARMGenRegisterInfo {
protected:
const ARMBaseInstrInfo &TII;
const ARMSubtarget &STI;
@@ -52,6 +52,11 @@ protected:
/// FramePtr - ARM physical register used as frame ptr.
unsigned FramePtr;
+ /// BasePtr - ARM physical register used as a base ptr in complex stack
+ /// frames. I.e., when we need a 3rd base, not just SP and FP, due to
+ /// variable size stack objects.
+ unsigned BasePtr;
+
// Can be only subclassed.
explicit ARMBaseRegisterInfo(const ARMBaseInstrInfo &tii,
const ARMSubtarget &STI);
@@ -102,9 +107,18 @@ public:
MachineFunction &MF) const;
bool hasFP(const MachineFunction &MF) const;
+ bool hasBasePointer(const MachineFunction &MF) const;
bool canRealignStack(const MachineFunction &MF) const;
bool needsStackRealignment(const MachineFunction &MF) const;
+ int64_t getFrameIndexInstrOffset(const MachineInstr *MI, int Idx) const;
+ bool needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const;
+ void materializeFrameBaseRegister(MachineBasicBlock::iterator I,
+ unsigned BaseReg, int FrameIdx,
+ int64_t Offset) const;
+ void resolveFrameIndex(MachineBasicBlock::iterator I,
+ unsigned BaseReg, int64_t Offset) const;
+ bool isFrameOffsetLegal(const MachineInstr *MI, int64_t Offset) const;
bool cannotEliminateFrame(const MachineFunction &MF) const;
@@ -116,6 +130,8 @@ public:
unsigned getFrameRegister(const MachineFunction &MF) const;
int getFrameIndexReference(const MachineFunction &MF, int FI,
unsigned &FrameReg) const;
+ int ResolveFrameIndexReference(const MachineFunction &MF, int FI,
+ unsigned &FrameReg, int SPAdj) const;
int getFrameIndexOffset(const MachineFunction &MF, int FI) const;
// Exception handling queries.
@@ -144,16 +160,17 @@ public:
virtual bool requiresFrameIndexScavenging(const MachineFunction &MF) const;
- virtual bool hasReservedCallFrame(MachineFunction &MF) const;
- virtual bool canSimplifyCallFramePseudos(MachineFunction &MF) const;
+ virtual bool requiresVirtualBaseRegisters(const MachineFunction &MF) const;
+
+ virtual bool hasReservedCallFrame(const MachineFunction &MF) const;
+ virtual bool canSimplifyCallFramePseudos(const MachineFunction &MF) const;
virtual void eliminateCallFramePseudoInstr(MachineFunction &MF,
MachineBasicBlock &MBB,
MachineBasicBlock::iterator I) const;
- virtual unsigned eliminateFrameIndex(MachineBasicBlock::iterator II,
- int SPAdj, FrameIndexValue *Value = NULL,
- RegScavenger *RS = NULL) const;
+ virtual void eliminateFrameIndex(MachineBasicBlock::iterator II,
+ int SPAdj, RegScavenger *RS = NULL) const;
virtual void emitPrologue(MachineFunction &MF) const;
virtual void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
diff --git a/lib/Target/ARM/ARMCallingConv.td b/lib/Target/ARM/ARMCallingConv.td
index 8fdb07f81626a..293e32aa5376e 100644
--- a/lib/Target/ARM/ARMCallingConv.td
+++ b/lib/Target/ARM/ARMCallingConv.td
@@ -1,4 +1,4 @@
-//===- ARMCallingConv.td - Calling Conventions for ARM ----------*- C++ -*-===//
+//===- ARMCallingConv.td - Calling Conventions for ARM -----*- tablegen -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -68,7 +68,7 @@ def CC_ARM_AAPCS_Common : CallingConv<[
"ArgFlags.getOrigAlign() != 8",
CCAssignToReg<[R0, R1, R2, R3]>>>,
- CCIfType<[i32], CCIfAlign<"8", CCAssignToStack<4, 8>>>,
+ CCIfType<[i32], CCIfAlign<"8", CCAssignToStackWithShadow<4, 8, R3>>>,
CCIfType<[i32, f32], CCAssignToStack<4, 4>>,
CCIfType<[f64], CCAssignToStack<8, 8>>,
CCIfType<[v2f64], CCAssignToStack<16, 8>>
diff --git a/lib/Target/ARM/ARMCodeEmitter.cpp b/lib/Target/ARM/ARMCodeEmitter.cpp
index 7895cb0719229..b1a702f90cfc3 100644
--- a/lib/Target/ARM/ARMCodeEmitter.cpp
+++ b/lib/Target/ARM/ARMCodeEmitter.cpp
@@ -65,7 +65,7 @@ namespace {
static char ID;
public:
ARMCodeEmitter(TargetMachine &tm, JITCodeEmitter &mce)
- : MachineFunctionPass(&ID), JTI(0),
+ : MachineFunctionPass(ID), JTI(0),
II((const ARMInstrInfo *)tm.getInstrInfo()),
TD(tm.getTargetData()), TM(tm),
MCE(mce), MCPEs(0), MJTEs(0),
@@ -124,6 +124,8 @@ namespace {
void emitMiscArithInstruction(const MachineInstr &MI);
+ void emitSaturateInstruction(const MachineInstr &MI);
+
void emitBranchInstruction(const MachineInstr &MI);
void emitInlineJumpTable(unsigned JTIndex);
@@ -389,6 +391,9 @@ void ARMCodeEmitter::emitInstruction(const MachineInstr &MI) {
case ARMII::ArithMiscFrm:
emitMiscArithInstruction(MI);
break;
+ case ARMII::SatFrm:
+ emitSaturateInstruction(MI);
+ break;
case ARMII::BrFrm:
emitBranchInstruction(MI);
break;
@@ -654,6 +659,19 @@ void ARMCodeEmitter::emitPseudoInstruction(const MachineInstr &MI) {
switch (Opcode) {
default:
llvm_unreachable("ARMCodeEmitter::emitPseudoInstruction");
+ case ARM::BX:
+ case ARM::BMOVPCRX:
+ case ARM::BXr9:
+ case ARM::BMOVPCRXr9: {
+ // First emit mov lr, pc
+ unsigned Binary = 0x01a0e00f;
+ Binary |= II->getPredicate(&MI) << ARMII::CondShift;
+ emitWordLE(Binary);
+
+ // and then emit the branch.
+ emitMiscBranchInstruction(MI);
+ break;
+ }
case TargetOpcode::INLINEASM: {
// We allow inline assembler nodes with empty bodies - they can
// implicitly define registers, which is ok for JIT.
@@ -662,7 +680,7 @@ void ARMCodeEmitter::emitPseudoInstruction(const MachineInstr &MI) {
}
break;
}
- case TargetOpcode::DBG_LABEL:
+ case TargetOpcode::PROLOG_LABEL:
case TargetOpcode::EH_LABEL:
MCE.emitLabel(MI.getOperand(0).getMCSymbol());
break;
@@ -1209,12 +1227,58 @@ void ARMCodeEmitter::emitMiscArithInstruction(const MachineInstr &MI) {
// Encode shift_imm.
unsigned ShiftAmt = MI.getOperand(OpIdx).getImm();
+ if (TID.Opcode == ARM::PKHTB) {
+ assert(ShiftAmt != 0 && "PKHTB shift_imm is 0!");
+ if (ShiftAmt == 32)
+ ShiftAmt = 0;
+ }
assert(ShiftAmt < 32 && "shift_imm range is 0 to 31!");
Binary |= ShiftAmt << ARMII::ShiftShift;
emitWordLE(Binary);
}
+void ARMCodeEmitter::emitSaturateInstruction(const MachineInstr &MI) {
+ const TargetInstrDesc &TID = MI.getDesc();
+
+ // Part of binary is determined by TableGen.
+ unsigned Binary = getBinaryCodeForInstr(MI);
+
+ // Set the conditional execution predicate
+ Binary |= II->getPredicate(&MI) << ARMII::CondShift;
+
+ // Encode Rd
+ Binary |= getMachineOpValue(MI, 0) << ARMII::RegRdShift;
+
+ // Encode saturate bit position.
+ unsigned Pos = MI.getOperand(1).getImm();
+ if (TID.Opcode == ARM::SSAT || TID.Opcode == ARM::SSAT16)
+ Pos -= 1;
+ assert((Pos < 16 || (Pos < 32 &&
+ TID.Opcode != ARM::SSAT16 &&
+ TID.Opcode != ARM::USAT16)) &&
+ "saturate bit position out of range");
+ Binary |= Pos << 16;
+
+ // Encode Rm
+ Binary |= getMachineOpValue(MI, 2);
+
+ // Encode shift_imm.
+ if (TID.getNumOperands() == 4) {
+ unsigned ShiftOp = MI.getOperand(3).getImm();
+ ARM_AM::ShiftOpc Opc = ARM_AM::getSORegShOp(ShiftOp);
+ if (Opc == ARM_AM::asr)
+ Binary |= (1 << 6);
+ unsigned ShiftAmt = MI.getOperand(3).getImm();
+ if (ShiftAmt == 32 && Opc == ARM_AM::asr)
+ ShiftAmt = 0;
+ assert(ShiftAmt < 32 && "shift_imm range is 0 to 31!");
+ Binary |= ShiftAmt << ARMII::ShiftShift;
+ }
+
+ emitWordLE(Binary);
+}
+
void ARMCodeEmitter::emitBranchInstruction(const MachineInstr &MI) {
const TargetInstrDesc &TID = MI.getDesc();
@@ -1485,7 +1549,7 @@ ARMCodeEmitter::emitVFPLoadStoreMultipleInstruction(const MachineInstr &MI) {
// Set addressing mode by modifying bits U(23) and P(24)
const MachineOperand &MO = MI.getOperand(OpIdx++);
- Binary |= getAddrModeUPBits(ARM_AM::getAM5SubMode(MO.getImm()));
+ Binary |= getAddrModeUPBits(ARM_AM::getAM4SubMode(MO.getImm()));
// Set bit W(21)
if (IsUpdating)
@@ -1494,7 +1558,7 @@ ARMCodeEmitter::emitVFPLoadStoreMultipleInstruction(const MachineInstr &MI) {
// First register is encoded in Dd.
Binary |= encodeVFPRd(MI, OpIdx+2);
- // Number of registers are encoded in offset field.
+ // Count the number of registers.
unsigned NumRegs = 1;
for (unsigned i = OpIdx+3, e = MI.getNumOperands(); i != e; ++i) {
const MachineOperand &MO = MI.getOperand(i);
diff --git a/lib/Target/ARM/ARMConstantIslandPass.cpp b/lib/Target/ARM/ARMConstantIslandPass.cpp
index 65a3da6f1617c..60e923bd2c385 100644
--- a/lib/Target/ARM/ARMConstantIslandPass.cpp
+++ b/lib/Target/ARM/ARMConstantIslandPass.cpp
@@ -18,9 +18,9 @@
#include "ARMAddressingModes.h"
#include "ARMMachineFunctionInfo.h"
#include "ARMInstrInfo.h"
+#include "Thumb2InstrInfo.h"
#include "llvm/CodeGen/MachineConstantPool.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineJumpTableInfo.h"
#include "llvm/Target/TargetData.h"
#include "llvm/Target/TargetMachine.h"
@@ -165,7 +165,7 @@ namespace {
/// HasInlineAsm - True if the function contains inline assembly.
bool HasInlineAsm;
- const TargetInstrInfo *TII;
+ const ARMInstrInfo *TII;
const ARMSubtarget *STI;
ARMFunctionInfo *AFI;
bool isThumb;
@@ -173,7 +173,7 @@ namespace {
bool isThumb2;
public:
static char ID;
- ARMConstantIslands() : MachineFunctionPass(&ID) {}
+ ARMConstantIslands() : MachineFunctionPass(ID) {}
virtual bool runOnMachineFunction(MachineFunction &MF);
@@ -272,7 +272,7 @@ FunctionPass *llvm::createARMConstantIslandPass() {
bool ARMConstantIslands::runOnMachineFunction(MachineFunction &MF) {
MachineConstantPool &MCP = *MF.getConstantPool();
- TII = MF.getTarget().getInstrInfo();
+ TII = (const ARMInstrInfo*)MF.getTarget().getInstrInfo();
AFI = MF.getInfo<ARMFunctionInfo>();
STI = &MF.getTarget().getSubtarget<ARMSubtarget>();
@@ -323,6 +323,8 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &MF) {
// constant pool users.
InitialFunctionScan(MF, CPEMIs);
CPEMIs.clear();
+ DEBUG(dumpBBs());
+
/// Remove dead constant pool entries.
RemoveUnusedCPEntries();
@@ -355,7 +357,7 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &MF) {
}
// Shrink 32-bit Thumb2 branch, load, and store instructions.
- if (isThumb2)
+ if (isThumb2 && !STI->prefers32BitThumb())
MadeChange |= OptimizeThumb2Instructions(MF);
// After a while, this might be made debug-only, but it is not expensive.
@@ -366,6 +368,8 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &MF) {
if (isThumb && !HasFarJump && AFI->isLRSpilledForFarJump())
MadeChange |= UndoLRSpillRestore();
+ DEBUG(errs() << '\n'; dumpBBs());
+
BBSizes.clear();
BBOffsets.clear();
WaterList.clear();
@@ -509,6 +513,10 @@ void ARMConstantIslands::InitialFunctionScan(MachineFunction &MF,
case ARM::tBR_JTr:
// A Thumb1 table jump may involve padding; for the offsets to
// be right, functions containing these must be 4-byte aligned.
+ // tBR_JTr expands to a mov pc followed by .align 2 and then the jump
+ // table entries. So this code checks whether offset of tBR_JTr + 2
+ // is aligned. That is held in Offset+MBBSize, which already has
+ // 2 added in for the size of the mov pc instruction.
MF.EnsureAlignment(2U);
if ((Offset+MBBSize)%4 != 0 || HasInlineAsm)
// FIXME: Add a pseudo ALIGN instruction instead.
@@ -768,28 +776,54 @@ MachineBasicBlock *ARMConstantIslands::SplitBlockBeforeInstr(MachineInstr *MI) {
WaterList.insert(IP, OrigBB);
NewWaterList.insert(OrigBB);
- // Figure out how large the first NewMBB is. (It cannot
- // contain a constpool_entry or tablejump.)
- unsigned NewBBSize = 0;
- for (MachineBasicBlock::iterator I = NewBB->begin(), E = NewBB->end();
- I != E; ++I)
- NewBBSize += TII->GetInstSizeInBytes(I);
-
unsigned OrigBBI = OrigBB->getNumber();
unsigned NewBBI = NewBB->getNumber();
- // Set the size of NewBB in BBSizes.
- BBSizes[NewBBI] = NewBBSize;
- // We removed instructions from UserMBB, subtract that off from its size.
- // Add 2 or 4 to the block to count the unconditional branch we added to it.
int delta = isThumb1 ? 2 : 4;
- BBSizes[OrigBBI] -= NewBBSize - delta;
+
+ // Figure out how large the OrigBB is. As the first half of the original
+ // block, it cannot contain a tablejump. The size includes
+ // the new jump we added. (It should be possible to do this without
+ // recounting everything, but it's very confusing, and this is rarely
+ // executed.)
+ unsigned OrigBBSize = 0;
+ for (MachineBasicBlock::iterator I = OrigBB->begin(), E = OrigBB->end();
+ I != E; ++I)
+ OrigBBSize += TII->GetInstSizeInBytes(I);
+ BBSizes[OrigBBI] = OrigBBSize;
// ...and adjust BBOffsets for NewBB accordingly.
BBOffsets[NewBBI] = BBOffsets[OrigBBI] + BBSizes[OrigBBI];
+ // Figure out how large the NewMBB is. As the second half of the original
+ // block, it may contain a tablejump.
+ unsigned NewBBSize = 0;
+ for (MachineBasicBlock::iterator I = NewBB->begin(), E = NewBB->end();
+ I != E; ++I)
+ NewBBSize += TII->GetInstSizeInBytes(I);
+ // Set the size of NewBB in BBSizes. It does not include any padding now.
+ BBSizes[NewBBI] = NewBBSize;
+
+ MachineInstr* ThumbJTMI = prior(NewBB->end());
+ if (ThumbJTMI->getOpcode() == ARM::tBR_JTr) {
+ // We've added another 2-byte instruction before this tablejump, which
+ // means we will always need padding if we didn't before, and vice versa.
+
+ // The original offset of the jump instruction was:
+ unsigned OrigOffset = BBOffsets[OrigBBI] + BBSizes[OrigBBI] - delta;
+ if (OrigOffset%4 == 0) {
+ // We had padding before and now we don't. No net change in code size.
+ delta = 0;
+ } else {
+ // We didn't have padding before and now we do.
+ BBSizes[NewBBI] += 2;
+ delta = 4;
+ }
+ }
+
// All BBOffsets following these blocks must be modified.
- AdjustBBOffsetsAfter(NewBB, delta);
+ if (delta)
+ AdjustBBOffsetsAfter(NewBB, delta);
return NewBB;
}
@@ -915,6 +949,10 @@ void ARMConstantIslands::AdjustBBOffsetsAfter(MachineBasicBlock *BB,
}
// Thumb1 jump tables require padding. They should be at the end;
// following unconditional branches are removed by AnalyzeBranch.
+ // tBR_JTr expands to a mov pc followed by .align 2 and then the jump
+ // table entries. So this code checks whether offset of tBR_JTr
+ // is aligned; if it is, the offset of the jump table following the
+ // instruction will not be aligned, and we need padding.
MachineInstr *ThumbJTMI = prior(MBB->end());
if (ThumbJTMI->getOpcode() == ARM::tBR_JTr) {
unsigned NewMIOffset = GetOffsetOf(ThumbJTMI);
@@ -1143,11 +1181,13 @@ void ARMConstantIslands::CreateNewWater(unsigned CPUserIndex,
MachineBasicBlock::iterator MI = UserMI;
++MI;
unsigned CPUIndex = CPUserIndex+1;
+ unsigned NumCPUsers = CPUsers.size();
+ MachineInstr *LastIT = 0;
for (unsigned Offset = UserOffset+TII->GetInstSizeInBytes(UserMI);
Offset < BaseInsertOffset;
Offset += TII->GetInstSizeInBytes(MI),
- MI = llvm::next(MI)) {
- if (CPUIndex < CPUsers.size() && CPUsers[CPUIndex].MI == MI) {
+ MI = llvm::next(MI)) {
+ if (CPUIndex < NumCPUsers && CPUsers[CPUIndex].MI == MI) {
CPUser &U = CPUsers[CPUIndex];
if (!OffsetIsInRange(Offset, EndInsertOffset,
U.MaxDisp, U.NegOk, U.IsSoImm)) {
@@ -1159,9 +1199,23 @@ void ARMConstantIslands::CreateNewWater(unsigned CPUserIndex,
EndInsertOffset += CPUsers[CPUIndex].CPEMI->getOperand(2).getImm();
CPUIndex++;
}
+
+ // Remember the last IT instruction.
+ if (MI->getOpcode() == ARM::t2IT)
+ LastIT = MI;
}
+
DEBUG(errs() << "Split in middle of big block\n");
- NewMBB = SplitBlockBeforeInstr(prior(MI));
+ --MI;
+
+ // Avoid splitting an IT block.
+ if (LastIT) {
+ unsigned PredReg = 0;
+ ARMCC::CondCodes CC = llvm::getITInstrPredicate(MI, PredReg);
+ if (CC != ARMCC::AL)
+ MI = LastIT;
+ }
+ NewMBB = SplitBlockBeforeInstr(MI);
}
}
diff --git a/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/lib/Target/ARM/ARMExpandPseudoInsts.cpp
index 9c62597b4323b..fc2e3c3fadaea 100644
--- a/lib/Target/ARM/ARMExpandPseudoInsts.cpp
+++ b/lib/Target/ARM/ARMExpandPseudoInsts.cpp
@@ -19,14 +19,21 @@
#include "ARMBaseInstrInfo.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
-
+#include "llvm/Target/TargetRegisterInfo.h"
using namespace llvm;
namespace {
class ARMExpandPseudo : public MachineFunctionPass {
+ // Constants for register spacing in NEON load/store instructions.
+ enum NEONRegSpacing {
+ SingleSpc,
+ EvenDblSpc,
+ OddDblSpc
+ };
+
public:
static char ID;
- ARMExpandPseudo() : MachineFunctionPass(&ID) {}
+ ARMExpandPseudo() : MachineFunctionPass(ID) {}
const TargetInstrInfo *TII;
const TargetRegisterInfo *TRI;
@@ -41,6 +48,10 @@ namespace {
void TransferImpOps(MachineInstr &OldMI,
MachineInstrBuilder &UseMI, MachineInstrBuilder &DefMI);
bool ExpandMBB(MachineBasicBlock &MBB);
+ void ExpandVLD(MachineBasicBlock::iterator &MBBI, unsigned Opc,
+ bool hasWriteBack, NEONRegSpacing RegSpc, unsigned NumRegs);
+ void ExpandVST(MachineBasicBlock::iterator &MBBI, unsigned Opc,
+ bool hasWriteBack, NEONRegSpacing RegSpc, unsigned NumRegs);
};
char ARMExpandPseudo::ID = 0;
}
@@ -63,6 +74,129 @@ void ARMExpandPseudo::TransferImpOps(MachineInstr &OldMI,
}
}
+/// ExpandVLD - Translate VLD pseudo instructions with Q, QQ or QQQQ register
+/// operands to real VLD instructions with D register operands.
+void ARMExpandPseudo::ExpandVLD(MachineBasicBlock::iterator &MBBI,
+ unsigned Opc, bool hasWriteBack,
+ NEONRegSpacing RegSpc, unsigned NumRegs) {
+ MachineInstr &MI = *MBBI;
+ MachineBasicBlock &MBB = *MI.getParent();
+
+ MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc));
+ unsigned OpIdx = 0;
+
+ bool DstIsDead = MI.getOperand(OpIdx).isDead();
+ unsigned DstReg = MI.getOperand(OpIdx++).getReg();
+ unsigned D0, D1, D2, D3;
+ if (RegSpc == SingleSpc) {
+ D0 = TRI->getSubReg(DstReg, ARM::dsub_0);
+ D1 = TRI->getSubReg(DstReg, ARM::dsub_1);
+ D2 = TRI->getSubReg(DstReg, ARM::dsub_2);
+ D3 = TRI->getSubReg(DstReg, ARM::dsub_3);
+ } else if (RegSpc == EvenDblSpc) {
+ D0 = TRI->getSubReg(DstReg, ARM::dsub_0);
+ D1 = TRI->getSubReg(DstReg, ARM::dsub_2);
+ D2 = TRI->getSubReg(DstReg, ARM::dsub_4);
+ D3 = TRI->getSubReg(DstReg, ARM::dsub_6);
+ } else {
+ assert(RegSpc == OddDblSpc && "unknown register spacing for VLD");
+ D0 = TRI->getSubReg(DstReg, ARM::dsub_1);
+ D1 = TRI->getSubReg(DstReg, ARM::dsub_3);
+ D2 = TRI->getSubReg(DstReg, ARM::dsub_5);
+ D3 = TRI->getSubReg(DstReg, ARM::dsub_7);
+ }
+ MIB.addReg(D0, RegState::Define | getDeadRegState(DstIsDead))
+ .addReg(D1, RegState::Define | getDeadRegState(DstIsDead));
+ if (NumRegs > 2)
+ MIB.addReg(D2, RegState::Define | getDeadRegState(DstIsDead));
+ if (NumRegs > 3)
+ MIB.addReg(D3, RegState::Define | getDeadRegState(DstIsDead));
+
+ if (hasWriteBack) {
+ bool WBIsDead = MI.getOperand(OpIdx).isDead();
+ unsigned WBReg = MI.getOperand(OpIdx++).getReg();
+ MIB.addReg(WBReg, RegState::Define | getDeadRegState(WBIsDead));
+ }
+ // Copy the addrmode6 operands.
+ bool AddrIsKill = MI.getOperand(OpIdx).isKill();
+ MIB.addReg(MI.getOperand(OpIdx++).getReg(), getKillRegState(AddrIsKill));
+ MIB.addImm(MI.getOperand(OpIdx++).getImm());
+ if (hasWriteBack) {
+ // Copy the am6offset operand.
+ bool OffsetIsKill = MI.getOperand(OpIdx).isKill();
+ MIB.addReg(MI.getOperand(OpIdx++).getReg(), getKillRegState(OffsetIsKill));
+ }
+
+ MIB = AddDefaultPred(MIB);
+ TransferImpOps(MI, MIB, MIB);
+ // For an instruction writing the odd subregs, add an implicit use of the
+ // super-register because the even subregs were loaded separately.
+ if (RegSpc == OddDblSpc)
+ MIB.addReg(DstReg, RegState::Implicit);
+ // Add an implicit def for the super-register.
+ MIB.addReg(DstReg, RegState::ImplicitDefine | getDeadRegState(DstIsDead));
+ MI.eraseFromParent();
+}
+
+/// ExpandVST - Translate VST pseudo instructions with Q, QQ or QQQQ register
+/// operands to real VST instructions with D register operands.
+void ARMExpandPseudo::ExpandVST(MachineBasicBlock::iterator &MBBI,
+ unsigned Opc, bool hasWriteBack,
+ NEONRegSpacing RegSpc, unsigned NumRegs) {
+ MachineInstr &MI = *MBBI;
+ MachineBasicBlock &MBB = *MI.getParent();
+
+ MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc));
+ unsigned OpIdx = 0;
+ if (hasWriteBack) {
+ bool DstIsDead = MI.getOperand(OpIdx).isDead();
+ unsigned DstReg = MI.getOperand(OpIdx++).getReg();
+ MIB.addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead));
+ }
+ // Copy the addrmode6 operands.
+ bool AddrIsKill = MI.getOperand(OpIdx).isKill();
+ MIB.addReg(MI.getOperand(OpIdx++).getReg(), getKillRegState(AddrIsKill));
+ MIB.addImm(MI.getOperand(OpIdx++).getImm());
+ if (hasWriteBack) {
+ // Copy the am6offset operand.
+ bool OffsetIsKill = MI.getOperand(OpIdx).isKill();
+ MIB.addReg(MI.getOperand(OpIdx++).getReg(), getKillRegState(OffsetIsKill));
+ }
+
+ bool SrcIsKill = MI.getOperand(OpIdx).isKill();
+ unsigned SrcReg = MI.getOperand(OpIdx).getReg();
+ unsigned D0, D1, D2, D3;
+ if (RegSpc == SingleSpc) {
+ D0 = TRI->getSubReg(SrcReg, ARM::dsub_0);
+ D1 = TRI->getSubReg(SrcReg, ARM::dsub_1);
+ D2 = TRI->getSubReg(SrcReg, ARM::dsub_2);
+ D3 = TRI->getSubReg(SrcReg, ARM::dsub_3);
+ } else if (RegSpc == EvenDblSpc) {
+ D0 = TRI->getSubReg(SrcReg, ARM::dsub_0);
+ D1 = TRI->getSubReg(SrcReg, ARM::dsub_2);
+ D2 = TRI->getSubReg(SrcReg, ARM::dsub_4);
+ D3 = TRI->getSubReg(SrcReg, ARM::dsub_6);
+ } else {
+ assert(RegSpc == OddDblSpc && "unknown register spacing for VST");
+ D0 = TRI->getSubReg(SrcReg, ARM::dsub_1);
+ D1 = TRI->getSubReg(SrcReg, ARM::dsub_3);
+ D2 = TRI->getSubReg(SrcReg, ARM::dsub_5);
+ D3 = TRI->getSubReg(SrcReg, ARM::dsub_7);
+ }
+
+ MIB.addReg(D0).addReg(D1);
+ if (NumRegs > 2)
+ MIB.addReg(D2);
+ if (NumRegs > 3)
+ MIB.addReg(D3);
+ MIB = AddDefaultPred(MIB);
+ TransferImpOps(MI, MIB, MIB);
+ if (SrcIsKill)
+ // Add an implicit kill for the super-reg.
+ (*MIB).addRegisterKilled(SrcReg, TRI, true);
+ MI.eraseFromParent();
+}
+
bool ARMExpandPseudo::ExpandMBB(MachineBasicBlock &MBB) {
bool Modified = false;
@@ -71,9 +205,13 @@ bool ARMExpandPseudo::ExpandMBB(MachineBasicBlock &MBB) {
MachineInstr &MI = *MBBI;
MachineBasicBlock::iterator NMBBI = llvm::next(MBBI);
+ bool ModifiedOp = true;
unsigned Opcode = MI.getOpcode();
switch (Opcode) {
- default: break;
+ default:
+ ModifiedOp = false;
+ break;
+
case ARM::tLDRpci_pic:
case ARM::t2LDRpci_pic: {
unsigned NewLdOpc = (Opcode == ARM::tLDRpci_pic)
@@ -92,10 +230,10 @@ bool ARMExpandPseudo::ExpandMBB(MachineBasicBlock &MBB) {
.addOperand(MI.getOperand(2));
TransferImpOps(MI, MIB1, MIB2);
MI.eraseFromParent();
- Modified = true;
break;
}
+ case ARM::MOVi32imm:
case ARM::t2MOVi32imm: {
unsigned PredReg = 0;
ARMCC::CondCodes Pred = llvm::getInstrPredicate(&MI, PredReg);
@@ -104,9 +242,13 @@ bool ARMExpandPseudo::ExpandMBB(MachineBasicBlock &MBB) {
const MachineOperand &MO = MI.getOperand(1);
MachineInstrBuilder LO16, HI16;
- LO16 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::t2MOVi16),
+ LO16 = BuildMI(MBB, MBBI, MI.getDebugLoc(),
+ TII->get(Opcode == ARM::MOVi32imm ?
+ ARM::MOVi16 : ARM::t2MOVi16),
DstReg);
- HI16 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::t2MOVTi16))
+ HI16 = BuildMI(MBB, MBBI, MI.getDebugLoc(),
+ TII->get(Opcode == ARM::MOVi32imm ?
+ ARM::MOVTi16 : ARM::t2MOVTi16))
.addReg(DstReg, getDefRegState(true) | getDeadRegState(DstIsDead))
.addReg(DstReg);
@@ -128,7 +270,6 @@ bool ARMExpandPseudo::ExpandMBB(MachineBasicBlock &MBB) {
HI16.addImm(Pred).addReg(PredReg);
TransferImpOps(MI, LO16, HI16);
MI.eraseFromParent();
- Modified = true;
break;
}
@@ -155,9 +296,211 @@ bool ARMExpandPseudo::ExpandMBB(MachineBasicBlock &MBB) {
.addReg(OddSrc, getKillRegState(SrcIsKill)));
TransferImpOps(MI, Even, Odd);
MI.eraseFromParent();
- Modified = true;
}
+
+ case ARM::VLD1q8Pseudo:
+ ExpandVLD(MBBI, ARM::VLD1q8, false, SingleSpc, 2); break;
+ case ARM::VLD1q16Pseudo:
+ ExpandVLD(MBBI, ARM::VLD1q16, false, SingleSpc, 2); break;
+ case ARM::VLD1q32Pseudo:
+ ExpandVLD(MBBI, ARM::VLD1q32, false, SingleSpc, 2); break;
+ case ARM::VLD1q64Pseudo:
+ ExpandVLD(MBBI, ARM::VLD1q64, false, SingleSpc, 2); break;
+ case ARM::VLD1q8Pseudo_UPD:
+ ExpandVLD(MBBI, ARM::VLD1q8, true, SingleSpc, 2); break;
+ case ARM::VLD1q16Pseudo_UPD:
+ ExpandVLD(MBBI, ARM::VLD1q16, true, SingleSpc, 2); break;
+ case ARM::VLD1q32Pseudo_UPD:
+ ExpandVLD(MBBI, ARM::VLD1q32, true, SingleSpc, 2); break;
+ case ARM::VLD1q64Pseudo_UPD:
+ ExpandVLD(MBBI, ARM::VLD1q64, true, SingleSpc, 2); break;
+
+ case ARM::VLD2d8Pseudo:
+ ExpandVLD(MBBI, ARM::VLD2d8, false, SingleSpc, 2); break;
+ case ARM::VLD2d16Pseudo:
+ ExpandVLD(MBBI, ARM::VLD2d16, false, SingleSpc, 2); break;
+ case ARM::VLD2d32Pseudo:
+ ExpandVLD(MBBI, ARM::VLD2d32, false, SingleSpc, 2); break;
+ case ARM::VLD2q8Pseudo:
+ ExpandVLD(MBBI, ARM::VLD2q8, false, SingleSpc, 4); break;
+ case ARM::VLD2q16Pseudo:
+ ExpandVLD(MBBI, ARM::VLD2q16, false, SingleSpc, 4); break;
+ case ARM::VLD2q32Pseudo:
+ ExpandVLD(MBBI, ARM::VLD2q32, false, SingleSpc, 4); break;
+ case ARM::VLD2d8Pseudo_UPD:
+ ExpandVLD(MBBI, ARM::VLD2d8, true, SingleSpc, 2); break;
+ case ARM::VLD2d16Pseudo_UPD:
+ ExpandVLD(MBBI, ARM::VLD2d16, true, SingleSpc, 2); break;
+ case ARM::VLD2d32Pseudo_UPD:
+ ExpandVLD(MBBI, ARM::VLD2d32, true, SingleSpc, 2); break;
+ case ARM::VLD2q8Pseudo_UPD:
+ ExpandVLD(MBBI, ARM::VLD2q8, true, SingleSpc, 4); break;
+ case ARM::VLD2q16Pseudo_UPD:
+ ExpandVLD(MBBI, ARM::VLD2q16, true, SingleSpc, 4); break;
+ case ARM::VLD2q32Pseudo_UPD:
+ ExpandVLD(MBBI, ARM::VLD2q32, true, SingleSpc, 4); break;
+
+ case ARM::VLD3d8Pseudo:
+ ExpandVLD(MBBI, ARM::VLD3d8, false, SingleSpc, 3); break;
+ case ARM::VLD3d16Pseudo:
+ ExpandVLD(MBBI, ARM::VLD3d16, false, SingleSpc, 3); break;
+ case ARM::VLD3d32Pseudo:
+ ExpandVLD(MBBI, ARM::VLD3d32, false, SingleSpc, 3); break;
+ case ARM::VLD1d64TPseudo:
+ ExpandVLD(MBBI, ARM::VLD1d64T, false, SingleSpc, 3); break;
+ case ARM::VLD3d8Pseudo_UPD:
+ ExpandVLD(MBBI, ARM::VLD3d8_UPD, true, SingleSpc, 3); break;
+ case ARM::VLD3d16Pseudo_UPD:
+ ExpandVLD(MBBI, ARM::VLD3d16_UPD, true, SingleSpc, 3); break;
+ case ARM::VLD3d32Pseudo_UPD:
+ ExpandVLD(MBBI, ARM::VLD3d32_UPD, true, SingleSpc, 3); break;
+ case ARM::VLD1d64TPseudo_UPD:
+ ExpandVLD(MBBI, ARM::VLD1d64T_UPD, true, SingleSpc, 3); break;
+ case ARM::VLD3q8Pseudo_UPD:
+ ExpandVLD(MBBI, ARM::VLD3q8_UPD, true, EvenDblSpc, 3); break;
+ case ARM::VLD3q16Pseudo_UPD:
+ ExpandVLD(MBBI, ARM::VLD3q16_UPD, true, EvenDblSpc, 3); break;
+ case ARM::VLD3q32Pseudo_UPD:
+ ExpandVLD(MBBI, ARM::VLD3q32_UPD, true, EvenDblSpc, 3); break;
+ case ARM::VLD3q8oddPseudo_UPD:
+ ExpandVLD(MBBI, ARM::VLD3q8_UPD, true, OddDblSpc, 3); break;
+ case ARM::VLD3q16oddPseudo_UPD:
+ ExpandVLD(MBBI, ARM::VLD3q16_UPD, true, OddDblSpc, 3); break;
+ case ARM::VLD3q32oddPseudo_UPD:
+ ExpandVLD(MBBI, ARM::VLD3q32_UPD, true, OddDblSpc, 3); break;
+
+ case ARM::VLD4d8Pseudo:
+ ExpandVLD(MBBI, ARM::VLD4d8, false, SingleSpc, 4); break;
+ case ARM::VLD4d16Pseudo:
+ ExpandVLD(MBBI, ARM::VLD4d16, false, SingleSpc, 4); break;
+ case ARM::VLD4d32Pseudo:
+ ExpandVLD(MBBI, ARM::VLD4d32, false, SingleSpc, 4); break;
+ case ARM::VLD1d64QPseudo:
+ ExpandVLD(MBBI, ARM::VLD1d64Q, false, SingleSpc, 4); break;
+ case ARM::VLD4d8Pseudo_UPD:
+ ExpandVLD(MBBI, ARM::VLD4d8_UPD, true, SingleSpc, 4); break;
+ case ARM::VLD4d16Pseudo_UPD:
+ ExpandVLD(MBBI, ARM::VLD4d16_UPD, true, SingleSpc, 4); break;
+ case ARM::VLD4d32Pseudo_UPD:
+ ExpandVLD(MBBI, ARM::VLD4d32_UPD, true, SingleSpc, 4); break;
+ case ARM::VLD1d64QPseudo_UPD:
+ ExpandVLD(MBBI, ARM::VLD1d64Q_UPD, true, SingleSpc, 4); break;
+ case ARM::VLD4q8Pseudo_UPD:
+ ExpandVLD(MBBI, ARM::VLD4q8_UPD, true, EvenDblSpc, 4); break;
+ case ARM::VLD4q16Pseudo_UPD:
+ ExpandVLD(MBBI, ARM::VLD4q16_UPD, true, EvenDblSpc, 4); break;
+ case ARM::VLD4q32Pseudo_UPD:
+ ExpandVLD(MBBI, ARM::VLD4q32_UPD, true, EvenDblSpc, 4); break;
+ case ARM::VLD4q8oddPseudo_UPD:
+ ExpandVLD(MBBI, ARM::VLD4q8_UPD, true, OddDblSpc, 4); break;
+ case ARM::VLD4q16oddPseudo_UPD:
+ ExpandVLD(MBBI, ARM::VLD4q16_UPD, true, OddDblSpc, 4); break;
+ case ARM::VLD4q32oddPseudo_UPD:
+ ExpandVLD(MBBI, ARM::VLD4q32_UPD, true, OddDblSpc, 4); break;
+
+ case ARM::VST1q8Pseudo:
+ ExpandVST(MBBI, ARM::VST1q8, false, SingleSpc, 2); break;
+ case ARM::VST1q16Pseudo:
+ ExpandVST(MBBI, ARM::VST1q16, false, SingleSpc, 2); break;
+ case ARM::VST1q32Pseudo:
+ ExpandVST(MBBI, ARM::VST1q32, false, SingleSpc, 2); break;
+ case ARM::VST1q64Pseudo:
+ ExpandVST(MBBI, ARM::VST1q64, false, SingleSpc, 2); break;
+ case ARM::VST1q8Pseudo_UPD:
+ ExpandVST(MBBI, ARM::VST1q8_UPD, true, SingleSpc, 2); break;
+ case ARM::VST1q16Pseudo_UPD:
+ ExpandVST(MBBI, ARM::VST1q16_UPD, true, SingleSpc, 2); break;
+ case ARM::VST1q32Pseudo_UPD:
+ ExpandVST(MBBI, ARM::VST1q32_UPD, true, SingleSpc, 2); break;
+ case ARM::VST1q64Pseudo_UPD:
+ ExpandVST(MBBI, ARM::VST1q64_UPD, true, SingleSpc, 2); break;
+
+ case ARM::VST2d8Pseudo:
+ ExpandVST(MBBI, ARM::VST2d8, false, SingleSpc, 2); break;
+ case ARM::VST2d16Pseudo:
+ ExpandVST(MBBI, ARM::VST2d16, false, SingleSpc, 2); break;
+ case ARM::VST2d32Pseudo:
+ ExpandVST(MBBI, ARM::VST2d32, false, SingleSpc, 2); break;
+ case ARM::VST2q8Pseudo:
+ ExpandVST(MBBI, ARM::VST2q8, false, SingleSpc, 4); break;
+ case ARM::VST2q16Pseudo:
+ ExpandVST(MBBI, ARM::VST2q16, false, SingleSpc, 4); break;
+ case ARM::VST2q32Pseudo:
+ ExpandVST(MBBI, ARM::VST2q32, false, SingleSpc, 4); break;
+ case ARM::VST2d8Pseudo_UPD:
+ ExpandVST(MBBI, ARM::VST2d8_UPD, true, SingleSpc, 2); break;
+ case ARM::VST2d16Pseudo_UPD:
+ ExpandVST(MBBI, ARM::VST2d16_UPD, true, SingleSpc, 2); break;
+ case ARM::VST2d32Pseudo_UPD:
+ ExpandVST(MBBI, ARM::VST2d32_UPD, true, SingleSpc, 2); break;
+ case ARM::VST2q8Pseudo_UPD:
+ ExpandVST(MBBI, ARM::VST2q8_UPD, true, SingleSpc, 4); break;
+ case ARM::VST2q16Pseudo_UPD:
+ ExpandVST(MBBI, ARM::VST2q16_UPD, true, SingleSpc, 4); break;
+ case ARM::VST2q32Pseudo_UPD:
+ ExpandVST(MBBI, ARM::VST2q32_UPD, true, SingleSpc, 4); break;
+
+ case ARM::VST3d8Pseudo:
+ ExpandVST(MBBI, ARM::VST3d8, false, SingleSpc, 3); break;
+ case ARM::VST3d16Pseudo:
+ ExpandVST(MBBI, ARM::VST3d16, false, SingleSpc, 3); break;
+ case ARM::VST3d32Pseudo:
+ ExpandVST(MBBI, ARM::VST3d32, false, SingleSpc, 3); break;
+ case ARM::VST1d64TPseudo:
+ ExpandVST(MBBI, ARM::VST1d64T, false, SingleSpc, 3); break;
+ case ARM::VST3d8Pseudo_UPD:
+ ExpandVST(MBBI, ARM::VST3d8_UPD, true, SingleSpc, 3); break;
+ case ARM::VST3d16Pseudo_UPD:
+ ExpandVST(MBBI, ARM::VST3d16_UPD, true, SingleSpc, 3); break;
+ case ARM::VST3d32Pseudo_UPD:
+ ExpandVST(MBBI, ARM::VST3d32_UPD, true, SingleSpc, 3); break;
+ case ARM::VST1d64TPseudo_UPD:
+ ExpandVST(MBBI, ARM::VST1d64T_UPD, true, SingleSpc, 3); break;
+ case ARM::VST3q8Pseudo_UPD:
+ ExpandVST(MBBI, ARM::VST3q8_UPD, true, EvenDblSpc, 3); break;
+ case ARM::VST3q16Pseudo_UPD:
+ ExpandVST(MBBI, ARM::VST3q16_UPD, true, EvenDblSpc, 3); break;
+ case ARM::VST3q32Pseudo_UPD:
+ ExpandVST(MBBI, ARM::VST3q32_UPD, true, EvenDblSpc, 3); break;
+ case ARM::VST3q8oddPseudo_UPD:
+ ExpandVST(MBBI, ARM::VST3q8_UPD, true, OddDblSpc, 3); break;
+ case ARM::VST3q16oddPseudo_UPD:
+ ExpandVST(MBBI, ARM::VST3q16_UPD, true, OddDblSpc, 3); break;
+ case ARM::VST3q32oddPseudo_UPD:
+ ExpandVST(MBBI, ARM::VST3q32_UPD, true, OddDblSpc, 3); break;
+
+ case ARM::VST4d8Pseudo:
+ ExpandVST(MBBI, ARM::VST4d8, false, SingleSpc, 4); break;
+ case ARM::VST4d16Pseudo:
+ ExpandVST(MBBI, ARM::VST4d16, false, SingleSpc, 4); break;
+ case ARM::VST4d32Pseudo:
+ ExpandVST(MBBI, ARM::VST4d32, false, SingleSpc, 4); break;
+ case ARM::VST1d64QPseudo:
+ ExpandVST(MBBI, ARM::VST1d64Q, false, SingleSpc, 4); break;
+ case ARM::VST4d8Pseudo_UPD:
+ ExpandVST(MBBI, ARM::VST4d8_UPD, true, SingleSpc, 4); break;
+ case ARM::VST4d16Pseudo_UPD:
+ ExpandVST(MBBI, ARM::VST4d16_UPD, true, SingleSpc, 4); break;
+ case ARM::VST4d32Pseudo_UPD:
+ ExpandVST(MBBI, ARM::VST4d32_UPD, true, SingleSpc, 4); break;
+ case ARM::VST1d64QPseudo_UPD:
+ ExpandVST(MBBI, ARM::VST1d64Q_UPD, true, SingleSpc, 4); break;
+ case ARM::VST4q8Pseudo_UPD:
+ ExpandVST(MBBI, ARM::VST4q8_UPD, true, EvenDblSpc, 4); break;
+ case ARM::VST4q16Pseudo_UPD:
+ ExpandVST(MBBI, ARM::VST4q16_UPD, true, EvenDblSpc, 4); break;
+ case ARM::VST4q32Pseudo_UPD:
+ ExpandVST(MBBI, ARM::VST4q32_UPD, true, EvenDblSpc, 4); break;
+ case ARM::VST4q8oddPseudo_UPD:
+ ExpandVST(MBBI, ARM::VST4q8_UPD, true, OddDblSpc, 4); break;
+ case ARM::VST4q16oddPseudo_UPD:
+ ExpandVST(MBBI, ARM::VST4q16_UPD, true, OddDblSpc, 4); break;
+ case ARM::VST4q32oddPseudo_UPD:
+ ExpandVST(MBBI, ARM::VST4q32_UPD, true, OddDblSpc, 4); break;
}
+
+ if (ModifiedOp)
+ Modified = true;
MBBI = NMBBI;
}
diff --git a/lib/Target/ARM/ARMFastISel.cpp b/lib/Target/ARM/ARMFastISel.cpp
new file mode 100644
index 0000000000000..4892eae95833b
--- /dev/null
+++ b/lib/Target/ARM/ARMFastISel.cpp
@@ -0,0 +1,665 @@
+//===-- ARMFastISel.cpp - ARM FastISel implementation ---------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the ARM-specific support for the FastISel class. Some
+// of the target-specific code is generated by tablegen in the file
+// ARMGenFastISel.inc, which is #included here.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ARM.h"
+#include "ARMBaseInstrInfo.h"
+#include "ARMRegisterInfo.h"
+#include "ARMTargetMachine.h"
+#include "ARMSubtarget.h"
+#include "llvm/CallingConv.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/Instructions.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/CodeGen/Analysis.h"
+#include "llvm/CodeGen/FastISel.h"
+#include "llvm/CodeGen/FunctionLoweringInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Support/CallSite.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/GetElementPtrTypeIterator.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+using namespace llvm;
+
+static cl::opt<bool>
+EnableARMFastISel("arm-fast-isel",
+ cl::desc("Turn on experimental ARM fast-isel support"),
+ cl::init(false), cl::Hidden);
+
+namespace {
+
+class ARMFastISel : public FastISel {
+
+ /// Subtarget - Keep a pointer to the ARMSubtarget around so that we can
+ /// make the right decision when generating code for different targets.
+ const ARMSubtarget *Subtarget;
+ const TargetMachine &TM;
+ const TargetInstrInfo &TII;
+ const TargetLowering &TLI;
+ const ARMFunctionInfo *AFI;
+
+ // Convenience variable to avoid checking all the time.
+ bool isThumb;
+
+ public:
+ explicit ARMFastISel(FunctionLoweringInfo &funcInfo)
+ : FastISel(funcInfo),
+ TM(funcInfo.MF->getTarget()),
+ TII(*TM.getInstrInfo()),
+ TLI(*TM.getTargetLowering()) {
+ Subtarget = &TM.getSubtarget<ARMSubtarget>();
+ AFI = funcInfo.MF->getInfo<ARMFunctionInfo>();
+ isThumb = AFI->isThumbFunction();
+ }
+
+ // Code from FastISel.cpp.
+ virtual unsigned FastEmitInst_(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC);
+ virtual unsigned FastEmitInst_r(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC,
+ unsigned Op0, bool Op0IsKill);
+ virtual unsigned FastEmitInst_rr(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC,
+ unsigned Op0, bool Op0IsKill,
+ unsigned Op1, bool Op1IsKill);
+ virtual unsigned FastEmitInst_ri(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC,
+ unsigned Op0, bool Op0IsKill,
+ uint64_t Imm);
+ virtual unsigned FastEmitInst_rf(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC,
+ unsigned Op0, bool Op0IsKill,
+ const ConstantFP *FPImm);
+ virtual unsigned FastEmitInst_i(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC,
+ uint64_t Imm);
+ virtual unsigned FastEmitInst_rri(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC,
+ unsigned Op0, bool Op0IsKill,
+ unsigned Op1, bool Op1IsKill,
+ uint64_t Imm);
+ virtual unsigned FastEmitInst_extractsubreg(MVT RetVT,
+ unsigned Op0, bool Op0IsKill,
+ uint32_t Idx);
+
+ // Backend specific FastISel code.
+ virtual bool TargetSelectInstruction(const Instruction *I);
+ virtual unsigned TargetMaterializeConstant(const Constant *C);
+
+ #include "ARMGenFastISel.inc"
+
+ // Instruction selection routines.
+ virtual bool ARMSelectLoad(const Instruction *I);
+ virtual bool ARMSelectStore(const Instruction *I);
+ virtual bool ARMSelectBranch(const Instruction *I);
+
+ // Utility routines.
+ private:
+ bool isTypeLegal(const Type *Ty, EVT &VT);
+ bool isLoadTypeLegal(const Type *Ty, EVT &VT);
+ bool ARMEmitLoad(EVT VT, unsigned &ResultReg, unsigned Reg, int Offset);
+ bool ARMEmitStore(EVT VT, unsigned SrcReg, unsigned Reg, int Offset);
+ bool ARMLoadAlloca(const Instruction *I);
+ bool ARMStoreAlloca(const Instruction *I, unsigned SrcReg);
+ bool ARMComputeRegOffset(const Value *Obj, unsigned &Reg, int &Offset);
+ bool ARMMaterializeConstant(const ConstantInt *Val, unsigned &Reg);
+
+ bool DefinesOptionalPredicate(MachineInstr *MI, bool *CPSR);
+ const MachineInstrBuilder &AddOptionalDefs(const MachineInstrBuilder &MIB);
+};
+
+} // end anonymous namespace
+
+// #include "ARMGenCallingConv.inc"
+
+// DefinesOptionalPredicate - This is different from DefinesPredicate in that
+// we don't care about implicit defs here, just places we'll need to add a
+// default CCReg argument. Sets CPSR if we're setting CPSR instead of CCR.
+bool ARMFastISel::DefinesOptionalPredicate(MachineInstr *MI, bool *CPSR) {
+ const TargetInstrDesc &TID = MI->getDesc();
+ if (!TID.hasOptionalDef())
+ return false;
+
+ // Look to see if our OptionalDef is defining CPSR or CCR.
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg() || !MO.isDef()) continue;
+ if (MO.getReg() == ARM::CPSR)
+ *CPSR = true;
+ }
+ return true;
+}
+
+// If the machine is predicable go ahead and add the predicate operands, if
+// it needs default CC operands add those.
+const MachineInstrBuilder &
+ARMFastISel::AddOptionalDefs(const MachineInstrBuilder &MIB) {
+ MachineInstr *MI = &*MIB;
+
+ // Do we use a predicate?
+ if (TII.isPredicable(MI))
+ AddDefaultPred(MIB);
+
+ // Do we optionally set a predicate? Preds is size > 0 iff the predicate
+ // defines CPSR. All other OptionalDefines in ARM are the CCR register.
+ bool CPSR = false;
+ if (DefinesOptionalPredicate(MI, &CPSR)) {
+ if (CPSR)
+ AddDefaultT1CC(MIB);
+ else
+ AddDefaultCC(MIB);
+ }
+ return MIB;
+}
+
+unsigned ARMFastISel::FastEmitInst_(unsigned MachineInstOpcode,
+ const TargetRegisterClass* RC) {
+ unsigned ResultReg = createResultReg(RC);
+ const TargetInstrDesc &II = TII.get(MachineInstOpcode);
+
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg));
+ return ResultReg;
+}
+
+unsigned ARMFastISel::FastEmitInst_r(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC,
+ unsigned Op0, bool Op0IsKill) {
+ unsigned ResultReg = createResultReg(RC);
+ const TargetInstrDesc &II = TII.get(MachineInstOpcode);
+
+ if (II.getNumDefs() >= 1)
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
+ .addReg(Op0, Op0IsKill * RegState::Kill));
+ else {
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
+ .addReg(Op0, Op0IsKill * RegState::Kill));
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(TargetOpcode::COPY), ResultReg)
+ .addReg(II.ImplicitDefs[0]));
+ }
+ return ResultReg;
+}
+
+unsigned ARMFastISel::FastEmitInst_rr(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC,
+ unsigned Op0, bool Op0IsKill,
+ unsigned Op1, bool Op1IsKill) {
+ unsigned ResultReg = createResultReg(RC);
+ const TargetInstrDesc &II = TII.get(MachineInstOpcode);
+
+ if (II.getNumDefs() >= 1)
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
+ .addReg(Op0, Op0IsKill * RegState::Kill)
+ .addReg(Op1, Op1IsKill * RegState::Kill));
+ else {
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
+ .addReg(Op0, Op0IsKill * RegState::Kill)
+ .addReg(Op1, Op1IsKill * RegState::Kill));
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(TargetOpcode::COPY), ResultReg)
+ .addReg(II.ImplicitDefs[0]));
+ }
+ return ResultReg;
+}
+
+unsigned ARMFastISel::FastEmitInst_ri(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC,
+ unsigned Op0, bool Op0IsKill,
+ uint64_t Imm) {
+ unsigned ResultReg = createResultReg(RC);
+ const TargetInstrDesc &II = TII.get(MachineInstOpcode);
+
+ if (II.getNumDefs() >= 1)
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
+ .addReg(Op0, Op0IsKill * RegState::Kill)
+ .addImm(Imm));
+ else {
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
+ .addReg(Op0, Op0IsKill * RegState::Kill)
+ .addImm(Imm));
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(TargetOpcode::COPY), ResultReg)
+ .addReg(II.ImplicitDefs[0]));
+ }
+ return ResultReg;
+}
+
+unsigned ARMFastISel::FastEmitInst_rf(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC,
+ unsigned Op0, bool Op0IsKill,
+ const ConstantFP *FPImm) {
+ unsigned ResultReg = createResultReg(RC);
+ const TargetInstrDesc &II = TII.get(MachineInstOpcode);
+
+ if (II.getNumDefs() >= 1)
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
+ .addReg(Op0, Op0IsKill * RegState::Kill)
+ .addFPImm(FPImm));
+ else {
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
+ .addReg(Op0, Op0IsKill * RegState::Kill)
+ .addFPImm(FPImm));
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(TargetOpcode::COPY), ResultReg)
+ .addReg(II.ImplicitDefs[0]));
+ }
+ return ResultReg;
+}
+
+unsigned ARMFastISel::FastEmitInst_rri(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC,
+ unsigned Op0, bool Op0IsKill,
+ unsigned Op1, bool Op1IsKill,
+ uint64_t Imm) {
+ unsigned ResultReg = createResultReg(RC);
+ const TargetInstrDesc &II = TII.get(MachineInstOpcode);
+
+ if (II.getNumDefs() >= 1)
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
+ .addReg(Op0, Op0IsKill * RegState::Kill)
+ .addReg(Op1, Op1IsKill * RegState::Kill)
+ .addImm(Imm));
+ else {
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
+ .addReg(Op0, Op0IsKill * RegState::Kill)
+ .addReg(Op1, Op1IsKill * RegState::Kill)
+ .addImm(Imm));
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(TargetOpcode::COPY), ResultReg)
+ .addReg(II.ImplicitDefs[0]));
+ }
+ return ResultReg;
+}
+
+unsigned ARMFastISel::FastEmitInst_i(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC,
+ uint64_t Imm) {
+ unsigned ResultReg = createResultReg(RC);
+ const TargetInstrDesc &II = TII.get(MachineInstOpcode);
+
+ if (II.getNumDefs() >= 1)
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
+ .addImm(Imm));
+ else {
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
+ .addImm(Imm));
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(TargetOpcode::COPY), ResultReg)
+ .addReg(II.ImplicitDefs[0]));
+ }
+ return ResultReg;
+}
+
+unsigned ARMFastISel::FastEmitInst_extractsubreg(MVT RetVT,
+ unsigned Op0, bool Op0IsKill,
+ uint32_t Idx) {
+ unsigned ResultReg = createResultReg(TLI.getRegClassFor(RetVT));
+ assert(TargetRegisterInfo::isVirtualRegister(Op0) &&
+ "Cannot yet extract from physregs");
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt,
+ DL, TII.get(TargetOpcode::COPY), ResultReg)
+ .addReg(Op0, getKillRegState(Op0IsKill), Idx));
+ return ResultReg;
+}
+
+unsigned ARMFastISel::TargetMaterializeConstant(const Constant *C) {
+ EVT VT = TLI.getValueType(C->getType(), true);
+
+ // Only handle simple types.
+ if (!VT.isSimple()) return 0;
+
+ // TODO: This should be safe for fp because they're just bits from the
+ // Constant.
+ // TODO: Theoretically we could materialize fp constants with instructions
+ // from VFP3.
+
+ // MachineConstantPool wants an explicit alignment.
+ unsigned Align = TD.getPrefTypeAlignment(C->getType());
+ if (Align == 0) {
+ // TODO: Figure out if this is correct.
+ Align = TD.getTypeAllocSize(C->getType());
+ }
+ unsigned Idx = MCP.getConstantPoolIndex(C, Align);
+
+ unsigned DestReg = createResultReg(TLI.getRegClassFor(VT));
+ // Different addressing modes between ARM/Thumb2 for constant pool loads.
+ if (isThumb)
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(ARM::t2LDRpci))
+ .addReg(DestReg).addConstantPoolIndex(Idx));
+ else
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(ARM::LDRcp))
+ .addReg(DestReg).addConstantPoolIndex(Idx)
+ .addReg(0).addImm(0));
+
+ return DestReg;
+}
+
+bool ARMFastISel::isTypeLegal(const Type *Ty, EVT &VT) {
+ VT = TLI.getValueType(Ty, true);
+
+ // Only handle simple types.
+ if (VT == MVT::Other || !VT.isSimple()) return false;
+
+ // Handle all legal types, i.e. a register that will directly hold this
+ // value.
+ return TLI.isTypeLegal(VT);
+}
+
+bool ARMFastISel::isLoadTypeLegal(const Type *Ty, EVT &VT) {
+ if (isTypeLegal(Ty, VT)) return true;
+
+ // If this is a type than can be sign or zero-extended to a basic operation
+ // go ahead and accept it now.
+ if (VT == MVT::i8 || VT == MVT::i16)
+ return true;
+
+ return false;
+}
+
+// Computes the Reg+Offset to get to an object.
+bool ARMFastISel::ARMComputeRegOffset(const Value *Obj, unsigned &Reg,
+ int &Offset) {
+ // Some boilerplate from the X86 FastISel.
+ const User *U = NULL;
+ unsigned Opcode = Instruction::UserOp1;
+ if (const Instruction *I = dyn_cast<Instruction>(Obj)) {
+ // Don't walk into other basic blocks; it's possible we haven't
+ // visited them yet, so the instructions may not yet be assigned
+ // virtual registers.
+ if (FuncInfo.MBBMap[I->getParent()] != FuncInfo.MBB)
+ return false;
+
+ Opcode = I->getOpcode();
+ U = I;
+ } else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(Obj)) {
+ Opcode = C->getOpcode();
+ U = C;
+ }
+
+ if (const PointerType *Ty = dyn_cast<PointerType>(Obj->getType()))
+ if (Ty->getAddressSpace() > 255)
+ // Fast instruction selection doesn't support the special
+ // address spaces.
+ return false;
+
+ switch (Opcode) {
+ default:
+ //errs() << "Failing Opcode is: " << *Op1 << "\n";
+ break;
+ case Instruction::Alloca: {
+ assert(false && "Alloca should have been handled earlier!");
+ return false;
+ }
+ }
+
+ if (const GlobalValue *GV = dyn_cast<GlobalValue>(Obj)) {
+ //errs() << "Failing GV is: " << GV << "\n";
+ (void)GV;
+ return false;
+ }
+
+ // Try to get this in a register if nothing else has worked.
+ Reg = getRegForValue(Obj);
+ if (Reg == 0) return false;
+
+ // Since the offset may be too large for the load instruction
+ // get the reg+offset into a register.
+ // TODO: Verify the additions work, otherwise we'll need to add the
+ // offset instead of 0 to the instructions and do all sorts of operand
+ // munging.
+ // TODO: Optimize this somewhat.
+ if (Offset != 0) {
+ ARMCC::CondCodes Pred = ARMCC::AL;
+ unsigned PredReg = 0;
+
+ if (!isThumb)
+ emitARMRegPlusImmediate(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ Reg, Reg, Offset, Pred, PredReg,
+ static_cast<const ARMBaseInstrInfo&>(TII));
+ else {
+ assert(AFI->isThumb2Function());
+ emitT2RegPlusImmediate(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ Reg, Reg, Offset, Pred, PredReg,
+ static_cast<const ARMBaseInstrInfo&>(TII));
+ }
+ }
+
+ return true;
+}
+
+bool ARMFastISel::ARMLoadAlloca(const Instruction *I) {
+ Value *Op0 = I->getOperand(0);
+
+ // Verify it's an alloca.
+ if (const AllocaInst *AI = dyn_cast<AllocaInst>(Op0)) {
+ DenseMap<const AllocaInst*, int>::iterator SI =
+ FuncInfo.StaticAllocaMap.find(AI);
+
+ if (SI != FuncInfo.StaticAllocaMap.end()) {
+ TargetRegisterClass* RC = TLI.getRegClassFor(TLI.getPointerTy());
+ unsigned ResultReg = createResultReg(RC);
+ TII.loadRegFromStackSlot(*FuncInfo.MBB, *FuncInfo.InsertPt,
+ ResultReg, SI->second, RC,
+ TM.getRegisterInfo());
+ UpdateValueMap(I, ResultReg);
+ return true;
+ }
+ }
+ return false;
+}
+
+bool ARMFastISel::ARMEmitLoad(EVT VT, unsigned &ResultReg,
+ unsigned Reg, int Offset) {
+
+ assert(VT.isSimple() && "Non-simple types are invalid here!");
+ unsigned Opc;
+
+ switch (VT.getSimpleVT().SimpleTy) {
+ default:
+ assert(false && "Trying to emit for an unhandled type!");
+ return false;
+ case MVT::i16:
+ Opc = isThumb ? ARM::tLDRH : ARM::LDRH;
+ VT = MVT::i32;
+ break;
+ case MVT::i8:
+ Opc = isThumb ? ARM::tLDRB : ARM::LDRB;
+ VT = MVT::i32;
+ break;
+ case MVT::i32:
+ Opc = isThumb ? ARM::tLDR : ARM::LDR;
+ break;
+ }
+
+ ResultReg = createResultReg(TLI.getRegClassFor(VT));
+
+ // TODO: Fix the Addressing modes so that these can share some code.
+ // Since this is a Thumb1 load this will work in Thumb1 or 2 mode.
+ if (isThumb)
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(Opc), ResultReg)
+ .addReg(Reg).addImm(Offset).addReg(0));
+ else
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(Opc), ResultReg)
+ .addReg(Reg).addReg(0).addImm(Offset));
+
+ return true;
+}
+
+bool ARMFastISel::ARMStoreAlloca(const Instruction *I, unsigned SrcReg) {
+ Value *Op1 = I->getOperand(1);
+
+ // Verify it's an alloca.
+ if (const AllocaInst *AI = dyn_cast<AllocaInst>(Op1)) {
+ DenseMap<const AllocaInst*, int>::iterator SI =
+ FuncInfo.StaticAllocaMap.find(AI);
+
+ if (SI != FuncInfo.StaticAllocaMap.end()) {
+ TargetRegisterClass* RC = TLI.getRegClassFor(TLI.getPointerTy());
+ assert(SrcReg != 0 && "Nothing to store!");
+ TII.storeRegToStackSlot(*FuncInfo.MBB, *FuncInfo.InsertPt,
+ SrcReg, true /*isKill*/, SI->second, RC,
+ TM.getRegisterInfo());
+ return true;
+ }
+ }
+ return false;
+}
+
+bool ARMFastISel::ARMEmitStore(EVT VT, unsigned SrcReg,
+ unsigned DstReg, int Offset) {
+ unsigned StrOpc;
+ switch (VT.getSimpleVT().SimpleTy) {
+ default: return false;
+ case MVT::i1:
+ case MVT::i8: StrOpc = isThumb ? ARM::tSTRB : ARM::STRB; break;
+ case MVT::i16: StrOpc = isThumb ? ARM::tSTRH : ARM::STRH; break;
+ case MVT::i32: StrOpc = isThumb ? ARM::tSTR : ARM::STR; break;
+ case MVT::f32:
+ if (!Subtarget->hasVFP2()) return false;
+ StrOpc = ARM::VSTRS;
+ break;
+ case MVT::f64:
+ if (!Subtarget->hasVFP2()) return false;
+ StrOpc = ARM::VSTRD;
+ break;
+ }
+
+ if (isThumb)
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(StrOpc), SrcReg)
+ .addReg(DstReg).addImm(Offset).addReg(0));
+ else
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(StrOpc), SrcReg)
+ .addReg(DstReg).addReg(0).addImm(Offset));
+
+ return true;
+}
+
+bool ARMFastISel::ARMSelectStore(const Instruction *I) {
+ Value *Op0 = I->getOperand(0);
+ unsigned SrcReg = 0;
+
+ // Yay type legalization
+ EVT VT;
+ if (!isLoadTypeLegal(I->getOperand(0)->getType(), VT))
+ return false;
+
+ // Get the value to be stored into a register.
+ SrcReg = getRegForValue(Op0);
+ if (SrcReg == 0)
+ return false;
+
+ // If we're an alloca we know we have a frame index and can emit the store
+ // quickly.
+ if (ARMStoreAlloca(I, SrcReg))
+ return true;
+
+ // Our register and offset with innocuous defaults.
+ unsigned Reg = 0;
+ int Offset = 0;
+
+ // See if we can handle this as Reg + Offset
+ if (!ARMComputeRegOffset(I->getOperand(1), Reg, Offset))
+ return false;
+
+ if (!ARMEmitStore(VT, SrcReg, Reg, Offset /* 0 */)) return false;
+
+ return false;
+
+}
+
+bool ARMFastISel::ARMSelectLoad(const Instruction *I) {
+ // If we're an alloca we know we have a frame index and can emit the load
+ // directly in short order.
+ if (ARMLoadAlloca(I))
+ return true;
+
+ // Verify we have a legal type before going any further.
+ EVT VT;
+ if (!isLoadTypeLegal(I->getType(), VT))
+ return false;
+
+ // Our register and offset with innocuous defaults.
+ unsigned Reg = 0;
+ int Offset = 0;
+
+ // See if we can handle this as Reg + Offset
+ if (!ARMComputeRegOffset(I->getOperand(0), Reg, Offset))
+ return false;
+
+ unsigned ResultReg;
+ if (!ARMEmitLoad(VT, ResultReg, Reg, Offset /* 0 */)) return false;
+
+ UpdateValueMap(I, ResultReg);
+ return true;
+}
+
+bool ARMFastISel::ARMSelectBranch(const Instruction *I) {
+ const BranchInst *BI = cast<BranchInst>(I);
+ MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)];
+ MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)];
+
+ // Simple branch support.
+ unsigned CondReg = getRegForValue(BI->getCondition());
+ if (CondReg == 0) return false;
+
+ unsigned CmpOpc = isThumb ? ARM::t2CMPrr : ARM::CMPrr;
+ unsigned BrOpc = isThumb ? ARM::t2Bcc : ARM::Bcc;
+ AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CmpOpc))
+ .addReg(CondReg).addReg(CondReg));
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(BrOpc))
+ .addMBB(TBB).addImm(ARMCC::NE).addReg(ARM::CPSR);
+ FastEmitBranch(FBB, DL);
+ FuncInfo.MBB->addSuccessor(TBB);
+ return true;
+}
+
+// TODO: SoftFP support.
+bool ARMFastISel::TargetSelectInstruction(const Instruction *I) {
+ // No Thumb-1 for now.
+ if (isThumb && !AFI->isThumb2Function()) return false;
+
+ switch (I->getOpcode()) {
+ case Instruction::Load:
+ return ARMSelectLoad(I);
+ case Instruction::Store:
+ return ARMSelectStore(I);
+ case Instruction::Br:
+ return ARMSelectBranch(I);
+ default: break;
+ }
+ return false;
+}
+
+namespace llvm {
+ llvm::FastISel *ARM::createFastISel(FunctionLoweringInfo &funcInfo) {
+ if (EnableARMFastISel) return new ARMFastISel(funcInfo);
+ return 0;
+ }
+}
diff --git a/lib/Target/ARM/ARMGlobalMerge.cpp b/lib/Target/ARM/ARMGlobalMerge.cpp
new file mode 100644
index 0000000000000..85b0c6c248d02
--- /dev/null
+++ b/lib/Target/ARM/ARMGlobalMerge.cpp
@@ -0,0 +1,212 @@
+//===-- ARMGlobalMerge.cpp - Internal globals merging --------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// This pass merges globals with internal linkage into one. This way all the
+// globals which were merged into a biggest one can be addressed using offsets
+// from the same base pointer (no need for separate base pointer for each of the
+// global). Such a transformation can significantly reduce the register pressure
+// when many globals are involved.
+//
+// For example, consider the code which touches several global variables at once:
+//
+// static int foo[N], bar[N], baz[N];
+//
+// for (i = 0; i < N; ++i) {
+// foo[i] = bar[i] * baz[i];
+// }
+//
+// On ARM the addresses of 3 arrays should be kept in the registers, thus
+// this code has quite large register pressure (loop body):
+//
+// ldr r1, [r5], #4
+// ldr r2, [r6], #4
+// mul r1, r2, r1
+// str r1, [r0], #4
+//
+// Pass converts the code to something like:
+//
+// static struct {
+// int foo[N];
+// int bar[N];
+// int baz[N];
+// } merged;
+//
+// for (i = 0; i < N; ++i) {
+// merged.foo[i] = merged.bar[i] * merged.baz[i];
+// }
+//
+// and in ARM code this becomes:
+//
+// ldr r0, [r5, #40]
+// ldr r1, [r5, #80]
+// mul r0, r1, r0
+// str r0, [r5], #4
+//
+// note that we saved 2 registers here almostly "for free".
+// ===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "arm-global-merge"
+#include "ARM.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Attributes.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/Instructions.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetLowering.h"
+using namespace llvm;
+
+namespace {
+ class LLVM_LIBRARY_VISIBILITY ARMGlobalMerge : public FunctionPass {
+ /// TLI - Keep a pointer of a TargetLowering to consult for determining
+ /// target type sizes.
+ const TargetLowering *TLI;
+
+ bool doMerge(SmallVectorImpl<GlobalVariable*> &Globals,
+ Module &M, bool) const;
+
+ public:
+ static char ID; // Pass identification, replacement for typeid.
+ explicit ARMGlobalMerge(const TargetLowering *tli)
+ : FunctionPass(ID), TLI(tli) {}
+
+ virtual bool doInitialization(Module &M);
+ virtual bool runOnFunction(Function& F);
+
+ const char *getPassName() const {
+ return "Merge internal globals";
+ }
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+ FunctionPass::getAnalysisUsage(AU);
+ }
+
+ struct GlobalCmp {
+ const TargetData *TD;
+
+ GlobalCmp(const TargetData *td):
+ TD(td) { }
+
+ bool operator() (const GlobalVariable* GV1,
+ const GlobalVariable* GV2) {
+ const Type* Ty1 = cast<PointerType>(GV1->getType())->getElementType();
+ const Type* Ty2 = cast<PointerType>(GV2->getType())->getElementType();
+
+ return (TD->getTypeAllocSize(Ty1) < TD->getTypeAllocSize(Ty2));
+ }
+ };
+ };
+} // end anonymous namespace
+
+char ARMGlobalMerge::ID = 0;
+
+bool ARMGlobalMerge::doMerge(SmallVectorImpl<GlobalVariable*> &Globals,
+ Module &M, bool isConst) const {
+ const TargetData *TD = TLI->getTargetData();
+
+ // FIXME: Infer the maximum possible offset depending on the actual users
+ // (these max offsets are different for the users inside Thumb or ARM
+ // functions)
+ unsigned MaxOffset = TLI->getMaximalGlobalOffset();
+
+ // FIXME: Find better heuristics
+ std::stable_sort(Globals.begin(), Globals.end(), GlobalCmp(TD));
+
+ const Type *Int32Ty = Type::getInt32Ty(M.getContext());
+
+ for (size_t i = 0, e = Globals.size(); i != e; ) {
+ size_t j = 0;
+ uint64_t MergedSize = 0;
+ std::vector<const Type*> Tys;
+ std::vector<Constant*> Inits;
+ for (j = i; MergedSize < MaxOffset && j != e; ++j) {
+ const Type* Ty = Globals[j]->getType()->getElementType();
+ Tys.push_back(Ty);
+ Inits.push_back(Globals[j]->getInitializer());
+ MergedSize += TD->getTypeAllocSize(Ty);
+ }
+
+ StructType* MergedTy = StructType::get(M.getContext(), Tys);
+ Constant* MergedInit = ConstantStruct::get(MergedTy, Inits);
+ GlobalVariable* MergedGV = new GlobalVariable(M, MergedTy, isConst,
+ GlobalValue::InternalLinkage,
+ MergedInit, "merged");
+ for (size_t k = i; k < j; ++k) {
+ SmallVector<Constant*, 2> Idx;
+ Idx.push_back(ConstantInt::get(Int32Ty, 0));
+ Idx.push_back(ConstantInt::get(Int32Ty, k-i));
+
+ Constant* GEP =
+ ConstantExpr::getInBoundsGetElementPtr(MergedGV,
+ &Idx[0], Idx.size());
+
+ Globals[k]->replaceAllUsesWith(GEP);
+ Globals[k]->eraseFromParent();
+ }
+ i = j;
+ }
+
+ return true;
+}
+
+
+bool ARMGlobalMerge::doInitialization(Module& M) {
+ SmallVector<GlobalVariable*, 16> Globals, ConstGlobals;
+ const TargetData *TD = TLI->getTargetData();
+ unsigned MaxOffset = TLI->getMaximalGlobalOffset();
+ bool Changed = false;
+
+ // Grab all non-const globals.
+ for (Module::global_iterator I = M.global_begin(),
+ E = M.global_end(); I != E; ++I) {
+ // Merge is safe for "normal" internal globals only
+ if (!I->hasLocalLinkage() || I->isThreadLocal() || I->hasSection())
+ continue;
+
+ // Ignore fancy-aligned globals for now.
+ if (I->getAlignment() != 0)
+ continue;
+
+ // Ignore all 'special' globals.
+ if (I->getName().startswith("llvm.") ||
+ I->getName().startswith(".llvm."))
+ continue;
+
+ if (TD->getTypeAllocSize(I->getType()) < MaxOffset) {
+ if (I->isConstant())
+ ConstGlobals.push_back(I);
+ else
+ Globals.push_back(I);
+ }
+ }
+
+ if (Globals.size() > 1)
+ Changed |= doMerge(Globals, M, false);
+ // FIXME: This currently breaks the EH processing due to way how the
+ // typeinfo detection works. We might want to detect the TIs and ignore
+ // them in the future.
+
+ // if (ConstGlobals.size() > 1)
+ // Changed |= doMerge(ConstGlobals, M, true);
+
+ return Changed;
+}
+
+bool ARMGlobalMerge::runOnFunction(Function& F) {
+ return false;
+}
+
+FunctionPass *llvm::createARMGlobalMergePass(const TargetLowering *tli) {
+ return new ARMGlobalMerge(tli);
+}
diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp
index c84d3ff813248..51a30c158dd1b 100644
--- a/lib/Target/ARM/ARMISelDAGToDAG.cpp
+++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp
@@ -36,6 +36,11 @@
using namespace llvm;
+static cl::opt<bool>
+DisableShifterOp("disable-shifter-op", cl::Hidden,
+ cl::desc("Disable isel of shifter-op"),
+ cl::init(false));
+
//===--------------------------------------------------------------------===//
/// ARMDAGToDAGISel - ARM specific code to select ARM machine
/// instructions for SelectionDAG operations.
@@ -113,6 +118,16 @@ public:
bool SelectT2AddrModeSoReg(SDNode *Op, SDValue N, SDValue &Base,
SDValue &OffReg, SDValue &ShImm);
+ inline bool Pred_so_imm(SDNode *inN) const {
+ ConstantSDNode *N = cast<ConstantSDNode>(inN);
+ return ARM_AM::getSOImmVal(N->getZExtValue()) != -1;
+ }
+
+ inline bool Pred_t2_so_imm(SDNode *inN) const {
+ ConstantSDNode *N = cast<ConstantSDNode>(inN);
+ return ARM_AM::getT2SOImmVal(N->getZExtValue()) != -1;
+ }
+
// Include the pieces autogenerated from the target description.
#include "ARMGenDAGISel.inc"
@@ -220,6 +235,9 @@ bool ARMDAGToDAGISel::SelectShifterOperandReg(SDNode *Op,
SDValue &BaseReg,
SDValue &ShReg,
SDValue &Opc) {
+ if (DisableShifterOp)
+ return false;
+
ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N);
// Don't match base register only case. That is matched to a separate
@@ -463,7 +481,7 @@ bool ARMDAGToDAGISel::SelectAddrMode3Offset(SDNode *Op, SDValue N,
bool ARMDAGToDAGISel::SelectAddrMode4(SDNode *Op, SDValue N,
SDValue &Addr, SDValue &Mode) {
Addr = N;
- Mode = CurDAG->getTargetConstant(0, MVT::i32);
+ Mode = CurDAG->getTargetConstant(ARM_AM::getAM4ModeImm(ARM_AM::ia), MVT::i32);
return true;
}
@@ -666,6 +684,9 @@ bool ARMDAGToDAGISel::SelectThumbAddrModeSP(SDNode *Op, SDValue N,
bool ARMDAGToDAGISel::SelectT2ShifterOperandReg(SDNode *Op, SDValue N,
SDValue &BaseReg,
SDValue &Opc) {
+ if (DisableShifterOp)
+ return false;
+
ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N);
// Don't match base register only case. That is matched to a separate
@@ -1090,110 +1111,79 @@ SDNode *ARMDAGToDAGISel::SelectVLD(SDNode *N, unsigned NumVecs,
break;
}
+ EVT ResTy;
+ if (NumVecs == 1)
+ ResTy = VT;
+ else {
+ unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs;
+ if (!is64BitVector)
+ ResTyElts *= 2;
+ ResTy = EVT::getVectorVT(*CurDAG->getContext(), MVT::i64, ResTyElts);
+ }
+
SDValue Pred = getAL(CurDAG);
SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
+ SDValue SuperReg;
if (is64BitVector) {
unsigned Opc = DOpcodes[OpcodeIndex];
const SDValue Ops[] = { MemAddr, Align, Pred, Reg0, Chain };
- std::vector<EVT> ResTys(NumVecs, VT);
- ResTys.push_back(MVT::Other);
- SDNode *VLd = CurDAG->getMachineNode(Opc, dl, ResTys, Ops, 5);
- if (NumVecs < 2)
+ SDNode *VLd = CurDAG->getMachineNode(Opc, dl, ResTy, MVT::Other, Ops, 5);
+ if (NumVecs == 1)
return VLd;
- SDValue RegSeq;
- SDValue V0 = SDValue(VLd, 0);
- SDValue V1 = SDValue(VLd, 1);
-
- // Form a REG_SEQUENCE to force register allocation.
- if (NumVecs == 2)
- RegSeq = SDValue(PairDRegs(MVT::v2i64, V0, V1), 0);
- else {
- SDValue V2 = SDValue(VLd, 2);
- // If it's a vld3, form a quad D-register but discard the last part.
- SDValue V3 = (NumVecs == 3)
- ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,VT), 0)
- : SDValue(VLd, 3);
- RegSeq = SDValue(QuadDRegs(MVT::v4i64, V0, V1, V2, V3), 0);
- }
-
+ SuperReg = SDValue(VLd, 0);
assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering");
for (unsigned Vec = 0; Vec < NumVecs; ++Vec) {
SDValue D = CurDAG->getTargetExtractSubreg(ARM::dsub_0+Vec,
- dl, VT, RegSeq);
+ dl, VT, SuperReg);
ReplaceUses(SDValue(N, Vec), D);
}
- ReplaceUses(SDValue(N, NumVecs), SDValue(VLd, NumVecs));
+ ReplaceUses(SDValue(N, NumVecs), SDValue(VLd, 1));
return NULL;
}
- EVT RegVT = GetNEONSubregVT(VT);
if (NumVecs <= 2) {
// Quad registers are directly supported for VLD1 and VLD2,
// loading pairs of D regs.
unsigned Opc = QOpcodes0[OpcodeIndex];
const SDValue Ops[] = { MemAddr, Align, Pred, Reg0, Chain };
- std::vector<EVT> ResTys(2 * NumVecs, RegVT);
- ResTys.push_back(MVT::Other);
- SDNode *VLd = CurDAG->getMachineNode(Opc, dl, ResTys, Ops, 5);
- Chain = SDValue(VLd, 2 * NumVecs);
+ SDNode *VLd = CurDAG->getMachineNode(Opc, dl, ResTy, MVT::Other, Ops, 5);
+ if (NumVecs == 1)
+ return VLd;
+
+ SuperReg = SDValue(VLd, 0);
+ Chain = SDValue(VLd, 1);
- // Combine the even and odd subregs to produce the result.
- if (NumVecs == 1) {
- SDNode *Q = PairDRegs(VT, SDValue(VLd, 0), SDValue(VLd, 1));
- ReplaceUses(SDValue(N, 0), SDValue(Q, 0));
- } else {
- SDValue QQ = SDValue(QuadDRegs(MVT::v4i64,
- SDValue(VLd, 0), SDValue(VLd, 1),
- SDValue(VLd, 2), SDValue(VLd, 3)), 0);
- SDValue Q0 = CurDAG->getTargetExtractSubreg(ARM::qsub_0, dl, VT, QQ);
- SDValue Q1 = CurDAG->getTargetExtractSubreg(ARM::qsub_1, dl, VT, QQ);
- ReplaceUses(SDValue(N, 0), Q0);
- ReplaceUses(SDValue(N, 1), Q1);
- }
} else {
// Otherwise, quad registers are loaded with two separate instructions,
// where one loads the even registers and the other loads the odd registers.
-
- std::vector<EVT> ResTys(NumVecs, RegVT);
- ResTys.push_back(MemAddr.getValueType());
- ResTys.push_back(MVT::Other);
+ EVT AddrTy = MemAddr.getValueType();
// Load the even subregs.
unsigned Opc = QOpcodes0[OpcodeIndex];
- const SDValue OpsA[] = { MemAddr, Align, Reg0, Pred, Reg0, Chain };
- SDNode *VLdA = CurDAG->getMachineNode(Opc, dl, ResTys, OpsA, 6);
- Chain = SDValue(VLdA, NumVecs+1);
+ SDValue ImplDef =
+ SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, ResTy), 0);
+ const SDValue OpsA[] = { MemAddr, Align, Reg0, ImplDef, Pred, Reg0, Chain };
+ SDNode *VLdA =
+ CurDAG->getMachineNode(Opc, dl, ResTy, AddrTy, MVT::Other, OpsA, 7);
+ Chain = SDValue(VLdA, 2);
// Load the odd subregs.
Opc = QOpcodes1[OpcodeIndex];
- const SDValue OpsB[] = { SDValue(VLdA, NumVecs),
- Align, Reg0, Pred, Reg0, Chain };
- SDNode *VLdB = CurDAG->getMachineNode(Opc, dl, ResTys, OpsB, 6);
- Chain = SDValue(VLdB, NumVecs+1);
-
- SDValue V0 = SDValue(VLdA, 0);
- SDValue V1 = SDValue(VLdB, 0);
- SDValue V2 = SDValue(VLdA, 1);
- SDValue V3 = SDValue(VLdB, 1);
- SDValue V4 = SDValue(VLdA, 2);
- SDValue V5 = SDValue(VLdB, 2);
- SDValue V6 = (NumVecs == 3)
- ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,RegVT), 0)
- : SDValue(VLdA, 3);
- SDValue V7 = (NumVecs == 3)
- ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,RegVT), 0)
- : SDValue(VLdB, 3);
- SDValue RegSeq = SDValue(OctoDRegs(MVT::v8i64, V0, V1, V2, V3,
- V4, V5, V6, V7), 0);
-
- // Extract out the 3 / 4 Q registers.
- assert(ARM::qsub_3 == ARM::qsub_0+3 && "Unexpected subreg numbering");
- for (unsigned Vec = 0; Vec < NumVecs; ++Vec) {
- SDValue Q = CurDAG->getTargetExtractSubreg(ARM::qsub_0+Vec,
- dl, VT, RegSeq);
- ReplaceUses(SDValue(N, Vec), Q);
- }
+ const SDValue OpsB[] = { SDValue(VLdA, 1), Align, Reg0, SDValue(VLdA, 0),
+ Pred, Reg0, Chain };
+ SDNode *VLdB =
+ CurDAG->getMachineNode(Opc, dl, ResTy, AddrTy, MVT::Other, OpsB, 7);
+ SuperReg = SDValue(VLdB, 0);
+ Chain = SDValue(VLdB, 2);
+ }
+
+ // Extract out the Q registers.
+ assert(ARM::qsub_3 == ARM::qsub_0+3 && "Unexpected subreg numbering");
+ for (unsigned Vec = 0; Vec < NumVecs; ++Vec) {
+ SDValue Q = CurDAG->getTargetExtractSubreg(ARM::qsub_0+Vec,
+ dl, VT, SuperReg);
+ ReplaceUses(SDValue(N, Vec), Q);
}
ReplaceUses(SDValue(N, NumVecs), Chain);
return NULL;
@@ -1235,12 +1225,14 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, unsigned NumVecs,
SDValue Pred = getAL(CurDAG);
SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
- SmallVector<SDValue, 10> Ops;
+ SmallVector<SDValue, 7> Ops;
Ops.push_back(MemAddr);
Ops.push_back(Align);
if (is64BitVector) {
- if (NumVecs >= 2) {
+ if (NumVecs == 1) {
+ Ops.push_back(N->getOperand(3));
+ } else {
SDValue RegSeq;
SDValue V0 = N->getOperand(0+3);
SDValue V1 = N->getOperand(1+3);
@@ -1257,111 +1249,61 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, unsigned NumVecs,
: N->getOperand(3+3);
RegSeq = SDValue(QuadDRegs(MVT::v4i64, V0, V1, V2, V3), 0);
}
-
- // Now extract the D registers back out.
- Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_0, dl, VT,
- RegSeq));
- Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_1, dl, VT,
- RegSeq));
- if (NumVecs > 2)
- Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_2, dl, VT,
- RegSeq));
- if (NumVecs > 3)
- Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_3, dl, VT,
- RegSeq));
- } else {
- for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
- Ops.push_back(N->getOperand(Vec+3));
+ Ops.push_back(RegSeq);
}
Ops.push_back(Pred);
Ops.push_back(Reg0); // predicate register
Ops.push_back(Chain);
unsigned Opc = DOpcodes[OpcodeIndex];
- return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops.data(), NumVecs+5);
+ return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops.data(), 6);
}
- EVT RegVT = GetNEONSubregVT(VT);
if (NumVecs <= 2) {
- // Quad registers are directly supported for VST1 and VST2,
- // storing pairs of D regs.
+ // Quad registers are directly supported for VST1 and VST2.
unsigned Opc = QOpcodes0[OpcodeIndex];
- if (NumVecs == 2) {
- // First extract the pair of Q registers.
+ if (NumVecs == 1) {
+ Ops.push_back(N->getOperand(3));
+ } else {
+ // Form a QQ register.
SDValue Q0 = N->getOperand(3);
SDValue Q1 = N->getOperand(4);
-
- // Form a QQ register.
- SDValue QQ = SDValue(PairQRegs(MVT::v4i64, Q0, Q1), 0);
-
- // Now extract the D registers back out.
- Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_0, dl, RegVT,
- QQ));
- Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_1, dl, RegVT,
- QQ));
- Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_2, dl, RegVT,
- QQ));
- Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_3, dl, RegVT,
- QQ));
- Ops.push_back(Pred);
- Ops.push_back(Reg0); // predicate register
- Ops.push_back(Chain);
- return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops.data(), 5 + 4);
- } else {
- for (unsigned Vec = 0; Vec < NumVecs; ++Vec) {
- Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_0, dl, RegVT,
- N->getOperand(Vec+3)));
- Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_1, dl, RegVT,
- N->getOperand(Vec+3)));
- }
- Ops.push_back(Pred);
- Ops.push_back(Reg0); // predicate register
- Ops.push_back(Chain);
- return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops.data(),
- 5 + 2 * NumVecs);
+ Ops.push_back(SDValue(PairQRegs(MVT::v4i64, Q0, Q1), 0));
}
+ Ops.push_back(Pred);
+ Ops.push_back(Reg0); // predicate register
+ Ops.push_back(Chain);
+ return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops.data(), 6);
}
// Otherwise, quad registers are stored with two separate instructions,
// where one stores the even registers and the other stores the odd registers.
// Form the QQQQ REG_SEQUENCE.
- SDValue V[8];
- for (unsigned Vec = 0, i = 0; Vec < NumVecs; ++Vec, i+=2) {
- V[i] = CurDAG->getTargetExtractSubreg(ARM::dsub_0, dl, RegVT,
- N->getOperand(Vec+3));
- V[i+1] = CurDAG->getTargetExtractSubreg(ARM::dsub_1, dl, RegVT,
- N->getOperand(Vec+3));
- }
- if (NumVecs == 3)
- V[6] = V[7] = SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,
- dl, RegVT), 0);
-
- SDValue RegSeq = SDValue(OctoDRegs(MVT::v8i64, V[0], V[1], V[2], V[3],
- V[4], V[5], V[6], V[7]), 0);
+ SDValue V0 = N->getOperand(0+3);
+ SDValue V1 = N->getOperand(1+3);
+ SDValue V2 = N->getOperand(2+3);
+ SDValue V3 = (NumVecs == 3)
+ ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0)
+ : N->getOperand(3+3);
+ SDValue RegSeq = SDValue(QuadQRegs(MVT::v8i64, V0, V1, V2, V3), 0);
// Store the even D registers.
- assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering");
Ops.push_back(Reg0); // post-access address offset
- for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
- Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_0+Vec*2, dl,
- RegVT, RegSeq));
+ Ops.push_back(RegSeq);
Ops.push_back(Pred);
Ops.push_back(Reg0); // predicate register
Ops.push_back(Chain);
unsigned Opc = QOpcodes0[OpcodeIndex];
SDNode *VStA = CurDAG->getMachineNode(Opc, dl, MemAddr.getValueType(),
- MVT::Other, Ops.data(), NumVecs+6);
+ MVT::Other, Ops.data(), 7);
Chain = SDValue(VStA, 1);
// Store the odd D registers.
Ops[0] = SDValue(VStA, 0); // MemAddr
- for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
- Ops[Vec+3] = CurDAG->getTargetExtractSubreg(ARM::dsub_1+Vec*2, dl,
- RegVT, RegSeq);
- Ops[NumVecs+5] = Chain;
+ Ops[6] = Chain;
Opc = QOpcodes1[OpcodeIndex];
SDNode *VStB = CurDAG->getMachineNode(Opc, dl, MemAddr.getValueType(),
- MVT::Other, Ops.data(), NumVecs+6);
+ MVT::Other, Ops.data(), 7);
Chain = SDValue(VStB, 1);
ReplaceUses(SDValue(N, 0), Chain);
return NULL;
@@ -1675,7 +1617,7 @@ SelectT2CMOVSoImmOp(SDNode *N, SDValue FalseVal, SDValue TrueVal,
if (!T)
return 0;
- if (Predicate_t2_so_imm(TrueVal.getNode())) {
+ if (Pred_t2_so_imm(TrueVal.getNode())) {
SDValue True = CurDAG->getTargetConstant(T->getZExtValue(), MVT::i32);
SDValue CC = CurDAG->getTargetConstant(CCVal, MVT::i32);
SDValue Ops[] = { FalseVal, True, CC, CCR, InFlag };
@@ -1692,7 +1634,7 @@ SelectARMCMOVSoImmOp(SDNode *N, SDValue FalseVal, SDValue TrueVal,
if (!T)
return 0;
- if (Predicate_so_imm(TrueVal.getNode())) {
+ if (Pred_so_imm(TrueVal.getNode())) {
SDValue True = CurDAG->getTargetConstant(T->getZExtValue(), MVT::i32);
SDValue CC = CurDAG->getTargetConstant(CCVal, MVT::i32);
SDValue Ops[] = { FalseVal, True, CC, CCR, InFlag };
@@ -1740,7 +1682,7 @@ SDNode *ARMDAGToDAGISel::SelectCMOVOp(SDNode *N) {
}
// Pattern: (ARMcmov:i32 GPR:i32:$false,
- // (imm:i32)<<P:Predicate_so_imm>>:$true,
+ // (imm:i32)<<P:Pred_so_imm>>:$true,
// (imm:i32):$cc)
// Emits: (MOVCCi:i32 GPR:i32:$false,
// (so_imm:i32 (imm:i32):$true), (imm:i32):$cc)
@@ -2013,43 +1955,6 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
ResNode = SelectARMIndexedLoad(N);
if (ResNode)
return ResNode;
-
- // VLDMQ must be custom-selected for "v2f64 load" to set the AM5Opc value.
- if (Subtarget->hasVFP2() &&
- N->getValueType(0).getSimpleVT().SimpleTy == MVT::v2f64) {
- SDValue Chain = N->getOperand(0);
- SDValue AM5Opc =
- CurDAG->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::ia, 4), MVT::i32);
- SDValue Pred = getAL(CurDAG);
- SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
- SDValue Ops[] = { N->getOperand(1), AM5Opc, Pred, PredReg, Chain };
- MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
- MemOp[0] = cast<MemSDNode>(N)->getMemOperand();
- SDNode *Ret = CurDAG->getMachineNode(ARM::VLDMQ, dl,
- MVT::v2f64, MVT::Other, Ops, 5);
- cast<MachineSDNode>(Ret)->setMemRefs(MemOp, MemOp + 1);
- return Ret;
- }
- // Other cases are autogenerated.
- break;
- }
- case ISD::STORE: {
- // VSTMQ must be custom-selected for "v2f64 store" to set the AM5Opc value.
- if (Subtarget->hasVFP2() &&
- N->getOperand(1).getValueType().getSimpleVT().SimpleTy == MVT::v2f64) {
- SDValue Chain = N->getOperand(0);
- SDValue AM5Opc =
- CurDAG->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::ia, 4), MVT::i32);
- SDValue Pred = getAL(CurDAG);
- SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
- SDValue Ops[] = { N->getOperand(1), N->getOperand(2),
- AM5Opc, Pred, PredReg, Chain };
- MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
- MemOp[0] = cast<MemSDNode>(N)->getMemOperand();
- SDNode *Ret = CurDAG->getMachineNode(ARM::VSTMQ, dl, MVT::Other, Ops, 6);
- cast<MachineSDNode>(Ret)->setMemRefs(MemOp, MemOp + 1);
- return Ret;
- }
// Other cases are autogenerated.
break;
}
@@ -2206,39 +2111,40 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
case Intrinsic::arm_neon_vld1: {
unsigned DOpcodes[] = { ARM::VLD1d8, ARM::VLD1d16,
ARM::VLD1d32, ARM::VLD1d64 };
- unsigned QOpcodes[] = { ARM::VLD1q8, ARM::VLD1q16,
- ARM::VLD1q32, ARM::VLD1q64 };
+ unsigned QOpcodes[] = { ARM::VLD1q8Pseudo, ARM::VLD1q16Pseudo,
+ ARM::VLD1q32Pseudo, ARM::VLD1q64Pseudo };
return SelectVLD(N, 1, DOpcodes, QOpcodes, 0);
}
case Intrinsic::arm_neon_vld2: {
- unsigned DOpcodes[] = { ARM::VLD2d8, ARM::VLD2d16,
- ARM::VLD2d32, ARM::VLD1q64 };
- unsigned QOpcodes[] = { ARM::VLD2q8, ARM::VLD2q16, ARM::VLD2q32 };
+ unsigned DOpcodes[] = { ARM::VLD2d8Pseudo, ARM::VLD2d16Pseudo,
+ ARM::VLD2d32Pseudo, ARM::VLD1q64Pseudo };
+ unsigned QOpcodes[] = { ARM::VLD2q8Pseudo, ARM::VLD2q16Pseudo,
+ ARM::VLD2q32Pseudo };
return SelectVLD(N, 2, DOpcodes, QOpcodes, 0);
}
case Intrinsic::arm_neon_vld3: {
- unsigned DOpcodes[] = { ARM::VLD3d8, ARM::VLD3d16,
- ARM::VLD3d32, ARM::VLD1d64T };
- unsigned QOpcodes0[] = { ARM::VLD3q8_UPD,
- ARM::VLD3q16_UPD,
- ARM::VLD3q32_UPD };
- unsigned QOpcodes1[] = { ARM::VLD3q8odd_UPD,
- ARM::VLD3q16odd_UPD,
- ARM::VLD3q32odd_UPD };
+ unsigned DOpcodes[] = { ARM::VLD3d8Pseudo, ARM::VLD3d16Pseudo,
+ ARM::VLD3d32Pseudo, ARM::VLD1d64TPseudo };
+ unsigned QOpcodes0[] = { ARM::VLD3q8Pseudo_UPD,
+ ARM::VLD3q16Pseudo_UPD,
+ ARM::VLD3q32Pseudo_UPD };
+ unsigned QOpcodes1[] = { ARM::VLD3q8oddPseudo_UPD,
+ ARM::VLD3q16oddPseudo_UPD,
+ ARM::VLD3q32oddPseudo_UPD };
return SelectVLD(N, 3, DOpcodes, QOpcodes0, QOpcodes1);
}
case Intrinsic::arm_neon_vld4: {
- unsigned DOpcodes[] = { ARM::VLD4d8, ARM::VLD4d16,
- ARM::VLD4d32, ARM::VLD1d64Q };
- unsigned QOpcodes0[] = { ARM::VLD4q8_UPD,
- ARM::VLD4q16_UPD,
- ARM::VLD4q32_UPD };
- unsigned QOpcodes1[] = { ARM::VLD4q8odd_UPD,
- ARM::VLD4q16odd_UPD,
- ARM::VLD4q32odd_UPD };
+ unsigned DOpcodes[] = { ARM::VLD4d8Pseudo, ARM::VLD4d16Pseudo,
+ ARM::VLD4d32Pseudo, ARM::VLD1d64QPseudo };
+ unsigned QOpcodes0[] = { ARM::VLD4q8Pseudo_UPD,
+ ARM::VLD4q16Pseudo_UPD,
+ ARM::VLD4q32Pseudo_UPD };
+ unsigned QOpcodes1[] = { ARM::VLD4q8oddPseudo_UPD,
+ ARM::VLD4q16oddPseudo_UPD,
+ ARM::VLD4q32oddPseudo_UPD };
return SelectVLD(N, 4, DOpcodes, QOpcodes0, QOpcodes1);
}
@@ -2266,39 +2172,40 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
case Intrinsic::arm_neon_vst1: {
unsigned DOpcodes[] = { ARM::VST1d8, ARM::VST1d16,
ARM::VST1d32, ARM::VST1d64 };
- unsigned QOpcodes[] = { ARM::VST1q8, ARM::VST1q16,
- ARM::VST1q32, ARM::VST1q64 };
+ unsigned QOpcodes[] = { ARM::VST1q8Pseudo, ARM::VST1q16Pseudo,
+ ARM::VST1q32Pseudo, ARM::VST1q64Pseudo };
return SelectVST(N, 1, DOpcodes, QOpcodes, 0);
}
case Intrinsic::arm_neon_vst2: {
- unsigned DOpcodes[] = { ARM::VST2d8, ARM::VST2d16,
- ARM::VST2d32, ARM::VST1q64 };
- unsigned QOpcodes[] = { ARM::VST2q8, ARM::VST2q16, ARM::VST2q32 };
+ unsigned DOpcodes[] = { ARM::VST2d8Pseudo, ARM::VST2d16Pseudo,
+ ARM::VST2d32Pseudo, ARM::VST1q64Pseudo };
+ unsigned QOpcodes[] = { ARM::VST2q8Pseudo, ARM::VST2q16Pseudo,
+ ARM::VST2q32Pseudo };
return SelectVST(N, 2, DOpcodes, QOpcodes, 0);
}
case Intrinsic::arm_neon_vst3: {
- unsigned DOpcodes[] = { ARM::VST3d8, ARM::VST3d16,
- ARM::VST3d32, ARM::VST1d64T };
- unsigned QOpcodes0[] = { ARM::VST3q8_UPD,
- ARM::VST3q16_UPD,
- ARM::VST3q32_UPD };
- unsigned QOpcodes1[] = { ARM::VST3q8odd_UPD,
- ARM::VST3q16odd_UPD,
- ARM::VST3q32odd_UPD };
+ unsigned DOpcodes[] = { ARM::VST3d8Pseudo, ARM::VST3d16Pseudo,
+ ARM::VST3d32Pseudo, ARM::VST1d64TPseudo };
+ unsigned QOpcodes0[] = { ARM::VST3q8Pseudo_UPD,
+ ARM::VST3q16Pseudo_UPD,
+ ARM::VST3q32Pseudo_UPD };
+ unsigned QOpcodes1[] = { ARM::VST3q8oddPseudo_UPD,
+ ARM::VST3q16oddPseudo_UPD,
+ ARM::VST3q32oddPseudo_UPD };
return SelectVST(N, 3, DOpcodes, QOpcodes0, QOpcodes1);
}
case Intrinsic::arm_neon_vst4: {
- unsigned DOpcodes[] = { ARM::VST4d8, ARM::VST4d16,
- ARM::VST4d32, ARM::VST1d64Q };
- unsigned QOpcodes0[] = { ARM::VST4q8_UPD,
- ARM::VST4q16_UPD,
- ARM::VST4q32_UPD };
- unsigned QOpcodes1[] = { ARM::VST4q8odd_UPD,
- ARM::VST4q16odd_UPD,
- ARM::VST4q32odd_UPD };
+ unsigned DOpcodes[] = { ARM::VST4d8Pseudo, ARM::VST4d16Pseudo,
+ ARM::VST4d32Pseudo, ARM::VST1d64QPseudo };
+ unsigned QOpcodes0[] = { ARM::VST4q8Pseudo_UPD,
+ ARM::VST4q16Pseudo_UPD,
+ ARM::VST4q32Pseudo_UPD };
+ unsigned QOpcodes1[] = { ARM::VST4q8oddPseudo_UPD,
+ ARM::VST4q16oddPseudo_UPD,
+ ARM::VST4q32oddPseudo_UPD };
return SelectVST(N, 4, DOpcodes, QOpcodes0, QOpcodes1);
}
diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp
index 0091df753eb78..ce4a2c90689c2 100644
--- a/lib/Target/ARM/ARMISelLowering.cpp
+++ b/lib/Target/ARM/ARMISelLowering.cpp
@@ -55,7 +55,14 @@ STATISTIC(NumTailCalls, "Number of tail calls");
static cl::opt<bool>
EnableARMTailCalls("arm-tail-calls", cl::Hidden,
cl::desc("Generate tail calls (TEMPORARY OPTION)."),
- cl::init(true));
+ cl::init(false));
+
+// This option should go away when Machine LICM is smart enough to hoist a
+// reg-to-reg VDUP.
+static cl::opt<bool>
+EnableARMVDUPsplat("arm-vdup-splat", cl::Hidden,
+ cl::desc("Generate VDUP for integer constant splats (TEMPORARY OPTION)."),
+ cl::init(false));
static cl::opt<bool>
EnableARMLongCalls("arm-long-calls", cl::Hidden,
@@ -122,7 +129,10 @@ void ARMTargetLowering::addTypeForNEON(EVT VT, EVT PromotedLdStVT,
setOperationAction(ISD::SHL, VT.getSimpleVT(), Custom);
setOperationAction(ISD::SRA, VT.getSimpleVT(), Custom);
setOperationAction(ISD::SRL, VT.getSimpleVT(), Custom);
+ setLoadExtAction(ISD::SEXTLOAD, VT.getSimpleVT(), Expand);
+ setLoadExtAction(ISD::ZEXTLOAD, VT.getSimpleVT(), Expand);
}
+ setLoadExtAction(ISD::EXTLOAD, VT.getSimpleVT(), Expand);
// Promote all bit-wise operations.
if (VT.isInteger() && VT != PromotedBitwiseVT) {
@@ -166,6 +176,7 @@ static TargetLoweringObjectFile *createTLOF(TargetMachine &TM) {
ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
: TargetLowering(TM, createTLOF(TM)) {
Subtarget = &TM.getSubtarget<ARMSubtarget>();
+ RegInfo = TM.getRegisterInfo();
if (Subtarget->isTargetDarwin()) {
// Uses VFP for Thumb libfuncs if available.
@@ -264,7 +275,8 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
addRegisterClass(MVT::i32, ARM::GPRRegisterClass);
if (!UseSoftFloat && Subtarget->hasVFP2() && !Subtarget->isThumb1Only()) {
addRegisterClass(MVT::f32, ARM::SPRRegisterClass);
- addRegisterClass(MVT::f64, ARM::DPRRegisterClass);
+ if (!Subtarget->isFPOnlySP())
+ addRegisterClass(MVT::f64, ARM::DPRRegisterClass);
setTruncStoreAction(MVT::f64, MVT::f32, Expand);
}
@@ -310,9 +322,14 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Expand);
setOperationAction(ISD::FFLOOR, MVT::v2f64, Expand);
+ setTruncStoreAction(MVT::v2f64, MVT::v2f32, Expand);
+
// Neon does not support some operations on v1i64 and v2i64 types.
setOperationAction(ISD::MUL, MVT::v1i64, Expand);
- setOperationAction(ISD::MUL, MVT::v2i64, Expand);
+ // Custom handling for some quad-vector types to detect VMULL.
+ setOperationAction(ISD::MUL, MVT::v8i16, Custom);
+ setOperationAction(ISD::MUL, MVT::v4i32, Custom);
+ setOperationAction(ISD::MUL, MVT::v2i64, Custom);
setOperationAction(ISD::VSETCC, MVT::v1i64, Expand);
setOperationAction(ISD::VSETCC, MVT::v2i64, Expand);
@@ -410,12 +427,10 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
// doesn't yet know how to not do that for SjLj.
setExceptionSelectorRegister(ARM::R0);
setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand);
- // Handle atomics directly for ARMv[67] (except for Thumb1), otherwise
- // use the default expansion.
- bool canHandleAtomics =
- (Subtarget->hasV7Ops() ||
- (Subtarget->hasV6Ops() && !Subtarget->isThumb1Only()));
- if (canHandleAtomics) {
+ // ARMv6 Thumb1 (except for CPUs that support dmb / dsb) and earlier use
+ // the default expansion.
+ if (Subtarget->hasDataBarrier() ||
+ (Subtarget->hasV6Ops() && !Subtarget->isThumb1Only())) {
// membarrier needs custom lowering; the rest are legal and handled
// normally.
setOperationAction(ISD::MEMBARRIER, MVT::Other, Custom);
@@ -466,10 +481,12 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
}
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
- if (!UseSoftFloat && Subtarget->hasVFP2() && !Subtarget->isThumb1Only())
+ if (!UseSoftFloat && Subtarget->hasVFP2() && !Subtarget->isThumb1Only()) {
// Turn f64->i64 into VMOVRRD, i64 -> f64 to VMOVDRR
// iff target supports vfp2.
setOperationAction(ISD::BIT_CONVERT, MVT::i64, Custom);
+ setOperationAction(ISD::FLT_ROUNDS_, MVT::i32, Custom);
+ }
// We want to custom lower some of our intrinsics.
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
@@ -481,9 +498,9 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
setOperationAction(ISD::SETCC, MVT::i32, Expand);
setOperationAction(ISD::SETCC, MVT::f32, Expand);
setOperationAction(ISD::SETCC, MVT::f64, Expand);
- setOperationAction(ISD::SELECT, MVT::i32, Expand);
- setOperationAction(ISD::SELECT, MVT::f32, Expand);
- setOperationAction(ISD::SELECT, MVT::f64, Expand);
+ setOperationAction(ISD::SELECT, MVT::i32, Custom);
+ setOperationAction(ISD::SELECT, MVT::f32, Custom);
+ setOperationAction(ISD::SELECT, MVT::f64, Custom);
setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
setOperationAction(ISD::SELECT_CC, MVT::f64, Custom);
@@ -530,6 +547,9 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
setTargetDAGCombine(ISD::SUB);
setTargetDAGCombine(ISD::MUL);
+ if (Subtarget->hasV6T2Ops())
+ setTargetDAGCombine(ISD::OR);
+
setStackPointerRegisterToSaveRestore(ARM::SP);
if (UseSoftFloat || Subtarget->isThumb1Only() || !Subtarget->hasVFP2())
@@ -547,6 +567,37 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
benefitFromCodePlacementOpt = true;
}
+std::pair<const TargetRegisterClass*, uint8_t>
+ARMTargetLowering::findRepresentativeClass(EVT VT) const{
+ const TargetRegisterClass *RRC = 0;
+ uint8_t Cost = 1;
+ switch (VT.getSimpleVT().SimpleTy) {
+ default:
+ return TargetLowering::findRepresentativeClass(VT);
+ // Use DPR as representative register class for all floating point
+ // and vector types. Since there are 32 SPR registers and 32 DPR registers so
+ // the cost is 1 for both f32 and f64.
+ case MVT::f32: case MVT::f64: case MVT::v8i8: case MVT::v4i16:
+ case MVT::v2i32: case MVT::v1i64: case MVT::v2f32:
+ RRC = ARM::DPRRegisterClass;
+ break;
+ case MVT::v16i8: case MVT::v8i16: case MVT::v4i32: case MVT::v2i64:
+ case MVT::v4f32: case MVT::v2f64:
+ RRC = ARM::DPRRegisterClass;
+ Cost = 2;
+ break;
+ case MVT::v4i64:
+ RRC = ARM::DPRRegisterClass;
+ Cost = 4;
+ break;
+ case MVT::v8i64:
+ RRC = ARM::DPRRegisterClass;
+ Cost = 8;
+ break;
+ }
+ return std::make_pair(RRC, Cost);
+}
+
const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
switch (Opcode) {
default: return 0;
@@ -561,6 +612,7 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
case ARMISD::BR2_JT: return "ARMISD::BR2_JT";
case ARMISD::RET_FLAG: return "ARMISD::RET_FLAG";
case ARMISD::PIC_ADD: return "ARMISD::PIC_ADD";
+ case ARMISD::AND: return "ARMISD::AND";
case ARMISD::CMP: return "ARMISD::CMP";
case ARMISD::CMPZ: return "ARMISD::CMPZ";
case ARMISD::CMPFP: return "ARMISD::CMPFP";
@@ -635,9 +687,12 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
case ARMISD::VZIP: return "ARMISD::VZIP";
case ARMISD::VUZP: return "ARMISD::VUZP";
case ARMISD::VTRN: return "ARMISD::VTRN";
+ case ARMISD::VMULLs: return "ARMISD::VMULLs";
+ case ARMISD::VMULLu: return "ARMISD::VMULLu";
case ARMISD::BUILD_VECTOR: return "ARMISD::BUILD_VECTOR";
case ARMISD::FMAX: return "ARMISD::FMAX";
case ARMISD::FMIN: return "ARMISD::FMIN";
+ case ARMISD::BFI: return "ARMISD::BFI";
}
}
@@ -656,11 +711,23 @@ TargetRegisterClass *ARMTargetLowering::getRegClassFor(EVT VT) const {
return TargetLowering::getRegClassFor(VT);
}
+// Create a fast isel object.
+FastISel *
+ARMTargetLowering::createFastISel(FunctionLoweringInfo &funcInfo) const {
+ return ARM::createFastISel(funcInfo);
+}
+
/// getFunctionAlignment - Return the Log2 alignment of this function.
unsigned ARMTargetLowering::getFunctionAlignment(const Function *F) const {
return getTargetMachine().getSubtarget<ARMSubtarget>().isThumb() ? 1 : 2;
}
+/// getMaximalGlobalOffset - Returns the maximal possible offset which can
+/// be used for loads / stores from the global.
+unsigned ARMTargetLowering::getMaximalGlobalOffset() const {
+ return (Subtarget->isThumb1Only() ? 127 : 4095);
+}
+
Sched::Preference ARMTargetLowering::getSchedulingPreference(SDNode *N) const {
unsigned NumVals = N->getNumValues();
if (!NumVals)
@@ -688,6 +755,24 @@ Sched::Preference ARMTargetLowering::getSchedulingPreference(SDNode *N) const {
return Sched::RegPressure;
}
+unsigned
+ARMTargetLowering::getRegPressureLimit(const TargetRegisterClass *RC,
+ MachineFunction &MF) const {
+ switch (RC->getID()) {
+ default:
+ return 0;
+ case ARM::tGPRRegClassID:
+ return RegInfo->hasFP(MF) ? 4 : 5;
+ case ARM::GPRRegClassID: {
+ unsigned FP = RegInfo->hasFP(MF) ? 1 : 0;
+ return 10 - FP - (Subtarget->isR9Reserved() ? 1 : 0);
+ }
+ case ARM::SPRRegClassID: // Currently not used as 'rep' register class.
+ case ARM::DPRRegClassID:
+ return 32 - 10;
+ }
+}
+
//===----------------------------------------------------------------------===//
// Lowering Code
//===----------------------------------------------------------------------===//
@@ -793,8 +878,9 @@ static bool f64AssignAAPCS(unsigned &ValNo, EVT &ValVT, EVT &LocVT,
CCState &State, bool CanFail) {
static const unsigned HiRegList[] = { ARM::R0, ARM::R2 };
static const unsigned LoRegList[] = { ARM::R1, ARM::R3 };
+ static const unsigned ShadowRegList[] = { ARM::R0, ARM::R1 };
- unsigned Reg = State.AllocateReg(HiRegList, LoRegList, 2);
+ unsigned Reg = State.AllocateReg(HiRegList, ShadowRegList, 2);
if (Reg == 0) {
// For the 2nd half of a v2f64, do not just fail.
if (CanFail)
@@ -812,6 +898,10 @@ static bool f64AssignAAPCS(unsigned &ValNo, EVT &ValVT, EVT &LocVT,
if (HiRegList[i] == Reg)
break;
+ unsigned T = State.AllocateReg(LoRegList[i]);
+ (void)T;
+ assert(T == LoRegList[i] && "Could not allocate register");
+
State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo));
State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, LoRegList[i],
LocVT, LocInfo));
@@ -1624,6 +1714,10 @@ static SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) {
return DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Res);
}
+unsigned ARMTargetLowering::getJumpTableEncoding() const {
+ return MachineJumpTableInfo::EK_Inline;
+}
+
SDValue ARMTargetLowering::LowerBlockAddress(SDValue Op,
SelectionDAG &DAG) const {
MachineFunction &MF = DAG.getMachineFunction();
@@ -1917,17 +2011,19 @@ static SDValue LowerMEMBARRIER(SDValue Op, SelectionDAG &DAG,
DebugLoc dl = Op.getDebugLoc();
SDValue Op5 = Op.getOperand(5);
unsigned isDeviceBarrier = cast<ConstantSDNode>(Op5)->getZExtValue();
- // v6 and v7 can both handle barriers directly, but need handled a bit
- // differently. Thumb1 and pre-v6 ARM mode use a libcall instead and should
+ // Some subtargets which have dmb and dsb instructions can handle barriers
+ // directly. Some ARMv6 cpus can support them with the help of mcr
+ // instruction. Thumb1 and pre-v6 ARM mode use a libcall instead and should
// never get here.
unsigned Opc = isDeviceBarrier ? ARMISD::SYNCBARRIER : ARMISD::MEMBARRIER;
- if (Subtarget->hasV7Ops())
+ if (Subtarget->hasDataBarrier())
return DAG.getNode(Opc, dl, MVT::Other, Op.getOperand(0));
- else if (Subtarget->hasV6Ops() && !Subtarget->isThumb1Only())
+ else {
+ assert(Subtarget->hasV6Ops() && !Subtarget->isThumb1Only() &&
+ "Unexpected ISD::MEMBARRIER encountered. Should be libcall!");
return DAG.getNode(Opc, dl, MVT::Other, Op.getOperand(0),
DAG.getConstant(0, MVT::i32));
- assert(0 && "Unexpected ISD::MEMBARRIER encountered. Should be libcall!");
- return SDValue();
+ }
}
static SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) {
@@ -1945,54 +2041,6 @@ static SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) {
}
SDValue
-ARMTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
- SelectionDAG &DAG) const {
- SDNode *Node = Op.getNode();
- DebugLoc dl = Node->getDebugLoc();
- EVT VT = Node->getValueType(0);
- SDValue Chain = Op.getOperand(0);
- SDValue Size = Op.getOperand(1);
- SDValue Align = Op.getOperand(2);
-
- // Chain the dynamic stack allocation so that it doesn't modify the stack
- // pointer when other instructions are using the stack.
- Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(0, true));
-
- unsigned AlignVal = cast<ConstantSDNode>(Align)->getZExtValue();
- unsigned StackAlign = getTargetMachine().getFrameInfo()->getStackAlignment();
- if (AlignVal > StackAlign)
- // Do this now since selection pass cannot introduce new target
- // independent node.
- Align = DAG.getConstant(-(uint64_t)AlignVal, VT);
-
- // In Thumb1 mode, there isn't a "sub r, sp, r" instruction, we will end up
- // using a "add r, sp, r" instead. Negate the size now so we don't have to
- // do even more horrible hack later.
- MachineFunction &MF = DAG.getMachineFunction();
- ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
- if (AFI->isThumb1OnlyFunction()) {
- bool Negate = true;
- ConstantSDNode *C = dyn_cast<ConstantSDNode>(Size);
- if (C) {
- uint32_t Val = C->getZExtValue();
- if (Val <= 508 && ((Val & 3) == 0))
- Negate = false;
- }
- if (Negate)
- Size = DAG.getNode(ISD::SUB, dl, VT, DAG.getConstant(0, VT), Size);
- }
-
- SDVTList VTList = DAG.getVTList(VT, MVT::Other);
- SDValue Ops1[] = { Chain, Size, Align };
- SDValue Res = DAG.getNode(ARMISD::DYN_ALLOC, dl, VTList, Ops1, 3);
- Chain = Res.getValue(1);
- Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(0, true),
- DAG.getIntPtrConstant(0, true), SDValue());
- SDValue Ops2[] = { Res, Chain };
- return DAG.getMergeValues(Ops2, 2, dl);
-}
-
-SDValue
ARMTargetLowering::GetF64FormalArgument(CCValAssign &VA, CCValAssign &NextVA,
SDValue &Root, SelectionDAG &DAG,
DebugLoc dl) const {
@@ -2229,28 +2277,28 @@ ARMTargetLowering::getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
default: break;
case ISD::SETLT:
case ISD::SETGE:
- if (isLegalICmpImmediate(C-1)) {
+ if (C != 0x80000000 && isLegalICmpImmediate(C-1)) {
CC = (CC == ISD::SETLT) ? ISD::SETLE : ISD::SETGT;
RHS = DAG.getConstant(C-1, MVT::i32);
}
break;
case ISD::SETULT:
case ISD::SETUGE:
- if (C > 0 && isLegalICmpImmediate(C-1)) {
+ if (C != 0 && isLegalICmpImmediate(C-1)) {
CC = (CC == ISD::SETULT) ? ISD::SETULE : ISD::SETUGT;
RHS = DAG.getConstant(C-1, MVT::i32);
}
break;
case ISD::SETLE:
case ISD::SETGT:
- if (isLegalICmpImmediate(C+1)) {
+ if (C != 0x7fffffff && isLegalICmpImmediate(C+1)) {
CC = (CC == ISD::SETLE) ? ISD::SETLT : ISD::SETGE;
RHS = DAG.getConstant(C+1, MVT::i32);
}
break;
case ISD::SETULE:
case ISD::SETUGT:
- if (C < 0xffffffff && isLegalICmpImmediate(C+1)) {
+ if (C != 0xffffffff && isLegalICmpImmediate(C+1)) {
CC = (CC == ISD::SETULE) ? ISD::SETULT : ISD::SETUGE;
RHS = DAG.getConstant(C+1, MVT::i32);
}
@@ -2287,6 +2335,52 @@ ARMTargetLowering::getVFPCmp(SDValue LHS, SDValue RHS, SelectionDAG &DAG,
return DAG.getNode(ARMISD::FMSTAT, dl, MVT::Flag, Cmp);
}
+SDValue ARMTargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
+ SDValue Cond = Op.getOperand(0);
+ SDValue SelectTrue = Op.getOperand(1);
+ SDValue SelectFalse = Op.getOperand(2);
+ DebugLoc dl = Op.getDebugLoc();
+
+ // Convert:
+ //
+ // (select (cmov 1, 0, cond), t, f) -> (cmov t, f, cond)
+ // (select (cmov 0, 1, cond), t, f) -> (cmov f, t, cond)
+ //
+ if (Cond.getOpcode() == ARMISD::CMOV && Cond.hasOneUse()) {
+ const ConstantSDNode *CMOVTrue =
+ dyn_cast<ConstantSDNode>(Cond.getOperand(0));
+ const ConstantSDNode *CMOVFalse =
+ dyn_cast<ConstantSDNode>(Cond.getOperand(1));
+
+ if (CMOVTrue && CMOVFalse) {
+ unsigned CMOVTrueVal = CMOVTrue->getZExtValue();
+ unsigned CMOVFalseVal = CMOVFalse->getZExtValue();
+
+ SDValue True;
+ SDValue False;
+ if (CMOVTrueVal == 1 && CMOVFalseVal == 0) {
+ True = SelectTrue;
+ False = SelectFalse;
+ } else if (CMOVTrueVal == 0 && CMOVFalseVal == 1) {
+ True = SelectFalse;
+ False = SelectTrue;
+ }
+
+ if (True.getNode() && False.getNode()) {
+ EVT VT = Cond.getValueType();
+ SDValue ARMcc = Cond.getOperand(2);
+ SDValue CCR = Cond.getOperand(3);
+ SDValue Cmp = Cond.getOperand(4);
+ return DAG.getNode(ARMISD::CMOV, dl, VT, True, False, ARMcc, CCR, Cmp);
+ }
+ }
+ }
+
+ return DAG.getSelectCC(dl, Cond,
+ DAG.getConstant(0, Cond.getValueType()),
+ SelectTrue, SelectFalse, ISD::SETNE);
+}
+
SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
EVT VT = Op.getValueType();
SDValue LHS = Op.getOperand(0);
@@ -2403,8 +2497,9 @@ ARMTargetLowering::OptimizeVFPBrcond(SDValue Op, SelectionDAG &DAG) const {
bool SeenZero = false;
if (canChangeToInt(LHS, SeenZero, Subtarget) &&
canChangeToInt(RHS, SeenZero, Subtarget) &&
- // If one of the operand is zero, it's safe to ignore the NaN case.
- (FiniteOnlyFPMath() || SeenZero)) {
+ // If one of the operand is zero, it's safe to ignore the NaN case since
+ // we only care about equality comparisons.
+ (SeenZero || (DAG.isKnownNeverNaN(LHS) && DAG.isKnownNeverNaN(RHS)))) {
// If unsafe fp math optimization is enabled and there are no othter uses of
// the CMP operands, and the condition code is EQ oe NE, we can optimize it
// to an integer comparison.
@@ -2587,7 +2682,7 @@ SDValue ARMTargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const{
}
// Return LR, which contains the return address. Mark it an implicit live-in.
- unsigned Reg = MF.addLiveIn(ARM::LR, ARM::GPRRegisterClass);
+ unsigned Reg = MF.addLiveIn(ARM::LR, getRegClassFor(MVT::i32));
return DAG.getCopyFromReg(DAG.getEntryNode(), dl, Reg, VT);
}
@@ -2730,6 +2825,24 @@ SDValue ARMTargetLowering::LowerShiftLeftParts(SDValue Op,
return DAG.getMergeValues(Ops, 2, dl);
}
+SDValue ARMTargetLowering::LowerFLT_ROUNDS_(SDValue Op,
+ SelectionDAG &DAG) const {
+ // The rounding mode is in bits 23:22 of the FPSCR.
+ // The ARM rounding mode value to FLT_ROUNDS mapping is 0->1, 1->2, 2->3, 3->0
+ // The formula we use to implement this is (((FPSCR + 1 << 22) >> 22) & 3)
+ // so that the shift + and get folded into a bitfield extract.
+ DebugLoc dl = Op.getDebugLoc();
+ SDValue FPSCR = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::i32,
+ DAG.getConstant(Intrinsic::arm_get_fpscr,
+ MVT::i32));
+ SDValue FltRounds = DAG.getNode(ISD::ADD, dl, MVT::i32, FPSCR,
+ DAG.getConstant(1U << 22, MVT::i32));
+ SDValue RMODE = DAG.getNode(ISD::SRL, dl, MVT::i32, FltRounds,
+ DAG.getConstant(22, MVT::i32));
+ return DAG.getNode(ISD::AND, dl, MVT::i32, RMODE,
+ DAG.getConstant(3, MVT::i32));
+}
+
static SDValue LowerCTTZ(SDNode *N, SelectionDAG &DAG,
const ARMSubtarget *ST) {
EVT VT = N->getValueType(0);
@@ -3046,6 +3159,11 @@ static bool isVEXTMask(const SmallVectorImpl<int> &M, EVT VT,
bool &ReverseVEXT, unsigned &Imm) {
unsigned NumElts = VT.getVectorNumElements();
ReverseVEXT = false;
+
+ // Assume that the first shuffle index is not UNDEF. Fail if it is.
+ if (M[0] < 0)
+ return false;
+
Imm = M[0];
// If this is a VEXT shuffle, the immediate value is the index of the first
@@ -3061,6 +3179,7 @@ static bool isVEXTMask(const SmallVectorImpl<int> &M, EVT VT,
ReverseVEXT = true;
}
+ if (M[i] < 0) continue; // ignore UNDEF indices
if (ExpectedElt != static_cast<unsigned>(M[i]))
return false;
}
@@ -3086,13 +3205,16 @@ static bool isVREVMask(const SmallVectorImpl<int> &M, EVT VT,
unsigned NumElts = VT.getVectorNumElements();
unsigned BlockElts = M[0] + 1;
+ // If the first shuffle index is UNDEF, be optimistic.
+ if (M[0] < 0)
+ BlockElts = BlockSize / EltSz;
if (BlockSize <= EltSz || BlockSize != BlockElts * EltSz)
return false;
for (unsigned i = 0; i < NumElts; ++i) {
- if ((unsigned) M[i] !=
- (i - i%BlockElts) + (BlockElts - 1 - i%BlockElts))
+ if (M[i] < 0) continue; // ignore UNDEF indices
+ if ((unsigned) M[i] != (i - i%BlockElts) + (BlockElts - 1 - i%BlockElts))
return false;
}
@@ -3108,8 +3230,8 @@ static bool isVTRNMask(const SmallVectorImpl<int> &M, EVT VT,
unsigned NumElts = VT.getVectorNumElements();
WhichResult = (M[0] == 0 ? 0 : 1);
for (unsigned i = 0; i < NumElts; i += 2) {
- if ((unsigned) M[i] != i + WhichResult ||
- (unsigned) M[i+1] != i + NumElts + WhichResult)
+ if ((M[i] >= 0 && (unsigned) M[i] != i + WhichResult) ||
+ (M[i+1] >= 0 && (unsigned) M[i+1] != i + NumElts + WhichResult))
return false;
}
return true;
@@ -3127,8 +3249,8 @@ static bool isVTRN_v_undef_Mask(const SmallVectorImpl<int> &M, EVT VT,
unsigned NumElts = VT.getVectorNumElements();
WhichResult = (M[0] == 0 ? 0 : 1);
for (unsigned i = 0; i < NumElts; i += 2) {
- if ((unsigned) M[i] != i + WhichResult ||
- (unsigned) M[i+1] != i + WhichResult)
+ if ((M[i] >= 0 && (unsigned) M[i] != i + WhichResult) ||
+ (M[i+1] >= 0 && (unsigned) M[i+1] != i + WhichResult))
return false;
}
return true;
@@ -3143,6 +3265,7 @@ static bool isVUZPMask(const SmallVectorImpl<int> &M, EVT VT,
unsigned NumElts = VT.getVectorNumElements();
WhichResult = (M[0] == 0 ? 0 : 1);
for (unsigned i = 0; i != NumElts; ++i) {
+ if (M[i] < 0) continue; // ignore UNDEF indices
if ((unsigned) M[i] != 2 * i + WhichResult)
return false;
}
@@ -3168,7 +3291,8 @@ static bool isVUZP_v_undef_Mask(const SmallVectorImpl<int> &M, EVT VT,
for (unsigned j = 0; j != 2; ++j) {
unsigned Idx = WhichResult;
for (unsigned i = 0; i != Half; ++i) {
- if ((unsigned) M[i + j * Half] != Idx)
+ int MIdx = M[i + j * Half];
+ if (MIdx >= 0 && (unsigned) MIdx != Idx)
return false;
Idx += 2;
}
@@ -3191,8 +3315,8 @@ static bool isVZIPMask(const SmallVectorImpl<int> &M, EVT VT,
WhichResult = (M[0] == 0 ? 0 : 1);
unsigned Idx = WhichResult * NumElts / 2;
for (unsigned i = 0; i != NumElts; i += 2) {
- if ((unsigned) M[i] != Idx ||
- (unsigned) M[i+1] != Idx + NumElts)
+ if ((M[i] >= 0 && (unsigned) M[i] != Idx) ||
+ (M[i+1] >= 0 && (unsigned) M[i+1] != Idx + NumElts))
return false;
Idx += 1;
}
@@ -3217,8 +3341,8 @@ static bool isVZIP_v_undef_Mask(const SmallVectorImpl<int> &M, EVT VT,
WhichResult = (M[0] == 0 ? 0 : 1);
unsigned Idx = WhichResult * NumElts / 2;
for (unsigned i = 0; i != NumElts; i += 2) {
- if ((unsigned) M[i] != Idx ||
- (unsigned) M[i+1] != Idx)
+ if ((M[i] >= 0 && (unsigned) M[i] != Idx) ||
+ (M[i+1] >= 0 && (unsigned) M[i+1] != Idx))
return false;
Idx += 1;
}
@@ -3230,9 +3354,30 @@ static bool isVZIP_v_undef_Mask(const SmallVectorImpl<int> &M, EVT VT,
return true;
}
+// If N is an integer constant that can be moved into a register in one
+// instruction, return an SDValue of such a constant (will become a MOV
+// instruction). Otherwise return null.
+static SDValue IsSingleInstrConstant(SDValue N, SelectionDAG &DAG,
+ const ARMSubtarget *ST, DebugLoc dl) {
+ uint64_t Val;
+ if (!isa<ConstantSDNode>(N))
+ return SDValue();
+ Val = cast<ConstantSDNode>(N)->getZExtValue();
+
+ if (ST->isThumb1Only()) {
+ if (Val <= 255 || ~Val <= 255)
+ return DAG.getConstant(Val, MVT::i32);
+ } else {
+ if (ARM_AM::getSOImmVal(Val) != -1 || ARM_AM::getSOImmVal(~Val) != -1)
+ return DAG.getConstant(Val, MVT::i32);
+ }
+ return SDValue();
+}
+
// If this is a case we can't handle, return null and let the default
// expansion code take care of it.
-static SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) {
+static SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
+ const ARMSubtarget *ST) {
BuildVectorSDNode *BVN = cast<BuildVectorSDNode>(Op.getNode());
DebugLoc dl = Op.getDebugLoc();
EVT VT = Op.getValueType();
@@ -3292,15 +3437,41 @@ static SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) {
if (isOnlyLowElement)
return DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Value);
- // If all elements are constants, fall back to the default expansion, which
- // will generate a load from the constant pool.
+ unsigned EltSize = VT.getVectorElementType().getSizeInBits();
+
+ if (EnableARMVDUPsplat) {
+ // Use VDUP for non-constant splats. For f32 constant splats, reduce to
+ // i32 and try again.
+ if (usesOnlyOneValue && EltSize <= 32) {
+ if (!isConstant)
+ return DAG.getNode(ARMISD::VDUP, dl, VT, Value);
+ if (VT.getVectorElementType().isFloatingPoint()) {
+ SmallVector<SDValue, 8> Ops;
+ for (unsigned i = 0; i < NumElts; ++i)
+ Ops.push_back(DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32,
+ Op.getOperand(i)));
+ SDValue Val = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, &Ops[0],
+ NumElts);
+ return DAG.getNode(ISD::BIT_CONVERT, dl, VT,
+ LowerBUILD_VECTOR(Val, DAG, ST));
+ }
+ SDValue Val = IsSingleInstrConstant(Value, DAG, ST, dl);
+ if (Val.getNode())
+ return DAG.getNode(ARMISD::VDUP, dl, VT, Val);
+ }
+ }
+
+ // If all elements are constants and the case above didn't get hit, fall back
+ // to the default expansion, which will generate a load from the constant
+ // pool.
if (isConstant)
return SDValue();
- // Use VDUP for non-constant splats.
- unsigned EltSize = VT.getVectorElementType().getSizeInBits();
- if (usesOnlyOneValue && EltSize <= 32)
- return DAG.getNode(ARMISD::VDUP, dl, VT, Value);
+ if (!EnableARMVDUPsplat) {
+ // Use VDUP for non-constant splats.
+ if (usesOnlyOneValue && EltSize <= 32)
+ return DAG.getNode(ARMISD::VDUP, dl, VT, Value);
+ }
// Vectors with 32- or 64-bit elements can be built by directly assigning
// the subregisters. Lower it to an ARMISD::BUILD_VECTOR so the operands
@@ -3585,6 +3756,51 @@ static SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) {
return DAG.getNode(ISD::BIT_CONVERT, dl, Op.getValueType(), Val);
}
+/// SkipExtension - For a node that is either a SIGN_EXTEND, ZERO_EXTEND, or
+/// an extending load, return the unextended value.
+static SDValue SkipExtension(SDNode *N, SelectionDAG &DAG) {
+ if (N->getOpcode() == ISD::SIGN_EXTEND || N->getOpcode() == ISD::ZERO_EXTEND)
+ return N->getOperand(0);
+ LoadSDNode *LD = cast<LoadSDNode>(N);
+ return DAG.getLoad(LD->getMemoryVT(), N->getDebugLoc(), LD->getChain(),
+ LD->getBasePtr(), LD->getSrcValue(),
+ LD->getSrcValueOffset(), LD->isVolatile(),
+ LD->isNonTemporal(), LD->getAlignment());
+}
+
+static SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) {
+ // Multiplications are only custom-lowered for 128-bit vectors so that
+ // VMULL can be detected. Otherwise v2i64 multiplications are not legal.
+ EVT VT = Op.getValueType();
+ assert(VT.is128BitVector() && "unexpected type for custom-lowering ISD::MUL");
+ SDNode *N0 = Op.getOperand(0).getNode();
+ SDNode *N1 = Op.getOperand(1).getNode();
+ unsigned NewOpc = 0;
+ if ((N0->getOpcode() == ISD::SIGN_EXTEND || ISD::isSEXTLoad(N0)) &&
+ (N1->getOpcode() == ISD::SIGN_EXTEND || ISD::isSEXTLoad(N1))) {
+ NewOpc = ARMISD::VMULLs;
+ } else if ((N0->getOpcode() == ISD::ZERO_EXTEND || ISD::isZEXTLoad(N0)) &&
+ (N1->getOpcode() == ISD::ZERO_EXTEND || ISD::isZEXTLoad(N1))) {
+ NewOpc = ARMISD::VMULLu;
+ } else if (VT.getSimpleVT().SimpleTy == MVT::v2i64) {
+ // Fall through to expand this. It is not legal.
+ return SDValue();
+ } else {
+ // Other vector multiplications are legal.
+ return Op;
+ }
+
+ // Legalize to a VMULL instruction.
+ DebugLoc DL = Op.getDebugLoc();
+ SDValue Op0 = SkipExtension(N0, DAG);
+ SDValue Op1 = SkipExtension(N1, DAG);
+
+ assert(Op0.getValueType().is64BitVector() &&
+ Op1.getValueType().is64BitVector() &&
+ "unexpected types for extended operands to VMULL");
+ return DAG.getNode(NewOpc, DL, VT, Op0, Op1);
+}
+
SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
switch (Op.getOpcode()) {
default: llvm_unreachable("Don't know how to custom lower this!");
@@ -3594,10 +3810,10 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
return Subtarget->isTargetDarwin() ? LowerGlobalAddressDarwin(Op, DAG) :
LowerGlobalAddressELF(Op, DAG);
case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG);
+ case ISD::SELECT: return LowerSELECT(Op, DAG);
case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
case ISD::BR_CC: return LowerBR_CC(Op, DAG);
case ISD::BR_JT: return LowerBR_JT(Op, DAG);
- case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG);
case ISD::VASTART: return LowerVASTART(Op, DAG);
case ISD::MEMBARRIER: return LowerMEMBARRIER(Op, DAG, Subtarget);
case ISD::SINT_TO_FP:
@@ -3621,10 +3837,12 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::SRA_PARTS: return LowerShiftRightParts(Op, DAG);
case ISD::CTTZ: return LowerCTTZ(Op.getNode(), DAG, Subtarget);
case ISD::VSETCC: return LowerVSETCC(Op, DAG);
- case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG);
+ case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG, Subtarget);
case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG);
case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG);
case ISD::CONCAT_VECTORS: return LowerCONCAT_VECTORS(Op, DAG);
+ case ISD::FLT_ROUNDS_: return LowerFLT_ROUNDS_(Op, DAG);
+ case ISD::MUL: return LowerMUL(Op, DAG);
}
return SDValue();
}
@@ -4002,78 +4220,6 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
MI->eraseFromParent(); // The pseudo instruction is gone now.
return BB;
}
-
- case ARM::tANDsp:
- case ARM::tADDspr_:
- case ARM::tSUBspi_:
- case ARM::t2SUBrSPi_:
- case ARM::t2SUBrSPi12_:
- case ARM::t2SUBrSPs_: {
- MachineFunction *MF = BB->getParent();
- unsigned DstReg = MI->getOperand(0).getReg();
- unsigned SrcReg = MI->getOperand(1).getReg();
- bool DstIsDead = MI->getOperand(0).isDead();
- bool SrcIsKill = MI->getOperand(1).isKill();
-
- if (SrcReg != ARM::SP) {
- // Copy the source to SP from virtual register.
- const TargetRegisterClass *RC = MF->getRegInfo().getRegClass(SrcReg);
- unsigned CopyOpc = (RC == ARM::tGPRRegisterClass)
- ? ARM::tMOVtgpr2gpr : ARM::tMOVgpr2gpr;
- BuildMI(*BB, MI, dl, TII->get(CopyOpc), ARM::SP)
- .addReg(SrcReg, getKillRegState(SrcIsKill));
- }
-
- unsigned OpOpc = 0;
- bool NeedPred = false, NeedCC = false, NeedOp3 = false;
- switch (MI->getOpcode()) {
- default:
- llvm_unreachable("Unexpected pseudo instruction!");
- case ARM::tANDsp:
- OpOpc = ARM::tAND;
- NeedPred = true;
- break;
- case ARM::tADDspr_:
- OpOpc = ARM::tADDspr;
- break;
- case ARM::tSUBspi_:
- OpOpc = ARM::tSUBspi;
- break;
- case ARM::t2SUBrSPi_:
- OpOpc = ARM::t2SUBrSPi;
- NeedPred = true; NeedCC = true;
- break;
- case ARM::t2SUBrSPi12_:
- OpOpc = ARM::t2SUBrSPi12;
- NeedPred = true;
- break;
- case ARM::t2SUBrSPs_:
- OpOpc = ARM::t2SUBrSPs;
- NeedPred = true; NeedCC = true; NeedOp3 = true;
- break;
- }
- MachineInstrBuilder MIB = BuildMI(*BB, MI, dl, TII->get(OpOpc), ARM::SP);
- if (OpOpc == ARM::tAND)
- AddDefaultT1CC(MIB);
- MIB.addReg(ARM::SP);
- MIB.addOperand(MI->getOperand(2));
- if (NeedOp3)
- MIB.addOperand(MI->getOperand(3));
- if (NeedPred)
- AddDefaultPred(MIB);
- if (NeedCC)
- AddDefaultCC(MIB);
-
- // Copy the result from SP to virtual register.
- const TargetRegisterClass *RC = MF->getRegInfo().getRegClass(DstReg);
- unsigned CopyOpc = (RC == ARM::tGPRRegisterClass)
- ? ARM::tMOVgpr2tgpr : ARM::tMOVgpr2gpr;
- BuildMI(*BB, MI, dl, TII->get(CopyOpc))
- .addReg(DstReg, getDefRegState(true) | getDeadRegState(DstIsDead))
- .addReg(ARM::SP);
- MI->eraseFromParent(); // The pseudo instruction is gone now.
- return BB;
- }
}
}
@@ -4141,30 +4287,42 @@ SDValue combineSelectAndUse(SDNode *N, SDValue Slct, SDValue OtherOp,
return SDValue();
}
-/// PerformADDCombine - Target-specific dag combine xforms for ISD::ADD.
-static SDValue PerformADDCombine(SDNode *N,
- TargetLowering::DAGCombinerInfo &DCI) {
- // added by evan in r37685 with no testcase.
- SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
-
+/// PerformADDCombineWithOperands - Try DAG combinations for an ADD with
+/// operands N0 and N1. This is a helper for PerformADDCombine that is
+/// called with the default operands, and if that fails, with commuted
+/// operands.
+static SDValue PerformADDCombineWithOperands(SDNode *N, SDValue N0, SDValue N1,
+ TargetLowering::DAGCombinerInfo &DCI) {
// fold (add (select cc, 0, c), x) -> (select cc, x, (add, x, c))
if (N0.getOpcode() == ISD::SELECT && N0.getNode()->hasOneUse()) {
SDValue Result = combineSelectAndUse(N, N0, N1, DCI);
if (Result.getNode()) return Result;
}
- if (N1.getOpcode() == ISD::SELECT && N1.getNode()->hasOneUse()) {
- SDValue Result = combineSelectAndUse(N, N1, N0, DCI);
- if (Result.getNode()) return Result;
- }
-
return SDValue();
}
+/// PerformADDCombine - Target-specific dag combine xforms for ISD::ADD.
+///
+static SDValue PerformADDCombine(SDNode *N,
+ TargetLowering::DAGCombinerInfo &DCI) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+
+ // First try with the default operand order.
+ SDValue Result = PerformADDCombineWithOperands(N, N0, N1, DCI);
+ if (Result.getNode())
+ return Result;
+
+ // If that didn't work, try again with the operands commuted.
+ return PerformADDCombineWithOperands(N, N1, N0, DCI);
+}
+
/// PerformSUBCombine - Target-specific dag combine xforms for ISD::SUB.
+///
static SDValue PerformSUBCombine(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI) {
- // added by evan in r37685 with no testcase.
- SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
// fold (sub x, (select cc, 0, c)) -> (select cc, x, (sub, x, c))
if (N1.getOpcode() == ISD::SELECT && N1.getNode()->hasOneUse()) {
@@ -4231,6 +4389,105 @@ static SDValue PerformMULCombine(SDNode *N,
return SDValue();
}
+/// PerformORCombine - Target-specific dag combine xforms for ISD::OR
+static SDValue PerformORCombine(SDNode *N,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const ARMSubtarget *Subtarget) {
+ // Try to use the ARM/Thumb2 BFI (bitfield insert) instruction when
+ // reasonable.
+
+ // BFI is only available on V6T2+
+ if (Subtarget->isThumb1Only() || !Subtarget->hasV6T2Ops())
+ return SDValue();
+
+ SelectionDAG &DAG = DCI.DAG;
+ SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
+ DebugLoc DL = N->getDebugLoc();
+ // 1) or (and A, mask), val => ARMbfi A, val, mask
+ // iff (val & mask) == val
+ //
+ // 2) or (and A, mask), (and B, mask2) => ARMbfi A, (lsr B, amt), mask
+ // 2a) iff isBitFieldInvertedMask(mask) && isBitFieldInvertedMask(~mask2)
+ // && CountPopulation_32(mask) == CountPopulation_32(~mask2)
+ // 2b) iff isBitFieldInvertedMask(~mask) && isBitFieldInvertedMask(mask2)
+ // && CountPopulation_32(mask) == CountPopulation_32(~mask2)
+ // (i.e., copy a bitfield value into another bitfield of the same width)
+ if (N0.getOpcode() != ISD::AND)
+ return SDValue();
+
+ EVT VT = N->getValueType(0);
+ if (VT != MVT::i32)
+ return SDValue();
+
+
+ // The value and the mask need to be constants so we can verify this is
+ // actually a bitfield set. If the mask is 0xffff, we can do better
+ // via a movt instruction, so don't use BFI in that case.
+ ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
+ if (!C)
+ return SDValue();
+ unsigned Mask = C->getZExtValue();
+ if (Mask == 0xffff)
+ return SDValue();
+ SDValue Res;
+ // Case (1): or (and A, mask), val => ARMbfi A, val, mask
+ if ((C = dyn_cast<ConstantSDNode>(N1))) {
+ unsigned Val = C->getZExtValue();
+ if (!ARM::isBitFieldInvertedMask(Mask) || (Val & ~Mask) != Val)
+ return SDValue();
+ Val >>= CountTrailingZeros_32(~Mask);
+
+ Res = DAG.getNode(ARMISD::BFI, DL, VT, N0.getOperand(0),
+ DAG.getConstant(Val, MVT::i32),
+ DAG.getConstant(Mask, MVT::i32));
+
+ // Do not add new nodes to DAG combiner worklist.
+ DCI.CombineTo(N, Res, false);
+ } else if (N1.getOpcode() == ISD::AND) {
+ // case (2) or (and A, mask), (and B, mask2) => ARMbfi A, (lsr B, amt), mask
+ C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
+ if (!C)
+ return SDValue();
+ unsigned Mask2 = C->getZExtValue();
+
+ if (ARM::isBitFieldInvertedMask(Mask) &&
+ ARM::isBitFieldInvertedMask(~Mask2) &&
+ (CountPopulation_32(Mask) == CountPopulation_32(~Mask2))) {
+ // The pack halfword instruction works better for masks that fit it,
+ // so use that when it's available.
+ if (Subtarget->hasT2ExtractPack() &&
+ (Mask == 0xffff || Mask == 0xffff0000))
+ return SDValue();
+ // 2a
+ unsigned lsb = CountTrailingZeros_32(Mask2);
+ Res = DAG.getNode(ISD::SRL, DL, VT, N1.getOperand(0),
+ DAG.getConstant(lsb, MVT::i32));
+ Res = DAG.getNode(ARMISD::BFI, DL, VT, N0.getOperand(0), Res,
+ DAG.getConstant(Mask, MVT::i32));
+ // Do not add new nodes to DAG combiner worklist.
+ DCI.CombineTo(N, Res, false);
+ } else if (ARM::isBitFieldInvertedMask(~Mask) &&
+ ARM::isBitFieldInvertedMask(Mask2) &&
+ (CountPopulation_32(~Mask) == CountPopulation_32(Mask2))) {
+ // The pack halfword instruction works better for masks that fit it,
+ // so use that when it's available.
+ if (Subtarget->hasT2ExtractPack() &&
+ (Mask2 == 0xffff || Mask2 == 0xffff0000))
+ return SDValue();
+ // 2b
+ unsigned lsb = CountTrailingZeros_32(Mask);
+ Res = DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0),
+ DAG.getConstant(lsb, MVT::i32));
+ Res = DAG.getNode(ARMISD::BFI, DL, VT, N1.getOperand(0), Res,
+ DAG.getConstant(Mask2, MVT::i32));
+ // Do not add new nodes to DAG combiner worklist.
+ DCI.CombineTo(N, Res, false);
+ }
+ }
+
+ return SDValue();
+}
+
/// PerformVMOVRRDCombine - Target-specific dag combine xforms for
/// ARMISD::VMOVRRD.
static SDValue PerformVMOVRRDCombine(SDNode *N,
@@ -4561,7 +4818,7 @@ static SDValue PerformExtendCombine(SDNode *N, SelectionDAG &DAG,
static SDValue PerformSELECT_CCCombine(SDNode *N, SelectionDAG &DAG,
const ARMSubtarget *ST) {
// If the target supports NEON, try to use vmax/vmin instructions for f32
- // selects like "x < y ? x : y". Unless the FiniteOnlyFPMath option is set,
+ // selects like "x < y ? x : y". Unless the NoNaNsFPMath option is set,
// be careful about NaNs: NEON's vmax/vmin return NaN if either operand is
// a NaN; only do the transformation when it matches that behavior.
@@ -4648,6 +4905,7 @@ SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N,
case ISD::ADD: return PerformADDCombine(N, DCI);
case ISD::SUB: return PerformSUBCombine(N, DCI);
case ISD::MUL: return PerformMULCombine(N, DCI, Subtarget);
+ case ISD::OR: return PerformORCombine(N, DCI, Subtarget);
case ARMISD::VMOVRRD: return PerformVMOVRRDCombine(N, DCI);
case ARMISD::VDUPLANE: return PerformVDUPLANECombine(N, DCI);
case ISD::INTRINSIC_WO_CHAIN: return PerformIntrinsicCombine(N, DCI.DAG);
@@ -5379,6 +5637,21 @@ int ARM::getVFPf64Imm(const APFloat &FPImm) {
return ((int)Sign << 7) | (Exp << 4) | Mantissa;
}
+bool ARM::isBitFieldInvertedMask(unsigned v) {
+ if (v == 0xffffffff)
+ return 0;
+ // there can be 1's on either or both "outsides", all the "inside"
+ // bits must be 0's
+ unsigned int lsb = 0, msb = 31;
+ while (v & (1 << msb)) --msb;
+ while (v & (1 << lsb)) ++lsb;
+ for (unsigned int i = lsb; i <= msb; ++i) {
+ if (v & (1 << i))
+ return 0;
+ }
+ return 1;
+}
+
/// isFPImmLegal - Returns true if the target can instruction select the
/// specified FP immediate natively. If false, the legalizer will
/// materialize the FP immediate as a load from a constant pool.
diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h
index 128b72e1e743e..ba9ea7f15e7b2 100644
--- a/lib/Target/ARM/ARMISelLowering.h
+++ b/lib/Target/ARM/ARMISelLowering.h
@@ -17,6 +17,8 @@
#include "ARMSubtarget.h"
#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/CodeGen/FastISel.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/CodeGen/CallingConvLower.h"
#include <vector>
@@ -45,6 +47,8 @@ namespace llvm {
PIC_ADD, // Add with a PC operand and a PIC label.
+ AND, // ARM "and" instruction that sets the 's' flag in CPSR.
+
CMP, // ARM compare instructions.
CMPZ, // ARM compare that sets only Z flag.
CMPFP, // ARM VFP compare instruction, sets FPSCR.
@@ -80,7 +84,7 @@ namespace llvm {
MEMBARRIER, // Memory barrier
SYNCBARRIER, // Memory sync barrier
-
+
VCEQ, // Vector compare equal.
VCGE, // Vector compare greater than or equal.
VCGEU, // Vector compare unsigned greater than or equal.
@@ -141,6 +145,10 @@ namespace llvm {
VUZP, // unzip (deinterleave)
VTRN, // transpose
+ // Vector multiply long:
+ VMULLs, // ...signed
+ VMULLu, // ...unsigned
+
// Operands of the standard BUILD_VECTOR node are not legalized, which
// is fine if BUILD_VECTORs are always lowered to shuffles or other
// operations, but for ARM some BUILD_VECTORs are legal as-is and their
@@ -150,7 +158,10 @@ namespace llvm {
// Floating-point max and min:
FMAX,
- FMIN
+ FMIN,
+
+ // Bit-field insert
+ BFI
};
}
@@ -162,6 +173,7 @@ namespace llvm {
/// returns -1.
int getVFPf32Imm(const APFloat &FPImm);
int getVFPf64Imm(const APFloat &FPImm);
+ bool isBitFieldInvertedMask(unsigned v);
}
//===--------------------------------------------------------------------===//
@@ -171,6 +183,8 @@ namespace llvm {
public:
explicit ARMTargetLowering(TargetMachine &TM);
+ virtual unsigned getJumpTableEncoding(void) const;
+
virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const;
/// ReplaceNodeResults - Replace the results of node with an illegal result
@@ -255,8 +269,19 @@ namespace llvm {
/// getFunctionAlignment - Return the Log2 alignment of this function.
virtual unsigned getFunctionAlignment(const Function *F) const;
+ /// getMaximalGlobalOffset - Returns the maximal possible offset which can
+ /// be used for loads / stores from the global.
+ virtual unsigned getMaximalGlobalOffset() const;
+
+ /// createFastISel - This method returns a target specific FastISel object,
+ /// or null if the target does not support "fast" ISel.
+ virtual FastISel *createFastISel(FunctionLoweringInfo &funcInfo) const;
+
Sched::Preference getSchedulingPreference(SDNode *N) const;
+ unsigned getRegPressureLimit(const TargetRegisterClass *RC,
+ MachineFunction &MF) const;
+
bool isShuffleMaskLegal(const SmallVectorImpl<int> &M, EVT VT) const;
bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const;
@@ -265,11 +290,17 @@ namespace llvm {
/// materialize the FP immediate as a load from a constant pool.
virtual bool isFPImmLegal(const APFloat &Imm, EVT VT) const;
+ protected:
+ std::pair<const TargetRegisterClass*, uint8_t>
+ findRepresentativeClass(EVT VT) const;
+
private:
/// Subtarget - Keep a pointer to the ARMSubtarget around so that we can
/// make the right decision when generating code for different targets.
const ARMSubtarget *Subtarget;
+ const TargetRegisterInfo *RegInfo;
+
/// ARMPCLabelIndex - Keep track of the number of ARM PC labels created.
///
unsigned ARMPCLabelIndex;
@@ -310,14 +341,15 @@ namespace llvm {
SelectionDAG &DAG) const;
SDValue LowerGLOBAL_OFFSET_TABLE(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerBR_JT(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerShiftRightParts(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerShiftLeftParts(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerCallResult(SDValue Chain, SDValue InFlag,
CallingConv::ID CallConv, bool isVarArg,
@@ -377,6 +409,10 @@ namespace llvm {
unsigned BinOpcode) const;
};
+
+ namespace ARM {
+ FastISel *createFastISel(FunctionLoweringInfo &funcInfo);
+ }
}
#endif // ARMISELLOWERING_H
diff --git a/lib/Target/ARM/ARMInstrFormats.td b/lib/Target/ARM/ARMInstrFormats.td
index ac568e75ccc46..113cfffe61f94 100644
--- a/lib/Target/ARM/ARMInstrFormats.td
+++ b/lib/Target/ARM/ARMInstrFormats.td
@@ -36,37 +36,38 @@ def LdStMulFrm : Format<10>;
def LdStExFrm : Format<11>;
def ArithMiscFrm : Format<12>;
-def ExtFrm : Format<13>;
-
-def VFPUnaryFrm : Format<14>;
-def VFPBinaryFrm : Format<15>;
-def VFPConv1Frm : Format<16>;
-def VFPConv2Frm : Format<17>;
-def VFPConv3Frm : Format<18>;
-def VFPConv4Frm : Format<19>;
-def VFPConv5Frm : Format<20>;
-def VFPLdStFrm : Format<21>;
-def VFPLdStMulFrm : Format<22>;
-def VFPMiscFrm : Format<23>;
-
-def ThumbFrm : Format<24>;
-def MiscFrm : Format<25>;
-
-def NGetLnFrm : Format<26>;
-def NSetLnFrm : Format<27>;
-def NDupFrm : Format<28>;
-def NLdStFrm : Format<29>;
-def N1RegModImmFrm: Format<30>;
-def N2RegFrm : Format<31>;
-def NVCVTFrm : Format<32>;
-def NVDupLnFrm : Format<33>;
-def N2RegVShLFrm : Format<34>;
-def N2RegVShRFrm : Format<35>;
-def N3RegFrm : Format<36>;
-def N3RegVShFrm : Format<37>;
-def NVExtFrm : Format<38>;
-def NVMulSLFrm : Format<39>;
-def NVTBLFrm : Format<40>;
+def SatFrm : Format<13>;
+def ExtFrm : Format<14>;
+
+def VFPUnaryFrm : Format<15>;
+def VFPBinaryFrm : Format<16>;
+def VFPConv1Frm : Format<17>;
+def VFPConv2Frm : Format<18>;
+def VFPConv3Frm : Format<19>;
+def VFPConv4Frm : Format<20>;
+def VFPConv5Frm : Format<21>;
+def VFPLdStFrm : Format<22>;
+def VFPLdStMulFrm : Format<23>;
+def VFPMiscFrm : Format<24>;
+
+def ThumbFrm : Format<25>;
+def MiscFrm : Format<26>;
+
+def NGetLnFrm : Format<27>;
+def NSetLnFrm : Format<28>;
+def NDupFrm : Format<29>;
+def NLdStFrm : Format<30>;
+def N1RegModImmFrm: Format<31>;
+def N2RegFrm : Format<32>;
+def NVCVTFrm : Format<33>;
+def NVDupLnFrm : Format<34>;
+def N2RegVShLFrm : Format<35>;
+def N2RegVShRFrm : Format<36>;
+def N3RegFrm : Format<37>;
+def N3RegVShFrm : Format<38>;
+def NVExtFrm : Format<39>;
+def NVMulSLFrm : Format<40>;
+def NVTBLFrm : Format<41>;
// Misc flags.
@@ -87,21 +88,21 @@ class Xform16Bit { bit canXformTo16Bit = 1; }
class AddrMode<bits<4> val> {
bits<4> Value = val;
}
-def AddrModeNone : AddrMode<0>;
-def AddrMode1 : AddrMode<1>;
-def AddrMode2 : AddrMode<2>;
-def AddrMode3 : AddrMode<3>;
-def AddrMode4 : AddrMode<4>;
-def AddrMode5 : AddrMode<5>;
-def AddrMode6 : AddrMode<6>;
-def AddrModeT1_1 : AddrMode<7>;
-def AddrModeT1_2 : AddrMode<8>;
-def AddrModeT1_4 : AddrMode<9>;
-def AddrModeT1_s : AddrMode<10>;
-def AddrModeT2_i12: AddrMode<11>;
-def AddrModeT2_i8 : AddrMode<12>;
-def AddrModeT2_so : AddrMode<13>;
-def AddrModeT2_pc : AddrMode<14>;
+def AddrModeNone : AddrMode<0>;
+def AddrMode1 : AddrMode<1>;
+def AddrMode2 : AddrMode<2>;
+def AddrMode3 : AddrMode<3>;
+def AddrMode4 : AddrMode<4>;
+def AddrMode5 : AddrMode<5>;
+def AddrMode6 : AddrMode<6>;
+def AddrModeT1_1 : AddrMode<7>;
+def AddrModeT1_2 : AddrMode<8>;
+def AddrModeT1_4 : AddrMode<9>;
+def AddrModeT1_s : AddrMode<10>;
+def AddrModeT2_i12 : AddrMode<11>;
+def AddrModeT2_i8 : AddrMode<12>;
+def AddrModeT2_so : AddrMode<13>;
+def AddrModeT2_pc : AddrMode<14>;
def AddrModeT2_i8s4 : AddrMode<15>;
// Instruction size.
@@ -137,11 +138,17 @@ def VFPNeonDomain : Domain<3>; // Instructions in both VFP & Neon domains
// ARM special operands.
//
+def CondCodeOperand : AsmOperandClass {
+ let Name = "CondCode";
+ let SuperClasses = [];
+}
+
// ARM Predicate operand. Default to 14 = always (AL). Second part is CC
// register whose default is 0 (no register).
def pred : PredicateOperand<OtherVT, (ops i32imm, CCR),
(ops (i32 14), (i32 zero_reg))> {
let PrintMethod = "printPredicateOperand";
+ let ParserMatchClass = CondCodeOperand;
}
// Conditional code result for instructions whose 's' bit is set, e.g. subs.
@@ -240,6 +247,7 @@ class I<dag oops, dag iops, AddrMode am, SizeFlagVal sz,
let Pattern = pattern;
list<Predicate> Predicates = [IsARM];
}
+
// A few are not predicable
class InoP<dag oops, dag iops, AddrMode am, SizeFlagVal sz,
IndexMode im, Format f, InstrItinClass itin,
@@ -254,9 +262,9 @@ class InoP<dag oops, dag iops, AddrMode am, SizeFlagVal sz,
list<Predicate> Predicates = [IsARM];
}
-// Same as I except it can optionally modify CPSR. Note it's modeled as
-// an input operand since by default it's a zero register. It will
-// become an implicit def once it's "flipped".
+// Same as I except it can optionally modify CPSR. Note it's modeled as an input
+// operand since by default it's a zero register. It will become an implicit def
+// once it's "flipped".
class sI<dag oops, dag iops, AddrMode am, SizeFlagVal sz,
IndexMode im, Format f, InstrItinClass itin,
string opc, string asm, string cstr,
@@ -313,7 +321,7 @@ class ABXI<bits<4> opcod, dag oops, dag iops, InstrItinClass itin,
}
class ABXIx2<dag oops, dag iops, InstrItinClass itin,
string asm, list<dag> pattern>
- : XI<oops, iops, AddrModeNone, Size8Bytes, IndexModeNone, BrMiscFrm, itin,
+ : XI<oops, iops, AddrModeNone, Size8Bytes, IndexModeNone, Pseudo, itin,
asm, "", pattern>;
// BR_JT instructions
@@ -322,16 +330,14 @@ class JTI<dag oops, dag iops, InstrItinClass itin,
: XI<oops, iops, AddrModeNone, SizeSpecial, IndexModeNone, BrMiscFrm, itin,
asm, "", pattern>;
-
// Atomic load/store instructions
-
class AIldrex<bits<2> opcod, dag oops, dag iops, InstrItinClass itin,
string opc, string asm, list<dag> pattern>
: I<oops, iops, AddrModeNone, Size4Bytes, IndexModeNone, LdStExFrm, itin,
opc, asm, "", pattern> {
let Inst{27-23} = 0b00011;
let Inst{22-21} = opcod;
- let Inst{20} = 1;
+ let Inst{20} = 1;
let Inst{11-0} = 0b111110011111;
}
class AIstrex<bits<2> opcod, dag oops, dag iops, InstrItinClass itin,
@@ -340,7 +346,7 @@ class AIstrex<bits<2> opcod, dag oops, dag iops, InstrItinClass itin,
opc, asm, "", pattern> {
let Inst{27-23} = 0b00011;
let Inst{22-21} = opcod;
- let Inst{20} = 0;
+ let Inst{20} = 0;
let Inst{11-4} = 0b11111001;
}
@@ -350,21 +356,21 @@ class AI1<bits<4> opcod, dag oops, dag iops, Format f, InstrItinClass itin,
: I<oops, iops, AddrMode1, Size4Bytes, IndexModeNone, f, itin,
opc, asm, "", pattern> {
let Inst{24-21} = opcod;
- let Inst{27-26} = {0,0};
+ let Inst{27-26} = 0b00;
}
class AsI1<bits<4> opcod, dag oops, dag iops, Format f, InstrItinClass itin,
string opc, string asm, list<dag> pattern>
: sI<oops, iops, AddrMode1, Size4Bytes, IndexModeNone, f, itin,
opc, asm, "", pattern> {
let Inst{24-21} = opcod;
- let Inst{27-26} = {0,0};
+ let Inst{27-26} = 0b00;
}
class AXI1<bits<4> opcod, dag oops, dag iops, Format f, InstrItinClass itin,
string asm, list<dag> pattern>
: XI<oops, iops, AddrMode1, Size4Bytes, IndexModeNone, f, itin,
asm, "", pattern> {
let Inst{24-21} = opcod;
- let Inst{27-26} = {0,0};
+ let Inst{27-26} = 0b00;
}
class AI1x2<dag oops, dag iops, Format f, InstrItinClass itin,
string opc, string asm, list<dag> pattern>
@@ -377,7 +383,7 @@ class AI2<dag oops, dag iops, Format f, InstrItinClass itin,
string opc, string asm, list<dag> pattern>
: I<oops, iops, AddrMode2, Size4Bytes, IndexModeNone, f, itin,
opc, asm, "", pattern> {
- let Inst{27-26} = {0,1};
+ let Inst{27-26} = 0b01;
}
// loads
@@ -389,7 +395,7 @@ class AI2ldw<dag oops, dag iops, Format f, InstrItinClass itin,
let Inst{21} = 0; // W bit
let Inst{22} = 0; // B bit
let Inst{24} = 1; // P bit
- let Inst{27-26} = {0,1};
+ let Inst{27-26} = 0b01;
}
class AXI2ldw<dag oops, dag iops, Format f, InstrItinClass itin,
string asm, list<dag> pattern>
@@ -399,7 +405,7 @@ class AXI2ldw<dag oops, dag iops, Format f, InstrItinClass itin,
let Inst{21} = 0; // W bit
let Inst{22} = 0; // B bit
let Inst{24} = 1; // P bit
- let Inst{27-26} = {0,1};
+ let Inst{27-26} = 0b01;
}
class AI2ldb<dag oops, dag iops, Format f, InstrItinClass itin,
string opc, string asm, list<dag> pattern>
@@ -409,7 +415,7 @@ class AI2ldb<dag oops, dag iops, Format f, InstrItinClass itin,
let Inst{21} = 0; // W bit
let Inst{22} = 1; // B bit
let Inst{24} = 1; // P bit
- let Inst{27-26} = {0,1};
+ let Inst{27-26} = 0b01;
}
class AXI2ldb<dag oops, dag iops, Format f, InstrItinClass itin,
string asm, list<dag> pattern>
@@ -419,7 +425,7 @@ class AXI2ldb<dag oops, dag iops, Format f, InstrItinClass itin,
let Inst{21} = 0; // W bit
let Inst{22} = 1; // B bit
let Inst{24} = 1; // P bit
- let Inst{27-26} = {0,1};
+ let Inst{27-26} = 0b01;
}
// stores
@@ -431,7 +437,7 @@ class AI2stw<dag oops, dag iops, Format f, InstrItinClass itin,
let Inst{21} = 0; // W bit
let Inst{22} = 0; // B bit
let Inst{24} = 1; // P bit
- let Inst{27-26} = {0,1};
+ let Inst{27-26} = 0b01;
}
class AXI2stw<dag oops, dag iops, Format f, InstrItinClass itin,
string asm, list<dag> pattern>
@@ -441,7 +447,7 @@ class AXI2stw<dag oops, dag iops, Format f, InstrItinClass itin,
let Inst{21} = 0; // W bit
let Inst{22} = 0; // B bit
let Inst{24} = 1; // P bit
- let Inst{27-26} = {0,1};
+ let Inst{27-26} = 0b01;
}
class AI2stb<dag oops, dag iops, Format f, InstrItinClass itin,
string opc, string asm, list<dag> pattern>
@@ -451,7 +457,7 @@ class AI2stb<dag oops, dag iops, Format f, InstrItinClass itin,
let Inst{21} = 0; // W bit
let Inst{22} = 1; // B bit
let Inst{24} = 1; // P bit
- let Inst{27-26} = {0,1};
+ let Inst{27-26} = 0b01;
}
class AXI2stb<dag oops, dag iops, Format f, InstrItinClass itin,
string asm, list<dag> pattern>
@@ -461,7 +467,7 @@ class AXI2stb<dag oops, dag iops, Format f, InstrItinClass itin,
let Inst{21} = 0; // W bit
let Inst{22} = 1; // B bit
let Inst{24} = 1; // P bit
- let Inst{27-26} = {0,1};
+ let Inst{27-26} = 0b01;
}
// Pre-indexed loads
@@ -473,7 +479,7 @@ class AI2ldwpr<dag oops, dag iops, Format f, InstrItinClass itin,
let Inst{21} = 1; // W bit
let Inst{22} = 0; // B bit
let Inst{24} = 1; // P bit
- let Inst{27-26} = {0,1};
+ let Inst{27-26} = 0b01;
}
class AI2ldbpr<dag oops, dag iops, Format f, InstrItinClass itin,
string opc, string asm, string cstr, list<dag> pattern>
@@ -483,7 +489,7 @@ class AI2ldbpr<dag oops, dag iops, Format f, InstrItinClass itin,
let Inst{21} = 1; // W bit
let Inst{22} = 1; // B bit
let Inst{24} = 1; // P bit
- let Inst{27-26} = {0,1};
+ let Inst{27-26} = 0b01;
}
// Pre-indexed stores
@@ -495,7 +501,7 @@ class AI2stwpr<dag oops, dag iops, Format f, InstrItinClass itin,
let Inst{21} = 1; // W bit
let Inst{22} = 0; // B bit
let Inst{24} = 1; // P bit
- let Inst{27-26} = {0,1};
+ let Inst{27-26} = 0b01;
}
class AI2stbpr<dag oops, dag iops, Format f, InstrItinClass itin,
string opc, string asm, string cstr, list<dag> pattern>
@@ -505,7 +511,7 @@ class AI2stbpr<dag oops, dag iops, Format f, InstrItinClass itin,
let Inst{21} = 1; // W bit
let Inst{22} = 1; // B bit
let Inst{24} = 1; // P bit
- let Inst{27-26} = {0,1};
+ let Inst{27-26} = 0b01;
}
// Post-indexed loads
@@ -517,7 +523,7 @@ class AI2ldwpo<dag oops, dag iops, Format f, InstrItinClass itin,
let Inst{21} = 0; // W bit
let Inst{22} = 0; // B bit
let Inst{24} = 0; // P bit
- let Inst{27-26} = {0,1};
+ let Inst{27-26} = 0b01;
}
class AI2ldbpo<dag oops, dag iops, Format f, InstrItinClass itin,
string opc, string asm, string cstr, list<dag> pattern>
@@ -527,7 +533,7 @@ class AI2ldbpo<dag oops, dag iops, Format f, InstrItinClass itin,
let Inst{21} = 0; // W bit
let Inst{22} = 1; // B bit
let Inst{24} = 0; // P bit
- let Inst{27-26} = {0,1};
+ let Inst{27-26} = 0b01;
}
// Post-indexed stores
@@ -539,7 +545,7 @@ class AI2stwpo<dag oops, dag iops, Format f, InstrItinClass itin,
let Inst{21} = 0; // W bit
let Inst{22} = 0; // B bit
let Inst{24} = 0; // P bit
- let Inst{27-26} = {0,1};
+ let Inst{27-26} = 0b01;
}
class AI2stbpo<dag oops, dag iops, Format f, InstrItinClass itin,
string opc, string asm, string cstr, list<dag> pattern>
@@ -549,7 +555,7 @@ class AI2stbpo<dag oops, dag iops, Format f, InstrItinClass itin,
let Inst{21} = 0; // W bit
let Inst{22} = 1; // B bit
let Inst{24} = 0; // P bit
- let Inst{27-26} = {0,1};
+ let Inst{27-26} = 0b01;
}
// addrmode3 instructions
@@ -977,7 +983,7 @@ class TIx2<bits<5> opcod1, bits<2> opcod2, bit opcod3,
Encoding {
let Inst{31-27} = opcod1;
let Inst{15-14} = opcod2;
- let Inst{12} = opcod3;
+ let Inst{12} = opcod3;
}
// BR_JT instructions
@@ -1099,13 +1105,13 @@ class T1Special<bits<4> opcode> : Encoding16 {
// A6.2.4 Load/store single data item encoding.
class T1LoadStore<bits<4> opA, bits<3> opB> : Encoding16 {
let Inst{15-12} = opA;
- let Inst{11-9} = opB;
+ let Inst{11-9} = opB;
}
-class T1LdSt<bits<3> opB> : T1LoadStore<0b0101, opB>;
+class T1LdSt<bits<3> opB> : T1LoadStore<0b0101, opB>;
class T1LdSt4Imm<bits<3> opB> : T1LoadStore<0b0110, opB>; // Immediate, 4 bytes
class T1LdSt1Imm<bits<3> opB> : T1LoadStore<0b0111, opB>; // Immediate, 1 byte
class T1LdSt2Imm<bits<3> opB> : T1LoadStore<0b1000, opB>; // Immediate, 2 bytes
-class T1LdStSP<bits<3> opB> : T1LoadStore<0b1001, opB>; // SP relative
+class T1LdStSP<bits<3> opB> : T1LoadStore<0b1001, opB>; // SP relative
// A6.2.5 Miscellaneous 16-bit instructions encoding.
class T1Misc<bits<7> opcode> : Encoding16 {
@@ -1125,9 +1131,10 @@ class Thumb2I<dag oops, dag iops, AddrMode am, SizeFlagVal sz,
list<Predicate> Predicates = [IsThumb2];
}
-// Same as Thumb2I except it can optionally modify CPSR. Note it's modeled as
-// an input operand since by default it's a zero register. It will
-// become an implicit def once it's "flipped".
+// Same as Thumb2I except it can optionally modify CPSR. Note it's modeled as an
+// input operand since by default it's a zero register. It will become an
+// implicit def once it's "flipped".
+//
// FIXME: This uses unified syntax so {s} comes before {p}. We should make it
// more consistent.
class Thumb2sI<dag oops, dag iops, AddrMode am, SizeFlagVal sz,
@@ -1185,11 +1192,11 @@ class T2Ii8s4<bit P, bit W, bit load, dag oops, dag iops, InstrItinClass itin,
pattern> {
let Inst{31-27} = 0b11101;
let Inst{26-25} = 0b00;
- let Inst{24} = P;
- let Inst{23} = ?; // The U bit.
- let Inst{22} = 1;
- let Inst{21} = W;
- let Inst{20} = load;
+ let Inst{24} = P;
+ let Inst{23} = ?; // The U bit.
+ let Inst{22} = 1;
+ let Inst{21} = W;
+ let Inst{20} = load;
}
class T2sI<dag oops, dag iops, InstrItinClass itin,
@@ -1225,14 +1232,14 @@ class T2Iidxldst<bit signed, bits<2> opcod, bit load, bit pre,
list<Predicate> Predicates = [IsThumb2];
let Inst{31-27} = 0b11111;
let Inst{26-25} = 0b00;
- let Inst{24} = signed;
- let Inst{23} = 0;
+ let Inst{24} = signed;
+ let Inst{23} = 0;
let Inst{22-21} = opcod;
- let Inst{20} = load;
- let Inst{11} = 1;
+ let Inst{20} = load;
+ let Inst{11} = 1;
// (P, W) = (1, 1) Pre-indexed or (0, 1) Post-indexed
- let Inst{10} = pre; // The P bit.
- let Inst{8} = 1; // The W bit.
+ let Inst{10} = pre; // The P bit.
+ let Inst{8} = 1; // The W bit.
}
// Helper class for disassembly only
@@ -1243,9 +1250,9 @@ class T2I_mac<bit long, bits<3> op22_20, bits<4> op7_4, dag oops, dag iops,
: T2I<oops, iops, itin, opc, asm, pattern> {
let Inst{31-27} = 0b11111;
let Inst{26-24} = 0b011;
- let Inst{23} = long;
+ let Inst{23} = long;
let Inst{22-20} = op22_20;
- let Inst{7-4} = op7_4;
+ let Inst{7-4} = op7_4;
}
// Tv5Pat - Same as Pat<>, but requires V5T Thumb mode.
@@ -1325,9 +1332,9 @@ class ASI5<bits<4> opcod1, bits<2> opcod2, dag oops, dag iops,
}
// Load / store multiple
-class AXDI5<dag oops, dag iops, IndexMode im, InstrItinClass itin,
+class AXDI4<dag oops, dag iops, IndexMode im, InstrItinClass itin,
string asm, string cstr, list<dag> pattern>
- : VFPXI<oops, iops, AddrMode5, Size4Bytes, im,
+ : VFPXI<oops, iops, AddrMode4, Size4Bytes, im,
VFPLdStMulFrm, itin, asm, cstr, pattern> {
// TODO: Mark the instructions with the appropriate subtarget info.
let Inst{27-25} = 0b110;
@@ -1337,9 +1344,9 @@ class AXDI5<dag oops, dag iops, IndexMode im, InstrItinClass itin,
let D = VFPNeonDomain;
}
-class AXSI5<dag oops, dag iops, IndexMode im, InstrItinClass itin,
+class AXSI4<dag oops, dag iops, IndexMode im, InstrItinClass itin,
string asm, string cstr, list<dag> pattern>
- : VFPXI<oops, iops, AddrMode5, Size4Bytes, im,
+ : VFPXI<oops, iops, AddrMode4, Size4Bytes, im,
VFPLdStMulFrm, itin, asm, cstr, pattern> {
// TODO: Mark the instructions with the appropriate subtarget info.
let Inst{27-25} = 0b110;
@@ -1367,8 +1374,8 @@ class ADbI<bits<5> opcod1, bits<2> opcod2, bit op6, bit op4, dag oops,
let Inst{27-23} = opcod1;
let Inst{21-20} = opcod2;
let Inst{11-8} = 0b1011;
- let Inst{6} = op6;
- let Inst{4} = op4;
+ let Inst{6} = op6;
+ let Inst{4} = op4;
}
// Double precision, binary, VML[AS] (for additional predicate)
@@ -1379,12 +1386,11 @@ class ADbI_vmlX<bits<5> opcod1, bits<2> opcod2, bit op6, bit op4, dag oops,
let Inst{27-23} = opcod1;
let Inst{21-20} = opcod2;
let Inst{11-8} = 0b1011;
- let Inst{6} = op6;
- let Inst{4} = op4;
+ let Inst{6} = op6;
+ let Inst{4} = op4;
list<Predicate> Predicates = [HasVFP2, UseVMLx];
}
-
// Single precision, unary
class ASuI<bits<5> opcod1, bits<2> opcod2, bits<4> opcod3, bits<2> opcod4,
bit opcod5, dag oops, dag iops, InstrItinClass itin, string opc,
@@ -1415,8 +1421,8 @@ class ASbI<bits<5> opcod1, bits<2> opcod2, bit op6, bit op4, dag oops, dag iops,
let Inst{27-23} = opcod1;
let Inst{21-20} = opcod2;
let Inst{11-8} = 0b1010;
- let Inst{6} = op6;
- let Inst{4} = op4;
+ let Inst{6} = op6;
+ let Inst{4} = op4;
}
// Single precision binary, if no NEON
@@ -1521,10 +1527,18 @@ class NLdSt<bit op23, bits<2> op21_20, bits<4> op11_8, bits<4> op7_4,
: NeonI<oops, iops, AddrMode6, IndexModeNone, NLdStFrm, itin, opc, dt, asm,
cstr, pattern> {
let Inst{31-24} = 0b11110100;
- let Inst{23} = op23;
+ let Inst{23} = op23;
let Inst{21-20} = op21_20;
- let Inst{11-8} = op11_8;
- let Inst{7-4} = op7_4;
+ let Inst{11-8} = op11_8;
+ let Inst{7-4} = op7_4;
+}
+
+class PseudoNLdSt<dag oops, dag iops, InstrItinClass itin, string cstr>
+ : InstARM<AddrMode6, Size4Bytes, IndexModeNone, Pseudo, NeonDomain, cstr,
+ itin> {
+ let OutOperandList = oops;
+ let InOperandList = !con(iops, (ins pred:$p));
+ list<Predicate> Predicates = [HasNEON];
}
class NDataI<dag oops, dag iops, Format f, InstrItinClass itin,
@@ -1548,13 +1562,13 @@ class N1ModImm<bit op23, bits<3> op21_19, bits<4> op11_8, bit op7, bit op6,
string opc, string dt, string asm, string cstr,
list<dag> pattern>
: NDataI<oops, iops, N1RegModImmFrm, itin, opc, dt, asm, cstr, pattern> {
- let Inst{23} = op23;
+ let Inst{23} = op23;
let Inst{21-19} = op21_19;
- let Inst{11-8} = op11_8;
- let Inst{7} = op7;
- let Inst{6} = op6;
- let Inst{5} = op5;
- let Inst{4} = op4;
+ let Inst{11-8} = op11_8;
+ let Inst{7} = op7;
+ let Inst{6} = op6;
+ let Inst{5} = op5;
+ let Inst{4} = op4;
}
// NEON 2 vector register format.
@@ -1567,9 +1581,9 @@ class N2V<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, bits<2> op17_16,
let Inst{21-20} = op21_20;
let Inst{19-18} = op19_18;
let Inst{17-16} = op17_16;
- let Inst{11-7} = op11_7;
- let Inst{6} = op6;
- let Inst{4} = op4;
+ let Inst{11-7} = op11_7;
+ let Inst{6} = op6;
+ let Inst{4} = op4;
}
// Same as N2V except it doesn't have a datatype suffix.
@@ -1582,9 +1596,9 @@ class N2VX<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, bits<2> op17_16,
let Inst{21-20} = op21_20;
let Inst{19-18} = op19_18;
let Inst{17-16} = op17_16;
- let Inst{11-7} = op11_7;
- let Inst{6} = op6;
- let Inst{4} = op4;
+ let Inst{11-7} = op11_7;
+ let Inst{6} = op6;
+ let Inst{4} = op4;
}
// NEON 2 vector register with immediate.
@@ -1592,12 +1606,12 @@ class N2VImm<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6, bit op4,
dag oops, dag iops, Format f, InstrItinClass itin,
string opc, string dt, string asm, string cstr, list<dag> pattern>
: NDataI<oops, iops, f, itin, opc, dt, asm, cstr, pattern> {
- let Inst{24} = op24;
- let Inst{23} = op23;
+ let Inst{24} = op24;
+ let Inst{23} = op23;
let Inst{11-8} = op11_8;
- let Inst{7} = op7;
- let Inst{6} = op6;
- let Inst{4} = op4;
+ let Inst{7} = op7;
+ let Inst{6} = op6;
+ let Inst{4} = op4;
}
// NEON 3 vector register format.
@@ -1605,12 +1619,12 @@ class N3V<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op6, bit op4,
dag oops, dag iops, Format f, InstrItinClass itin,
string opc, string dt, string asm, string cstr, list<dag> pattern>
: NDataI<oops, iops, f, itin, opc, dt, asm, cstr, pattern> {
- let Inst{24} = op24;
- let Inst{23} = op23;
+ let Inst{24} = op24;
+ let Inst{23} = op23;
let Inst{21-20} = op21_20;
- let Inst{11-8} = op11_8;
- let Inst{6} = op6;
- let Inst{4} = op4;
+ let Inst{11-8} = op11_8;
+ let Inst{6} = op6;
+ let Inst{4} = op4;
}
// Same as N3V except it doesn't have a data type suffix.
@@ -1619,12 +1633,12 @@ class N3VX<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op6,
dag oops, dag iops, Format f, InstrItinClass itin,
string opc, string asm, string cstr, list<dag> pattern>
: NDataXI<oops, iops, f, itin, opc, asm, cstr, pattern> {
- let Inst{24} = op24;
- let Inst{23} = op23;
+ let Inst{24} = op24;
+ let Inst{23} = op23;
let Inst{21-20} = op21_20;
- let Inst{11-8} = op11_8;
- let Inst{6} = op6;
- let Inst{4} = op4;
+ let Inst{11-8} = op11_8;
+ let Inst{6} = op6;
+ let Inst{4} = op4;
}
// NEON VMOVs between scalar and core registers.
@@ -1634,9 +1648,9 @@ class NVLaneOp<bits<8> opcod1, bits<4> opcod2, bits<2> opcod3,
: InstARM<AddrModeNone, Size4Bytes, IndexModeNone, f, GenericDomain,
"", itin> {
let Inst{27-20} = opcod1;
- let Inst{11-8} = opcod2;
- let Inst{6-5} = opcod3;
- let Inst{4} = 1;
+ let Inst{11-8} = opcod2;
+ let Inst{6-5} = opcod3;
+ let Inst{4} = 1;
let OutOperandList = oops;
let InOperandList = !con(iops, (ins pred:$p));
@@ -1670,9 +1684,9 @@ class NVDupLane<bits<4> op19_16, bit op6, dag oops, dag iops,
let Inst{24-23} = 0b11;
let Inst{21-20} = 0b11;
let Inst{19-16} = op19_16;
- let Inst{11-7} = 0b11000;
- let Inst{6} = op6;
- let Inst{4} = 0;
+ let Inst{11-7} = 0b11000;
+ let Inst{6} = op6;
+ let Inst{4} = 0;
}
// NEONFPPat - Same as Pat<>, but requires that the compiler be using NEON
diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td
index 51fc1522485fa..e66f9b9ad0ac5 100644
--- a/lib/Target/ARM/ARMInstrInfo.td
+++ b/lib/Target/ARM/ARMInstrInfo.td
@@ -44,6 +44,10 @@ def SDT_ARMBCC_i64 : SDTypeProfile<0, 6,
SDTCisVT<3, i32>, SDTCisVT<4, i32>,
SDTCisVT<5, OtherVT>]>;
+def SDT_ARMAnd : SDTypeProfile<1, 2,
+ [SDTCisVT<0, i32>, SDTCisVT<1, i32>,
+ SDTCisVT<2, i32>]>;
+
def SDT_ARMCmp : SDTypeProfile<0, 2, [SDTCisSameAs<0, 1>]>;
def SDT_ARMPICAdd : SDTypeProfile<1, 2, [SDTCisSameAs<0, 1>,
@@ -54,13 +58,16 @@ def SDT_ARMEH_SJLJ_Setjmp : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisPtrTy<1>,
SDTCisInt<2>]>;
def SDT_ARMEH_SJLJ_Longjmp: SDTypeProfile<0, 2, [SDTCisPtrTy<0>, SDTCisInt<1>]>;
-def SDT_ARMMEMBARRIERV7 : SDTypeProfile<0, 0, []>;
-def SDT_ARMSYNCBARRIERV7 : SDTypeProfile<0, 0, []>;
-def SDT_ARMMEMBARRIERV6 : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
-def SDT_ARMSYNCBARRIERV6 : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
+def SDT_ARMMEMBARRIER : SDTypeProfile<0, 0, []>;
+def SDT_ARMSYNCBARRIER : SDTypeProfile<0, 0, []>;
+def SDT_ARMMEMBARRIERMCR : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
+def SDT_ARMSYNCBARRIERMCR : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
def SDT_ARMTCRET : SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>;
+def SDT_ARMBFI : SDTypeProfile<1, 3, [SDTCisVT<0, i32>, SDTCisVT<1, i32>,
+ SDTCisVT<2, i32>, SDTCisVT<3, i32>]>;
+
// Node definitions.
def ARMWrapper : SDNode<"ARMISD::Wrapper", SDTIntUnaryOp>;
def ARMWrapperJT : SDNode<"ARMISD::WrapperJT", SDTIntBinOp>;
@@ -99,11 +106,14 @@ def ARMbr2jt : SDNode<"ARMISD::BR2_JT", SDT_ARMBr2JT,
def ARMBcci64 : SDNode<"ARMISD::BCC_i64", SDT_ARMBCC_i64,
[SDNPHasChain]>;
+def ARMand : SDNode<"ARMISD::AND", SDT_ARMAnd,
+ [SDNPOutFlag]>;
+
def ARMcmp : SDNode<"ARMISD::CMP", SDT_ARMCmp,
[SDNPOutFlag]>;
def ARMcmpZ : SDNode<"ARMISD::CMPZ", SDT_ARMCmp,
- [SDNPOutFlag,SDNPCommutative]>;
+ [SDNPOutFlag, SDNPCommutative]>;
def ARMpic_add : SDNode<"ARMISD::PIC_ADD", SDT_ARMPICAdd>;
@@ -117,51 +127,54 @@ def ARMeh_sjlj_setjmp: SDNode<"ARMISD::EH_SJLJ_SETJMP",
def ARMeh_sjlj_longjmp: SDNode<"ARMISD::EH_SJLJ_LONGJMP",
SDT_ARMEH_SJLJ_Longjmp, [SDNPHasChain]>;
-def ARMMemBarrierV7 : SDNode<"ARMISD::MEMBARRIER", SDT_ARMMEMBARRIERV7,
- [SDNPHasChain]>;
-def ARMSyncBarrierV7 : SDNode<"ARMISD::SYNCBARRIER", SDT_ARMMEMBARRIERV7,
- [SDNPHasChain]>;
-def ARMMemBarrierV6 : SDNode<"ARMISD::MEMBARRIER", SDT_ARMMEMBARRIERV6,
- [SDNPHasChain]>;
-def ARMSyncBarrierV6 : SDNode<"ARMISD::SYNCBARRIER", SDT_ARMMEMBARRIERV6,
- [SDNPHasChain]>;
+def ARMMemBarrier : SDNode<"ARMISD::MEMBARRIER", SDT_ARMMEMBARRIER,
+ [SDNPHasChain]>;
+def ARMSyncBarrier : SDNode<"ARMISD::SYNCBARRIER", SDT_ARMMEMBARRIER,
+ [SDNPHasChain]>;
+def ARMMemBarrierMCR : SDNode<"ARMISD::MEMBARRIER", SDT_ARMMEMBARRIERMCR,
+ [SDNPHasChain]>;
+def ARMSyncBarrierMCR : SDNode<"ARMISD::SYNCBARRIER", SDT_ARMMEMBARRIERMCR,
+ [SDNPHasChain]>;
def ARMrbit : SDNode<"ARMISD::RBIT", SDTIntUnaryOp>;
def ARMtcret : SDNode<"ARMISD::TC_RETURN", SDT_ARMTCRET,
[SDNPHasChain, SDNPOptInFlag, SDNPVariadic]>;
+
+def ARMbfi : SDNode<"ARMISD::BFI", SDT_ARMBFI>;
+
//===----------------------------------------------------------------------===//
// ARM Instruction Predicate Definitions.
//
-def HasV4T : Predicate<"Subtarget->hasV4TOps()">;
-def NoV4T : Predicate<"!Subtarget->hasV4TOps()">;
-def HasV5T : Predicate<"Subtarget->hasV5TOps()">;
-def HasV5TE : Predicate<"Subtarget->hasV5TEOps()">;
-def HasV6 : Predicate<"Subtarget->hasV6Ops()">;
-def HasV6T2 : Predicate<"Subtarget->hasV6T2Ops()">;
-def NoV6T2 : Predicate<"!Subtarget->hasV6T2Ops()">;
-def HasV7 : Predicate<"Subtarget->hasV7Ops()">;
-def NoVFP : Predicate<"!Subtarget->hasVFP2()">;
-def HasVFP2 : Predicate<"Subtarget->hasVFP2()">;
-def HasVFP3 : Predicate<"Subtarget->hasVFP3()">;
-def HasNEON : Predicate<"Subtarget->hasNEON()">;
-def HasDivide : Predicate<"Subtarget->hasDivide()">;
+def HasV4T : Predicate<"Subtarget->hasV4TOps()">;
+def NoV4T : Predicate<"!Subtarget->hasV4TOps()">;
+def HasV5T : Predicate<"Subtarget->hasV5TOps()">;
+def HasV5TE : Predicate<"Subtarget->hasV5TEOps()">;
+def HasV6 : Predicate<"Subtarget->hasV6Ops()">;
+def HasV6T2 : Predicate<"Subtarget->hasV6T2Ops()">;
+def NoV6T2 : Predicate<"!Subtarget->hasV6T2Ops()">;
+def HasV7 : Predicate<"Subtarget->hasV7Ops()">;
+def NoVFP : Predicate<"!Subtarget->hasVFP2()">;
+def HasVFP2 : Predicate<"Subtarget->hasVFP2()">;
+def HasVFP3 : Predicate<"Subtarget->hasVFP3()">;
+def HasNEON : Predicate<"Subtarget->hasNEON()">;
+def HasDivide : Predicate<"Subtarget->hasDivide()">;
def HasT2ExtractPack : Predicate<"Subtarget->hasT2ExtractPack()">;
-def UseNEONForFP : Predicate<"Subtarget->useNEONForSinglePrecisionFP()">;
+def HasDB : Predicate<"Subtarget->hasDataBarrier()">;
+def UseNEONForFP : Predicate<"Subtarget->useNEONForSinglePrecisionFP()">;
def DontUseNEONForFP : Predicate<"!Subtarget->useNEONForSinglePrecisionFP()">;
-def IsThumb : Predicate<"Subtarget->isThumb()">;
-def IsThumb1Only : Predicate<"Subtarget->isThumb1Only()">;
-def IsThumb2 : Predicate<"Subtarget->isThumb2()">;
-def IsARM : Predicate<"!Subtarget->isThumb()">;
-def IsDarwin : Predicate<"Subtarget->isTargetDarwin()">;
-def IsNotDarwin : Predicate<"!Subtarget->isTargetDarwin()">;
+def IsThumb : Predicate<"Subtarget->isThumb()">;
+def IsThumb1Only : Predicate<"Subtarget->isThumb1Only()">;
+def IsThumb2 : Predicate<"Subtarget->isThumb2()">;
+def IsARM : Predicate<"!Subtarget->isThumb()">;
+def IsDarwin : Predicate<"Subtarget->isTargetDarwin()">;
+def IsNotDarwin : Predicate<"!Subtarget->isTargetDarwin()">;
// FIXME: Eventually this will be just "hasV6T2Ops".
-def UseMovt : Predicate<"Subtarget->useMovt()">;
-def DontUseMovt : Predicate<"!Subtarget->useMovt()">;
-
-def UseVMLx : Predicate<"Subtarget->useVMLx()">;
+def UseMovt : Predicate<"Subtarget->useMovt()">;
+def DontUseMovt : Predicate<"!Subtarget->useMovt()">;
+def UseVMLx : Predicate<"Subtarget->useVMLx()">;
//===----------------------------------------------------------------------===//
// ARM Flag Definitions.
@@ -221,29 +234,12 @@ def sext_16_node : PatLeaf<(i32 GPR:$a), [{
/// e.g., 0xf000ffff
def bf_inv_mask_imm : Operand<i32>,
PatLeaf<(imm), [{
- uint32_t v = (uint32_t)N->getZExtValue();
- if (v == 0xffffffff)
- return 0;
- // there can be 1's on either or both "outsides", all the "inside"
- // bits must be 0's
- unsigned int lsb = 0, msb = 31;
- while (v & (1 << msb)) --msb;
- while (v & (1 << lsb)) ++lsb;
- for (unsigned int i = lsb; i <= msb; ++i) {
- if (v & (1 << i))
- return 0;
- }
- return 1;
+ return ARM::isBitFieldInvertedMask(N->getZExtValue());
}] > {
let PrintMethod = "printBitfieldInvMaskImmOperand";
}
/// Split a 32-bit immediate into two 16 bit parts.
-def lo16 : SDNodeXForm<imm, [{
- return CurDAG->getTargetConstant((uint32_t)N->getZExtValue() & 0xffff,
- MVT::i32);
-}]>;
-
def hi16 : SDNodeXForm<imm, [{
return CurDAG->getTargetConstant((uint32_t)N->getZExtValue() >> 16, MVT::i32);
}]>;
@@ -306,6 +302,13 @@ def pclabel : Operand<i32> {
let PrintMethod = "printPCLabel";
}
+// shift_imm: An integer that encodes a shift amount and the type of shift
+// (currently either asr or lsl) using the same encoding used for the
+// immediates in so_reg operands.
+def shift_imm : Operand<i32> {
+ let PrintMethod = "printShiftImmOperand";
+}
+
// shifter_operand operands: so_reg and so_imm.
def so_reg : Operand<i32>, // reg reg imm
ComplexPattern<i32, 3, "SelectShifterOperandReg",
@@ -319,10 +322,7 @@ def so_reg : Operand<i32>, // reg reg imm
// represented in the imm field in the same 12-bit form that they are encoded
// into so_imm instructions: the 8-bit immediate is the least significant bits
// [bits 0-7], the 4-bit shift amount is the next 4 bits [bits 8-11].
-def so_imm : Operand<i32>,
- PatLeaf<(imm), [{
- return ARM_AM::getSOImmVal(N->getZExtValue()) != -1;
- }]> {
+def so_imm : Operand<i32>, PatLeaf<(imm), [{ return Pred_so_imm(N); }]> {
let PrintMethod = "printSOImmOperand";
}
@@ -452,11 +452,15 @@ include "ARMInstrFormats.td"
/// binop that produces a value.
multiclass AsI1_bin_irs<bits<4> opcod, string opc, PatFrag opnode,
bit Commutable = 0> {
+ // The register-immediate version is re-materializable. This is useful
+ // in particular for taking the address of a local.
+ let isReMaterializable = 1 in {
def ri : AsI1<opcod, (outs GPR:$dst), (ins GPR:$a, so_imm:$b), DPFrm,
IIC_iALUi, opc, "\t$dst, $a, $b",
[(set GPR:$dst, (opnode GPR:$a, so_imm:$b))]> {
let Inst{25} = 1;
}
+ }
def rr : AsI1<opcod, (outs GPR:$dst), (ins GPR:$a, GPR:$b), DPFrm,
IIC_iALUr, opc, "\t$dst, $a, $b",
[(set GPR:$dst, (opnode GPR:$a, GPR:$b))]> {
@@ -502,7 +506,7 @@ multiclass AI1_bin_s_irs<bits<4> opcod, string opc, PatFrag opnode,
/// AI1_cmp_irs - Defines a set of (op r, {so_imm|r|so_reg}) cmp / test
/// patterns. Similar to AsI1_bin_irs except the instruction does not produce
/// a explicit result, only implicitly set CPSR.
-let Defs = [CPSR] in {
+let isCompare = 1, Defs = [CPSR] in {
multiclass AI1_cmp_irs<bits<4> opcod, string opc, PatFrag opnode,
bit Commutable = 0> {
def ri : AI1<opcod, (outs), (ins GPR:$a, so_imm:$b), DPFrm, IIC_iCMPi,
@@ -1117,7 +1121,7 @@ let isBranch = 1, isTerminator = 1 in {
let isNotDuplicable = 1, isIndirectBranch = 1 in {
def BR_JTr : JTI<(outs), (ins GPR:$target, jtblock_operand:$jt, i32imm:$id),
- IIC_Br, "mov\tpc, $target \n$jt",
+ IIC_Br, "mov\tpc, $target$jt",
[(ARMbrjt GPR:$target, tjumptable:$jt, imm:$id)]> {
let Inst{11-4} = 0b00000000;
let Inst{15-12} = 0b1111;
@@ -1127,7 +1131,7 @@ let isBranch = 1, isTerminator = 1 in {
}
def BR_JTm : JTI<(outs),
(ins addrmode2:$target, jtblock_operand:$jt, i32imm:$id),
- IIC_Br, "ldr\tpc, $target \n$jt",
+ IIC_Br, "ldr\tpc, $target$jt",
[(ARMbrjt (i32 (load addrmode2:$target)), tjumptable:$jt,
imm:$id)]> {
let Inst{15-12} = 0b1111;
@@ -1139,7 +1143,7 @@ let isBranch = 1, isTerminator = 1 in {
}
def BR_JTadd : JTI<(outs),
(ins GPR:$target, GPR:$idx, jtblock_operand:$jt, i32imm:$id),
- IIC_Br, "add\tpc, $target, $idx \n$jt",
+ IIC_Br, "add\tpc, $target, $idx$jt",
[(ARMbrjt (add GPR:$target, GPR:$idx), tjumptable:$jt,
imm:$id)]> {
let Inst{15-12} = 0b1111;
@@ -1573,8 +1577,12 @@ defm UXTH : AI_unary_rrot<0b01101111,
defm UXTB16 : AI_unary_rrot<0b01101100,
"uxtb16", UnOpFrag<(and node:$Src, 0x00FF00FF)>>;
-def : ARMV6Pat<(and (shl GPR:$Src, (i32 8)), 0xFF00FF),
- (UXTB16r_rot GPR:$Src, 24)>;
+// FIXME: This pattern incorrectly assumes the shl operator is a rotate.
+// The transformation should probably be done as a combiner action
+// instead so we can include a check for masking back in the upper
+// eight bits of the source into the lower eight bits of the result.
+//def : ARMV6Pat<(and (shl GPR:$Src, (i32 8)), 0xFF00FF),
+// (UXTB16r_rot GPR:$Src, 24)>;
def : ARMV6Pat<(and (srl GPR:$Src, (i32 8)), 0xFF00FF),
(UXTB16r_rot GPR:$Src, 8)>;
@@ -1631,16 +1639,24 @@ defm ADCS : AI1_adde_sube_s_irs<0b0101, "adcs",
defm SBCS : AI1_adde_sube_s_irs<0b0110, "sbcs",
BinOpFrag<(sube_live_carry node:$LHS, node:$RHS) >>;
-// These don't define reg/reg forms, because they are handled above.
def RSBri : AsI1<0b0011, (outs GPR:$dst), (ins GPR:$a, so_imm:$b), DPFrm,
- IIC_iALUi, "rsb", "\t$dst, $a, $b",
- [(set GPR:$dst, (sub so_imm:$b, GPR:$a))]> {
+ IIC_iALUi, "rsb", "\t$dst, $a, $b",
+ [(set GPR:$dst, (sub so_imm:$b, GPR:$a))]> {
let Inst{25} = 1;
}
+// The reg/reg form is only defined for the disassembler; for codegen it is
+// equivalent to SUBrr.
+def RSBrr : AsI1<0b0011, (outs GPR:$dst), (ins GPR:$a, GPR:$b), DPFrm,
+ IIC_iALUr, "rsb", "\t$dst, $a, $b",
+ [/* For disassembly only; pattern left blank */]> {
+ let Inst{25} = 0;
+ let Inst{11-4} = 0b00000000;
+}
+
def RSBrs : AsI1<0b0011, (outs GPR:$dst), (ins GPR:$a, so_reg:$b), DPSoRegFrm,
- IIC_iALUsr, "rsb", "\t$dst, $a, $b",
- [(set GPR:$dst, (sub so_reg:$b, GPR:$a))]> {
+ IIC_iALUsr, "rsb", "\t$dst, $a, $b",
+ [(set GPR:$dst, (sub so_reg:$b, GPR:$a))]> {
let Inst{25} = 0;
}
@@ -1667,6 +1683,14 @@ def RSCri : AsI1<0b0111, (outs GPR:$dst), (ins GPR:$a, so_imm:$b),
Requires<[IsARM]> {
let Inst{25} = 1;
}
+// The reg/reg form is only defined for the disassembler; for codegen it is
+// equivalent to SUBrr.
+def RSCrr : AsI1<0b0111, (outs GPR:$dst), (ins GPR:$a, GPR:$b),
+ DPFrm, IIC_iALUr, "rsc", "\t$dst, $a, $b",
+ [/* For disassembly only; pattern left blank */]> {
+ let Inst{25} = 0;
+ let Inst{11-4} = 0b00000000;
+}
def RSCrs : AsI1<0b0111, (outs GPR:$dst), (ins GPR:$a, so_reg:$b),
DPSoRegFrm, IIC_iALUsr, "rsc", "\t$dst, $a, $b",
[(set GPR:$dst, (sube_dead_carry so_reg:$b, GPR:$a))]>,
@@ -1716,24 +1740,26 @@ def : ARMPat<(adde GPR:$src, so_imm_not:$imm),
// ARM Arithmetic Instruction -- for disassembly only
// GPR:$dst = GPR:$a op GPR:$b
-class AAI<bits<8> op27_20, bits<4> op7_4, string opc>
+class AAI<bits<8> op27_20, bits<4> op7_4, string opc,
+ list<dag> pattern = [/* For disassembly only; pattern left blank */]>
: AI<(outs GPR:$dst), (ins GPR:$a, GPR:$b), DPFrm, IIC_iALUr,
- opc, "\t$dst, $a, $b",
- [/* For disassembly only; pattern left blank */]> {
+ opc, "\t$dst, $a, $b", pattern> {
let Inst{27-20} = op27_20;
let Inst{7-4} = op7_4;
}
// Saturating add/subtract -- for disassembly only
-def QADD : AAI<0b00010000, 0b0101, "qadd">;
+def QADD : AAI<0b00010000, 0b0101, "qadd",
+ [(set GPR:$dst, (int_arm_qadd GPR:$a, GPR:$b))]>;
def QADD16 : AAI<0b01100010, 0b0001, "qadd16">;
def QADD8 : AAI<0b01100010, 0b1001, "qadd8">;
def QASX : AAI<0b01100010, 0b0011, "qasx">;
def QDADD : AAI<0b00010100, 0b0101, "qdadd">;
def QDSUB : AAI<0b00010110, 0b0101, "qdsub">;
def QSAX : AAI<0b01100010, 0b0101, "qsax">;
-def QSUB : AAI<0b00010010, 0b0101, "qsub">;
+def QSUB : AAI<0b00010010, 0b0101, "qsub",
+ [(set GPR:$dst, (int_arm_qsub GPR:$a, GPR:$b))]>;
def QSUB16 : AAI<0b01100010, 0b0111, "qsub16">;
def QSUB8 : AAI<0b01100010, 0b1111, "qsub8">;
def UQADD16 : AAI<0b01100110, 0b0001, "uqadd16">;
@@ -1793,54 +1819,45 @@ def USADA8 : AI<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc),
// Signed/Unsigned saturate -- for disassembly only
-def SSATlsl : AI<(outs GPR:$dst), (ins i32imm:$bit_pos, GPR:$a, i32imm:$shamt),
- DPFrm, NoItinerary, "ssat", "\t$dst, $bit_pos, $a, lsl $shamt",
- [/* For disassembly only; pattern left blank */]> {
- let Inst{27-21} = 0b0110101;
- let Inst{6-4} = 0b001;
-}
-
-def SSATasr : AI<(outs GPR:$dst), (ins i32imm:$bit_pos, GPR:$a, i32imm:$shamt),
- DPFrm, NoItinerary, "ssat", "\t$dst, $bit_pos, $a, asr $shamt",
- [/* For disassembly only; pattern left blank */]> {
+def SSAT : AI<(outs GPR:$dst), (ins i32imm:$bit_pos, GPR:$a, shift_imm:$sh),
+ SatFrm, NoItinerary, "ssat", "\t$dst, $bit_pos, $a$sh",
+ [/* For disassembly only; pattern left blank */]> {
let Inst{27-21} = 0b0110101;
- let Inst{6-4} = 0b101;
+ let Inst{5-4} = 0b01;
}
-def SSAT16 : AI<(outs GPR:$dst), (ins i32imm:$bit_pos, GPR:$a), DPFrm,
+def SSAT16 : AI<(outs GPR:$dst), (ins i32imm:$bit_pos, GPR:$a), SatFrm,
NoItinerary, "ssat16", "\t$dst, $bit_pos, $a",
[/* For disassembly only; pattern left blank */]> {
let Inst{27-20} = 0b01101010;
let Inst{7-4} = 0b0011;
}
-def USATlsl : AI<(outs GPR:$dst), (ins i32imm:$bit_pos, GPR:$a, i32imm:$shamt),
- DPFrm, NoItinerary, "usat", "\t$dst, $bit_pos, $a, lsl $shamt",
- [/* For disassembly only; pattern left blank */]> {
- let Inst{27-21} = 0b0110111;
- let Inst{6-4} = 0b001;
-}
-
-def USATasr : AI<(outs GPR:$dst), (ins i32imm:$bit_pos, GPR:$a, i32imm:$shamt),
- DPFrm, NoItinerary, "usat", "\t$dst, $bit_pos, $a, asr $shamt",
- [/* For disassembly only; pattern left blank */]> {
+def USAT : AI<(outs GPR:$dst), (ins i32imm:$bit_pos, GPR:$a, shift_imm:$sh),
+ SatFrm, NoItinerary, "usat", "\t$dst, $bit_pos, $a$sh",
+ [/* For disassembly only; pattern left blank */]> {
let Inst{27-21} = 0b0110111;
- let Inst{6-4} = 0b101;
+ let Inst{5-4} = 0b01;
}
-def USAT16 : AI<(outs GPR:$dst), (ins i32imm:$bit_pos, GPR:$a), DPFrm,
+def USAT16 : AI<(outs GPR:$dst), (ins i32imm:$bit_pos, GPR:$a), SatFrm,
NoItinerary, "usat16", "\t$dst, $bit_pos, $a",
[/* For disassembly only; pattern left blank */]> {
let Inst{27-20} = 0b01101110;
let Inst{7-4} = 0b0011;
}
+def : ARMV6Pat<(int_arm_ssat GPR:$a, imm:$pos), (SSAT imm:$pos, GPR:$a, 0)>;
+def : ARMV6Pat<(int_arm_usat GPR:$a, imm:$pos), (USAT imm:$pos, GPR:$a, 0)>;
+
//===----------------------------------------------------------------------===//
// Bitwise Instructions.
//
defm AND : AsI1_bin_irs<0b0000, "and",
BinOpFrag<(and node:$LHS, node:$RHS)>, 1>;
+defm ANDS : AI1_bin_s_irs<0b0000, "and",
+ BinOpFrag<(ARMand node:$LHS, node:$RHS)>, 1>;
defm ORR : AsI1_bin_irs<0b1100, "orr",
BinOpFrag<(or node:$LHS, node:$RHS)>, 1>;
defm EOR : AsI1_bin_irs<0b0001, "eor",
@@ -1858,11 +1875,11 @@ def BFC : I<(outs GPR:$dst), (ins GPR:$src, bf_inv_mask_imm:$imm),
}
// A8.6.18 BFI - Bitfield insert (Encoding A1)
-// Added for disassembler with the pattern field purposely left blank.
-def BFI : I<(outs GPR:$dst), (ins GPR:$src, bf_inv_mask_imm:$imm),
+def BFI : I<(outs GPR:$dst), (ins GPR:$src, GPR:$val, bf_inv_mask_imm:$imm),
AddrMode1, Size4Bytes, IndexModeNone, DPFrm, IIC_iUNAsi,
- "bfi", "\t$dst, $src, $imm", "",
- [/* For disassembly only; pattern left blank */]>,
+ "bfi", "\t$dst, $val, $imm", "$src = $dst",
+ [(set GPR:$dst, (ARMbfi GPR:$src, GPR:$val,
+ bf_inv_mask_imm:$imm))]>,
Requires<[IsARM, HasV6T2]> {
let Inst{27-21} = 0b0111110;
let Inst{6-4} = 0b001; // Rn: Inst{3-0} != 15
@@ -2232,11 +2249,20 @@ def REVSH : AMiscA1I<0b01101111, (outs GPR:$dst), (ins GPR:$src), IIC_iUNAr,
let Inst{19-16} = 0b1111;
}
+def lsl_shift_imm : SDNodeXForm<imm, [{
+ unsigned Sh = ARM_AM::getSORegOpc(ARM_AM::lsl, N->getZExtValue());
+ return CurDAG->getTargetConstant(Sh, MVT::i32);
+}]>;
+
+def lsl_amt : PatLeaf<(i32 imm), [{
+ return (N->getZExtValue() < 32);
+}], lsl_shift_imm>;
+
def PKHBT : AMiscA1I<0b01101000, (outs GPR:$dst),
- (ins GPR:$src1, GPR:$src2, i32imm:$shamt),
- IIC_iALUsi, "pkhbt", "\t$dst, $src1, $src2, lsl $shamt",
+ (ins GPR:$src1, GPR:$src2, shift_imm:$sh),
+ IIC_iALUsi, "pkhbt", "\t$dst, $src1, $src2$sh",
[(set GPR:$dst, (or (and GPR:$src1, 0xFFFF),
- (and (shl GPR:$src2, (i32 imm:$shamt)),
+ (and (shl GPR:$src2, lsl_amt:$sh),
0xFFFF0000)))]>,
Requires<[IsARM, HasV6]> {
let Inst{6-4} = 0b001;
@@ -2245,26 +2271,37 @@ def PKHBT : AMiscA1I<0b01101000, (outs GPR:$dst),
// Alternate cases for PKHBT where identities eliminate some nodes.
def : ARMV6Pat<(or (and GPR:$src1, 0xFFFF), (and GPR:$src2, 0xFFFF0000)),
(PKHBT GPR:$src1, GPR:$src2, 0)>;
-def : ARMV6Pat<(or (and GPR:$src1, 0xFFFF), (shl GPR:$src2, imm16_31:$shamt)),
- (PKHBT GPR:$src1, GPR:$src2, imm16_31:$shamt)>;
+def : ARMV6Pat<(or (and GPR:$src1, 0xFFFF), (shl GPR:$src2, imm16_31:$sh)),
+ (PKHBT GPR:$src1, GPR:$src2, (lsl_shift_imm imm16_31:$sh))>;
+
+def asr_shift_imm : SDNodeXForm<imm, [{
+ unsigned Sh = ARM_AM::getSORegOpc(ARM_AM::asr, N->getZExtValue());
+ return CurDAG->getTargetConstant(Sh, MVT::i32);
+}]>;
+def asr_amt : PatLeaf<(i32 imm), [{
+ return (N->getZExtValue() <= 32);
+}], asr_shift_imm>;
+// Note: Shifts of 1-15 bits will be transformed to srl instead of sra and
+// will match the pattern below.
def PKHTB : AMiscA1I<0b01101000, (outs GPR:$dst),
- (ins GPR:$src1, GPR:$src2, i32imm:$shamt),
- IIC_iALUsi, "pkhtb", "\t$dst, $src1, $src2, asr $shamt",
+ (ins GPR:$src1, GPR:$src2, shift_imm:$sh),
+ IIC_iALUsi, "pkhtb", "\t$dst, $src1, $src2$sh",
[(set GPR:$dst, (or (and GPR:$src1, 0xFFFF0000),
- (and (sra GPR:$src2, imm16_31:$shamt),
- 0xFFFF)))]>, Requires<[IsARM, HasV6]> {
+ (and (sra GPR:$src2, asr_amt:$sh),
+ 0xFFFF)))]>,
+ Requires<[IsARM, HasV6]> {
let Inst{6-4} = 0b101;
}
// Alternate cases for PKHTB where identities eliminate some nodes. Note that
// a shift amount of 0 is *not legal* here, it is PKHBT instead.
-def : ARMV6Pat<(or (and GPR:$src1, 0xFFFF0000), (srl GPR:$src2, (i32 16))),
- (PKHTB GPR:$src1, GPR:$src2, 16)>;
+def : ARMV6Pat<(or (and GPR:$src1, 0xFFFF0000), (srl GPR:$src2, imm16_31:$sh)),
+ (PKHTB GPR:$src1, GPR:$src2, (asr_shift_imm imm16_31:$sh))>;
def : ARMV6Pat<(or (and GPR:$src1, 0xFFFF0000),
- (and (srl GPR:$src2, imm1_15:$shamt), 0xFFFF)),
- (PKHTB GPR:$src1, GPR:$src2, imm1_15:$shamt)>;
+ (and (srl GPR:$src2, imm1_15:$sh), 0xFFFF)),
+ (PKHTB GPR:$src1, GPR:$src2, (asr_shift_imm imm1_15:$sh))>;
//===----------------------------------------------------------------------===//
// Comparison Instructions...
@@ -2272,8 +2309,52 @@ def : ARMV6Pat<(or (and GPR:$src1, 0xFFFF0000),
defm CMP : AI1_cmp_irs<0b1010, "cmp",
BinOpFrag<(ARMcmp node:$LHS, node:$RHS)>>;
-//FIXME: Disable CMN, as CCodes are backwards from compare expectations
-// Compare-to-zero still works out, just not the relationals
+
+// FIXME: There seems to be a (potential) hardware bug with the CMN instruction
+// and comparison with 0. These two pieces of code should give identical
+// results:
+//
+// rsbs r1, r1, 0
+// cmp r0, r1
+// mov r0, #0
+// it ls
+// mov r0, #1
+//
+// and:
+//
+// cmn r0, r1
+// mov r0, #0
+// it ls
+// mov r0, #1
+//
+// However, the CMN gives the *opposite* result when r1 is 0. This is because
+// the carry flag is set in the CMP case but not in the CMN case. In short, the
+// CMP instruction doesn't perform a truncate of the (logical) NOT of 0 plus the
+// value of r0 and the carry bit (because the "carry bit" parameter to
+// AddWithCarry is defined as 1 in this case, the carry flag will always be set
+// when r0 >= 0). The CMN instruction doesn't perform a NOT of 0 so there is
+// never a "carry" when this AddWithCarry is performed (because the "carry bit"
+// parameter to AddWithCarry is defined as 0).
+//
+// The AddWithCarry in the CMP case seems to be relying upon the identity:
+//
+// ~x + 1 = -x
+//
+// However when x is 0 and unsigned, this doesn't hold:
+//
+// x = 0
+// ~x = 0xFFFF FFFF
+// ~x + 1 = 0x1 0000 0000
+// (-x = 0) != (0x1 0000 0000 = ~x + 1)
+//
+// Therefore, we should disable *all* versions of CMN, especially when comparing
+// against zero, until we can limit when the CMN instruction is used (when we
+// know that the RHS is not 0) or when we have a hardware fix for this.
+//
+// (See the ARM docs for the "AddWithCarry" pseudo-code.)
+//
+// This is related to <rdar://problem/7569620>.
+//
//defm CMN : AI1_cmp_irs<0b1011, "cmn",
// BinOpFrag<(ARMcmp node:$LHS,(ineg node:$RHS))>>;
@@ -2298,8 +2379,8 @@ def : ARMPat<(ARMcmpZ GPR:$src, so_imm_neg:$imm),
let usesCustomInserter = 1, isBranch = 1, isTerminator = 1,
Defs = [CPSR] in {
def BCCi64 : PseudoInst<(outs),
- (ins i32imm:$cc, GPR:$lhs1, GPR:$lhs2, GPR:$rhs1, GPR:$rhs2, brtarget:$dst),
- IIC_Br,
+ (ins i32imm:$cc, GPR:$lhs1, GPR:$lhs2, GPR:$rhs1, GPR:$rhs2, brtarget:$dst),
+ IIC_Br,
"${:comment} B\t$dst GPR:$lhs1, GPR:$lhs2, GPR:$rhs1, GPR:$rhs2, imm:$cc",
[(ARMBcci64 imm:$cc, GPR:$lhs1, GPR:$lhs2, GPR:$rhs1, GPR:$rhs2, bb:$dst)]>;
@@ -2346,102 +2427,63 @@ def MOVCCi : AI1<0b1101, (outs GPR:$dst),
// memory barriers protect the atomic sequences
let hasSideEffects = 1 in {
-def Int_MemBarrierV7 : AInoP<(outs), (ins),
- Pseudo, NoItinerary,
- "dmb", "",
- [(ARMMemBarrierV7)]>,
- Requires<[IsARM, HasV7]> {
+def DMBsy : AInoP<(outs), (ins), MiscFrm, NoItinerary, "dmb", "",
+ [(ARMMemBarrier)]>, Requires<[IsARM, HasDB]> {
let Inst{31-4} = 0xf57ff05;
// FIXME: add support for options other than a full system DMB
// See DMB disassembly-only variants below.
let Inst{3-0} = 0b1111;
}
-def Int_SyncBarrierV7 : AInoP<(outs), (ins),
- Pseudo, NoItinerary,
- "dsb", "",
- [(ARMSyncBarrierV7)]>,
- Requires<[IsARM, HasV7]> {
+def DSBsy : AInoP<(outs), (ins), MiscFrm, NoItinerary, "dsb", "",
+ [(ARMSyncBarrier)]>, Requires<[IsARM, HasDB]> {
let Inst{31-4} = 0xf57ff04;
// FIXME: add support for options other than a full system DSB
// See DSB disassembly-only variants below.
let Inst{3-0} = 0b1111;
}
-def Int_MemBarrierV6 : AInoP<(outs), (ins GPR:$zero),
- Pseudo, NoItinerary,
+def DMB_MCR : AInoP<(outs), (ins GPR:$zero), MiscFrm, NoItinerary,
"mcr", "\tp15, 0, $zero, c7, c10, 5",
- [(ARMMemBarrierV6 GPR:$zero)]>,
+ [(ARMMemBarrierMCR GPR:$zero)]>,
Requires<[IsARM, HasV6]> {
// FIXME: add support for options other than a full system DMB
// FIXME: add encoding
}
-def Int_SyncBarrierV6 : AInoP<(outs), (ins GPR:$zero),
- Pseudo, NoItinerary,
+def DSB_MCR : AInoP<(outs), (ins GPR:$zero), MiscFrm, NoItinerary,
"mcr", "\tp15, 0, $zero, c7, c10, 4",
- [(ARMSyncBarrierV6 GPR:$zero)]>,
+ [(ARMSyncBarrierMCR GPR:$zero)]>,
Requires<[IsARM, HasV6]> {
// FIXME: add support for options other than a full system DSB
// FIXME: add encoding
}
}
-// Helper class for multiclass MemB -- for disassembly only
-class AMBI<string opc, string asm>
- : AInoP<(outs), (ins), MiscFrm, NoItinerary, opc, asm,
- [/* For disassembly only; pattern left blank */]>,
- Requires<[IsARM, HasV7]> {
- let Inst{31-20} = 0xf57;
-}
-
-multiclass MemB<bits<4> op7_4, string opc> {
-
- def st : AMBI<opc, "\tst"> {
- let Inst{7-4} = op7_4;
- let Inst{3-0} = 0b1110;
- }
-
- def ish : AMBI<opc, "\tish"> {
- let Inst{7-4} = op7_4;
- let Inst{3-0} = 0b1011;
- }
-
- def ishst : AMBI<opc, "\tishst"> {
- let Inst{7-4} = op7_4;
- let Inst{3-0} = 0b1010;
- }
-
- def nsh : AMBI<opc, "\tnsh"> {
- let Inst{7-4} = op7_4;
- let Inst{3-0} = 0b0111;
- }
-
- def nshst : AMBI<opc, "\tnshst"> {
- let Inst{7-4} = op7_4;
- let Inst{3-0} = 0b0110;
- }
+// Memory Barrier Operations Variants -- for disassembly only
- def osh : AMBI<opc, "\tosh"> {
- let Inst{7-4} = op7_4;
- let Inst{3-0} = 0b0011;
- }
+def memb_opt : Operand<i32> {
+ let PrintMethod = "printMemBOption";
+}
- def oshst : AMBI<opc, "\toshst"> {
- let Inst{7-4} = op7_4;
- let Inst{3-0} = 0b0010;
- }
+class AMBI<bits<4> op7_4, string opc>
+ : AInoP<(outs), (ins memb_opt:$opt), MiscFrm, NoItinerary, opc, "\t$opt",
+ [/* For disassembly only; pattern left blank */]>,
+ Requires<[IsARM, HasDB]> {
+ let Inst{31-8} = 0xf57ff0;
+ let Inst{7-4} = op7_4;
}
// These DMB variants are for disassembly only.
-defm DMB : MemB<0b0101, "dmb">;
+def DMBvar : AMBI<0b0101, "dmb">;
// These DSB variants are for disassembly only.
-defm DSB : MemB<0b0100, "dsb">;
+def DSBvar : AMBI<0b0100, "dsb">;
// ISB has only full system option -- for disassembly only
-def ISBsy : AMBI<"isb", ""> {
- let Inst{7-4} = 0b0110;
+def ISBsy : AInoP<(outs), (ins), MiscFrm, NoItinerary, "isb", "", []>,
+ Requires<[IsARM, HasDB]> {
+ let Inst{31-4} = 0xf57ff06;
let Inst{3-0} = 0b1111;
}
diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td
index 7f7eb980abe83..4d2f1169061ff 100644
--- a/lib/Target/ARM/ARMInstrNEON.td
+++ b/lib/Target/ARM/ARMInstrNEON.td
@@ -93,6 +93,11 @@ def NEONzip : SDNode<"ARMISD::VZIP", SDTARMVSHUF2>;
def NEONuzp : SDNode<"ARMISD::VUZP", SDTARMVSHUF2>;
def NEONtrn : SDNode<"ARMISD::VTRN", SDTARMVSHUF2>;
+def SDTARMVMULL : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisInt<1>,
+ SDTCisSameAs<1, 2>]>;
+def NEONvmulls : SDNode<"ARMISD::VMULLs", SDTARMVMULL>;
+def NEONvmullu : SDNode<"ARMISD::VMULLu", SDTARMVMULL>;
+
def SDTARMFMAX : SDTypeProfile<1, 2, [SDTCisVT<0, f32>, SDTCisSameAs<0, 1>,
SDTCisSameAs<0, 2>]>;
def NEONfmax : SDNode<"ARMISD::FMAX", SDTARMFMAX>;
@@ -100,14 +105,14 @@ def NEONfmin : SDNode<"ARMISD::FMIN", SDTARMFMAX>;
def NEONimmAllZerosV: PatLeaf<(NEONvmovImm (i32 timm)), [{
ConstantSDNode *ConstVal = cast<ConstantSDNode>(N->getOperand(0));
- unsigned EltBits;
+ unsigned EltBits = 0;
uint64_t EltVal = ARM_AM::decodeNEONModImm(ConstVal->getZExtValue(), EltBits);
return (EltBits == 32 && EltVal == 0);
}]>;
def NEONimmAllOnesV: PatLeaf<(NEONvmovImm (i32 timm)), [{
ConstantSDNode *ConstVal = cast<ConstantSDNode>(N->getOperand(0));
- unsigned EltBits;
+ unsigned EltBits = 0;
uint64_t EltVal = ARM_AM::decodeNEONModImm(ConstVal->getZExtValue(), EltBits);
return (EltBits == 8 && EltVal == 0xff);
}]>;
@@ -124,15 +129,16 @@ def nModImm : Operand<i32> {
// NEON load / store instructions
//===----------------------------------------------------------------------===//
-let mayLoad = 1, neverHasSideEffects = 1 in {
// Use vldmia to load a Q register as a D register pair.
// This is equivalent to VLDMD except that it has a Q register operand
// instead of a pair of D registers.
def VLDMQ
- : AXDI5<(outs QPR:$dst), (ins addrmode5:$addr, pred:$p),
+ : AXDI4<(outs QPR:$dst), (ins addrmode4:$addr, pred:$p),
IndexModeNone, IIC_fpLoadm,
- "vldm${addr:submode}${p}\t${addr:base}, ${dst:dregpair}", "", []>;
+ "vldm${addr:submode}${p}\t$addr, ${dst:dregpair}", "",
+ [(set QPR:$dst, (v2f64 (load addrmode4:$addr)))]>;
+let mayLoad = 1, neverHasSideEffects = 1 in {
// Use vld1 to load a Q register as a D register pair.
// This alternative to VLDMQ allows an alignment to be specified.
// This is equivalent to VLD1q64 except that it has a Q register operand.
@@ -141,15 +147,16 @@ def VLD1q
IIC_VLD1, "vld1", "64", "${dst:dregpair}, $addr", "", []>;
} // mayLoad = 1, neverHasSideEffects = 1
-let mayStore = 1, neverHasSideEffects = 1 in {
// Use vstmia to store a Q register as a D register pair.
// This is equivalent to VSTMD except that it has a Q register operand
// instead of a pair of D registers.
def VSTMQ
- : AXDI5<(outs), (ins QPR:$src, addrmode5:$addr, pred:$p),
+ : AXDI4<(outs), (ins QPR:$src, addrmode4:$addr, pred:$p),
IndexModeNone, IIC_fpStorem,
- "vstm${addr:submode}${p}\t${addr:base}, ${src:dregpair}", "", []>;
+ "vstm${addr:submode}${p}\t$addr, ${src:dregpair}", "",
+ [(store (v2f64 QPR:$src), addrmode4:$addr)]>;
+let mayStore = 1, neverHasSideEffects = 1 in {
// Use vst1 to store a Q register as a D register pair.
// This alternative to VSTMQ allows an alignment to be specified.
// This is equivalent to VST1q64 except that it has a Q register operand.
@@ -160,6 +167,25 @@ def VST1q
let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in {
+// Classes for VLD* pseudo-instructions with multi-register operands.
+// These are expanded to real instructions after register allocation.
+class VLDQPseudo
+ : PseudoNLdSt<(outs QPR:$dst), (ins addrmode6:$addr), IIC_VST, "">;
+class VLDQWBPseudo
+ : PseudoNLdSt<(outs QPR:$dst, GPR:$wb),
+ (ins addrmode6:$addr, am6offset:$offset), IIC_VST,
+ "$addr.addr = $wb">;
+class VLDQQPseudo
+ : PseudoNLdSt<(outs QQPR:$dst), (ins addrmode6:$addr), IIC_VST, "">;
+class VLDQQWBPseudo
+ : PseudoNLdSt<(outs QQPR:$dst, GPR:$wb),
+ (ins addrmode6:$addr, am6offset:$offset), IIC_VST,
+ "$addr.addr = $wb">;
+class VLDQQQQWBPseudo
+ : PseudoNLdSt<(outs QQQQPR:$dst, GPR:$wb),
+ (ins addrmode6:$addr, am6offset:$offset, QQQQPR:$src), IIC_VST,
+ "$addr.addr = $wb, $src = $dst">;
+
// VLD1 : Vector Load (multiple single elements)
class VLD1D<bits<4> op7_4, string Dt>
: NLdSt<0,0b10,0b0111,op7_4, (outs DPR:$dst),
@@ -180,6 +206,11 @@ def VLD1q16 : VLD1Q<0b0100, "16">;
def VLD1q32 : VLD1Q<0b1000, "32">;
def VLD1q64 : VLD1Q<0b1100, "64">;
+def VLD1q8Pseudo : VLDQPseudo;
+def VLD1q16Pseudo : VLDQPseudo;
+def VLD1q32Pseudo : VLDQPseudo;
+def VLD1q64Pseudo : VLDQPseudo;
+
// ...with address register writeback:
class VLD1DWB<bits<4> op7_4, string Dt>
: NLdSt<0,0b10,0b0111,op7_4, (outs DPR:$dst, GPR:$wb),
@@ -202,6 +233,11 @@ def VLD1q16_UPD : VLD1QWB<0b0100, "16">;
def VLD1q32_UPD : VLD1QWB<0b1000, "32">;
def VLD1q64_UPD : VLD1QWB<0b1100, "64">;
+def VLD1q8Pseudo_UPD : VLDQWBPseudo;
+def VLD1q16Pseudo_UPD : VLDQWBPseudo;
+def VLD1q32Pseudo_UPD : VLDQWBPseudo;
+def VLD1q64Pseudo_UPD : VLDQWBPseudo;
+
// ...with 3 registers (some of these are only for the disassembler):
class VLD1D3<bits<4> op7_4, string Dt>
: NLdSt<0,0b10,0b0110,op7_4, (outs DPR:$dst1, DPR:$dst2, DPR:$dst3),
@@ -222,6 +258,9 @@ def VLD1d16T_UPD : VLD1D3WB<0b0100, "16">;
def VLD1d32T_UPD : VLD1D3WB<0b1000, "32">;
def VLD1d64T_UPD : VLD1D3WB<0b1100, "64">;
+def VLD1d64TPseudo : VLDQQPseudo;
+def VLD1d64TPseudo_UPD : VLDQQWBPseudo;
+
// ...with 4 registers (some of these are only for the disassembler):
class VLD1D4<bits<4> op7_4, string Dt>
: NLdSt<0,0b10,0b0010,op7_4,(outs DPR:$dst1, DPR:$dst2, DPR:$dst3, DPR:$dst4),
@@ -244,6 +283,9 @@ def VLD1d16Q_UPD : VLD1D4WB<0b0100, "16">;
def VLD1d32Q_UPD : VLD1D4WB<0b1000, "32">;
def VLD1d64Q_UPD : VLD1D4WB<0b1100, "64">;
+def VLD1d64QPseudo : VLDQQPseudo;
+def VLD1d64QPseudo_UPD : VLDQQWBPseudo;
+
// VLD2 : Vector Load (multiple 2-element structures)
class VLD2D<bits<4> op11_8, bits<4> op7_4, string Dt>
: NLdSt<0, 0b10, op11_8, op7_4, (outs DPR:$dst1, DPR:$dst2),
@@ -263,6 +305,14 @@ def VLD2q8 : VLD2Q<0b0000, "8">;
def VLD2q16 : VLD2Q<0b0100, "16">;
def VLD2q32 : VLD2Q<0b1000, "32">;
+def VLD2d8Pseudo : VLDQPseudo;
+def VLD2d16Pseudo : VLDQPseudo;
+def VLD2d32Pseudo : VLDQPseudo;
+
+def VLD2q8Pseudo : VLDQQPseudo;
+def VLD2q16Pseudo : VLDQQPseudo;
+def VLD2q32Pseudo : VLDQQPseudo;
+
// ...with address register writeback:
class VLD2DWB<bits<4> op11_8, bits<4> op7_4, string Dt>
: NLdSt<0, 0b10, op11_8, op7_4, (outs DPR:$dst1, DPR:$dst2, GPR:$wb),
@@ -284,6 +334,14 @@ def VLD2q8_UPD : VLD2QWB<0b0000, "8">;
def VLD2q16_UPD : VLD2QWB<0b0100, "16">;
def VLD2q32_UPD : VLD2QWB<0b1000, "32">;
+def VLD2d8Pseudo_UPD : VLDQWBPseudo;
+def VLD2d16Pseudo_UPD : VLDQWBPseudo;
+def VLD2d32Pseudo_UPD : VLDQWBPseudo;
+
+def VLD2q8Pseudo_UPD : VLDQQWBPseudo;
+def VLD2q16Pseudo_UPD : VLDQQWBPseudo;
+def VLD2q32Pseudo_UPD : VLDQQWBPseudo;
+
// ...with double-spaced registers (for disassembly only):
def VLD2b8 : VLD2D<0b1001, 0b0000, "8">;
def VLD2b16 : VLD2D<0b1001, 0b0100, "16">;
@@ -302,6 +360,10 @@ def VLD3d8 : VLD3D<0b0100, 0b0000, "8">;
def VLD3d16 : VLD3D<0b0100, 0b0100, "16">;
def VLD3d32 : VLD3D<0b0100, 0b1000, "32">;
+def VLD3d8Pseudo : VLDQQPseudo;
+def VLD3d16Pseudo : VLDQQPseudo;
+def VLD3d32Pseudo : VLDQQPseudo;
+
// ...with address register writeback:
class VLD3DWB<bits<4> op11_8, bits<4> op7_4, string Dt>
: NLdSt<0, 0b10, op11_8, op7_4,
@@ -314,6 +376,10 @@ def VLD3d8_UPD : VLD3DWB<0b0100, 0b0000, "8">;
def VLD3d16_UPD : VLD3DWB<0b0100, 0b0100, "16">;
def VLD3d32_UPD : VLD3DWB<0b0100, 0b1000, "32">;
+def VLD3d8Pseudo_UPD : VLDQQWBPseudo;
+def VLD3d16Pseudo_UPD : VLDQQWBPseudo;
+def VLD3d32Pseudo_UPD : VLDQQWBPseudo;
+
// ...with double-spaced registers (non-updating versions for disassembly only):
def VLD3q8 : VLD3D<0b0101, 0b0000, "8">;
def VLD3q16 : VLD3D<0b0101, 0b0100, "16">;
@@ -322,10 +388,14 @@ def VLD3q8_UPD : VLD3DWB<0b0101, 0b0000, "8">;
def VLD3q16_UPD : VLD3DWB<0b0101, 0b0100, "16">;
def VLD3q32_UPD : VLD3DWB<0b0101, 0b1000, "32">;
+def VLD3q8Pseudo_UPD : VLDQQQQWBPseudo;
+def VLD3q16Pseudo_UPD : VLDQQQQWBPseudo;
+def VLD3q32Pseudo_UPD : VLDQQQQWBPseudo;
+
// ...alternate versions to be allocated odd register numbers:
-def VLD3q8odd_UPD : VLD3DWB<0b0101, 0b0000, "8">;
-def VLD3q16odd_UPD : VLD3DWB<0b0101, 0b0100, "16">;
-def VLD3q32odd_UPD : VLD3DWB<0b0101, 0b1000, "32">;
+def VLD3q8oddPseudo_UPD : VLDQQQQWBPseudo;
+def VLD3q16oddPseudo_UPD : VLDQQQQWBPseudo;
+def VLD3q32oddPseudo_UPD : VLDQQQQWBPseudo;
// VLD4 : Vector Load (multiple 4-element structures)
class VLD4D<bits<4> op11_8, bits<4> op7_4, string Dt>
@@ -338,6 +408,10 @@ def VLD4d8 : VLD4D<0b0000, 0b0000, "8">;
def VLD4d16 : VLD4D<0b0000, 0b0100, "16">;
def VLD4d32 : VLD4D<0b0000, 0b1000, "32">;
+def VLD4d8Pseudo : VLDQQPseudo;
+def VLD4d16Pseudo : VLDQQPseudo;
+def VLD4d32Pseudo : VLDQQPseudo;
+
// ...with address register writeback:
class VLD4DWB<bits<4> op11_8, bits<4> op7_4, string Dt>
: NLdSt<0, 0b10, op11_8, op7_4,
@@ -350,6 +424,10 @@ def VLD4d8_UPD : VLD4DWB<0b0000, 0b0000, "8">;
def VLD4d16_UPD : VLD4DWB<0b0000, 0b0100, "16">;
def VLD4d32_UPD : VLD4DWB<0b0000, 0b1000, "32">;
+def VLD4d8Pseudo_UPD : VLDQQWBPseudo;
+def VLD4d16Pseudo_UPD : VLDQQWBPseudo;
+def VLD4d32Pseudo_UPD : VLDQQWBPseudo;
+
// ...with double-spaced registers (non-updating versions for disassembly only):
def VLD4q8 : VLD4D<0b0001, 0b0000, "8">;
def VLD4q16 : VLD4D<0b0001, 0b0100, "16">;
@@ -358,10 +436,14 @@ def VLD4q8_UPD : VLD4DWB<0b0001, 0b0000, "8">;
def VLD4q16_UPD : VLD4DWB<0b0001, 0b0100, "16">;
def VLD4q32_UPD : VLD4DWB<0b0001, 0b1000, "32">;
+def VLD4q8Pseudo_UPD : VLDQQQQWBPseudo;
+def VLD4q16Pseudo_UPD : VLDQQQQWBPseudo;
+def VLD4q32Pseudo_UPD : VLDQQQQWBPseudo;
+
// ...alternate versions to be allocated odd register numbers:
-def VLD4q8odd_UPD : VLD4DWB<0b0001, 0b0000, "8">;
-def VLD4q16odd_UPD : VLD4DWB<0b0001, 0b0100, "16">;
-def VLD4q32odd_UPD : VLD4DWB<0b0001, 0b1000, "32">;
+def VLD4q8oddPseudo_UPD : VLDQQQQWBPseudo;
+def VLD4q16oddPseudo_UPD : VLDQQQQWBPseudo;
+def VLD4q32oddPseudo_UPD : VLDQQQQWBPseudo;
// VLD1LN : Vector Load (single element to one lane)
// FIXME: Not yet implemented.
@@ -486,6 +568,25 @@ def VLD4LNq32_UPD : VLD4LNWB<0b1011, {?,1,?,?}, "32">;
let mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 in {
+// Classes for VST* pseudo-instructions with multi-register operands.
+// These are expanded to real instructions after register allocation.
+class VSTQPseudo
+ : PseudoNLdSt<(outs), (ins addrmode6:$addr, QPR:$src), IIC_VST, "">;
+class VSTQWBPseudo
+ : PseudoNLdSt<(outs GPR:$wb),
+ (ins addrmode6:$addr, am6offset:$offset, QPR:$src), IIC_VST,
+ "$addr.addr = $wb">;
+class VSTQQPseudo
+ : PseudoNLdSt<(outs), (ins addrmode6:$addr, QQPR:$src), IIC_VST, "">;
+class VSTQQWBPseudo
+ : PseudoNLdSt<(outs GPR:$wb),
+ (ins addrmode6:$addr, am6offset:$offset, QQPR:$src), IIC_VST,
+ "$addr.addr = $wb">;
+class VSTQQQQWBPseudo
+ : PseudoNLdSt<(outs GPR:$wb),
+ (ins addrmode6:$addr, am6offset:$offset, QQQQPR:$src), IIC_VST,
+ "$addr.addr = $wb">;
+
// VST1 : Vector Store (multiple single elements)
class VST1D<bits<4> op7_4, string Dt>
: NLdSt<0,0b00,0b0111,op7_4, (outs), (ins addrmode6:$addr, DPR:$src), IIC_VST,
@@ -505,6 +606,11 @@ def VST1q16 : VST1Q<0b0100, "16">;
def VST1q32 : VST1Q<0b1000, "32">;
def VST1q64 : VST1Q<0b1100, "64">;
+def VST1q8Pseudo : VSTQPseudo;
+def VST1q16Pseudo : VSTQPseudo;
+def VST1q32Pseudo : VSTQPseudo;
+def VST1q64Pseudo : VSTQPseudo;
+
// ...with address register writeback:
class VST1DWB<bits<4> op7_4, string Dt>
: NLdSt<0, 0b00, 0b0111, op7_4, (outs GPR:$wb),
@@ -525,6 +631,11 @@ def VST1q16_UPD : VST1QWB<0b0100, "16">;
def VST1q32_UPD : VST1QWB<0b1000, "32">;
def VST1q64_UPD : VST1QWB<0b1100, "64">;
+def VST1q8Pseudo_UPD : VSTQWBPseudo;
+def VST1q16Pseudo_UPD : VSTQWBPseudo;
+def VST1q32Pseudo_UPD : VSTQWBPseudo;
+def VST1q64Pseudo_UPD : VSTQWBPseudo;
+
// ...with 3 registers (some of these are only for the disassembler):
class VST1D3<bits<4> op7_4, string Dt>
: NLdSt<0, 0b00, 0b0110, op7_4, (outs),
@@ -547,6 +658,9 @@ def VST1d16T_UPD : VST1D3WB<0b0100, "16">;
def VST1d32T_UPD : VST1D3WB<0b1000, "32">;
def VST1d64T_UPD : VST1D3WB<0b1100, "64">;
+def VST1d64TPseudo : VSTQQPseudo;
+def VST1d64TPseudo_UPD : VSTQQWBPseudo;
+
// ...with 4 registers (some of these are only for the disassembler):
class VST1D4<bits<4> op7_4, string Dt>
: NLdSt<0, 0b00, 0b0010, op7_4, (outs),
@@ -570,6 +684,9 @@ def VST1d16Q_UPD : VST1D4WB<0b0100, "16">;
def VST1d32Q_UPD : VST1D4WB<0b1000, "32">;
def VST1d64Q_UPD : VST1D4WB<0b1100, "64">;
+def VST1d64QPseudo : VSTQQPseudo;
+def VST1d64QPseudo_UPD : VSTQQWBPseudo;
+
// VST2 : Vector Store (multiple 2-element structures)
class VST2D<bits<4> op11_8, bits<4> op7_4, string Dt>
: NLdSt<0, 0b00, op11_8, op7_4, (outs),
@@ -589,6 +706,14 @@ def VST2q8 : VST2Q<0b0000, "8">;
def VST2q16 : VST2Q<0b0100, "16">;
def VST2q32 : VST2Q<0b1000, "32">;
+def VST2d8Pseudo : VSTQPseudo;
+def VST2d16Pseudo : VSTQPseudo;
+def VST2d32Pseudo : VSTQPseudo;
+
+def VST2q8Pseudo : VSTQQPseudo;
+def VST2q16Pseudo : VSTQQPseudo;
+def VST2q32Pseudo : VSTQQPseudo;
+
// ...with address register writeback:
class VST2DWB<bits<4> op11_8, bits<4> op7_4, string Dt>
: NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb),
@@ -610,6 +735,14 @@ def VST2q8_UPD : VST2QWB<0b0000, "8">;
def VST2q16_UPD : VST2QWB<0b0100, "16">;
def VST2q32_UPD : VST2QWB<0b1000, "32">;
+def VST2d8Pseudo_UPD : VSTQWBPseudo;
+def VST2d16Pseudo_UPD : VSTQWBPseudo;
+def VST2d32Pseudo_UPD : VSTQWBPseudo;
+
+def VST2q8Pseudo_UPD : VSTQQWBPseudo;
+def VST2q16Pseudo_UPD : VSTQQWBPseudo;
+def VST2q32Pseudo_UPD : VSTQQWBPseudo;
+
// ...with double-spaced registers (for disassembly only):
def VST2b8 : VST2D<0b1001, 0b0000, "8">;
def VST2b16 : VST2D<0b1001, 0b0100, "16">;
@@ -628,6 +761,10 @@ def VST3d8 : VST3D<0b0100, 0b0000, "8">;
def VST3d16 : VST3D<0b0100, 0b0100, "16">;
def VST3d32 : VST3D<0b0100, 0b1000, "32">;
+def VST3d8Pseudo : VSTQQPseudo;
+def VST3d16Pseudo : VSTQQPseudo;
+def VST3d32Pseudo : VSTQQPseudo;
+
// ...with address register writeback:
class VST3DWB<bits<4> op11_8, bits<4> op7_4, string Dt>
: NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb),
@@ -640,6 +777,10 @@ def VST3d8_UPD : VST3DWB<0b0100, 0b0000, "8">;
def VST3d16_UPD : VST3DWB<0b0100, 0b0100, "16">;
def VST3d32_UPD : VST3DWB<0b0100, 0b1000, "32">;
+def VST3d8Pseudo_UPD : VSTQQWBPseudo;
+def VST3d16Pseudo_UPD : VSTQQWBPseudo;
+def VST3d32Pseudo_UPD : VSTQQWBPseudo;
+
// ...with double-spaced registers (non-updating versions for disassembly only):
def VST3q8 : VST3D<0b0101, 0b0000, "8">;
def VST3q16 : VST3D<0b0101, 0b0100, "16">;
@@ -648,10 +789,14 @@ def VST3q8_UPD : VST3DWB<0b0101, 0b0000, "8">;
def VST3q16_UPD : VST3DWB<0b0101, 0b0100, "16">;
def VST3q32_UPD : VST3DWB<0b0101, 0b1000, "32">;
+def VST3q8Pseudo_UPD : VSTQQQQWBPseudo;
+def VST3q16Pseudo_UPD : VSTQQQQWBPseudo;
+def VST3q32Pseudo_UPD : VSTQQQQWBPseudo;
+
// ...alternate versions to be allocated odd register numbers:
-def VST3q8odd_UPD : VST3DWB<0b0101, 0b0000, "8">;
-def VST3q16odd_UPD : VST3DWB<0b0101, 0b0100, "16">;
-def VST3q32odd_UPD : VST3DWB<0b0101, 0b1000, "32">;
+def VST3q8oddPseudo_UPD : VSTQQQQWBPseudo;
+def VST3q16oddPseudo_UPD : VSTQQQQWBPseudo;
+def VST3q32oddPseudo_UPD : VSTQQQQWBPseudo;
// VST4 : Vector Store (multiple 4-element structures)
class VST4D<bits<4> op11_8, bits<4> op7_4, string Dt>
@@ -664,6 +809,10 @@ def VST4d8 : VST4D<0b0000, 0b0000, "8">;
def VST4d16 : VST4D<0b0000, 0b0100, "16">;
def VST4d32 : VST4D<0b0000, 0b1000, "32">;
+def VST4d8Pseudo : VSTQQPseudo;
+def VST4d16Pseudo : VSTQQPseudo;
+def VST4d32Pseudo : VSTQQPseudo;
+
// ...with address register writeback:
class VST4DWB<bits<4> op11_8, bits<4> op7_4, string Dt>
: NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb),
@@ -676,6 +825,10 @@ def VST4d8_UPD : VST4DWB<0b0000, 0b0000, "8">;
def VST4d16_UPD : VST4DWB<0b0000, 0b0100, "16">;
def VST4d32_UPD : VST4DWB<0b0000, 0b1000, "32">;
+def VST4d8Pseudo_UPD : VSTQQWBPseudo;
+def VST4d16Pseudo_UPD : VSTQQWBPseudo;
+def VST4d32Pseudo_UPD : VSTQQWBPseudo;
+
// ...with double-spaced registers (non-updating versions for disassembly only):
def VST4q8 : VST4D<0b0001, 0b0000, "8">;
def VST4q16 : VST4D<0b0001, 0b0100, "16">;
@@ -684,10 +837,14 @@ def VST4q8_UPD : VST4DWB<0b0001, 0b0000, "8">;
def VST4q16_UPD : VST4DWB<0b0001, 0b0100, "16">;
def VST4q32_UPD : VST4DWB<0b0001, 0b1000, "32">;
+def VST4q8Pseudo_UPD : VSTQQQQWBPseudo;
+def VST4q16Pseudo_UPD : VSTQQQQWBPseudo;
+def VST4q32Pseudo_UPD : VSTQQQQWBPseudo;
+
// ...alternate versions to be allocated odd register numbers:
-def VST4q8odd_UPD : VST4DWB<0b0001, 0b0000, "8">;
-def VST4q16odd_UPD : VST4DWB<0b0001, 0b0100, "16">;
-def VST4q32odd_UPD : VST4DWB<0b0001, 0b1000, "32">;
+def VST4q8oddPseudo_UPD : VSTQQQQWBPseudo;
+def VST4q16oddPseudo_UPD : VSTQQQQWBPseudo;
+def VST4q32oddPseudo_UPD : VSTQQQQWBPseudo;
// VST1LN : Vector Store (single element from one lane)
// FIXME: Not yet implemented.
@@ -879,6 +1036,15 @@ class N2VQInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
(ins QPR:$src), itin, OpcodeStr, Dt, "$dst, $src", "",
[(set QPR:$dst, (ResTy (IntOp (OpTy QPR:$src))))]>;
+// Narrow 2-register operations.
+class N2VN<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
+ bits<2> op17_16, bits<5> op11_7, bit op6, bit op4,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ ValueType TyD, ValueType TyQ, SDNode OpNode>
+ : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, op6, op4, (outs DPR:$dst),
+ (ins QPR:$src), itin, OpcodeStr, Dt, "$dst, $src", "",
+ [(set DPR:$dst, (TyD (OpNode (TyQ QPR:$src))))]>;
+
// Narrow 2-register intrinsics.
class N2VNInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
bits<2> op17_16, bits<5> op11_7, bit op6, bit op4,
@@ -888,14 +1054,14 @@ class N2VNInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
(ins QPR:$src), itin, OpcodeStr, Dt, "$dst, $src", "",
[(set DPR:$dst, (TyD (IntOp (TyQ QPR:$src))))]>;
-// Long 2-register intrinsics (currently only used for VMOVL).
-class N2VLInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
- bits<2> op17_16, bits<5> op11_7, bit op6, bit op4,
- InstrItinClass itin, string OpcodeStr, string Dt,
- ValueType TyQ, ValueType TyD, Intrinsic IntOp>
+// Long 2-register operations (currently only used for VMOVL).
+class N2VL<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
+ bits<2> op17_16, bits<5> op11_7, bit op6, bit op4,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ ValueType TyQ, ValueType TyD, SDNode OpNode>
: N2V<op24_23, op21_20, op19_18, op17_16, op11_7, op6, op4, (outs QPR:$dst),
(ins DPR:$src), itin, OpcodeStr, Dt, "$dst, $src", "",
- [(set QPR:$dst, (TyQ (IntOp (TyD DPR:$src))))]>;
+ [(set QPR:$dst, (TyQ (OpNode (TyD DPR:$src))))]>;
// 2-register shuffles (VTRN/VZIP/VUZP), both double- and quad-register.
class N2VDShuffle<bits<2> op19_18, bits<5> op11_7, string OpcodeStr, string Dt>
@@ -1150,6 +1316,24 @@ class N3VQMulOpSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
(ResTy (NEONvduplane (OpTy DPR_8:$src3),
imm:$lane)))))))]>;
+// Neon Intrinsic-Op instructions (VABA): double- and quad-register.
+class N3VDIntOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ ValueType Ty, Intrinsic IntOp, SDNode OpNode>
+ : N3V<op24, op23, op21_20, op11_8, 0, op4,
+ (outs DPR:$dst), (ins DPR:$src1, DPR:$src2, DPR:$src3), N3RegFrm, itin,
+ OpcodeStr, Dt, "$dst, $src2, $src3", "$src1 = $dst",
+ [(set DPR:$dst, (Ty (OpNode DPR:$src1,
+ (Ty (IntOp (Ty DPR:$src2), (Ty DPR:$src3))))))]>;
+class N3VQIntOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ ValueType Ty, Intrinsic IntOp, SDNode OpNode>
+ : N3V<op24, op23, op21_20, op11_8, 1, op4,
+ (outs QPR:$dst), (ins QPR:$src1, QPR:$src2, QPR:$src3), N3RegFrm, itin,
+ OpcodeStr, Dt, "$dst, $src2, $src3", "$src1 = $dst",
+ [(set QPR:$dst, (Ty (OpNode QPR:$src1,
+ (Ty (IntOp (Ty QPR:$src2), (Ty QPR:$src3))))))]>;
+
// Neon 3-argument intrinsics, both double- and quad-register.
// The destination register is also used as the first source operand register.
class N3VDInt3<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
@@ -1169,6 +1353,53 @@ class N3VQInt3<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
[(set QPR:$dst, (ResTy (IntOp (OpTy QPR:$src1),
(OpTy QPR:$src2), (OpTy QPR:$src3))))]>;
+// Long Multiply-Add/Sub operations.
+class N3VLMulOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ ValueType TyQ, ValueType TyD, SDNode MulOp, SDNode OpNode>
+ : N3V<op24, op23, op21_20, op11_8, 0, op4,
+ (outs QPR:$dst), (ins QPR:$src1, DPR:$src2, DPR:$src3), N3RegFrm, itin,
+ OpcodeStr, Dt, "$dst, $src2, $src3", "$src1 = $dst",
+ [(set QPR:$dst, (OpNode (TyQ QPR:$src1),
+ (TyQ (MulOp (TyD DPR:$src2),
+ (TyD DPR:$src3)))))]>;
+class N3VLMulOpSL<bit op24, bits<2> op21_20, bits<4> op11_8,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ ValueType TyQ, ValueType TyD, SDNode MulOp, SDNode OpNode>
+ : N3V<op24, 1, op21_20, op11_8, 1, 0, (outs QPR:$dst),
+ (ins QPR:$src1, DPR:$src2, DPR_VFP2:$src3, nohash_imm:$lane),
+ NVMulSLFrm, itin,
+ OpcodeStr, Dt, "$dst, $src2, $src3[$lane]", "$src1 = $dst",
+ [(set QPR:$dst,
+ (OpNode (TyQ QPR:$src1),
+ (TyQ (MulOp (TyD DPR:$src2),
+ (TyD (NEONvduplane (TyD DPR_VFP2:$src3),
+ imm:$lane))))))]>;
+class N3VLMulOpSL16<bit op24, bits<2> op21_20, bits<4> op11_8,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ ValueType TyQ, ValueType TyD, SDNode MulOp, SDNode OpNode>
+ : N3V<op24, 1, op21_20, op11_8, 1, 0, (outs QPR:$dst),
+ (ins QPR:$src1, DPR:$src2, DPR_8:$src3, nohash_imm:$lane),
+ NVMulSLFrm, itin,
+ OpcodeStr, Dt, "$dst, $src2, $src3[$lane]", "$src1 = $dst",
+ [(set QPR:$dst,
+ (OpNode (TyQ QPR:$src1),
+ (TyQ (MulOp (TyD DPR:$src2),
+ (TyD (NEONvduplane (TyD DPR_8:$src3),
+ imm:$lane))))))]>;
+
+// Long Intrinsic-Op vector operations with explicit extend (VABAL).
+class N3VLIntExtOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ ValueType TyQ, ValueType TyD, Intrinsic IntOp, SDNode ExtOp,
+ SDNode OpNode>
+ : N3V<op24, op23, op21_20, op11_8, 0, op4,
+ (outs QPR:$dst), (ins QPR:$src1, DPR:$src2, DPR:$src3), N3RegFrm, itin,
+ OpcodeStr, Dt, "$dst, $src2, $src3", "$src1 = $dst",
+ [(set QPR:$dst, (OpNode (TyQ QPR:$src1),
+ (TyQ (ExtOp (TyD (IntOp (TyD DPR:$src2),
+ (TyD DPR:$src3)))))))]>;
+
// Neon Long 3-argument intrinsic. The destination register is
// a quad-register and is also used as the first source operand register.
class N3VLInt3<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
@@ -1217,6 +1448,61 @@ class N3VNInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
let isCommutable = Commutable;
}
+// Long 3-register operations.
+class N3VL<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ ValueType TyQ, ValueType TyD, SDNode OpNode, bit Commutable>
+ : N3V<op24, op23, op21_20, op11_8, 0, op4,
+ (outs QPR:$dst), (ins DPR:$src1, DPR:$src2), N3RegFrm, itin,
+ OpcodeStr, Dt, "$dst, $src1, $src2", "",
+ [(set QPR:$dst, (TyQ (OpNode (TyD DPR:$src1), (TyD DPR:$src2))))]> {
+ let isCommutable = Commutable;
+}
+class N3VLSL<bit op24, bits<2> op21_20, bits<4> op11_8,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ ValueType TyQ, ValueType TyD, SDNode OpNode>
+ : N3V<op24, 1, op21_20, op11_8, 1, 0,
+ (outs QPR:$dst), (ins DPR:$src1, DPR_VFP2:$src2, nohash_imm:$lane),
+ NVMulSLFrm, itin, OpcodeStr, Dt, "$dst, $src1, $src2[$lane]", "",
+ [(set QPR:$dst,
+ (TyQ (OpNode (TyD DPR:$src1),
+ (TyD (NEONvduplane (TyD DPR_VFP2:$src2),imm:$lane)))))]>;
+class N3VLSL16<bit op24, bits<2> op21_20, bits<4> op11_8,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ ValueType TyQ, ValueType TyD, SDNode OpNode>
+ : N3V<op24, 1, op21_20, op11_8, 1, 0,
+ (outs QPR:$dst), (ins DPR:$src1, DPR_8:$src2, nohash_imm:$lane),
+ NVMulSLFrm, itin, OpcodeStr, Dt, "$dst, $src1, $src2[$lane]", "",
+ [(set QPR:$dst,
+ (TyQ (OpNode (TyD DPR:$src1),
+ (TyD (NEONvduplane (TyD DPR_8:$src2), imm:$lane)))))]>;
+
+// Long 3-register operations with explicitly extended operands.
+class N3VLExt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ ValueType TyQ, ValueType TyD, SDNode OpNode, SDNode ExtOp,
+ bit Commutable>
+ : N3V<op24, op23, op21_20, op11_8, 0, op4,
+ (outs QPR:$dst), (ins DPR:$src1, DPR:$src2), N3RegFrm, itin,
+ OpcodeStr, Dt, "$dst, $src1, $src2", "",
+ [(set QPR:$dst, (OpNode (TyQ (ExtOp (TyD DPR:$src1))),
+ (TyQ (ExtOp (TyD DPR:$src2)))))]> {
+ let isCommutable = Commutable;
+}
+
+// Long 3-register intrinsics with explicit extend (VABDL).
+class N3VLIntExt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ ValueType TyQ, ValueType TyD, Intrinsic IntOp, SDNode ExtOp,
+ bit Commutable>
+ : N3V<op24, op23, op21_20, op11_8, 0, op4,
+ (outs QPR:$dst), (ins DPR:$src1, DPR:$src2), N3RegFrm, itin,
+ OpcodeStr, Dt, "$dst, $src1, $src2", "",
+ [(set QPR:$dst, (TyQ (ExtOp (TyD (IntOp (TyD DPR:$src1),
+ (TyD DPR:$src2))))))]> {
+ let isCommutable = Commutable;
+}
+
// Long 3-register intrinsics.
class N3VLInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
InstrItinClass itin, string OpcodeStr, string Dt,
@@ -1248,14 +1534,15 @@ class N3VLIntSL16<bit op24, bits<2> op21_20, bits<4> op11_8,
(OpTy (NEONvduplane (OpTy DPR_8:$src2),
imm:$lane)))))]>;
-// Wide 3-register intrinsics.
-class N3VWInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
- string OpcodeStr, string Dt, ValueType TyQ, ValueType TyD,
- Intrinsic IntOp, bit Commutable>
+// Wide 3-register operations.
+class N3VW<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
+ string OpcodeStr, string Dt, ValueType TyQ, ValueType TyD,
+ SDNode OpNode, SDNode ExtOp, bit Commutable>
: N3V<op24, op23, op21_20, op11_8, 0, op4,
(outs QPR:$dst), (ins QPR:$src1, DPR:$src2), N3RegFrm, IIC_VSUBiD,
OpcodeStr, Dt, "$dst, $src1, $src2", "",
- [(set QPR:$dst, (TyQ (IntOp (TyQ QPR:$src1), (TyD DPR:$src2))))]> {
+ [(set QPR:$dst, (OpNode (TyQ QPR:$src1),
+ (TyQ (ExtOp (TyD DPR:$src2)))))]> {
let isCommutable = Commutable;
}
@@ -1488,6 +1775,23 @@ multiclass N3V_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4,
}
+// Neon Narrowing 2-register vector operations,
+// source operand element sizes of 16, 32 and 64 bits:
+multiclass N2VN_HSD<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16,
+ bits<5> op11_7, bit op6, bit op4,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ SDNode OpNode> {
+ def v8i8 : N2VN<op24_23, op21_20, 0b00, op17_16, op11_7, op6, op4,
+ itin, OpcodeStr, !strconcat(Dt, "16"),
+ v8i8, v8i16, OpNode>;
+ def v4i16 : N2VN<op24_23, op21_20, 0b01, op17_16, op11_7, op6, op4,
+ itin, OpcodeStr, !strconcat(Dt, "32"),
+ v4i16, v4i32, OpNode>;
+ def v2i32 : N2VN<op24_23, op21_20, 0b10, op17_16, op11_7, op6, op4,
+ itin, OpcodeStr, !strconcat(Dt, "64"),
+ v2i32, v2i64, OpNode>;
+}
+
// Neon Narrowing 2-register vector intrinsics,
// source operand element sizes of 16, 32 and 64 bits:
multiclass N2VNInt_HSD<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16,
@@ -1508,14 +1812,14 @@ multiclass N2VNInt_HSD<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16,
// Neon Lengthening 2-register vector intrinsic (currently specific to VMOVL).
// source operand element sizes of 16, 32 and 64 bits:
-multiclass N2VLInt_QHS<bits<2> op24_23, bits<5> op11_7, bit op6, bit op4,
- string OpcodeStr, string Dt, Intrinsic IntOp> {
- def v8i16 : N2VLInt<op24_23, 0b00, 0b10, 0b00, op11_7, op6, op4, IIC_VQUNAiD,
- OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8, IntOp>;
- def v4i32 : N2VLInt<op24_23, 0b01, 0b00, 0b00, op11_7, op6, op4, IIC_VQUNAiD,
- OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, IntOp>;
- def v2i64 : N2VLInt<op24_23, 0b10, 0b00, 0b00, op11_7, op6, op4, IIC_VQUNAiD,
- OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, IntOp>;
+multiclass N2VL_QHS<bits<2> op24_23, bits<5> op11_7, bit op6, bit op4,
+ string OpcodeStr, string Dt, SDNode OpNode> {
+ def v8i16 : N2VL<op24_23, 0b00, 0b10, 0b00, op11_7, op6, op4, IIC_VQUNAiD,
+ OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8, OpNode>;
+ def v4i32 : N2VL<op24_23, 0b01, 0b00, 0b00, op11_7, op6, op4, IIC_VQUNAiD,
+ OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, OpNode>;
+ def v2i64 : N2VL<op24_23, 0b10, 0b00, 0b00, op11_7, op6, op4, IIC_VQUNAiD,
+ OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, OpNode>;
}
@@ -1607,6 +1911,47 @@ multiclass N3VNInt_HSD<bit op24, bit op23, bits<4> op11_8, bit op4,
}
+// Neon Long 3-register vector operations.
+
+multiclass N3VL_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
+ InstrItinClass itin16, InstrItinClass itin32,
+ string OpcodeStr, string Dt,
+ SDNode OpNode, bit Commutable = 0> {
+ def v8i16 : N3VL<op24, op23, 0b00, op11_8, op4, itin16,
+ OpcodeStr, !strconcat(Dt, "8"),
+ v8i16, v8i8, OpNode, Commutable>;
+ def v4i32 : N3VL<op24, op23, 0b01, op11_8, op4, itin16,
+ OpcodeStr, !strconcat(Dt, "16"),
+ v4i32, v4i16, OpNode, Commutable>;
+ def v2i64 : N3VL<op24, op23, 0b10, op11_8, op4, itin32,
+ OpcodeStr, !strconcat(Dt, "32"),
+ v2i64, v2i32, OpNode, Commutable>;
+}
+
+multiclass N3VLSL_HS<bit op24, bits<4> op11_8,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ SDNode OpNode> {
+ def v4i16 : N3VLSL16<op24, 0b01, op11_8, itin, OpcodeStr,
+ !strconcat(Dt, "16"), v4i32, v4i16, OpNode>;
+ def v2i32 : N3VLSL<op24, 0b10, op11_8, itin, OpcodeStr,
+ !strconcat(Dt, "32"), v2i64, v2i32, OpNode>;
+}
+
+multiclass N3VLExt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
+ InstrItinClass itin16, InstrItinClass itin32,
+ string OpcodeStr, string Dt,
+ SDNode OpNode, SDNode ExtOp, bit Commutable = 0> {
+ def v8i16 : N3VLExt<op24, op23, 0b00, op11_8, op4, itin16,
+ OpcodeStr, !strconcat(Dt, "8"),
+ v8i16, v8i8, OpNode, ExtOp, Commutable>;
+ def v4i32 : N3VLExt<op24, op23, 0b01, op11_8, op4, itin16,
+ OpcodeStr, !strconcat(Dt, "16"),
+ v4i32, v4i16, OpNode, ExtOp, Commutable>;
+ def v2i64 : N3VLExt<op24, op23, 0b10, op11_8, op4, itin32,
+ OpcodeStr, !strconcat(Dt, "32"),
+ v2i64, v2i32, OpNode, ExtOp, Commutable>;
+}
+
// Neon Long 3-register vector intrinsics.
// First with only element sizes of 16 and 32 bits:
@@ -1643,21 +1988,36 @@ multiclass N3VLInt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
v8i16, v8i8, IntOp, Commutable>;
}
+// ....with explicit extend (VABDL).
+multiclass N3VLIntExt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ Intrinsic IntOp, SDNode ExtOp, bit Commutable = 0> {
+ def v8i16 : N3VLIntExt<op24, op23, 0b00, op11_8, op4, itin,
+ OpcodeStr, !strconcat(Dt, "8"),
+ v8i16, v8i8, IntOp, ExtOp, Commutable>;
+ def v4i32 : N3VLIntExt<op24, op23, 0b01, op11_8, op4, itin,
+ OpcodeStr, !strconcat(Dt, "16"),
+ v4i32, v4i16, IntOp, ExtOp, Commutable>;
+ def v2i64 : N3VLIntExt<op24, op23, 0b10, op11_8, op4, itin,
+ OpcodeStr, !strconcat(Dt, "32"),
+ v2i64, v2i32, IntOp, ExtOp, Commutable>;
+}
+
// Neon Wide 3-register vector intrinsics,
// source operand element sizes of 8, 16 and 32 bits:
-multiclass N3VWInt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
- string OpcodeStr, string Dt,
- Intrinsic IntOp, bit Commutable = 0> {
- def v8i16 : N3VWInt<op24, op23, 0b00, op11_8, op4,
- OpcodeStr, !strconcat(Dt, "8"),
- v8i16, v8i8, IntOp, Commutable>;
- def v4i32 : N3VWInt<op24, op23, 0b01, op11_8, op4,
- OpcodeStr, !strconcat(Dt, "16"),
- v4i32, v4i16, IntOp, Commutable>;
- def v2i64 : N3VWInt<op24, op23, 0b10, op11_8, op4,
- OpcodeStr, !strconcat(Dt, "32"),
- v2i64, v2i32, IntOp, Commutable>;
+multiclass N3VW_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
+ string OpcodeStr, string Dt,
+ SDNode OpNode, SDNode ExtOp, bit Commutable = 0> {
+ def v8i16 : N3VW<op24, op23, 0b00, op11_8, op4,
+ OpcodeStr, !strconcat(Dt, "8"),
+ v8i16, v8i8, OpNode, ExtOp, Commutable>;
+ def v4i32 : N3VW<op24, op23, 0b01, op11_8, op4,
+ OpcodeStr, !strconcat(Dt, "16"),
+ v4i32, v4i16, OpNode, ExtOp, Commutable>;
+ def v2i64 : N3VW<op24, op23, 0b10, op11_8, op4,
+ OpcodeStr, !strconcat(Dt, "32"),
+ v2i64, v2i32, OpNode, ExtOp, Commutable>;
}
@@ -1700,6 +2060,29 @@ multiclass N3VMulOpSL_HS<bits<4> op11_8,
mul, ShOp>;
}
+// Neon Intrinsic-Op vector operations,
+// element sizes of 8, 16 and 32 bits:
+multiclass N3VIntOp_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
+ InstrItinClass itinD, InstrItinClass itinQ,
+ string OpcodeStr, string Dt, Intrinsic IntOp,
+ SDNode OpNode> {
+ // 64-bit vector types.
+ def v8i8 : N3VDIntOp<op24, op23, 0b00, op11_8, op4, itinD,
+ OpcodeStr, !strconcat(Dt, "8"), v8i8, IntOp, OpNode>;
+ def v4i16 : N3VDIntOp<op24, op23, 0b01, op11_8, op4, itinD,
+ OpcodeStr, !strconcat(Dt, "16"), v4i16, IntOp, OpNode>;
+ def v2i32 : N3VDIntOp<op24, op23, 0b10, op11_8, op4, itinD,
+ OpcodeStr, !strconcat(Dt, "32"), v2i32, IntOp, OpNode>;
+
+ // 128-bit vector types.
+ def v16i8 : N3VQIntOp<op24, op23, 0b00, op11_8, op4, itinQ,
+ OpcodeStr, !strconcat(Dt, "8"), v16i8, IntOp, OpNode>;
+ def v8i16 : N3VQIntOp<op24, op23, 0b01, op11_8, op4, itinQ,
+ OpcodeStr, !strconcat(Dt, "16"), v8i16, IntOp, OpNode>;
+ def v4i32 : N3VQIntOp<op24, op23, 0b10, op11_8, op4, itinQ,
+ OpcodeStr, !strconcat(Dt, "32"), v4i32, IntOp, OpNode>;
+}
+
// Neon 3-argument intrinsics,
// element sizes of 8, 16 and 32 bits:
multiclass N3VInt3_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
@@ -1723,6 +2106,29 @@ multiclass N3VInt3_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
}
+// Neon Long Multiply-Op vector operations,
+// element sizes of 8, 16 and 32 bits:
+multiclass N3VLMulOp_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
+ InstrItinClass itin16, InstrItinClass itin32,
+ string OpcodeStr, string Dt, SDNode MulOp,
+ SDNode OpNode> {
+ def v8i16 : N3VLMulOp<op24, op23, 0b00, op11_8, op4, itin16, OpcodeStr,
+ !strconcat(Dt, "8"), v8i16, v8i8, MulOp, OpNode>;
+ def v4i32 : N3VLMulOp<op24, op23, 0b01, op11_8, op4, itin16, OpcodeStr,
+ !strconcat(Dt, "16"), v4i32, v4i16, MulOp, OpNode>;
+ def v2i64 : N3VLMulOp<op24, op23, 0b10, op11_8, op4, itin32, OpcodeStr,
+ !strconcat(Dt, "32"), v2i64, v2i32, MulOp, OpNode>;
+}
+
+multiclass N3VLMulOpSL_HS<bit op24, bits<4> op11_8, string OpcodeStr,
+ string Dt, SDNode MulOp, SDNode OpNode> {
+ def v4i16 : N3VLMulOpSL16<op24, 0b01, op11_8, IIC_VMACi16D, OpcodeStr,
+ !strconcat(Dt,"16"), v4i32, v4i16, MulOp, OpNode>;
+ def v2i32 : N3VLMulOpSL<op24, 0b10, op11_8, IIC_VMACi32D, OpcodeStr,
+ !strconcat(Dt, "32"), v2i64, v2i32, MulOp, OpNode>;
+}
+
+
// Neon Long 3-argument intrinsics.
// First with only element sizes of 16 and 32 bits:
@@ -1752,6 +2158,21 @@ multiclass N3VLInt3_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8, IntOp>;
}
+// ....with explicit extend (VABAL).
+multiclass N3VLIntExtOp_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ Intrinsic IntOp, SDNode ExtOp, SDNode OpNode> {
+ def v8i16 : N3VLIntExtOp<op24, op23, 0b00, op11_8, op4, itin,
+ OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8,
+ IntOp, ExtOp, OpNode>;
+ def v4i32 : N3VLIntExtOp<op24, op23, 0b01, op11_8, op4, itin,
+ OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16,
+ IntOp, ExtOp, OpNode>;
+ def v2i64 : N3VLIntExtOp<op24, op23, 0b10, op11_8, op4, itin,
+ OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32,
+ IntOp, ExtOp, OpNode>;
+}
+
// Neon 2-register vector intrinsics,
// element sizes of 8, 16 and 32 bits:
@@ -1996,13 +2417,13 @@ def VADDfd : N3VD<0, 0, 0b00, 0b1101, 0, IIC_VBIND, "vadd", "f32",
def VADDfq : N3VQ<0, 0, 0b00, 0b1101, 0, IIC_VBINQ, "vadd", "f32",
v4f32, v4f32, fadd, 1>;
// VADDL : Vector Add Long (Q = D + D)
-defm VADDLs : N3VLInt_QHS<0,1,0b0000,0, IIC_VSHLiD, IIC_VSHLiD,
- "vaddl", "s", int_arm_neon_vaddls, 1>;
-defm VADDLu : N3VLInt_QHS<1,1,0b0000,0, IIC_VSHLiD, IIC_VSHLiD,
- "vaddl", "u", int_arm_neon_vaddlu, 1>;
+defm VADDLs : N3VLExt_QHS<0,1,0b0000,0, IIC_VSHLiD, IIC_VSHLiD,
+ "vaddl", "s", add, sext, 1>;
+defm VADDLu : N3VLExt_QHS<1,1,0b0000,0, IIC_VSHLiD, IIC_VSHLiD,
+ "vaddl", "u", add, zext, 1>;
// VADDW : Vector Add Wide (Q = Q + D)
-defm VADDWs : N3VWInt_QHS<0,1,0b0001,0, "vaddw", "s", int_arm_neon_vaddws, 0>;
-defm VADDWu : N3VWInt_QHS<1,1,0b0001,0, "vaddw", "u", int_arm_neon_vaddwu, 0>;
+defm VADDWs : N3VW_QHS<0,1,0b0001,0, "vaddw", "s", add, sext, 0>;
+defm VADDWu : N3VW_QHS<1,1,0b0001,0, "vaddw", "u", add, zext, 0>;
// VHADD : Vector Halving Add
defm VHADDs : N3VInt_QHS<0, 0, 0b0000, 0, N3RegFrm,
IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q,
@@ -2113,16 +2534,14 @@ def : Pat<(v4i32 (int_arm_neon_vqrdmulh (v4i32 QPR:$src1),
(SubReg_i32_lane imm:$lane)))>;
// VMULL : Vector Multiply Long (integer and polynomial) (Q = D * D)
-defm VMULLs : N3VLInt_QHS<0,1,0b1100,0, IIC_VMULi16D, IIC_VMULi32D,
- "vmull", "s", int_arm_neon_vmulls, 1>;
-defm VMULLu : N3VLInt_QHS<1,1,0b1100,0, IIC_VMULi16D, IIC_VMULi32D,
- "vmull", "u", int_arm_neon_vmullu, 1>;
+defm VMULLs : N3VL_QHS<0,1,0b1100,0, IIC_VMULi16D, IIC_VMULi32D,
+ "vmull", "s", NEONvmulls, 1>;
+defm VMULLu : N3VL_QHS<1,1,0b1100,0, IIC_VMULi16D, IIC_VMULi32D,
+ "vmull", "u", NEONvmullu, 1>;
def VMULLp : N3VLInt<0, 1, 0b00, 0b1110, 0, IIC_VMULi16D, "vmull", "p8",
v8i16, v8i8, int_arm_neon_vmullp, 1>;
-defm VMULLsls : N3VLIntSL_HS<0, 0b1010, IIC_VMULi16D, "vmull", "s",
- int_arm_neon_vmulls>;
-defm VMULLslu : N3VLIntSL_HS<1, 0b1010, IIC_VMULi16D, "vmull", "u",
- int_arm_neon_vmullu>;
+defm VMULLsls : N3VLSL_HS<0, 0b1010, IIC_VMULi16D, "vmull", "s", NEONvmulls>;
+defm VMULLslu : N3VLSL_HS<1, 0b1010, IIC_VMULi16D, "vmull", "u", NEONvmullu>;
// VQDMULL : Vector Saturating Doubling Multiply Long (Q = D * D)
defm VQDMULL : N3VLInt_HS<0,1,0b1101,0, IIC_VMULi16D, IIC_VMULi32D,
@@ -2172,13 +2591,13 @@ def : Pat<(v4f32 (fadd (v4f32 QPR:$src1),
(SubReg_i32_lane imm:$lane)))>;
// VMLAL : Vector Multiply Accumulate Long (Q += D * D)
-defm VMLALs : N3VLInt3_QHS<0,1,0b1000,0, IIC_VMACi16D, IIC_VMACi32D,
- "vmlal", "s", int_arm_neon_vmlals>;
-defm VMLALu : N3VLInt3_QHS<1,1,0b1000,0, IIC_VMACi16D, IIC_VMACi32D,
- "vmlal", "u", int_arm_neon_vmlalu>;
+defm VMLALs : N3VLMulOp_QHS<0,1,0b1000,0, IIC_VMACi16D, IIC_VMACi32D,
+ "vmlal", "s", NEONvmulls, add>;
+defm VMLALu : N3VLMulOp_QHS<1,1,0b1000,0, IIC_VMACi16D, IIC_VMACi32D,
+ "vmlal", "u", NEONvmullu, add>;
-defm VMLALsls : N3VLInt3SL_HS<0, 0b0010, "vmlal", "s", int_arm_neon_vmlals>;
-defm VMLALslu : N3VLInt3SL_HS<1, 0b0010, "vmlal", "u", int_arm_neon_vmlalu>;
+defm VMLALsls : N3VLMulOpSL_HS<0, 0b0010, "vmlal", "s", NEONvmulls, add>;
+defm VMLALslu : N3VLMulOpSL_HS<1, 0b0010, "vmlal", "u", NEONvmullu, add>;
// VQDMLAL : Vector Saturating Doubling Multiply Accumulate Long (Q += D * D)
defm VQDMLAL : N3VLInt3_HS<0, 1, 0b1001, 0, IIC_VMACi16D, IIC_VMACi32D,
@@ -2224,13 +2643,13 @@ def : Pat<(v4f32 (fsub (v4f32 QPR:$src1),
(SubReg_i32_lane imm:$lane)))>;
// VMLSL : Vector Multiply Subtract Long (Q -= D * D)
-defm VMLSLs : N3VLInt3_QHS<0,1,0b1010,0, IIC_VMACi16D, IIC_VMACi32D,
- "vmlsl", "s", int_arm_neon_vmlsls>;
-defm VMLSLu : N3VLInt3_QHS<1,1,0b1010,0, IIC_VMACi16D, IIC_VMACi32D,
- "vmlsl", "u", int_arm_neon_vmlslu>;
+defm VMLSLs : N3VLMulOp_QHS<0,1,0b1010,0, IIC_VMACi16D, IIC_VMACi32D,
+ "vmlsl", "s", NEONvmulls, sub>;
+defm VMLSLu : N3VLMulOp_QHS<1,1,0b1010,0, IIC_VMACi16D, IIC_VMACi32D,
+ "vmlsl", "u", NEONvmullu, sub>;
-defm VMLSLsls : N3VLInt3SL_HS<0, 0b0110, "vmlsl", "s", int_arm_neon_vmlsls>;
-defm VMLSLslu : N3VLInt3SL_HS<1, 0b0110, "vmlsl", "u", int_arm_neon_vmlslu>;
+defm VMLSLsls : N3VLMulOpSL_HS<0, 0b0110, "vmlsl", "s", NEONvmulls, sub>;
+defm VMLSLslu : N3VLMulOpSL_HS<1, 0b0110, "vmlsl", "u", NEONvmullu, sub>;
// VQDMLSL : Vector Saturating Doubling Multiply Subtract Long (Q -= D * D)
defm VQDMLSL : N3VLInt3_HS<0, 1, 0b1011, 0, IIC_VMACi16D, IIC_VMACi32D,
@@ -2247,13 +2666,13 @@ def VSUBfd : N3VD<0, 0, 0b10, 0b1101, 0, IIC_VBIND, "vsub", "f32",
def VSUBfq : N3VQ<0, 0, 0b10, 0b1101, 0, IIC_VBINQ, "vsub", "f32",
v4f32, v4f32, fsub, 0>;
// VSUBL : Vector Subtract Long (Q = D - D)
-defm VSUBLs : N3VLInt_QHS<0,1,0b0010,0, IIC_VSHLiD, IIC_VSHLiD,
- "vsubl", "s", int_arm_neon_vsubls, 1>;
-defm VSUBLu : N3VLInt_QHS<1,1,0b0010,0, IIC_VSHLiD, IIC_VSHLiD,
- "vsubl", "u", int_arm_neon_vsublu, 1>;
+defm VSUBLs : N3VLExt_QHS<0,1,0b0010,0, IIC_VSHLiD, IIC_VSHLiD,
+ "vsubl", "s", sub, sext, 0>;
+defm VSUBLu : N3VLExt_QHS<1,1,0b0010,0, IIC_VSHLiD, IIC_VSHLiD,
+ "vsubl", "u", sub, zext, 0>;
// VSUBW : Vector Subtract Wide (Q = Q - D)
-defm VSUBWs : N3VWInt_QHS<0,1,0b0011,0, "vsubw", "s", int_arm_neon_vsubws, 0>;
-defm VSUBWu : N3VWInt_QHS<1,1,0b0011,0, "vsubw", "u", int_arm_neon_vsubwu, 0>;
+defm VSUBWs : N3VW_QHS<0,1,0b0011,0, "vsubw", "s", sub, sext, 0>;
+defm VSUBWu : N3VW_QHS<1,1,0b0011,0, "vsubw", "u", sub, zext, 0>;
// VHSUB : Vector Halving Subtract
defm VHSUBs : N3VInt_QHS<0, 0, 0b0010, 0, N3RegFrm,
IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
@@ -2469,32 +2888,32 @@ def VBITq : N3VX<1, 0, 0b10, 0b0001, 1, 1,
// VABD : Vector Absolute Difference
defm VABDs : N3VInt_QHS<0, 0, 0b0111, 0, N3RegFrm,
IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
- "vabd", "s", int_arm_neon_vabds, 0>;
+ "vabd", "s", int_arm_neon_vabds, 1>;
defm VABDu : N3VInt_QHS<1, 0, 0b0111, 0, N3RegFrm,
IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
- "vabd", "u", int_arm_neon_vabdu, 0>;
+ "vabd", "u", int_arm_neon_vabdu, 1>;
def VABDfd : N3VDInt<1, 0, 0b10, 0b1101, 0, N3RegFrm, IIC_VBIND,
- "vabd", "f32", v2f32, v2f32, int_arm_neon_vabds, 0>;
+ "vabd", "f32", v2f32, v2f32, int_arm_neon_vabds, 1>;
def VABDfq : N3VQInt<1, 0, 0b10, 0b1101, 0, N3RegFrm, IIC_VBINQ,
- "vabd", "f32", v4f32, v4f32, int_arm_neon_vabds, 0>;
+ "vabd", "f32", v4f32, v4f32, int_arm_neon_vabds, 1>;
// VABDL : Vector Absolute Difference Long (Q = | D - D |)
-defm VABDLs : N3VLInt_QHS<0,1,0b0111,0, IIC_VSUBi4Q, IIC_VSUBi4Q,
- "vabdl", "s", int_arm_neon_vabdls, 0>;
-defm VABDLu : N3VLInt_QHS<1,1,0b0111,0, IIC_VSUBi4Q, IIC_VSUBi4Q,
- "vabdl", "u", int_arm_neon_vabdlu, 0>;
+defm VABDLs : N3VLIntExt_QHS<0,1,0b0111,0, IIC_VSUBi4Q,
+ "vabdl", "s", int_arm_neon_vabds, zext, 1>;
+defm VABDLu : N3VLIntExt_QHS<1,1,0b0111,0, IIC_VSUBi4Q,
+ "vabdl", "u", int_arm_neon_vabdu, zext, 1>;
// VABA : Vector Absolute Difference and Accumulate
-defm VABAs : N3VInt3_QHS<0,0,0b0111,1, IIC_VABAD, IIC_VABAQ,
- "vaba", "s", int_arm_neon_vabas>;
-defm VABAu : N3VInt3_QHS<1,0,0b0111,1, IIC_VABAD, IIC_VABAQ,
- "vaba", "u", int_arm_neon_vabau>;
+defm VABAs : N3VIntOp_QHS<0,0,0b0111,1, IIC_VABAD, IIC_VABAQ,
+ "vaba", "s", int_arm_neon_vabds, add>;
+defm VABAu : N3VIntOp_QHS<1,0,0b0111,1, IIC_VABAD, IIC_VABAQ,
+ "vaba", "u", int_arm_neon_vabdu, add>;
// VABAL : Vector Absolute Difference and Accumulate Long (Q += | D - D |)
-defm VABALs : N3VLInt3_QHS<0,1,0b0101,0, IIC_VABAD, IIC_VABAD,
- "vabal", "s", int_arm_neon_vabals>;
-defm VABALu : N3VLInt3_QHS<1,1,0b0101,0, IIC_VABAD, IIC_VABAD,
- "vabal", "u", int_arm_neon_vabalu>;
+defm VABALs : N3VLIntExtOp_QHS<0,1,0b0101,0, IIC_VABAD,
+ "vabal", "s", int_arm_neon_vabds, zext, add>;
+defm VABALu : N3VLIntExtOp_QHS<1,1,0b0101,0, IIC_VABAD,
+ "vabal", "u", int_arm_neon_vabdu, zext, add>;
// Vector Maximum and Minimum.
@@ -3113,8 +3532,8 @@ def VDUPfqf : N2V<0b11, 0b11, {?,1}, {0,0}, 0b11000, 1, 0,
[(set QPR:$dst, (v4f32 (NEONvdup (f32 SPR:$src))))]>;
// VMOVN : Vector Narrowing Move
-defm VMOVN : N2VNInt_HSD<0b11,0b11,0b10,0b00100,0,0, IIC_VMOVD,
- "vmovn", "i", int_arm_neon_vmovn>;
+defm VMOVN : N2VN_HSD<0b11,0b11,0b10,0b00100,0,0, IIC_VMOVD,
+ "vmovn", "i", trunc>;
// VQMOVN : Vector Saturating Narrowing Move
defm VQMOVNs : N2VNInt_HSD<0b11,0b11,0b10,0b00101,0,0, IIC_VQUNAiD,
"vqmovn", "s", int_arm_neon_vqmovns>;
@@ -3123,10 +3542,8 @@ defm VQMOVNu : N2VNInt_HSD<0b11,0b11,0b10,0b00101,1,0, IIC_VQUNAiD,
defm VQMOVNsu : N2VNInt_HSD<0b11,0b11,0b10,0b00100,1,0, IIC_VQUNAiD,
"vqmovun", "s", int_arm_neon_vqmovnsu>;
// VMOVL : Vector Lengthening Move
-defm VMOVLs : N2VLInt_QHS<0b01,0b10100,0,1, "vmovl", "s",
- int_arm_neon_vmovls>;
-defm VMOVLu : N2VLInt_QHS<0b11,0b10100,0,1, "vmovl", "u",
- int_arm_neon_vmovlu>;
+defm VMOVLs : N2VL_QHS<0b01,0b10100,0,1, "vmovl", "s", sext>;
+defm VMOVLu : N2VL_QHS<0b11,0b10100,0,1, "vmovl", "u", zext>;
// Vector Conversions.
diff --git a/lib/Target/ARM/ARMInstrThumb.td b/lib/Target/ARM/ARMInstrThumb.td
index bc0790dccbb5f..a13ff12327491 100644
--- a/lib/Target/ARM/ARMInstrThumb.td
+++ b/lib/Target/ARM/ARMInstrThumb.td
@@ -221,9 +221,13 @@ def tADDrPCi : T1I<(outs tGPR:$dst), (ins t_imm_s4:$rhs), IIC_iALUi,
T1Encoding<{1,0,1,0,0,?}>; // A6.2 & A8.6.10
// ADD rd, sp, #imm8
+// This is rematerializable, which is particularly useful for taking the
+// address of locals.
+let isReMaterializable = 1 in {
def tADDrSPi : T1I<(outs tGPR:$dst), (ins GPR:$sp, t_imm_s4:$rhs), IIC_iALUi,
"add\t$dst, $sp, $rhs", []>,
T1Encoding<{1,0,1,0,1,?}>; // A6.2 & A8.6.8
+}
// ADD sp, sp, #imm7
def tADDspi : TIt<(outs GPR:$dst), (ins GPR:$lhs, t_imm_s4:$rhs), IIC_iALUi,
@@ -251,19 +255,6 @@ def tADDspr : TIt<(outs GPR:$dst), (ins GPR:$lhs, GPR:$rhs), IIC_iALUr,
let Inst{2-0} = 0b101;
}
-// Pseudo instruction that will expand into a tSUBspi + a copy.
-let usesCustomInserter = 1 in { // Expanded after instruction selection.
-def tSUBspi_ : PseudoInst<(outs GPR:$dst), (ins GPR:$lhs, t_imm_s4:$rhs),
- NoItinerary, "${:comment} sub\t$dst, $rhs", []>;
-
-def tADDspr_ : PseudoInst<(outs GPR:$dst), (ins GPR:$lhs, GPR:$rhs),
- NoItinerary, "${:comment} add\t$dst, $rhs", []>;
-
-let Defs = [CPSR] in
-def tANDsp : PseudoInst<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs),
- NoItinerary, "${:comment} and\t$dst, $rhs", []>;
-} // usesCustomInserter
-
//===----------------------------------------------------------------------===//
// Control Flow Instructions.
//
@@ -378,7 +369,7 @@ let isBranch = 1, isTerminator = 1 in {
def tBR_JTr : T1JTI<(outs),
(ins tGPR:$target, jtblock_operand:$jt, i32imm:$id),
- IIC_Br, "mov\tpc, $target\n\t.align\t2\n$jt",
+ IIC_Br, "mov\tpc, $target\n\t.align\t2$jt",
[(ARMbrjt tGPR:$target, tjumptable:$jt, imm:$id)]>,
Encoding16 {
let Inst{15-7} = 0b010001101;
diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td
index bbe675e81ab1d..6ba0a44be4700 100644
--- a/lib/Target/ARM/ARMInstrThumb2.td
+++ b/lib/Target/ARM/ARMInstrThumb2.td
@@ -32,7 +32,7 @@ def t2_so_reg : Operand<i32>, // reg imm
ComplexPattern<i32, 2, "SelectT2ShifterOperandReg",
[shl,srl,sra,rotr]> {
let PrintMethod = "printT2SOOperand";
- let MIOperandInfo = (ops GPR, i32imm);
+ let MIOperandInfo = (ops rGPR, i32imm);
}
// t2_so_imm_not_XFORM - Return the complement of a t2_so_imm value
@@ -51,10 +51,7 @@ def t2_so_imm_neg_XFORM : SDNodeXForm<imm, [{
// represented in the imm field in the same 12-bit form that they are encoded
// into t2_so_imm instructions: the 8-bit immediate is the least significant
// bits [bits 0-7], the 4-bit shift/splat amount is the next 4 bits [bits 8-11].
-def t2_so_imm : Operand<i32>,
- PatLeaf<(imm), [{
- return ARM_AM::getT2SOImmVal((uint32_t)N->getZExtValue()) != -1;
-}]>;
+def t2_so_imm : Operand<i32>, PatLeaf<(imm), [{ return Pred_t2_so_imm(N); }]>;
// t2_so_imm_not - Match an immediate that is a complement
// of a t2_so_imm.
@@ -162,7 +159,7 @@ def t2am_imm8s4_offset : Operand<i32> {
def t2addrmode_so_reg : Operand<i32>,
ComplexPattern<i32, 3, "SelectT2AddrModeSoReg", []> {
let PrintMethod = "printT2AddrModeSoRegOperand";
- let MIOperandInfo = (ops GPR:$base, GPR:$offsreg, i32imm:$offsimm);
+ let MIOperandInfo = (ops GPR:$base, rGPR:$offsreg, i32imm:$offsimm);
}
@@ -176,9 +173,9 @@ def t2addrmode_so_reg : Operand<i32>,
multiclass T2I_un_irs<bits<4> opcod, string opc, PatFrag opnode,
bit Cheap = 0, bit ReMat = 0> {
// shifted imm
- def i : T2sI<(outs GPR:$dst), (ins t2_so_imm:$src), IIC_iMOVi,
+ def i : T2sI<(outs rGPR:$dst), (ins t2_so_imm:$src), IIC_iMOVi,
opc, "\t$dst, $src",
- [(set GPR:$dst, (opnode t2_so_imm:$src))]> {
+ [(set rGPR:$dst, (opnode t2_so_imm:$src))]> {
let isAsCheapAsAMove = Cheap;
let isReMaterializable = ReMat;
let Inst{31-27} = 0b11110;
@@ -189,9 +186,9 @@ multiclass T2I_un_irs<bits<4> opcod, string opc, PatFrag opnode,
let Inst{15} = 0;
}
// register
- def r : T2sI<(outs GPR:$dst), (ins GPR:$src), IIC_iMOVr,
+ def r : T2sI<(outs rGPR:$dst), (ins rGPR:$src), IIC_iMOVr,
opc, ".w\t$dst, $src",
- [(set GPR:$dst, (opnode GPR:$src))]> {
+ [(set rGPR:$dst, (opnode rGPR:$src))]> {
let Inst{31-27} = 0b11101;
let Inst{26-25} = 0b01;
let Inst{24-21} = opcod;
@@ -202,9 +199,9 @@ multiclass T2I_un_irs<bits<4> opcod, string opc, PatFrag opnode,
let Inst{5-4} = 0b00; // type
}
// shifted register
- def s : T2sI<(outs GPR:$dst), (ins t2_so_reg:$src), IIC_iMOVsi,
+ def s : T2sI<(outs rGPR:$dst), (ins t2_so_reg:$src), IIC_iMOVsi,
opc, ".w\t$dst, $src",
- [(set GPR:$dst, (opnode t2_so_reg:$src))]> {
+ [(set rGPR:$dst, (opnode t2_so_reg:$src))]> {
let Inst{31-27} = 0b11101;
let Inst{26-25} = 0b01;
let Inst{24-21} = opcod;
@@ -217,11 +214,11 @@ multiclass T2I_un_irs<bits<4> opcod, string opc, PatFrag opnode,
/// binary operation that produces a value. These are predicable and can be
/// changed to modify CPSR.
multiclass T2I_bin_irs<bits<4> opcod, string opc, PatFrag opnode,
- bit Commutable = 0, string wide =""> {
+ bit Commutable = 0, string wide = ""> {
// shifted imm
- def ri : T2sI<(outs GPR:$dst), (ins GPR:$lhs, t2_so_imm:$rhs), IIC_iALUi,
+ def ri : T2sI<(outs rGPR:$dst), (ins rGPR:$lhs, t2_so_imm:$rhs), IIC_iALUi,
opc, "\t$dst, $lhs, $rhs",
- [(set GPR:$dst, (opnode GPR:$lhs, t2_so_imm:$rhs))]> {
+ [(set rGPR:$dst, (opnode rGPR:$lhs, t2_so_imm:$rhs))]> {
let Inst{31-27} = 0b11110;
let Inst{25} = 0;
let Inst{24-21} = opcod;
@@ -229,9 +226,9 @@ multiclass T2I_bin_irs<bits<4> opcod, string opc, PatFrag opnode,
let Inst{15} = 0;
}
// register
- def rr : T2sI<(outs GPR:$dst), (ins GPR:$lhs, GPR:$rhs), IIC_iALUr,
+ def rr : T2sI<(outs rGPR:$dst), (ins rGPR:$lhs, rGPR:$rhs), IIC_iALUr,
opc, !strconcat(wide, "\t$dst, $lhs, $rhs"),
- [(set GPR:$dst, (opnode GPR:$lhs, GPR:$rhs))]> {
+ [(set rGPR:$dst, (opnode rGPR:$lhs, rGPR:$rhs))]> {
let isCommutable = Commutable;
let Inst{31-27} = 0b11101;
let Inst{26-25} = 0b01;
@@ -242,9 +239,9 @@ multiclass T2I_bin_irs<bits<4> opcod, string opc, PatFrag opnode,
let Inst{5-4} = 0b00; // type
}
// shifted register
- def rs : T2sI<(outs GPR:$dst), (ins GPR:$lhs, t2_so_reg:$rhs), IIC_iALUsi,
+ def rs : T2sI<(outs rGPR:$dst), (ins rGPR:$lhs, t2_so_reg:$rhs), IIC_iALUsi,
opc, !strconcat(wide, "\t$dst, $lhs, $rhs"),
- [(set GPR:$dst, (opnode GPR:$lhs, t2_so_reg:$rhs))]> {
+ [(set rGPR:$dst, (opnode rGPR:$lhs, t2_so_reg:$rhs))]> {
let Inst{31-27} = 0b11101;
let Inst{26-25} = 0b01;
let Inst{24-21} = opcod;
@@ -259,23 +256,35 @@ multiclass T2I_bin_w_irs<bits<4> opcod, string opc, PatFrag opnode,
T2I_bin_irs<opcod, opc, opnode, Commutable, ".w">;
/// T2I_rbin_is - Same as T2I_bin_irs except the order of operands are
-/// reversed. It doesn't define the 'rr' form since it's handled by its
-/// T2I_bin_irs counterpart.
-multiclass T2I_rbin_is<bits<4> opcod, string opc, PatFrag opnode> {
+/// reversed. The 'rr' form is only defined for the disassembler; for codegen
+/// it is equivalent to the T2I_bin_irs counterpart.
+multiclass T2I_rbin_irs<bits<4> opcod, string opc, PatFrag opnode> {
// shifted imm
- def ri : T2sI<(outs GPR:$dst), (ins GPR:$rhs, t2_so_imm:$lhs), IIC_iALUi,
+ def ri : T2sI<(outs rGPR:$dst), (ins rGPR:$rhs, t2_so_imm:$lhs), IIC_iALUi,
opc, ".w\t$dst, $rhs, $lhs",
- [(set GPR:$dst, (opnode t2_so_imm:$lhs, GPR:$rhs))]> {
+ [(set rGPR:$dst, (opnode t2_so_imm:$lhs, rGPR:$rhs))]> {
let Inst{31-27} = 0b11110;
let Inst{25} = 0;
let Inst{24-21} = opcod;
let Inst{20} = ?; // The S bit.
let Inst{15} = 0;
}
+ // register
+ def rr : T2sI<(outs rGPR:$dst), (ins rGPR:$rhs, rGPR:$lhs), IIC_iALUr,
+ opc, "\t$dst, $rhs, $lhs",
+ [/* For disassembly only; pattern left blank */]> {
+ let Inst{31-27} = 0b11101;
+ let Inst{26-25} = 0b01;
+ let Inst{24-21} = opcod;
+ let Inst{20} = ?; // The S bit.
+ let Inst{14-12} = 0b000; // imm3
+ let Inst{7-6} = 0b00; // imm2
+ let Inst{5-4} = 0b00; // type
+ }
// shifted register
- def rs : T2sI<(outs GPR:$dst), (ins GPR:$rhs, t2_so_reg:$lhs), IIC_iALUsi,
+ def rs : T2sI<(outs rGPR:$dst), (ins rGPR:$rhs, t2_so_reg:$lhs), IIC_iALUsi,
opc, "\t$dst, $rhs, $lhs",
- [(set GPR:$dst, (opnode t2_so_reg:$lhs, GPR:$rhs))]> {
+ [(set rGPR:$dst, (opnode t2_so_reg:$lhs, rGPR:$rhs))]> {
let Inst{31-27} = 0b11101;
let Inst{26-25} = 0b01;
let Inst{24-21} = opcod;
@@ -289,9 +298,9 @@ let Defs = [CPSR] in {
multiclass T2I_bin_s_irs<bits<4> opcod, string opc, PatFrag opnode,
bit Commutable = 0> {
// shifted imm
- def ri : T2I<(outs GPR:$dst), (ins GPR:$lhs, t2_so_imm:$rhs), IIC_iALUi,
+ def ri : T2I<(outs rGPR:$dst), (ins GPR:$lhs, t2_so_imm:$rhs), IIC_iALUi,
!strconcat(opc, "s"), ".w\t$dst, $lhs, $rhs",
- [(set GPR:$dst, (opnode GPR:$lhs, t2_so_imm:$rhs))]> {
+ [(set rGPR:$dst, (opnode GPR:$lhs, t2_so_imm:$rhs))]> {
let Inst{31-27} = 0b11110;
let Inst{25} = 0;
let Inst{24-21} = opcod;
@@ -299,9 +308,9 @@ multiclass T2I_bin_s_irs<bits<4> opcod, string opc, PatFrag opnode,
let Inst{15} = 0;
}
// register
- def rr : T2I<(outs GPR:$dst), (ins GPR:$lhs, GPR:$rhs), IIC_iALUr,
+ def rr : T2I<(outs rGPR:$dst), (ins GPR:$lhs, rGPR:$rhs), IIC_iALUr,
!strconcat(opc, "s"), ".w\t$dst, $lhs, $rhs",
- [(set GPR:$dst, (opnode GPR:$lhs, GPR:$rhs))]> {
+ [(set rGPR:$dst, (opnode GPR:$lhs, rGPR:$rhs))]> {
let isCommutable = Commutable;
let Inst{31-27} = 0b11101;
let Inst{26-25} = 0b01;
@@ -312,9 +321,9 @@ multiclass T2I_bin_s_irs<bits<4> opcod, string opc, PatFrag opnode,
let Inst{5-4} = 0b00; // type
}
// shifted register
- def rs : T2I<(outs GPR:$dst), (ins GPR:$lhs, t2_so_reg:$rhs), IIC_iALUsi,
+ def rs : T2I<(outs rGPR:$dst), (ins GPR:$lhs, t2_so_reg:$rhs), IIC_iALUsi,
!strconcat(opc, "s"), ".w\t$dst, $lhs, $rhs",
- [(set GPR:$dst, (opnode GPR:$lhs, t2_so_reg:$rhs))]> {
+ [(set rGPR:$dst, (opnode GPR:$lhs, t2_so_reg:$rhs))]> {
let Inst{31-27} = 0b11101;
let Inst{26-25} = 0b01;
let Inst{24-21} = opcod;
@@ -328,9 +337,12 @@ multiclass T2I_bin_s_irs<bits<4> opcod, string opc, PatFrag opnode,
multiclass T2I_bin_ii12rs<bits<3> op23_21, string opc, PatFrag opnode,
bit Commutable = 0> {
// shifted imm
- def ri : T2sI<(outs GPR:$dst), (ins GPR:$lhs, t2_so_imm:$rhs), IIC_iALUi,
+ // The register-immediate version is re-materializable. This is useful
+ // in particular for taking the address of a local.
+ let isReMaterializable = 1 in {
+ def ri : T2sI<(outs rGPR:$dst), (ins GPR:$lhs, t2_so_imm:$rhs), IIC_iALUi,
opc, ".w\t$dst, $lhs, $rhs",
- [(set GPR:$dst, (opnode GPR:$lhs, t2_so_imm:$rhs))]> {
+ [(set rGPR:$dst, (opnode GPR:$lhs, t2_so_imm:$rhs))]> {
let Inst{31-27} = 0b11110;
let Inst{25} = 0;
let Inst{24} = 1;
@@ -338,10 +350,11 @@ multiclass T2I_bin_ii12rs<bits<3> op23_21, string opc, PatFrag opnode,
let Inst{20} = 0; // The S bit.
let Inst{15} = 0;
}
+ }
// 12-bit imm
- def ri12 : T2I<(outs GPR:$dst), (ins GPR:$lhs, imm0_4095:$rhs), IIC_iALUi,
+ def ri12 : T2I<(outs rGPR:$dst), (ins GPR:$lhs, imm0_4095:$rhs), IIC_iALUi,
!strconcat(opc, "w"), "\t$dst, $lhs, $rhs",
- [(set GPR:$dst, (opnode GPR:$lhs, imm0_4095:$rhs))]> {
+ [(set rGPR:$dst, (opnode GPR:$lhs, imm0_4095:$rhs))]> {
let Inst{31-27} = 0b11110;
let Inst{25} = 1;
let Inst{24} = 0;
@@ -350,9 +363,9 @@ multiclass T2I_bin_ii12rs<bits<3> op23_21, string opc, PatFrag opnode,
let Inst{15} = 0;
}
// register
- def rr : T2sI<(outs GPR:$dst), (ins GPR:$lhs, GPR:$rhs), IIC_iALUr,
+ def rr : T2sI<(outs rGPR:$dst), (ins GPR:$lhs, rGPR:$rhs), IIC_iALUr,
opc, ".w\t$dst, $lhs, $rhs",
- [(set GPR:$dst, (opnode GPR:$lhs, GPR:$rhs))]> {
+ [(set rGPR:$dst, (opnode GPR:$lhs, rGPR:$rhs))]> {
let isCommutable = Commutable;
let Inst{31-27} = 0b11101;
let Inst{26-25} = 0b01;
@@ -364,9 +377,9 @@ multiclass T2I_bin_ii12rs<bits<3> op23_21, string opc, PatFrag opnode,
let Inst{5-4} = 0b00; // type
}
// shifted register
- def rs : T2sI<(outs GPR:$dst), (ins GPR:$lhs, t2_so_reg:$rhs), IIC_iALUsi,
+ def rs : T2sI<(outs rGPR:$dst), (ins GPR:$lhs, t2_so_reg:$rhs), IIC_iALUsi,
opc, ".w\t$dst, $lhs, $rhs",
- [(set GPR:$dst, (opnode GPR:$lhs, t2_so_reg:$rhs))]> {
+ [(set rGPR:$dst, (opnode GPR:$lhs, t2_so_reg:$rhs))]> {
let Inst{31-27} = 0b11101;
let Inst{26-25} = 0b01;
let Inst{24} = 1;
@@ -382,9 +395,9 @@ let Uses = [CPSR] in {
multiclass T2I_adde_sube_irs<bits<4> opcod, string opc, PatFrag opnode,
bit Commutable = 0> {
// shifted imm
- def ri : T2sI<(outs GPR:$dst), (ins GPR:$lhs, t2_so_imm:$rhs), IIC_iALUi,
+ def ri : T2sI<(outs rGPR:$dst), (ins rGPR:$lhs, t2_so_imm:$rhs), IIC_iALUi,
opc, "\t$dst, $lhs, $rhs",
- [(set GPR:$dst, (opnode GPR:$lhs, t2_so_imm:$rhs))]>,
+ [(set rGPR:$dst, (opnode rGPR:$lhs, t2_so_imm:$rhs))]>,
Requires<[IsThumb2]> {
let Inst{31-27} = 0b11110;
let Inst{25} = 0;
@@ -393,9 +406,9 @@ multiclass T2I_adde_sube_irs<bits<4> opcod, string opc, PatFrag opnode,
let Inst{15} = 0;
}
// register
- def rr : T2sI<(outs GPR:$dst), (ins GPR:$lhs, GPR:$rhs), IIC_iALUr,
+ def rr : T2sI<(outs rGPR:$dst), (ins rGPR:$lhs, rGPR:$rhs), IIC_iALUr,
opc, ".w\t$dst, $lhs, $rhs",
- [(set GPR:$dst, (opnode GPR:$lhs, GPR:$rhs))]>,
+ [(set rGPR:$dst, (opnode rGPR:$lhs, rGPR:$rhs))]>,
Requires<[IsThumb2]> {
let isCommutable = Commutable;
let Inst{31-27} = 0b11101;
@@ -407,9 +420,9 @@ multiclass T2I_adde_sube_irs<bits<4> opcod, string opc, PatFrag opnode,
let Inst{5-4} = 0b00; // type
}
// shifted register
- def rs : T2sI<(outs GPR:$dst), (ins GPR:$lhs, t2_so_reg:$rhs), IIC_iALUsi,
+ def rs : T2sI<(outs rGPR:$dst), (ins rGPR:$lhs, t2_so_reg:$rhs), IIC_iALUsi,
opc, ".w\t$dst, $lhs, $rhs",
- [(set GPR:$dst, (opnode GPR:$lhs, t2_so_reg:$rhs))]>,
+ [(set rGPR:$dst, (opnode rGPR:$lhs, t2_so_reg:$rhs))]>,
Requires<[IsThumb2]> {
let Inst{31-27} = 0b11101;
let Inst{26-25} = 0b01;
@@ -423,9 +436,9 @@ let Defs = [CPSR] in {
multiclass T2I_adde_sube_s_irs<bits<4> opcod, string opc, PatFrag opnode,
bit Commutable = 0> {
// shifted imm
- def ri : T2sI<(outs GPR:$dst), (ins GPR:$lhs, t2_so_imm:$rhs), IIC_iALUi,
+ def ri : T2sI<(outs rGPR:$dst), (ins rGPR:$lhs, t2_so_imm:$rhs), IIC_iALUi,
opc, "\t$dst, $lhs, $rhs",
- [(set GPR:$dst, (opnode GPR:$lhs, t2_so_imm:$rhs))]>,
+ [(set rGPR:$dst, (opnode rGPR:$lhs, t2_so_imm:$rhs))]>,
Requires<[IsThumb2]> {
let Inst{31-27} = 0b11110;
let Inst{25} = 0;
@@ -434,9 +447,9 @@ multiclass T2I_adde_sube_s_irs<bits<4> opcod, string opc, PatFrag opnode,
let Inst{15} = 0;
}
// register
- def rr : T2sI<(outs GPR:$dst), (ins GPR:$lhs, GPR:$rhs), IIC_iALUr,
+ def rr : T2sI<(outs rGPR:$dst), (ins rGPR:$lhs, rGPR:$rhs), IIC_iALUr,
opc, ".w\t$dst, $lhs, $rhs",
- [(set GPR:$dst, (opnode GPR:$lhs, GPR:$rhs))]>,
+ [(set rGPR:$dst, (opnode rGPR:$lhs, rGPR:$rhs))]>,
Requires<[IsThumb2]> {
let isCommutable = Commutable;
let Inst{31-27} = 0b11101;
@@ -448,9 +461,9 @@ multiclass T2I_adde_sube_s_irs<bits<4> opcod, string opc, PatFrag opnode,
let Inst{5-4} = 0b00; // type
}
// shifted register
- def rs : T2sI<(outs GPR:$dst), (ins GPR:$lhs, t2_so_reg:$rhs), IIC_iALUsi,
+ def rs : T2sI<(outs rGPR:$dst), (ins rGPR:$lhs, t2_so_reg:$rhs), IIC_iALUsi,
opc, ".w\t$dst, $lhs, $rhs",
- [(set GPR:$dst, (opnode GPR:$lhs, t2_so_reg:$rhs))]>,
+ [(set rGPR:$dst, (opnode rGPR:$lhs, t2_so_reg:$rhs))]>,
Requires<[IsThumb2]> {
let Inst{31-27} = 0b11101;
let Inst{26-25} = 0b01;
@@ -461,13 +474,14 @@ multiclass T2I_adde_sube_s_irs<bits<4> opcod, string opc, PatFrag opnode,
}
}
-/// T2I_rbin_s_is - Same as T2I_rbin_is except sets 's' bit.
+/// T2I_rbin_s_is - Same as T2I_rbin_irs except sets 's' bit and the register
+/// version is not needed since this is only for codegen.
let Defs = [CPSR] in {
multiclass T2I_rbin_s_is<bits<4> opcod, string opc, PatFrag opnode> {
// shifted imm
- def ri : T2I<(outs GPR:$dst), (ins GPR:$rhs, t2_so_imm:$lhs), IIC_iALUi,
+ def ri : T2I<(outs rGPR:$dst), (ins rGPR:$rhs, t2_so_imm:$lhs), IIC_iALUi,
!strconcat(opc, "s"), ".w\t$dst, $rhs, $lhs",
- [(set GPR:$dst, (opnode t2_so_imm:$lhs, GPR:$rhs))]> {
+ [(set rGPR:$dst, (opnode t2_so_imm:$lhs, rGPR:$rhs))]> {
let Inst{31-27} = 0b11110;
let Inst{25} = 0;
let Inst{24-21} = opcod;
@@ -475,9 +489,9 @@ multiclass T2I_rbin_s_is<bits<4> opcod, string opc, PatFrag opnode> {
let Inst{15} = 0;
}
// shifted register
- def rs : T2I<(outs GPR:$dst), (ins GPR:$rhs, t2_so_reg:$lhs), IIC_iALUsi,
+ def rs : T2I<(outs rGPR:$dst), (ins rGPR:$rhs, t2_so_reg:$lhs), IIC_iALUsi,
!strconcat(opc, "s"), "\t$dst, $rhs, $lhs",
- [(set GPR:$dst, (opnode t2_so_reg:$lhs, GPR:$rhs))]> {
+ [(set rGPR:$dst, (opnode t2_so_reg:$lhs, rGPR:$rhs))]> {
let Inst{31-27} = 0b11101;
let Inst{26-25} = 0b01;
let Inst{24-21} = opcod;
@@ -490,18 +504,18 @@ multiclass T2I_rbin_s_is<bits<4> opcod, string opc, PatFrag opnode> {
// rotate operation that produces a value.
multiclass T2I_sh_ir<bits<2> opcod, string opc, PatFrag opnode> {
// 5-bit imm
- def ri : T2sI<(outs GPR:$dst), (ins GPR:$lhs, i32imm:$rhs), IIC_iMOVsi,
+ def ri : T2sI<(outs rGPR:$dst), (ins rGPR:$lhs, i32imm:$rhs), IIC_iMOVsi,
opc, ".w\t$dst, $lhs, $rhs",
- [(set GPR:$dst, (opnode GPR:$lhs, imm1_31:$rhs))]> {
+ [(set rGPR:$dst, (opnode rGPR:$lhs, imm1_31:$rhs))]> {
let Inst{31-27} = 0b11101;
let Inst{26-21} = 0b010010;
let Inst{19-16} = 0b1111; // Rn
let Inst{5-4} = opcod;
}
// register
- def rr : T2sI<(outs GPR:$dst), (ins GPR:$lhs, GPR:$rhs), IIC_iMOVsr,
+ def rr : T2sI<(outs rGPR:$dst), (ins rGPR:$lhs, rGPR:$rhs), IIC_iMOVsr,
opc, ".w\t$dst, $lhs, $rhs",
- [(set GPR:$dst, (opnode GPR:$lhs, GPR:$rhs))]> {
+ [(set rGPR:$dst, (opnode rGPR:$lhs, rGPR:$rhs))]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0100;
let Inst{22-21} = opcod;
@@ -513,7 +527,7 @@ multiclass T2I_sh_ir<bits<2> opcod, string opc, PatFrag opnode> {
/// T2I_cmp_irs - Defines a set of (op r, {so_imm|r|so_reg}) cmp / test
/// patterns. Similar to T2I_bin_irs except the instruction does not produce
/// a explicit result, only implicitly set CPSR.
-let Defs = [CPSR] in {
+let isCompare = 1, Defs = [CPSR] in {
multiclass T2I_cmp_irs<bits<4> opcod, string opc, PatFrag opnode> {
// shifted imm
def ri : T2I<(outs), (ins GPR:$lhs, t2_so_imm:$rhs), IIC_iCMPi,
@@ -527,9 +541,9 @@ multiclass T2I_cmp_irs<bits<4> opcod, string opc, PatFrag opnode> {
let Inst{11-8} = 0b1111; // Rd
}
// register
- def rr : T2I<(outs), (ins GPR:$lhs, GPR:$rhs), IIC_iCMPr,
+ def rr : T2I<(outs), (ins GPR:$lhs, rGPR:$rhs), IIC_iCMPr,
opc, ".w\t$lhs, $rhs",
- [(opnode GPR:$lhs, GPR:$rhs)]> {
+ [(opnode GPR:$lhs, rGPR:$rhs)]> {
let Inst{31-27} = 0b11101;
let Inst{26-25} = 0b01;
let Inst{24-21} = opcod;
@@ -639,9 +653,9 @@ multiclass T2I_st<bits<2> opcod, string opc, PatFrag opnode> {
/// T2I_unary_rrot - A unary operation with two forms: one whose operand is a
/// register and one whose operand is a register rotated by 8/16/24.
multiclass T2I_unary_rrot<bits<3> opcod, string opc, PatFrag opnode> {
- def r : T2I<(outs GPR:$dst), (ins GPR:$src), IIC_iUNAr,
+ def r : T2I<(outs rGPR:$dst), (ins rGPR:$src), IIC_iUNAr,
opc, ".w\t$dst, $src",
- [(set GPR:$dst, (opnode GPR:$src))]> {
+ [(set rGPR:$dst, (opnode rGPR:$src))]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0100;
let Inst{22-20} = opcod;
@@ -650,9 +664,9 @@ multiclass T2I_unary_rrot<bits<3> opcod, string opc, PatFrag opnode> {
let Inst{7} = 1;
let Inst{5-4} = 0b00; // rotate
}
- def r_rot : T2I<(outs GPR:$dst), (ins GPR:$src, i32imm:$rot), IIC_iUNAsi,
+ def r_rot : T2I<(outs rGPR:$dst), (ins rGPR:$src, i32imm:$rot), IIC_iUNAsi,
opc, ".w\t$dst, $src, ror $rot",
- [(set GPR:$dst, (opnode (rotr GPR:$src, rot_imm:$rot)))]> {
+ [(set rGPR:$dst, (opnode (rotr rGPR:$src, rot_imm:$rot)))]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0100;
let Inst{22-20} = opcod;
@@ -665,9 +679,9 @@ multiclass T2I_unary_rrot<bits<3> opcod, string opc, PatFrag opnode> {
// UXTB16 - Requres T2ExtractPack, does not need the .w qualifier.
multiclass T2I_unary_rrot_uxtb16<bits<3> opcod, string opc, PatFrag opnode> {
- def r : T2I<(outs GPR:$dst), (ins GPR:$src), IIC_iUNAr,
+ def r : T2I<(outs rGPR:$dst), (ins rGPR:$src), IIC_iUNAr,
opc, "\t$dst, $src",
- [(set GPR:$dst, (opnode GPR:$src))]>,
+ [(set rGPR:$dst, (opnode rGPR:$src))]>,
Requires<[HasT2ExtractPack]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0100;
@@ -677,9 +691,9 @@ multiclass T2I_unary_rrot_uxtb16<bits<3> opcod, string opc, PatFrag opnode> {
let Inst{7} = 1;
let Inst{5-4} = 0b00; // rotate
}
- def r_rot : T2I<(outs GPR:$dst), (ins GPR:$src, i32imm:$rot), IIC_iUNAsi,
+ def r_rot : T2I<(outs rGPR:$dst), (ins rGPR:$src, i32imm:$rot), IIC_iUNAsi,
opc, "\t$dst, $src, ror $rot",
- [(set GPR:$dst, (opnode (rotr GPR:$src, rot_imm:$rot)))]>,
+ [(set rGPR:$dst, (opnode (rotr rGPR:$src, rot_imm:$rot)))]>,
Requires<[HasT2ExtractPack]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0100;
@@ -694,7 +708,7 @@ multiclass T2I_unary_rrot_uxtb16<bits<3> opcod, string opc, PatFrag opnode> {
// SXTB16 - Requres T2ExtractPack, does not need the .w qualifier, no pattern
// supported yet.
multiclass T2I_unary_rrot_sxtb16<bits<3> opcod, string opc> {
- def r : T2I<(outs GPR:$dst), (ins GPR:$src), IIC_iUNAr,
+ def r : T2I<(outs rGPR:$dst), (ins rGPR:$src), IIC_iUNAr,
opc, "\t$dst, $src", []> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0100;
@@ -704,7 +718,7 @@ multiclass T2I_unary_rrot_sxtb16<bits<3> opcod, string opc> {
let Inst{7} = 1;
let Inst{5-4} = 0b00; // rotate
}
- def r_rot : T2I<(outs GPR:$dst), (ins GPR:$src, i32imm:$rot), IIC_iUNAsi,
+ def r_rot : T2I<(outs rGPR:$dst), (ins rGPR:$src, i32imm:$rot), IIC_iUNAsi,
opc, "\t$dst, $src, ror $rot", []> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0100;
@@ -719,9 +733,9 @@ multiclass T2I_unary_rrot_sxtb16<bits<3> opcod, string opc> {
/// T2I_bin_rrot - A binary operation with two forms: one whose operand is a
/// register and one whose operand is a register rotated by 8/16/24.
multiclass T2I_bin_rrot<bits<3> opcod, string opc, PatFrag opnode> {
- def rr : T2I<(outs GPR:$dst), (ins GPR:$LHS, GPR:$RHS), IIC_iALUr,
+ def rr : T2I<(outs rGPR:$dst), (ins rGPR:$LHS, rGPR:$RHS), IIC_iALUr,
opc, "\t$dst, $LHS, $RHS",
- [(set GPR:$dst, (opnode GPR:$LHS, GPR:$RHS))]>,
+ [(set rGPR:$dst, (opnode rGPR:$LHS, rGPR:$RHS))]>,
Requires<[HasT2ExtractPack]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0100;
@@ -730,10 +744,10 @@ multiclass T2I_bin_rrot<bits<3> opcod, string opc, PatFrag opnode> {
let Inst{7} = 1;
let Inst{5-4} = 0b00; // rotate
}
- def rr_rot : T2I<(outs GPR:$dst), (ins GPR:$LHS, GPR:$RHS, i32imm:$rot),
+ def rr_rot : T2I<(outs rGPR:$dst), (ins rGPR:$LHS, rGPR:$RHS, i32imm:$rot),
IIC_iALUsr, opc, "\t$dst, $LHS, $RHS, ror $rot",
- [(set GPR:$dst, (opnode GPR:$LHS,
- (rotr GPR:$RHS, rot_imm:$rot)))]>,
+ [(set rGPR:$dst, (opnode rGPR:$LHS,
+ (rotr rGPR:$RHS, rot_imm:$rot)))]>,
Requires<[HasT2ExtractPack]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0100;
@@ -747,7 +761,7 @@ multiclass T2I_bin_rrot<bits<3> opcod, string opc, PatFrag opnode> {
// DO variant - disassembly only, no pattern
multiclass T2I_bin_rrot_DO<bits<3> opcod, string opc> {
- def rr : T2I<(outs GPR:$dst), (ins GPR:$LHS, GPR:$RHS), IIC_iALUr,
+ def rr : T2I<(outs rGPR:$dst), (ins rGPR:$LHS, rGPR:$RHS), IIC_iALUr,
opc, "\t$dst, $LHS, $RHS", []> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0100;
@@ -756,7 +770,7 @@ multiclass T2I_bin_rrot_DO<bits<3> opcod, string opc> {
let Inst{7} = 1;
let Inst{5-4} = 0b00; // rotate
}
- def rr_rot : T2I<(outs GPR:$dst), (ins GPR:$LHS, GPR:$RHS, i32imm:$rot),
+ def rr_rot : T2I<(outs rGPR:$dst), (ins rGPR:$LHS, rGPR:$RHS, i32imm:$rot),
IIC_iALUsr, opc, "\t$dst, $LHS, $RHS, ror $rot", []> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0100;
@@ -779,8 +793,8 @@ multiclass T2I_bin_rrot_DO<bits<3> opcod, string opc> {
// assembler.
let neverHasSideEffects = 1 in {
let isReMaterializable = 1 in
-def t2LEApcrel : T2XI<(outs GPR:$dst), (ins i32imm:$label, pred:$p), IIC_iALUi,
- "adr$p.w\t$dst, #$label", []> {
+def t2LEApcrel : T2XI<(outs rGPR:$dst), (ins i32imm:$label, pred:$p), IIC_iALUi,
+ "adr${p}.w\t$dst, #$label", []> {
let Inst{31-27} = 0b11110;
let Inst{25-24} = 0b10;
// Inst{23:21} = '11' (add = FALSE) or '00' (add = TRUE)
@@ -790,9 +804,9 @@ def t2LEApcrel : T2XI<(outs GPR:$dst), (ins i32imm:$label, pred:$p), IIC_iALUi,
let Inst{15} = 0;
}
} // neverHasSideEffects
-def t2LEApcrelJT : T2XI<(outs GPR:$dst),
+def t2LEApcrelJT : T2XI<(outs rGPR:$dst),
(ins i32imm:$label, nohash_imm:$id, pred:$p), IIC_iALUi,
- "adr$p.w\t$dst, #${label}_${id}", []> {
+ "adr${p}.w\t$dst, #${label}_${id}", []> {
let Inst{31-27} = 0b11110;
let Inst{25-24} = 0b10;
// Inst{23:21} = '11' (add = FALSE) or '00' (add = TRUE)
@@ -866,9 +880,9 @@ def t2SUBrSPs : T2sI<(outs GPR:$dst), (ins GPR:$sp, t2_so_reg:$rhs),
}
// Signed and unsigned division on v7-M
-def t2SDIV : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iALUi,
+def t2SDIV : T2I<(outs rGPR:$dst), (ins rGPR:$a, rGPR:$b), IIC_iALUi,
"sdiv", "\t$dst, $a, $b",
- [(set GPR:$dst, (sdiv GPR:$a, GPR:$b))]>,
+ [(set rGPR:$dst, (sdiv rGPR:$a, rGPR:$b))]>,
Requires<[HasDivide]> {
let Inst{31-27} = 0b11111;
let Inst{26-21} = 0b011100;
@@ -877,9 +891,9 @@ def t2SDIV : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iALUi,
let Inst{7-4} = 0b1111;
}
-def t2UDIV : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iALUi,
+def t2UDIV : T2I<(outs rGPR:$dst), (ins rGPR:$a, rGPR:$b), IIC_iALUi,
"udiv", "\t$dst, $a, $b",
- [(set GPR:$dst, (udiv GPR:$a, GPR:$b))]>,
+ [(set rGPR:$dst, (udiv rGPR:$a, rGPR:$b))]>,
Requires<[HasDivide]> {
let Inst{31-27} = 0b11111;
let Inst{26-21} = 0b011101;
@@ -888,17 +902,6 @@ def t2UDIV : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iALUi,
let Inst{7-4} = 0b1111;
}
-// Pseudo instruction that will expand into a t2SUBrSPi + a copy.
-let usesCustomInserter = 1 in { // Expanded after instruction selection.
-def t2SUBrSPi_ : PseudoInst<(outs GPR:$dst), (ins GPR:$sp, t2_so_imm:$imm),
- NoItinerary, "${:comment} sub.w\t$dst, $sp, $imm", []>;
-def t2SUBrSPi12_ : PseudoInst<(outs GPR:$dst), (ins GPR:$sp, imm0_4095:$imm),
- NoItinerary, "${:comment} subw\t$dst, $sp, $imm", []>;
-def t2SUBrSPs_ : PseudoInst<(outs GPR:$dst), (ins GPR:$sp, t2_so_reg:$rhs),
- NoItinerary, "${:comment} sub\t$dst, $sp, $rhs", []>;
-} // usesCustomInserter
-
-
//===----------------------------------------------------------------------===//
// Load / store Instructions.
//
@@ -917,10 +920,10 @@ defm t2LDRSB : T2I_ld<1, 0b00, "ldrsb", UnOpFrag<(sextloadi8 node:$Src)>>;
let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in {
// Load doubleword
-def t2LDRDi8 : T2Ii8s4<1, 0, 1, (outs GPR:$dst1, GPR:$dst2),
+def t2LDRDi8 : T2Ii8s4<1, 0, 1, (outs rGPR:$dst1, rGPR:$dst2),
(ins t2addrmode_imm8s4:$addr),
IIC_iLoadi, "ldrd", "\t$dst1, $addr", []>;
-def t2LDRDpci : T2Ii8s4<1, 0, 1, (outs GPR:$dst1, GPR:$dst2),
+def t2LDRDpci : T2Ii8s4<1, 0, 1, (outs rGPR:$dst1, rGPR:$dst2),
(ins i32imm:$addr), IIC_iLoadi,
"ldrd", "\t$dst1, $addr", []> {
let Inst{19-16} = 0b1111; // Rn
@@ -967,6 +970,11 @@ def : T2Pat<(extloadi16 t2addrmode_so_reg:$addr),
def : T2Pat<(extloadi16 (ARMWrapper tconstpool:$addr)),
(t2LDRHpci tconstpool:$addr)>;
+// FIXME: The destination register of the loads and stores can't be PC, but
+// can be SP. We need another regclass (similar to rGPR) to represent
+// that. Not a pressing issue since these are selected manually,
+// not via pattern.
+
// Indexed loads
let mayLoad = 1, neverHasSideEffects = 1 in {
def t2LDR_PRE : T2Iidxldst<0, 0b10, 1, 1, (outs GPR:$dst, GPR:$base_wb),
@@ -1286,9 +1294,9 @@ def t2MOVr : T2sI<(outs GPR:$dst), (ins GPR:$src), IIC_iMOVr,
// AddedComplexity to ensure isel tries t2MOVi before t2MOVi16.
let isReMaterializable = 1, isAsCheapAsAMove = 1, AddedComplexity = 1 in
-def t2MOVi : T2sI<(outs GPR:$dst), (ins t2_so_imm:$src), IIC_iMOVi,
+def t2MOVi : T2sI<(outs rGPR:$dst), (ins t2_so_imm:$src), IIC_iMOVi,
"mov", ".w\t$dst, $src",
- [(set GPR:$dst, t2_so_imm:$src)]> {
+ [(set rGPR:$dst, t2_so_imm:$src)]> {
let Inst{31-27} = 0b11110;
let Inst{25} = 0;
let Inst{24-21} = 0b0010;
@@ -1298,9 +1306,9 @@ def t2MOVi : T2sI<(outs GPR:$dst), (ins t2_so_imm:$src), IIC_iMOVi,
}
let isReMaterializable = 1, isAsCheapAsAMove = 1 in
-def t2MOVi16 : T2I<(outs GPR:$dst), (ins i32imm:$src), IIC_iMOVi,
+def t2MOVi16 : T2I<(outs rGPR:$dst), (ins i32imm:$src), IIC_iMOVi,
"movw", "\t$dst, $src",
- [(set GPR:$dst, imm0_65535:$src)]> {
+ [(set rGPR:$dst, imm0_65535:$src)]> {
let Inst{31-27} = 0b11110;
let Inst{25} = 1;
let Inst{24-21} = 0b0010;
@@ -1309,10 +1317,10 @@ def t2MOVi16 : T2I<(outs GPR:$dst), (ins i32imm:$src), IIC_iMOVi,
}
let Constraints = "$src = $dst" in
-def t2MOVTi16 : T2I<(outs GPR:$dst), (ins GPR:$src, i32imm:$imm), IIC_iMOVi,
+def t2MOVTi16 : T2I<(outs rGPR:$dst), (ins rGPR:$src, i32imm:$imm), IIC_iMOVi,
"movt", "\t$dst, $imm",
- [(set GPR:$dst,
- (or (and GPR:$src, 0xffff), lo16AllZero:$imm))]> {
+ [(set rGPR:$dst,
+ (or (and rGPR:$src, 0xffff), lo16AllZero:$imm))]> {
let Inst{31-27} = 0b11110;
let Inst{25} = 1;
let Inst{24-21} = 0b0110;
@@ -1320,7 +1328,7 @@ def t2MOVTi16 : T2I<(outs GPR:$dst), (ins GPR:$src, i32imm:$imm), IIC_iMOVi,
let Inst{15} = 0;
}
-def : T2Pat<(or GPR:$src, 0xffff0000), (t2MOVTi16 GPR:$src, 0xffff)>;
+def : T2Pat<(or rGPR:$src, 0xffff0000), (t2MOVTi16 rGPR:$src, 0xffff)>;
//===----------------------------------------------------------------------===//
// Extend Instructions.
@@ -1352,10 +1360,14 @@ defm t2UXTH : T2I_unary_rrot<0b001, "uxth",
defm t2UXTB16 : T2I_unary_rrot_uxtb16<0b011, "uxtb16",
UnOpFrag<(and node:$Src, 0x00FF00FF)>>;
-def : T2Pat<(and (shl GPR:$Src, (i32 8)), 0xFF00FF),
- (t2UXTB16r_rot GPR:$Src, 24)>, Requires<[HasT2ExtractPack]>;
-def : T2Pat<(and (srl GPR:$Src, (i32 8)), 0xFF00FF),
- (t2UXTB16r_rot GPR:$Src, 8)>, Requires<[HasT2ExtractPack]>;
+// FIXME: This pattern incorrectly assumes the shl operator is a rotate.
+// The transformation should probably be done as a combiner action
+// instead so we can include a check for masking back in the upper
+// eight bits of the source into the lower eight bits of the result.
+//def : T2Pat<(and (shl rGPR:$Src, (i32 8)), 0xFF00FF),
+// (t2UXTB16r_rot rGPR:$Src, 24)>, Requires<[HasT2ExtractPack]>;
+def : T2Pat<(and (srl rGPR:$Src, (i32 8)), 0xFF00FF),
+ (t2UXTB16r_rot rGPR:$Src, 8)>, Requires<[HasT2ExtractPack]>;
defm t2UXTAB : T2I_bin_rrot<0b101, "uxtab",
BinOpFrag<(add node:$LHS, (and node:$RHS, 0x00FF))>>;
@@ -1389,7 +1401,7 @@ defm t2SBCS : T2I_adde_sube_s_irs<0b1011, "sbc",
BinOpFrag<(sube_live_carry node:$LHS, node:$RHS)>>;
// RSB
-defm t2RSB : T2I_rbin_is <0b1110, "rsb",
+defm t2RSB : T2I_rbin_irs <0b1110, "rsb",
BinOpFrag<(sub node:$LHS, node:$RHS)>>;
defm t2RSBS : T2I_rbin_s_is <0b1110, "rsb",
BinOpFrag<(subc node:$LHS, node:$RHS)>>;
@@ -1409,18 +1421,18 @@ def : T2Pat<(add GPR:$src, t2_so_imm_neg:$imm),
def : T2Pat<(add GPR:$src, imm0_4095_neg:$imm),
(t2SUBri12 GPR:$src, imm0_4095_neg:$imm)>;
let AddedComplexity = 1 in
-def : T2Pat<(addc GPR:$src, imm0_255_neg:$imm),
- (t2SUBSri GPR:$src, imm0_255_neg:$imm)>;
-def : T2Pat<(addc GPR:$src, t2_so_imm_neg:$imm),
- (t2SUBSri GPR:$src, t2_so_imm_neg:$imm)>;
+def : T2Pat<(addc rGPR:$src, imm0_255_neg:$imm),
+ (t2SUBSri rGPR:$src, imm0_255_neg:$imm)>;
+def : T2Pat<(addc rGPR:$src, t2_so_imm_neg:$imm),
+ (t2SUBSri rGPR:$src, t2_so_imm_neg:$imm)>;
// The with-carry-in form matches bitwise not instead of the negation.
// Effectively, the inverse interpretation of the carry flag already accounts
// for part of the negation.
let AddedComplexity = 1 in
-def : T2Pat<(adde GPR:$src, imm0_255_not:$imm),
- (t2SBCSri GPR:$src, imm0_255_not:$imm)>;
-def : T2Pat<(adde GPR:$src, t2_so_imm_not:$imm),
- (t2SBCSri GPR:$src, t2_so_imm_not:$imm)>;
+def : T2Pat<(adde rGPR:$src, imm0_255_not:$imm),
+ (t2SBCSri rGPR:$src, imm0_255_not:$imm)>;
+def : T2Pat<(adde rGPR:$src, t2_so_imm_not:$imm),
+ (t2SBCSri rGPR:$src, t2_so_imm_not:$imm)>;
// Select Bytes -- for disassembly only
@@ -1437,9 +1449,10 @@ def t2SEL : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), NoItinerary, "sel",
// A6.3.13, A6.3.14, A6.3.15 Parallel addition and subtraction (signed/unsigned)
// And Miscellaneous operations -- for disassembly only
-class T2I_pam<bits<3> op22_20, bits<4> op7_4, string opc>
- : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), NoItinerary, opc,
- "\t$dst, $a, $b", [/* For disassembly only; pattern left blank */]> {
+class T2I_pam<bits<3> op22_20, bits<4> op7_4, string opc,
+ list<dag> pat = [/* For disassembly only; pattern left blank */]>
+ : T2I<(outs rGPR:$dst), (ins rGPR:$a, rGPR:$b), NoItinerary, opc,
+ "\t$dst, $a, $b", pat> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0101;
let Inst{22-20} = op22_20;
@@ -1449,14 +1462,16 @@ class T2I_pam<bits<3> op22_20, bits<4> op7_4, string opc>
// Saturating add/subtract -- for disassembly only
-def t2QADD : T2I_pam<0b000, 0b1000, "qadd">;
+def t2QADD : T2I_pam<0b000, 0b1000, "qadd",
+ [(set rGPR:$dst, (int_arm_qadd rGPR:$a, rGPR:$b))]>;
def t2QADD16 : T2I_pam<0b001, 0b0001, "qadd16">;
def t2QADD8 : T2I_pam<0b000, 0b0001, "qadd8">;
def t2QASX : T2I_pam<0b010, 0b0001, "qasx">;
def t2QDADD : T2I_pam<0b000, 0b1001, "qdadd">;
def t2QDSUB : T2I_pam<0b000, 0b1011, "qdsub">;
def t2QSAX : T2I_pam<0b110, 0b0001, "qsax">;
-def t2QSUB : T2I_pam<0b000, 0b1010, "qsub">;
+def t2QSUB : T2I_pam<0b000, 0b1010, "qsub",
+ [(set rGPR:$dst, (int_arm_qsub rGPR:$a, rGPR:$b))]>;
def t2QSUB16 : T2I_pam<0b101, 0b0001, "qsub16">;
def t2QSUB8 : T2I_pam<0b100, 0b0001, "qsub8">;
def t2UQADD16 : T2I_pam<0b001, 0b0101, "uqadd16">;
@@ -1498,37 +1513,27 @@ def t2UHSUB8 : T2I_pam<0b100, 0b0110, "uhsub8">;
// Unsigned Sum of Absolute Differences [and Accumulate] -- for disassembly only
-def t2USAD8 : T2I_mac<0, 0b111, 0b0000, (outs GPR:$dst), (ins GPR:$a, GPR:$b),
+def t2USAD8 : T2I_mac<0, 0b111, 0b0000, (outs rGPR:$dst),
+ (ins rGPR:$a, rGPR:$b),
NoItinerary, "usad8", "\t$dst, $a, $b", []> {
let Inst{15-12} = 0b1111;
}
-def t2USADA8 : T2I_mac<0, 0b111, 0b0000, (outs GPR:$dst),
- (ins GPR:$a, GPR:$b, GPR:$acc), NoItinerary, "usada8",
+def t2USADA8 : T2I_mac<0, 0b111, 0b0000, (outs rGPR:$dst),
+ (ins rGPR:$a, rGPR:$b, rGPR:$acc), NoItinerary, "usada8",
"\t$dst, $a, $b, $acc", []>;
// Signed/Unsigned saturate -- for disassembly only
-def t2SSATlsl : T2I<(outs GPR:$dst), (ins i32imm:$bit_pos,GPR:$a,i32imm:$shamt),
- NoItinerary, "ssat", "\t$dst, $bit_pos, $a, lsl $shamt",
- [/* For disassembly only; pattern left blank */]> {
- let Inst{31-27} = 0b11110;
- let Inst{25-22} = 0b1100;
- let Inst{20} = 0;
- let Inst{15} = 0;
- let Inst{21} = 0; // sh = '0'
-}
-
-def t2SSATasr : T2I<(outs GPR:$dst), (ins i32imm:$bit_pos,GPR:$a,i32imm:$shamt),
- NoItinerary, "ssat", "\t$dst, $bit_pos, $a, asr $shamt",
- [/* For disassembly only; pattern left blank */]> {
+def t2SSAT: T2I<(outs rGPR:$dst), (ins i32imm:$bit_pos, rGPR:$a, shift_imm:$sh),
+ NoItinerary, "ssat", "\t$dst, $bit_pos, $a$sh",
+ [/* For disassembly only; pattern left blank */]> {
let Inst{31-27} = 0b11110;
let Inst{25-22} = 0b1100;
let Inst{20} = 0;
let Inst{15} = 0;
- let Inst{21} = 1; // sh = '1'
}
-def t2SSAT16 : T2I<(outs GPR:$dst), (ins i32imm:$bit_pos, GPR:$a), NoItinerary,
+def t2SSAT16: T2I<(outs rGPR:$dst), (ins i32imm:$bit_pos, rGPR:$a), NoItinerary,
"ssat16", "\t$dst, $bit_pos, $a",
[/* For disassembly only; pattern left blank */]> {
let Inst{31-27} = 0b11110;
@@ -1540,27 +1545,16 @@ def t2SSAT16 : T2I<(outs GPR:$dst), (ins i32imm:$bit_pos, GPR:$a), NoItinerary,
let Inst{7-6} = 0b00; // imm2 = '00'
}
-def t2USATlsl : T2I<(outs GPR:$dst), (ins i32imm:$bit_pos,GPR:$a,i32imm:$shamt),
- NoItinerary, "usat", "\t$dst, $bit_pos, $a, lsl $shamt",
- [/* For disassembly only; pattern left blank */]> {
- let Inst{31-27} = 0b11110;
- let Inst{25-22} = 0b1110;
- let Inst{20} = 0;
- let Inst{15} = 0;
- let Inst{21} = 0; // sh = '0'
-}
-
-def t2USATasr : T2I<(outs GPR:$dst), (ins i32imm:$bit_pos,GPR:$a,i32imm:$shamt),
- NoItinerary, "usat", "\t$dst, $bit_pos, $a, asr $shamt",
- [/* For disassembly only; pattern left blank */]> {
+def t2USAT: T2I<(outs rGPR:$dst), (ins i32imm:$bit_pos, rGPR:$a, shift_imm:$sh),
+ NoItinerary, "usat", "\t$dst, $bit_pos, $a$sh",
+ [/* For disassembly only; pattern left blank */]> {
let Inst{31-27} = 0b11110;
let Inst{25-22} = 0b1110;
let Inst{20} = 0;
let Inst{15} = 0;
- let Inst{21} = 1; // sh = '1'
}
-def t2USAT16 : T2I<(outs GPR:$dst), (ins i32imm:$bit_pos, GPR:$a), NoItinerary,
+def t2USAT16: T2I<(outs rGPR:$dst), (ins i32imm:$bit_pos, rGPR:$a), NoItinerary,
"usat16", "\t$dst, $bit_pos, $a",
[/* For disassembly only; pattern left blank */]> {
let Inst{31-27} = 0b11110;
@@ -1572,6 +1566,9 @@ def t2USAT16 : T2I<(outs GPR:$dst), (ins i32imm:$bit_pos, GPR:$a), NoItinerary,
let Inst{7-6} = 0b00; // imm2 = '00'
}
+def : T2Pat<(int_arm_ssat GPR:$a, imm:$pos), (t2SSAT imm:$pos, GPR:$a, 0)>;
+def : T2Pat<(int_arm_usat GPR:$a, imm:$pos), (t2USAT imm:$pos, GPR:$a, 0)>;
+
//===----------------------------------------------------------------------===//
// Shift and rotate Instructions.
//
@@ -1582,9 +1579,9 @@ defm t2ASR : T2I_sh_ir<0b10, "asr", BinOpFrag<(sra node:$LHS, node:$RHS)>>;
defm t2ROR : T2I_sh_ir<0b11, "ror", BinOpFrag<(rotr node:$LHS, node:$RHS)>>;
let Uses = [CPSR] in {
-def t2MOVrx : T2sI<(outs GPR:$dst), (ins GPR:$src), IIC_iMOVsi,
+def t2MOVrx : T2sI<(outs rGPR:$dst), (ins rGPR:$src), IIC_iMOVsi,
"rrx", "\t$dst, $src",
- [(set GPR:$dst, (ARMrrx GPR:$src))]> {
+ [(set rGPR:$dst, (ARMrrx rGPR:$src))]> {
let Inst{31-27} = 0b11101;
let Inst{26-25} = 0b01;
let Inst{24-21} = 0b0010;
@@ -1596,9 +1593,9 @@ def t2MOVrx : T2sI<(outs GPR:$dst), (ins GPR:$src), IIC_iMOVsi,
}
let Defs = [CPSR] in {
-def t2MOVsrl_flag : T2I<(outs GPR:$dst), (ins GPR:$src), IIC_iMOVsi,
+def t2MOVsrl_flag : T2I<(outs rGPR:$dst), (ins rGPR:$src), IIC_iMOVsi,
"lsrs", ".w\t$dst, $src, #1",
- [(set GPR:$dst, (ARMsrl_flag GPR:$src))]> {
+ [(set rGPR:$dst, (ARMsrl_flag rGPR:$src))]> {
let Inst{31-27} = 0b11101;
let Inst{26-25} = 0b01;
let Inst{24-21} = 0b0010;
@@ -1609,9 +1606,9 @@ def t2MOVsrl_flag : T2I<(outs GPR:$dst), (ins GPR:$src), IIC_iMOVsi,
let Inst{14-12} = 0b000;
let Inst{7-6} = 0b01;
}
-def t2MOVsra_flag : T2I<(outs GPR:$dst), (ins GPR:$src), IIC_iMOVsi,
+def t2MOVsra_flag : T2I<(outs rGPR:$dst), (ins rGPR:$src), IIC_iMOVsi,
"asrs", ".w\t$dst, $src, #1",
- [(set GPR:$dst, (ARMsra_flag GPR:$src))]> {
+ [(set rGPR:$dst, (ARMsra_flag rGPR:$src))]> {
let Inst{31-27} = 0b11101;
let Inst{26-25} = 0b01;
let Inst{24-21} = 0b0010;
@@ -1638,10 +1635,13 @@ defm t2EOR : T2I_bin_w_irs<0b0100, "eor",
defm t2BIC : T2I_bin_w_irs<0b0001, "bic",
BinOpFrag<(and node:$LHS, (not node:$RHS))>>;
+defm t2ANDS : T2I_bin_s_irs<0b0000, "and",
+ BinOpFrag<(ARMand node:$LHS, node:$RHS)>, 1>;
+
let Constraints = "$src = $dst" in
-def t2BFC : T2I<(outs GPR:$dst), (ins GPR:$src, bf_inv_mask_imm:$imm),
+def t2BFC : T2I<(outs rGPR:$dst), (ins rGPR:$src, bf_inv_mask_imm:$imm),
IIC_iUNAsi, "bfc", "\t$dst, $imm",
- [(set GPR:$dst, (and GPR:$src, bf_inv_mask_imm:$imm))]> {
+ [(set rGPR:$dst, (and rGPR:$src, bf_inv_mask_imm:$imm))]> {
let Inst{31-27} = 0b11110;
let Inst{25} = 1;
let Inst{24-20} = 0b10110;
@@ -1649,7 +1649,7 @@ def t2BFC : T2I<(outs GPR:$dst), (ins GPR:$src, bf_inv_mask_imm:$imm),
let Inst{15} = 0;
}
-def t2SBFX : T2I<(outs GPR:$dst), (ins GPR:$src, imm0_31:$lsb, imm0_31:$width),
+def t2SBFX: T2I<(outs rGPR:$dst), (ins rGPR:$src, imm0_31:$lsb, imm0_31:$width),
IIC_iALUi, "sbfx", "\t$dst, $src, $lsb, $width", []> {
let Inst{31-27} = 0b11110;
let Inst{25} = 1;
@@ -1657,7 +1657,7 @@ def t2SBFX : T2I<(outs GPR:$dst), (ins GPR:$src, imm0_31:$lsb, imm0_31:$width),
let Inst{15} = 0;
}
-def t2UBFX : T2I<(outs GPR:$dst), (ins GPR:$src, imm0_31:$lsb, imm0_31:$width),
+def t2UBFX: T2I<(outs rGPR:$dst), (ins rGPR:$src, imm0_31:$lsb, imm0_31:$width),
IIC_iALUi, "ubfx", "\t$dst, $src, $lsb, $width", []> {
let Inst{31-27} = 0b11110;
let Inst{25} = 1;
@@ -1666,10 +1666,12 @@ def t2UBFX : T2I<(outs GPR:$dst), (ins GPR:$src, imm0_31:$lsb, imm0_31:$width),
}
// A8.6.18 BFI - Bitfield insert (Encoding T1)
-// Added for disassembler with the pattern field purposely left blank.
-// FIXME: Utilize this instruction in codgen.
-def t2BFI : T2I<(outs GPR:$dst), (ins GPR:$src, imm0_31:$lsb, imm0_31:$width),
- IIC_iALUi, "bfi", "\t$dst, $src, $lsb, $width", []> {
+let Constraints = "$src = $dst" in
+def t2BFI : T2I<(outs rGPR:$dst),
+ (ins rGPR:$src, rGPR:$val, bf_inv_mask_imm:$imm),
+ IIC_iALUi, "bfi", "\t$dst, $val, $imm",
+ [(set rGPR:$dst, (ARMbfi rGPR:$src, rGPR:$val,
+ bf_inv_mask_imm:$imm))]> {
let Inst{31-27} = 0b11110;
let Inst{25} = 1;
let Inst{24-20} = 0b10110;
@@ -1677,19 +1679,20 @@ def t2BFI : T2I<(outs GPR:$dst), (ins GPR:$src, imm0_31:$lsb, imm0_31:$width),
}
defm t2ORN : T2I_bin_irs<0b0011, "orn", BinOpFrag<(or node:$LHS,
- (not node:$RHS))>>;
+ (not node:$RHS))>, 0, "">;
// Prefer over of t2EORri ra, rb, -1 because mvn has 16-bit version
let AddedComplexity = 1 in
defm t2MVN : T2I_un_irs <0b0011, "mvn", UnOpFrag<(not node:$Src)>, 1, 1>;
-def : T2Pat<(and GPR:$src, t2_so_imm_not:$imm),
- (t2BICri GPR:$src, t2_so_imm_not:$imm)>;
+let AddedComplexity = 1 in
+def : T2Pat<(and rGPR:$src, t2_so_imm_not:$imm),
+ (t2BICri rGPR:$src, t2_so_imm_not:$imm)>;
// FIXME: Disable this pattern on Darwin to workaround an assembler bug.
-def : T2Pat<(or GPR:$src, t2_so_imm_not:$imm),
- (t2ORNri GPR:$src, t2_so_imm_not:$imm)>,
+def : T2Pat<(or rGPR:$src, t2_so_imm_not:$imm),
+ (t2ORNri rGPR:$src, t2_so_imm_not:$imm)>,
Requires<[IsThumb2]>;
def : T2Pat<(t2_so_imm_not:$src),
@@ -1699,9 +1702,9 @@ def : T2Pat<(t2_so_imm_not:$src),
// Multiply Instructions.
//
let isCommutable = 1 in
-def t2MUL: T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iMUL32,
+def t2MUL: T2I<(outs rGPR:$dst), (ins rGPR:$a, rGPR:$b), IIC_iMUL32,
"mul", "\t$dst, $a, $b",
- [(set GPR:$dst, (mul GPR:$a, GPR:$b))]> {
+ [(set rGPR:$dst, (mul rGPR:$a, rGPR:$b))]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0110;
let Inst{22-20} = 0b000;
@@ -1709,9 +1712,9 @@ def t2MUL: T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iMUL32,
let Inst{7-4} = 0b0000; // Multiply
}
-def t2MLA: T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c), IIC_iMAC32,
+def t2MLA: T2I<(outs rGPR:$dst), (ins rGPR:$a, rGPR:$b, rGPR:$c), IIC_iMAC32,
"mla", "\t$dst, $a, $b, $c",
- [(set GPR:$dst, (add (mul GPR:$a, GPR:$b), GPR:$c))]> {
+ [(set rGPR:$dst, (add (mul rGPR:$a, rGPR:$b), rGPR:$c))]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0110;
let Inst{22-20} = 0b000;
@@ -1719,9 +1722,9 @@ def t2MLA: T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c), IIC_iMAC32,
let Inst{7-4} = 0b0000; // Multiply
}
-def t2MLS: T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c), IIC_iMAC32,
+def t2MLS: T2I<(outs rGPR:$dst), (ins rGPR:$a, rGPR:$b, rGPR:$c), IIC_iMAC32,
"mls", "\t$dst, $a, $b, $c",
- [(set GPR:$dst, (sub GPR:$c, (mul GPR:$a, GPR:$b)))]> {
+ [(set rGPR:$dst, (sub rGPR:$c, (mul rGPR:$a, rGPR:$b)))]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0110;
let Inst{22-20} = 0b000;
@@ -1732,7 +1735,8 @@ def t2MLS: T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c), IIC_iMAC32,
// Extra precision multiplies with low / high results
let neverHasSideEffects = 1 in {
let isCommutable = 1 in {
-def t2SMULL : T2I<(outs GPR:$ldst, GPR:$hdst), (ins GPR:$a, GPR:$b), IIC_iMUL64,
+def t2SMULL : T2I<(outs rGPR:$ldst, rGPR:$hdst),
+ (ins rGPR:$a, rGPR:$b), IIC_iMUL64,
"smull", "\t$ldst, $hdst, $a, $b", []> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0111;
@@ -1740,7 +1744,8 @@ def t2SMULL : T2I<(outs GPR:$ldst, GPR:$hdst), (ins GPR:$a, GPR:$b), IIC_iMUL64,
let Inst{7-4} = 0b0000;
}
-def t2UMULL : T2I<(outs GPR:$ldst, GPR:$hdst), (ins GPR:$a, GPR:$b), IIC_iMUL64,
+def t2UMULL : T2I<(outs rGPR:$ldst, rGPR:$hdst),
+ (ins rGPR:$a, rGPR:$b), IIC_iMUL64,
"umull", "\t$ldst, $hdst, $a, $b", []> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0111;
@@ -1750,7 +1755,8 @@ def t2UMULL : T2I<(outs GPR:$ldst, GPR:$hdst), (ins GPR:$a, GPR:$b), IIC_iMUL64,
} // isCommutable
// Multiply + accumulate
-def t2SMLAL : T2I<(outs GPR:$ldst, GPR:$hdst), (ins GPR:$a, GPR:$b), IIC_iMAC64,
+def t2SMLAL : T2I<(outs rGPR:$ldst, rGPR:$hdst),
+ (ins rGPR:$a, rGPR:$b), IIC_iMAC64,
"smlal", "\t$ldst, $hdst, $a, $b", []>{
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0111;
@@ -1758,7 +1764,8 @@ def t2SMLAL : T2I<(outs GPR:$ldst, GPR:$hdst), (ins GPR:$a, GPR:$b), IIC_iMAC64,
let Inst{7-4} = 0b0000;
}
-def t2UMLAL : T2I<(outs GPR:$ldst, GPR:$hdst), (ins GPR:$a, GPR:$b), IIC_iMAC64,
+def t2UMLAL : T2I<(outs rGPR:$ldst, rGPR:$hdst),
+ (ins rGPR:$a, rGPR:$b), IIC_iMAC64,
"umlal", "\t$ldst, $hdst, $a, $b", []>{
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0111;
@@ -1766,7 +1773,8 @@ def t2UMLAL : T2I<(outs GPR:$ldst, GPR:$hdst), (ins GPR:$a, GPR:$b), IIC_iMAC64,
let Inst{7-4} = 0b0000;
}
-def t2UMAAL : T2I<(outs GPR:$ldst, GPR:$hdst), (ins GPR:$a, GPR:$b), IIC_iMAC64,
+def t2UMAAL : T2I<(outs rGPR:$ldst, rGPR:$hdst),
+ (ins rGPR:$a, rGPR:$b), IIC_iMAC64,
"umaal", "\t$ldst, $hdst, $a, $b", []>{
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0111;
@@ -1778,9 +1786,9 @@ def t2UMAAL : T2I<(outs GPR:$ldst, GPR:$hdst), (ins GPR:$a, GPR:$b), IIC_iMAC64,
// Rounding variants of the below included for disassembly only
// Most significant word multiply
-def t2SMMUL : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iMUL32,
+def t2SMMUL : T2I<(outs rGPR:$dst), (ins rGPR:$a, rGPR:$b), IIC_iMUL32,
"smmul", "\t$dst, $a, $b",
- [(set GPR:$dst, (mulhs GPR:$a, GPR:$b))]> {
+ [(set rGPR:$dst, (mulhs rGPR:$a, rGPR:$b))]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0110;
let Inst{22-20} = 0b101;
@@ -1788,7 +1796,7 @@ def t2SMMUL : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iMUL32,
let Inst{7-4} = 0b0000; // No Rounding (Inst{4} = 0)
}
-def t2SMMULR : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iMUL32,
+def t2SMMULR : T2I<(outs rGPR:$dst), (ins rGPR:$a, rGPR:$b), IIC_iMUL32,
"smmulr", "\t$dst, $a, $b", []> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0110;
@@ -1797,9 +1805,9 @@ def t2SMMULR : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iMUL32,
let Inst{7-4} = 0b0001; // Rounding (Inst{4} = 1)
}
-def t2SMMLA : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c), IIC_iMAC32,
+def t2SMMLA : T2I<(outs rGPR:$dst), (ins rGPR:$a, rGPR:$b, rGPR:$c), IIC_iMAC32,
"smmla", "\t$dst, $a, $b, $c",
- [(set GPR:$dst, (add (mulhs GPR:$a, GPR:$b), GPR:$c))]> {
+ [(set rGPR:$dst, (add (mulhs rGPR:$a, rGPR:$b), rGPR:$c))]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0110;
let Inst{22-20} = 0b101;
@@ -1807,7 +1815,7 @@ def t2SMMLA : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c), IIC_iMAC32,
let Inst{7-4} = 0b0000; // No Rounding (Inst{4} = 0)
}
-def t2SMMLAR : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c), IIC_iMAC32,
+def t2SMMLAR: T2I<(outs rGPR:$dst), (ins rGPR:$a, rGPR:$b, rGPR:$c), IIC_iMAC32,
"smmlar", "\t$dst, $a, $b, $c", []> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0110;
@@ -1816,9 +1824,9 @@ def t2SMMLAR : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c), IIC_iMAC32,
let Inst{7-4} = 0b0001; // Rounding (Inst{4} = 1)
}
-def t2SMMLS : T2I <(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c), IIC_iMAC32,
+def t2SMMLS: T2I <(outs rGPR:$dst), (ins rGPR:$a, rGPR:$b, rGPR:$c), IIC_iMAC32,
"smmls", "\t$dst, $a, $b, $c",
- [(set GPR:$dst, (sub GPR:$c, (mulhs GPR:$a, GPR:$b)))]> {
+ [(set rGPR:$dst, (sub rGPR:$c, (mulhs rGPR:$a, rGPR:$b)))]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0110;
let Inst{22-20} = 0b110;
@@ -1826,7 +1834,7 @@ def t2SMMLS : T2I <(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c), IIC_iMAC32,
let Inst{7-4} = 0b0000; // No Rounding (Inst{4} = 0)
}
-def t2SMMLSR : T2I <(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c), IIC_iMAC32,
+def t2SMMLSR:T2I <(outs rGPR:$dst), (ins rGPR:$a, rGPR:$b, rGPR:$c), IIC_iMAC32,
"smmlsr", "\t$dst, $a, $b, $c", []> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0110;
@@ -1836,10 +1844,10 @@ def t2SMMLSR : T2I <(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c), IIC_iMAC32,
}
multiclass T2I_smul<string opc, PatFrag opnode> {
- def BB : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iMUL32,
+ def BB : T2I<(outs rGPR:$dst), (ins rGPR:$a, rGPR:$b), IIC_iMUL32,
!strconcat(opc, "bb"), "\t$dst, $a, $b",
- [(set GPR:$dst, (opnode (sext_inreg GPR:$a, i16),
- (sext_inreg GPR:$b, i16)))]> {
+ [(set rGPR:$dst, (opnode (sext_inreg rGPR:$a, i16),
+ (sext_inreg rGPR:$b, i16)))]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0110;
let Inst{22-20} = 0b001;
@@ -1848,10 +1856,10 @@ multiclass T2I_smul<string opc, PatFrag opnode> {
let Inst{5-4} = 0b00;
}
- def BT : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iMUL32,
+ def BT : T2I<(outs rGPR:$dst), (ins rGPR:$a, rGPR:$b), IIC_iMUL32,
!strconcat(opc, "bt"), "\t$dst, $a, $b",
- [(set GPR:$dst, (opnode (sext_inreg GPR:$a, i16),
- (sra GPR:$b, (i32 16))))]> {
+ [(set rGPR:$dst, (opnode (sext_inreg rGPR:$a, i16),
+ (sra rGPR:$b, (i32 16))))]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0110;
let Inst{22-20} = 0b001;
@@ -1860,10 +1868,10 @@ multiclass T2I_smul<string opc, PatFrag opnode> {
let Inst{5-4} = 0b01;
}
- def TB : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iMUL32,
+ def TB : T2I<(outs rGPR:$dst), (ins rGPR:$a, rGPR:$b), IIC_iMUL32,
!strconcat(opc, "tb"), "\t$dst, $a, $b",
- [(set GPR:$dst, (opnode (sra GPR:$a, (i32 16)),
- (sext_inreg GPR:$b, i16)))]> {
+ [(set rGPR:$dst, (opnode (sra rGPR:$a, (i32 16)),
+ (sext_inreg rGPR:$b, i16)))]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0110;
let Inst{22-20} = 0b001;
@@ -1872,10 +1880,10 @@ multiclass T2I_smul<string opc, PatFrag opnode> {
let Inst{5-4} = 0b10;
}
- def TT : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iMUL32,
+ def TT : T2I<(outs rGPR:$dst), (ins rGPR:$a, rGPR:$b), IIC_iMUL32,
!strconcat(opc, "tt"), "\t$dst, $a, $b",
- [(set GPR:$dst, (opnode (sra GPR:$a, (i32 16)),
- (sra GPR:$b, (i32 16))))]> {
+ [(set rGPR:$dst, (opnode (sra rGPR:$a, (i32 16)),
+ (sra rGPR:$b, (i32 16))))]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0110;
let Inst{22-20} = 0b001;
@@ -1884,10 +1892,10 @@ multiclass T2I_smul<string opc, PatFrag opnode> {
let Inst{5-4} = 0b11;
}
- def WB : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iMUL16,
+ def WB : T2I<(outs rGPR:$dst), (ins rGPR:$a, rGPR:$b), IIC_iMUL16,
!strconcat(opc, "wb"), "\t$dst, $a, $b",
- [(set GPR:$dst, (sra (opnode GPR:$a,
- (sext_inreg GPR:$b, i16)), (i32 16)))]> {
+ [(set rGPR:$dst, (sra (opnode rGPR:$a,
+ (sext_inreg rGPR:$b, i16)), (i32 16)))]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0110;
let Inst{22-20} = 0b011;
@@ -1896,10 +1904,10 @@ multiclass T2I_smul<string opc, PatFrag opnode> {
let Inst{5-4} = 0b00;
}
- def WT : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iMUL16,
+ def WT : T2I<(outs rGPR:$dst), (ins rGPR:$a, rGPR:$b), IIC_iMUL16,
!strconcat(opc, "wt"), "\t$dst, $a, $b",
- [(set GPR:$dst, (sra (opnode GPR:$a,
- (sra GPR:$b, (i32 16))), (i32 16)))]> {
+ [(set rGPR:$dst, (sra (opnode rGPR:$a,
+ (sra rGPR:$b, (i32 16))), (i32 16)))]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0110;
let Inst{22-20} = 0b011;
@@ -1911,11 +1919,11 @@ multiclass T2I_smul<string opc, PatFrag opnode> {
multiclass T2I_smla<string opc, PatFrag opnode> {
- def BB : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc), IIC_iMAC16,
+ def BB : T2I<(outs rGPR:$dst), (ins rGPR:$a, rGPR:$b, rGPR:$acc), IIC_iMAC16,
!strconcat(opc, "bb"), "\t$dst, $a, $b, $acc",
- [(set GPR:$dst, (add GPR:$acc,
- (opnode (sext_inreg GPR:$a, i16),
- (sext_inreg GPR:$b, i16))))]> {
+ [(set rGPR:$dst, (add rGPR:$acc,
+ (opnode (sext_inreg rGPR:$a, i16),
+ (sext_inreg rGPR:$b, i16))))]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0110;
let Inst{22-20} = 0b001;
@@ -1924,10 +1932,10 @@ multiclass T2I_smla<string opc, PatFrag opnode> {
let Inst{5-4} = 0b00;
}
- def BT : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc), IIC_iMAC16,
+ def BT : T2I<(outs rGPR:$dst), (ins rGPR:$a, rGPR:$b, rGPR:$acc), IIC_iMAC16,
!strconcat(opc, "bt"), "\t$dst, $a, $b, $acc",
- [(set GPR:$dst, (add GPR:$acc, (opnode (sext_inreg GPR:$a, i16),
- (sra GPR:$b, (i32 16)))))]> {
+ [(set rGPR:$dst, (add rGPR:$acc, (opnode (sext_inreg rGPR:$a, i16),
+ (sra rGPR:$b, (i32 16)))))]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0110;
let Inst{22-20} = 0b001;
@@ -1936,10 +1944,10 @@ multiclass T2I_smla<string opc, PatFrag opnode> {
let Inst{5-4} = 0b01;
}
- def TB : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc), IIC_iMAC16,
+ def TB : T2I<(outs rGPR:$dst), (ins rGPR:$a, rGPR:$b, rGPR:$acc), IIC_iMAC16,
!strconcat(opc, "tb"), "\t$dst, $a, $b, $acc",
- [(set GPR:$dst, (add GPR:$acc, (opnode (sra GPR:$a, (i32 16)),
- (sext_inreg GPR:$b, i16))))]> {
+ [(set rGPR:$dst, (add rGPR:$acc, (opnode (sra rGPR:$a, (i32 16)),
+ (sext_inreg rGPR:$b, i16))))]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0110;
let Inst{22-20} = 0b001;
@@ -1948,10 +1956,10 @@ multiclass T2I_smla<string opc, PatFrag opnode> {
let Inst{5-4} = 0b10;
}
- def TT : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc), IIC_iMAC16,
+ def TT : T2I<(outs rGPR:$dst), (ins rGPR:$a, rGPR:$b, rGPR:$acc), IIC_iMAC16,
!strconcat(opc, "tt"), "\t$dst, $a, $b, $acc",
- [(set GPR:$dst, (add GPR:$acc, (opnode (sra GPR:$a, (i32 16)),
- (sra GPR:$b, (i32 16)))))]> {
+ [(set rGPR:$dst, (add rGPR:$acc, (opnode (sra rGPR:$a, (i32 16)),
+ (sra rGPR:$b, (i32 16)))))]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0110;
let Inst{22-20} = 0b001;
@@ -1960,10 +1968,10 @@ multiclass T2I_smla<string opc, PatFrag opnode> {
let Inst{5-4} = 0b11;
}
- def WB : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc), IIC_iMAC16,
+ def WB : T2I<(outs rGPR:$dst), (ins rGPR:$a, rGPR:$b, rGPR:$acc), IIC_iMAC16,
!strconcat(opc, "wb"), "\t$dst, $a, $b, $acc",
- [(set GPR:$dst, (add GPR:$acc, (sra (opnode GPR:$a,
- (sext_inreg GPR:$b, i16)), (i32 16))))]> {
+ [(set rGPR:$dst, (add rGPR:$acc, (sra (opnode rGPR:$a,
+ (sext_inreg rGPR:$b, i16)), (i32 16))))]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0110;
let Inst{22-20} = 0b011;
@@ -1972,10 +1980,10 @@ multiclass T2I_smla<string opc, PatFrag opnode> {
let Inst{5-4} = 0b00;
}
- def WT : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc), IIC_iMAC16,
+ def WT : T2I<(outs rGPR:$dst), (ins rGPR:$a, rGPR:$b, rGPR:$acc), IIC_iMAC16,
!strconcat(opc, "wt"), "\t$dst, $a, $b, $acc",
- [(set GPR:$dst, (add GPR:$acc, (sra (opnode GPR:$a,
- (sra GPR:$b, (i32 16))), (i32 16))))]> {
+ [(set rGPR:$dst, (add rGPR:$acc, (sra (opnode rGPR:$a,
+ (sra rGPR:$b, (i32 16))), (i32 16))))]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0110;
let Inst{22-20} = 0b011;
@@ -1989,61 +1997,61 @@ defm t2SMUL : T2I_smul<"smul", BinOpFrag<(mul node:$LHS, node:$RHS)>>;
defm t2SMLA : T2I_smla<"smla", BinOpFrag<(mul node:$LHS, node:$RHS)>>;
// Halfword multiple accumulate long: SMLAL<x><y> -- for disassembly only
-def t2SMLALBB : T2I_mac<1, 0b100, 0b1000, (outs GPR:$ldst,GPR:$hdst),
- (ins GPR:$a,GPR:$b), IIC_iMAC64, "smlalbb", "\t$ldst, $hdst, $a, $b",
+def t2SMLALBB : T2I_mac<1, 0b100, 0b1000, (outs rGPR:$ldst,rGPR:$hdst),
+ (ins rGPR:$a,rGPR:$b), IIC_iMAC64, "smlalbb", "\t$ldst, $hdst, $a, $b",
[/* For disassembly only; pattern left blank */]>;
-def t2SMLALBT : T2I_mac<1, 0b100, 0b1001, (outs GPR:$ldst,GPR:$hdst),
- (ins GPR:$a,GPR:$b), IIC_iMAC64, "smlalbt", "\t$ldst, $hdst, $a, $b",
+def t2SMLALBT : T2I_mac<1, 0b100, 0b1001, (outs rGPR:$ldst,rGPR:$hdst),
+ (ins rGPR:$a,rGPR:$b), IIC_iMAC64, "smlalbt", "\t$ldst, $hdst, $a, $b",
[/* For disassembly only; pattern left blank */]>;
-def t2SMLALTB : T2I_mac<1, 0b100, 0b1010, (outs GPR:$ldst,GPR:$hdst),
- (ins GPR:$a,GPR:$b), IIC_iMAC64, "smlaltb", "\t$ldst, $hdst, $a, $b",
+def t2SMLALTB : T2I_mac<1, 0b100, 0b1010, (outs rGPR:$ldst,rGPR:$hdst),
+ (ins rGPR:$a,rGPR:$b), IIC_iMAC64, "smlaltb", "\t$ldst, $hdst, $a, $b",
[/* For disassembly only; pattern left blank */]>;
-def t2SMLALTT : T2I_mac<1, 0b100, 0b1011, (outs GPR:$ldst,GPR:$hdst),
- (ins GPR:$a,GPR:$b), IIC_iMAC64, "smlaltt", "\t$ldst, $hdst, $a, $b",
+def t2SMLALTT : T2I_mac<1, 0b100, 0b1011, (outs rGPR:$ldst,rGPR:$hdst),
+ (ins rGPR:$a,rGPR:$b), IIC_iMAC64, "smlaltt", "\t$ldst, $hdst, $a, $b",
[/* For disassembly only; pattern left blank */]>;
// Dual halfword multiple: SMUAD, SMUSD, SMLAD, SMLSD, SMLALD, SMLSLD
// These are for disassembly only.
-def t2SMUAD : T2I_mac<0, 0b010, 0b0000, (outs GPR:$dst), (ins GPR:$a, GPR:$b),
- IIC_iMAC32, "smuad", "\t$dst, $a, $b", []> {
+def t2SMUAD: T2I_mac<0, 0b010, 0b0000, (outs rGPR:$dst), (ins rGPR:$a, rGPR:$b),
+ IIC_iMAC32, "smuad", "\t$dst, $a, $b", []> {
let Inst{15-12} = 0b1111;
}
-def t2SMUADX : T2I_mac<0, 0b010, 0b0001, (outs GPR:$dst), (ins GPR:$a, GPR:$b),
- IIC_iMAC32, "smuadx", "\t$dst, $a, $b", []> {
+def t2SMUADX:T2I_mac<0, 0b010, 0b0001, (outs rGPR:$dst), (ins rGPR:$a, rGPR:$b),
+ IIC_iMAC32, "smuadx", "\t$dst, $a, $b", []> {
let Inst{15-12} = 0b1111;
}
-def t2SMUSD : T2I_mac<0, 0b100, 0b0000, (outs GPR:$dst), (ins GPR:$a, GPR:$b),
- IIC_iMAC32, "smusd", "\t$dst, $a, $b", []> {
+def t2SMUSD: T2I_mac<0, 0b100, 0b0000, (outs rGPR:$dst), (ins rGPR:$a, rGPR:$b),
+ IIC_iMAC32, "smusd", "\t$dst, $a, $b", []> {
let Inst{15-12} = 0b1111;
}
-def t2SMUSDX : T2I_mac<0, 0b100, 0b0001, (outs GPR:$dst), (ins GPR:$a, GPR:$b),
- IIC_iMAC32, "smusdx", "\t$dst, $a, $b", []> {
+def t2SMUSDX:T2I_mac<0, 0b100, 0b0001, (outs rGPR:$dst), (ins rGPR:$a, rGPR:$b),
+ IIC_iMAC32, "smusdx", "\t$dst, $a, $b", []> {
let Inst{15-12} = 0b1111;
}
-def t2SMLAD : T2I_mac<0, 0b010, 0b0000, (outs GPR:$dst),
- (ins GPR:$a, GPR:$b, GPR:$acc), IIC_iMAC32, "smlad",
+def t2SMLAD : T2I_mac<0, 0b010, 0b0000, (outs rGPR:$dst),
+ (ins rGPR:$a, rGPR:$b, rGPR:$acc), IIC_iMAC32, "smlad",
"\t$dst, $a, $b, $acc", []>;
-def t2SMLADX : T2I_mac<0, 0b010, 0b0001, (outs GPR:$dst),
- (ins GPR:$a, GPR:$b, GPR:$acc), IIC_iMAC32, "smladx",
+def t2SMLADX : T2I_mac<0, 0b010, 0b0001, (outs rGPR:$dst),
+ (ins rGPR:$a, rGPR:$b, rGPR:$acc), IIC_iMAC32, "smladx",
"\t$dst, $a, $b, $acc", []>;
-def t2SMLSD : T2I_mac<0, 0b100, 0b0000, (outs GPR:$dst),
- (ins GPR:$a, GPR:$b, GPR:$acc), IIC_iMAC32, "smlsd",
+def t2SMLSD : T2I_mac<0, 0b100, 0b0000, (outs rGPR:$dst),
+ (ins rGPR:$a, rGPR:$b, rGPR:$acc), IIC_iMAC32, "smlsd",
"\t$dst, $a, $b, $acc", []>;
-def t2SMLSDX : T2I_mac<0, 0b100, 0b0001, (outs GPR:$dst),
- (ins GPR:$a, GPR:$b, GPR:$acc), IIC_iMAC32, "smlsdx",
+def t2SMLSDX : T2I_mac<0, 0b100, 0b0001, (outs rGPR:$dst),
+ (ins rGPR:$a, rGPR:$b, rGPR:$acc), IIC_iMAC32, "smlsdx",
"\t$dst, $a, $b, $acc", []>;
-def t2SMLALD : T2I_mac<1, 0b100, 0b1100, (outs GPR:$ldst,GPR:$hdst),
- (ins GPR:$a,GPR:$b), IIC_iMAC64, "smlald",
+def t2SMLALD : T2I_mac<1, 0b100, 0b1100, (outs rGPR:$ldst,rGPR:$hdst),
+ (ins rGPR:$a,rGPR:$b), IIC_iMAC64, "smlald",
"\t$ldst, $hdst, $a, $b", []>;
-def t2SMLALDX : T2I_mac<1, 0b100, 0b1101, (outs GPR:$ldst,GPR:$hdst),
- (ins GPR:$a,GPR:$b), IIC_iMAC64, "smlaldx",
+def t2SMLALDX : T2I_mac<1, 0b100, 0b1101, (outs rGPR:$ldst,rGPR:$hdst),
+ (ins rGPR:$a,rGPR:$b), IIC_iMAC64, "smlaldx",
"\t$ldst, $hdst, $a, $b", []>;
-def t2SMLSLD : T2I_mac<1, 0b101, 0b1100, (outs GPR:$ldst,GPR:$hdst),
- (ins GPR:$a,GPR:$b), IIC_iMAC64, "smlsld",
+def t2SMLSLD : T2I_mac<1, 0b101, 0b1100, (outs rGPR:$ldst,rGPR:$hdst),
+ (ins rGPR:$a,rGPR:$b), IIC_iMAC64, "smlsld",
"\t$ldst, $hdst, $a, $b", []>;
-def t2SMLSLDX : T2I_mac<1, 0b101, 0b1101, (outs GPR:$ldst,GPR:$hdst),
- (ins GPR:$a,GPR:$b), IIC_iMAC64, "smlsldx",
+def t2SMLSLDX : T2I_mac<1, 0b101, 0b1101, (outs rGPR:$ldst,rGPR:$hdst),
+ (ins rGPR:$a,rGPR:$b), IIC_iMAC64, "smlsldx",
"\t$ldst, $hdst, $a, $b", []>;
//===----------------------------------------------------------------------===//
@@ -2061,35 +2069,35 @@ class T2I_misc<bits<2> op1, bits<2> op2, dag oops, dag iops,
let Inst{5-4} = op2;
}
-def t2CLZ : T2I_misc<0b11, 0b00, (outs GPR:$dst), (ins GPR:$src), IIC_iUNAr,
- "clz", "\t$dst, $src", [(set GPR:$dst, (ctlz GPR:$src))]>;
+def t2CLZ : T2I_misc<0b11, 0b00, (outs rGPR:$dst), (ins rGPR:$src), IIC_iUNAr,
+ "clz", "\t$dst, $src", [(set rGPR:$dst, (ctlz rGPR:$src))]>;
-def t2RBIT : T2I_misc<0b01, 0b10, (outs GPR:$dst), (ins GPR:$src), IIC_iUNAr,
+def t2RBIT : T2I_misc<0b01, 0b10, (outs rGPR:$dst), (ins rGPR:$src), IIC_iUNAr,
"rbit", "\t$dst, $src",
- [(set GPR:$dst, (ARMrbit GPR:$src))]>;
+ [(set rGPR:$dst, (ARMrbit rGPR:$src))]>;
-def t2REV : T2I_misc<0b01, 0b00, (outs GPR:$dst), (ins GPR:$src), IIC_iUNAr,
- "rev", ".w\t$dst, $src", [(set GPR:$dst, (bswap GPR:$src))]>;
+def t2REV : T2I_misc<0b01, 0b00, (outs rGPR:$dst), (ins rGPR:$src), IIC_iUNAr,
+ "rev", ".w\t$dst, $src", [(set rGPR:$dst, (bswap rGPR:$src))]>;
-def t2REV16 : T2I_misc<0b01, 0b01, (outs GPR:$dst), (ins GPR:$src), IIC_iUNAr,
+def t2REV16 : T2I_misc<0b01, 0b01, (outs rGPR:$dst), (ins rGPR:$src), IIC_iUNAr,
"rev16", ".w\t$dst, $src",
- [(set GPR:$dst,
- (or (and (srl GPR:$src, (i32 8)), 0xFF),
- (or (and (shl GPR:$src, (i32 8)), 0xFF00),
- (or (and (srl GPR:$src, (i32 8)), 0xFF0000),
- (and (shl GPR:$src, (i32 8)), 0xFF000000)))))]>;
+ [(set rGPR:$dst,
+ (or (and (srl rGPR:$src, (i32 8)), 0xFF),
+ (or (and (shl rGPR:$src, (i32 8)), 0xFF00),
+ (or (and (srl rGPR:$src, (i32 8)), 0xFF0000),
+ (and (shl rGPR:$src, (i32 8)), 0xFF000000)))))]>;
-def t2REVSH : T2I_misc<0b01, 0b11, (outs GPR:$dst), (ins GPR:$src), IIC_iUNAr,
+def t2REVSH : T2I_misc<0b01, 0b11, (outs rGPR:$dst), (ins rGPR:$src), IIC_iUNAr,
"revsh", ".w\t$dst, $src",
- [(set GPR:$dst,
+ [(set rGPR:$dst,
(sext_inreg
- (or (srl (and GPR:$src, 0xFF00), (i32 8)),
- (shl GPR:$src, (i32 8))), i16))]>;
+ (or (srl (and rGPR:$src, 0xFF00), (i32 8)),
+ (shl rGPR:$src, (i32 8))), i16))]>;
-def t2PKHBT : T2I<(outs GPR:$dst), (ins GPR:$src1, GPR:$src2, i32imm:$shamt),
- IIC_iALUsi, "pkhbt", "\t$dst, $src1, $src2, lsl $shamt",
- [(set GPR:$dst, (or (and GPR:$src1, 0xFFFF),
- (and (shl GPR:$src2, (i32 imm:$shamt)),
+def t2PKHBT : T2I<(outs rGPR:$dst), (ins rGPR:$src1, rGPR:$src2, shift_imm:$sh),
+ IIC_iALUsi, "pkhbt", "\t$dst, $src1, $src2$sh",
+ [(set rGPR:$dst, (or (and rGPR:$src1, 0xFFFF),
+ (and (shl rGPR:$src2, lsl_amt:$sh),
0xFFFF0000)))]>,
Requires<[HasT2ExtractPack]> {
let Inst{31-27} = 0b11101;
@@ -2100,18 +2108,20 @@ def t2PKHBT : T2I<(outs GPR:$dst), (ins GPR:$src1, GPR:$src2, i32imm:$shamt),
}
// Alternate cases for PKHBT where identities eliminate some nodes.
-def : T2Pat<(or (and GPR:$src1, 0xFFFF), (and GPR:$src2, 0xFFFF0000)),
- (t2PKHBT GPR:$src1, GPR:$src2, 0)>,
+def : T2Pat<(or (and rGPR:$src1, 0xFFFF), (and rGPR:$src2, 0xFFFF0000)),
+ (t2PKHBT rGPR:$src1, rGPR:$src2, 0)>,
Requires<[HasT2ExtractPack]>;
-def : T2Pat<(or (and GPR:$src1, 0xFFFF), (shl GPR:$src2, imm16_31:$shamt)),
- (t2PKHBT GPR:$src1, GPR:$src2, imm16_31:$shamt)>,
+def : T2Pat<(or (and rGPR:$src1, 0xFFFF), (shl rGPR:$src2, imm16_31:$sh)),
+ (t2PKHBT rGPR:$src1, rGPR:$src2, (lsl_shift_imm imm16_31:$sh))>,
Requires<[HasT2ExtractPack]>;
-def t2PKHTB : T2I<(outs GPR:$dst), (ins GPR:$src1, GPR:$src2, i32imm:$shamt),
- IIC_iALUsi, "pkhtb", "\t$dst, $src1, $src2, asr $shamt",
- [(set GPR:$dst, (or (and GPR:$src1, 0xFFFF0000),
- (and (sra GPR:$src2, imm16_31:$shamt),
- 0xFFFF)))]>,
+// Note: Shifts of 1-15 bits will be transformed to srl instead of sra and
+// will match the pattern below.
+def t2PKHTB : T2I<(outs rGPR:$dst), (ins rGPR:$src1, rGPR:$src2, shift_imm:$sh),
+ IIC_iALUsi, "pkhtb", "\t$dst, $src1, $src2$sh",
+ [(set rGPR:$dst, (or (and rGPR:$src1, 0xFFFF0000),
+ (and (sra rGPR:$src2, asr_amt:$sh),
+ 0xFFFF)))]>,
Requires<[HasT2ExtractPack]> {
let Inst{31-27} = 0b11101;
let Inst{26-25} = 0b01;
@@ -2122,18 +2132,17 @@ def t2PKHTB : T2I<(outs GPR:$dst), (ins GPR:$src1, GPR:$src2, i32imm:$shamt),
// Alternate cases for PKHTB where identities eliminate some nodes. Note that
// a shift amount of 0 is *not legal* here, it is PKHBT instead.
-def : T2Pat<(or (and GPR:$src1, 0xFFFF0000), (srl GPR:$src2, (i32 16))),
- (t2PKHTB GPR:$src1, GPR:$src2, 16)>,
+def : T2Pat<(or (and rGPR:$src1, 0xFFFF0000), (srl rGPR:$src2, imm16_31:$sh)),
+ (t2PKHTB rGPR:$src1, rGPR:$src2, (asr_shift_imm imm16_31:$sh))>,
Requires<[HasT2ExtractPack]>;
-def : T2Pat<(or (and GPR:$src1, 0xFFFF0000),
- (and (srl GPR:$src2, imm1_15:$shamt), 0xFFFF)),
- (t2PKHTB GPR:$src1, GPR:$src2, imm1_15:$shamt)>,
+def : T2Pat<(or (and rGPR:$src1, 0xFFFF0000),
+ (and (srl rGPR:$src2, imm1_15:$sh), 0xFFFF)),
+ (t2PKHTB rGPR:$src1, rGPR:$src2, (asr_shift_imm imm1_15:$sh))>,
Requires<[HasT2ExtractPack]>;
//===----------------------------------------------------------------------===//
// Comparison Instructions...
//
-
defm t2CMP : T2I_cmp_irs<0b1101, "cmp",
BinOpFrag<(ARMcmp node:$LHS, node:$RHS)>>;
defm t2CMPz : T2I_cmp_irs<0b1101, "cmp",
@@ -2157,18 +2166,13 @@ defm t2TST : T2I_cmp_irs<0b0000, "tst",
defm t2TEQ : T2I_cmp_irs<0b0100, "teq",
BinOpFrag<(ARMcmpZ (xor node:$LHS, node:$RHS), 0)>>;
-// A8.6.27 CBNZ, CBZ - Compare and branch on (non)zero.
-// Short range conditional branch. Looks awesome for loops. Need to figure
-// out how to use this one.
-
-
// Conditional moves
// FIXME: should be able to write a pattern for ARMcmov, but can't use
// a two-value operand where a dag node expects two operands. :(
let neverHasSideEffects = 1 in {
-def t2MOVCCr : T2I<(outs GPR:$dst), (ins GPR:$false, GPR:$true), IIC_iCMOVr,
+def t2MOVCCr : T2I<(outs rGPR:$dst), (ins rGPR:$false, rGPR:$true), IIC_iCMOVr,
"mov", ".w\t$dst, $true",
- [/*(set GPR:$dst, (ARMcmov GPR:$false, GPR:$true, imm:$cc, CCR:$ccr))*/]>,
+ [/*(set rGPR:$dst, (ARMcmov rGPR:$false, rGPR:$true, imm:$cc, CCR:$ccr))*/]>,
RegConstraint<"$false = $dst"> {
let Inst{31-27} = 0b11101;
let Inst{26-25} = 0b01;
@@ -2179,9 +2183,9 @@ def t2MOVCCr : T2I<(outs GPR:$dst), (ins GPR:$false, GPR:$true), IIC_iCMOVr,
let Inst{7-4} = 0b0000;
}
-def t2MOVCCi : T2I<(outs GPR:$dst), (ins GPR:$false, t2_so_imm:$true),
+def t2MOVCCi : T2I<(outs rGPR:$dst), (ins rGPR:$false, t2_so_imm:$true),
IIC_iCMOVi, "mov", ".w\t$dst, $true",
-[/*(set GPR:$dst, (ARMcmov GPR:$false, t2_so_imm:$true, imm:$cc, CCR:$ccr))*/]>,
+[/*(set rGPR:$dst,(ARMcmov rGPR:$false,t2_so_imm:$true, imm:$cc, CCR:$ccr))*/]>,
RegConstraint<"$false = $dst"> {
let Inst{31-27} = 0b11110;
let Inst{25} = 0;
@@ -2201,20 +2205,20 @@ class T2I_movcc_sh<bits<2> opcod, dag oops, dag iops, InstrItinClass itin,
let Inst{19-16} = 0b1111; // Rn
let Inst{5-4} = opcod; // Shift type.
}
-def t2MOVCClsl : T2I_movcc_sh<0b00, (outs GPR:$dst),
- (ins GPR:$false, GPR:$true, i32imm:$rhs),
+def t2MOVCClsl : T2I_movcc_sh<0b00, (outs rGPR:$dst),
+ (ins rGPR:$false, rGPR:$true, i32imm:$rhs),
IIC_iCMOVsi, "lsl", ".w\t$dst, $true, $rhs", []>,
RegConstraint<"$false = $dst">;
-def t2MOVCClsr : T2I_movcc_sh<0b01, (outs GPR:$dst),
- (ins GPR:$false, GPR:$true, i32imm:$rhs),
+def t2MOVCClsr : T2I_movcc_sh<0b01, (outs rGPR:$dst),
+ (ins rGPR:$false, rGPR:$true, i32imm:$rhs),
IIC_iCMOVsi, "lsr", ".w\t$dst, $true, $rhs", []>,
RegConstraint<"$false = $dst">;
-def t2MOVCCasr : T2I_movcc_sh<0b10, (outs GPR:$dst),
- (ins GPR:$false, GPR:$true, i32imm:$rhs),
+def t2MOVCCasr : T2I_movcc_sh<0b10, (outs rGPR:$dst),
+ (ins rGPR:$false, rGPR:$true, i32imm:$rhs),
IIC_iCMOVsi, "asr", ".w\t$dst, $true, $rhs", []>,
RegConstraint<"$false = $dst">;
-def t2MOVCCror : T2I_movcc_sh<0b11, (outs GPR:$dst),
- (ins GPR:$false, GPR:$true, i32imm:$rhs),
+def t2MOVCCror : T2I_movcc_sh<0b11, (outs rGPR:$dst),
+ (ins rGPR:$false, rGPR:$true, i32imm:$rhs),
IIC_iCMOVsi, "ror", ".w\t$dst, $true, $rhs", []>,
RegConstraint<"$false = $dst">;
} // neverHasSideEffects
@@ -2225,21 +2229,15 @@ def t2MOVCCror : T2I_movcc_sh<0b11, (outs GPR:$dst),
// memory barriers protect the atomic sequences
let hasSideEffects = 1 in {
-def t2Int_MemBarrierV7 : AInoP<(outs), (ins),
- ThumbFrm, NoItinerary,
- "dmb", "",
- [(ARMMemBarrierV7)]>,
- Requires<[IsThumb2]> {
+def t2DMBsy : AInoP<(outs), (ins), ThumbFrm, NoItinerary, "dmb", "",
+ [(ARMMemBarrier)]>, Requires<[IsThumb, HasDB]> {
let Inst{31-4} = 0xF3BF8F5;
// FIXME: add support for options other than a full system DMB
let Inst{3-0} = 0b1111;
}
-def t2Int_SyncBarrierV7 : AInoP<(outs), (ins),
- ThumbFrm, NoItinerary,
- "dsb", "",
- [(ARMSyncBarrierV7)]>,
- Requires<[IsThumb2]> {
+def t2DSBsy : AInoP<(outs), (ins), ThumbFrm, NoItinerary, "dsb", "",
+ [(ARMSyncBarrier)]>, Requires<[IsThumb, HasDB]> {
let Inst{31-4} = 0xF3BF8F4;
// FIXME: add support for options other than a full system DSB
let Inst{3-0} = 0b1111;
@@ -2329,13 +2327,13 @@ class T2I_strex<bits<2> opcod, dag oops, dag iops, AddrMode am, SizeFlagVal sz,
}
let mayLoad = 1 in {
-def t2LDREXB : T2I_ldrex<0b00, (outs GPR:$dest), (ins GPR:$ptr), AddrModeNone,
+def t2LDREXB : T2I_ldrex<0b00, (outs rGPR:$dest), (ins rGPR:$ptr), AddrModeNone,
Size4Bytes, NoItinerary, "ldrexb", "\t$dest, [$ptr]",
"", []>;
-def t2LDREXH : T2I_ldrex<0b01, (outs GPR:$dest), (ins GPR:$ptr), AddrModeNone,
+def t2LDREXH : T2I_ldrex<0b01, (outs rGPR:$dest), (ins rGPR:$ptr), AddrModeNone,
Size4Bytes, NoItinerary, "ldrexh", "\t$dest, [$ptr]",
"", []>;
-def t2LDREX : Thumb2I<(outs GPR:$dest), (ins GPR:$ptr), AddrModeNone,
+def t2LDREX : Thumb2I<(outs rGPR:$dest), (ins rGPR:$ptr), AddrModeNone,
Size4Bytes, NoItinerary,
"ldrex", "\t$dest, [$ptr]", "",
[]> {
@@ -2344,20 +2342,20 @@ def t2LDREX : Thumb2I<(outs GPR:$dest), (ins GPR:$ptr), AddrModeNone,
let Inst{11-8} = 0b1111;
let Inst{7-0} = 0b00000000; // imm8 = 0
}
-def t2LDREXD : T2I_ldrex<0b11, (outs GPR:$dest, GPR:$dest2), (ins GPR:$ptr),
+def t2LDREXD : T2I_ldrex<0b11, (outs rGPR:$dest, rGPR:$dest2), (ins rGPR:$ptr),
AddrModeNone, Size4Bytes, NoItinerary,
"ldrexd", "\t$dest, $dest2, [$ptr]", "",
[], {?, ?, ?, ?}>;
}
let mayStore = 1, Constraints = "@earlyclobber $success" in {
-def t2STREXB : T2I_strex<0b00, (outs GPR:$success), (ins GPR:$src, GPR:$ptr),
+def t2STREXB : T2I_strex<0b00, (outs rGPR:$success), (ins rGPR:$src, rGPR:$ptr),
AddrModeNone, Size4Bytes, NoItinerary,
"strexb", "\t$success, $src, [$ptr]", "", []>;
-def t2STREXH : T2I_strex<0b01, (outs GPR:$success), (ins GPR:$src, GPR:$ptr),
+def t2STREXH : T2I_strex<0b01, (outs rGPR:$success), (ins rGPR:$src, rGPR:$ptr),
AddrModeNone, Size4Bytes, NoItinerary,
"strexh", "\t$success, $src, [$ptr]", "", []>;
-def t2STREX : Thumb2I<(outs GPR:$success), (ins GPR:$src, GPR:$ptr),
+def t2STREX : Thumb2I<(outs rGPR:$success), (ins rGPR:$src, rGPR:$ptr),
AddrModeNone, Size4Bytes, NoItinerary,
"strex", "\t$success, $src, [$ptr]", "",
[]> {
@@ -2365,8 +2363,8 @@ def t2STREX : Thumb2I<(outs GPR:$success), (ins GPR:$src, GPR:$ptr),
let Inst{26-20} = 0b0000100;
let Inst{7-0} = 0b00000000; // imm8 = 0
}
-def t2STREXD : T2I_strex<0b11, (outs GPR:$success),
- (ins GPR:$src, GPR:$src2, GPR:$ptr),
+def t2STREXD : T2I_strex<0b11, (outs rGPR:$success),
+ (ins rGPR:$src, rGPR:$src2, rGPR:$ptr),
AddrModeNone, Size4Bytes, NoItinerary,
"strexd", "\t$success, $src, $src2, [$ptr]", "", [],
{?, ?, ?, ?}>;
@@ -2416,7 +2414,7 @@ let Defs =
D1, D2, D3, D4, D5, D6, D7, D8, D9, D10, D11, D12, D13, D14, D15,
D16, D17, D18, D19, D20, D21, D22, D23, D24, D25, D26, D27, D28, D29, D30,
D31 ], hasSideEffects = 1, isBarrier = 1 in {
- def t2Int_eh_sjlj_setjmp : Thumb2XI<(outs), (ins GPR:$src, tGPR:$val),
+ def t2Int_eh_sjlj_setjmp : Thumb2XI<(outs), (ins tGPR:$src, tGPR:$val),
AddrModeNone, SizeSpecial, NoItinerary,
"mov\t$val, pc\t${:comment} begin eh.setjmp\n\t"
"adds\t$val, #7\n\t"
@@ -2425,14 +2423,14 @@ let Defs =
"b\t1f\n\t"
"movs\tr0, #1\t${:comment} end eh.setjmp\n\t"
"1:", "",
- [(set R0, (ARMeh_sjlj_setjmp GPR:$src, tGPR:$val))]>,
+ [(set R0, (ARMeh_sjlj_setjmp tGPR:$src, tGPR:$val))]>,
Requires<[IsThumb2, HasVFP2]>;
}
let Defs =
[ R0, R1, R2, R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, LR ],
hasSideEffects = 1, isBarrier = 1 in {
- def t2Int_eh_sjlj_setjmp_nofp : Thumb2XI<(outs), (ins GPR:$src, tGPR:$val),
+ def t2Int_eh_sjlj_setjmp_nofp : Thumb2XI<(outs), (ins tGPR:$src, tGPR:$val),
AddrModeNone, SizeSpecial, NoItinerary,
"mov\t$val, pc\t${:comment} begin eh.setjmp\n\t"
"adds\t$val, #7\n\t"
@@ -2441,7 +2439,7 @@ let Defs =
"b\t1f\n\t"
"movs\tr0, #1\t${:comment} end eh.setjmp\n\t"
"1:", "",
- [(set R0, (ARMeh_sjlj_setjmp GPR:$src, tGPR:$val))]>,
+ [(set R0, (ARMeh_sjlj_setjmp tGPR:$src, tGPR:$val))]>,
Requires<[IsThumb2, NoVFP]>;
}
@@ -2482,7 +2480,7 @@ let isNotDuplicable = 1, isIndirectBranch = 1 in {
def t2BR_JT :
T2JTI<(outs),
(ins GPR:$target, GPR:$index, jt2block_operand:$jt, i32imm:$id),
- IIC_Br, "mov\tpc, $target\n$jt",
+ IIC_Br, "mov\tpc, $target$jt",
[(ARMbr2jt GPR:$target, GPR:$index, tjumptable:$jt, imm:$id)]> {
let Inst{31-27} = 0b11101;
let Inst{26-20} = 0b0100100;
@@ -2496,7 +2494,7 @@ def t2BR_JT :
def t2TBB :
T2JTI<(outs),
(ins tb_addrmode:$index, jt2block_operand:$jt, i32imm:$id),
- IIC_Br, "tbb\t$index\n$jt", []> {
+ IIC_Br, "tbb\t$index$jt", []> {
let Inst{31-27} = 0b11101;
let Inst{26-20} = 0b0001101;
let Inst{19-16} = 0b1111; // Rn = pc (table follows this instruction)
@@ -2507,7 +2505,7 @@ def t2TBB :
def t2TBH :
T2JTI<(outs),
(ins tb_addrmode:$index, jt2block_operand:$jt, i32imm:$id),
- IIC_Br, "tbh\t$index\n$jt", []> {
+ IIC_Br, "tbh\t$index$jt", []> {
let Inst{31-27} = 0b11101;
let Inst{26-20} = 0b0001101;
let Inst{19-16} = 0b1111; // Rn = pc (table follows this instruction)
@@ -2560,7 +2558,7 @@ def t2IT : Thumb2XI<(outs), (ins it_pred:$cc, it_mask:$mask),
// Branch and Exchange Jazelle -- for disassembly only
// Rm = Inst{19-16}
-def t2BXJ : T2I<(outs), (ins GPR:$func), NoItinerary, "bxj", "\t$func",
+def t2BXJ : T2I<(outs), (ins rGPR:$func), NoItinerary, "bxj", "\t$func",
[/* For disassembly only; pattern left blank */]> {
let Inst{31-27} = 0b11110;
let Inst{26} = 0;
@@ -2647,25 +2645,25 @@ def t2SRSIA : T2I<(outs), (ins i32imm:$mode),NoItinerary,"srsia","\tsp, $mode",
}
// Return From Exception is a system instruction -- for disassembly only
-def t2RFEDBW : T2I<(outs), (ins GPR:$base), NoItinerary, "rfedb", "\t$base!",
+def t2RFEDBW : T2I<(outs), (ins rGPR:$base), NoItinerary, "rfedb", "\t$base!",
[/* For disassembly only; pattern left blank */]> {
let Inst{31-27} = 0b11101;
let Inst{26-20} = 0b0000011; // W = 1
}
-def t2RFEDB : T2I<(outs), (ins GPR:$base), NoItinerary, "rfeab", "\t$base",
+def t2RFEDB : T2I<(outs), (ins rGPR:$base), NoItinerary, "rfeab", "\t$base",
[/* For disassembly only; pattern left blank */]> {
let Inst{31-27} = 0b11101;
let Inst{26-20} = 0b0000001; // W = 0
}
-def t2RFEIAW : T2I<(outs), (ins GPR:$base), NoItinerary, "rfeia", "\t$base!",
+def t2RFEIAW : T2I<(outs), (ins rGPR:$base), NoItinerary, "rfeia", "\t$base!",
[/* For disassembly only; pattern left blank */]> {
let Inst{31-27} = 0b11101;
let Inst{26-20} = 0b0011011; // W = 1
}
-def t2RFEIA : T2I<(outs), (ins GPR:$base), NoItinerary, "rfeia", "\t$base",
+def t2RFEIA : T2I<(outs), (ins rGPR:$base), NoItinerary, "rfeia", "\t$base",
[/* For disassembly only; pattern left blank */]> {
let Inst{31-27} = 0b11101;
let Inst{26-20} = 0b0011001; // W = 0
@@ -2676,26 +2674,26 @@ def t2RFEIA : T2I<(outs), (ins GPR:$base), NoItinerary, "rfeia", "\t$base",
//
// Two piece so_imms.
-def : T2Pat<(or GPR:$LHS, t2_so_imm2part:$RHS),
- (t2ORRri (t2ORRri GPR:$LHS, (t2_so_imm2part_1 imm:$RHS)),
+def : T2Pat<(or rGPR:$LHS, t2_so_imm2part:$RHS),
+ (t2ORRri (t2ORRri rGPR:$LHS, (t2_so_imm2part_1 imm:$RHS)),
(t2_so_imm2part_2 imm:$RHS))>;
-def : T2Pat<(xor GPR:$LHS, t2_so_imm2part:$RHS),
- (t2EORri (t2EORri GPR:$LHS, (t2_so_imm2part_1 imm:$RHS)),
+def : T2Pat<(xor rGPR:$LHS, t2_so_imm2part:$RHS),
+ (t2EORri (t2EORri rGPR:$LHS, (t2_so_imm2part_1 imm:$RHS)),
(t2_so_imm2part_2 imm:$RHS))>;
-def : T2Pat<(add GPR:$LHS, t2_so_imm2part:$RHS),
- (t2ADDri (t2ADDri GPR:$LHS, (t2_so_imm2part_1 imm:$RHS)),
+def : T2Pat<(add rGPR:$LHS, t2_so_imm2part:$RHS),
+ (t2ADDri (t2ADDri rGPR:$LHS, (t2_so_imm2part_1 imm:$RHS)),
(t2_so_imm2part_2 imm:$RHS))>;
-def : T2Pat<(add GPR:$LHS, t2_so_neg_imm2part:$RHS),
- (t2SUBri (t2SUBri GPR:$LHS, (t2_so_neg_imm2part_1 imm:$RHS)),
+def : T2Pat<(add rGPR:$LHS, t2_so_neg_imm2part:$RHS),
+ (t2SUBri (t2SUBri rGPR:$LHS, (t2_so_neg_imm2part_1 imm:$RHS)),
(t2_so_neg_imm2part_2 imm:$RHS))>;
// 32-bit immediate using movw + movt.
// This is a single pseudo instruction to make it re-materializable. Remove
// when we can do generalized remat.
let isReMaterializable = 1 in
-def t2MOVi32imm : T2Ix2<(outs GPR:$dst), (ins i32imm:$src), IIC_iMOVi,
+def t2MOVi32imm : T2Ix2<(outs rGPR:$dst), (ins i32imm:$src), IIC_iMOVi,
"movw", "\t$dst, ${src:lo16}\n\tmovt${p}\t$dst, ${src:hi16}",
- [(set GPR:$dst, (i32 imm:$src))]>;
+ [(set rGPR:$dst, (i32 imm:$src))]>;
// ConstantPool, GlobalAddress, and JumpTable
def : T2Pat<(ARMWrapper tglobaladdr :$dst), (t2LEApcrel tglobaladdr :$dst)>,
@@ -2723,7 +2721,7 @@ def t2LDRpci_pic : PseudoInst<(outs GPR:$dst), (ins i32imm:$addr, pclabel:$cp),
//
// Rd = Instr{11-8}
-def t2MRS : T2I<(outs GPR:$dst), (ins), NoItinerary, "mrs", "\t$dst, cpsr",
+def t2MRS : T2I<(outs rGPR:$dst), (ins), NoItinerary, "mrs", "\t$dst, cpsr",
[/* For disassembly only; pattern left blank */]> {
let Inst{31-27} = 0b11110;
let Inst{26} = 0;
@@ -2734,7 +2732,7 @@ def t2MRS : T2I<(outs GPR:$dst), (ins), NoItinerary, "mrs", "\t$dst, cpsr",
}
// Rd = Instr{11-8}
-def t2MRSsys : T2I<(outs GPR:$dst), (ins), NoItinerary, "mrs", "\t$dst, spsr",
+def t2MRSsys : T2I<(outs rGPR:$dst), (ins), NoItinerary, "mrs", "\t$dst, spsr",
[/* For disassembly only; pattern left blank */]> {
let Inst{31-27} = 0b11110;
let Inst{26} = 0;
@@ -2745,7 +2743,7 @@ def t2MRSsys : T2I<(outs GPR:$dst), (ins), NoItinerary, "mrs", "\t$dst, spsr",
}
// Rn = Inst{19-16}
-def t2MSR : T2I<(outs), (ins GPR:$src, msr_mask:$mask), NoItinerary, "msr",
+def t2MSR : T2I<(outs), (ins rGPR:$src, msr_mask:$mask), NoItinerary, "msr",
"\tcpsr$mask, $src",
[/* For disassembly only; pattern left blank */]> {
let Inst{31-27} = 0b11110;
@@ -2757,7 +2755,7 @@ def t2MSR : T2I<(outs), (ins GPR:$src, msr_mask:$mask), NoItinerary, "msr",
}
// Rn = Inst{19-16}
-def t2MSRsys : T2I<(outs), (ins GPR:$src, msr_mask:$mask), NoItinerary, "msr",
+def t2MSRsys : T2I<(outs), (ins rGPR:$src, msr_mask:$mask), NoItinerary, "msr",
"\tspsr$mask, $src",
[/* For disassembly only; pattern left blank */]> {
let Inst{31-27} = 0b11110;
diff --git a/lib/Target/ARM/ARMInstrVFP.td b/lib/Target/ARM/ARMInstrVFP.td
index 84c23e1a784cc..c29e09606bd48 100644
--- a/lib/Target/ARM/ARMInstrVFP.td
+++ b/lib/Target/ARM/ARMInstrVFP.td
@@ -77,61 +77,61 @@ def VSTRS : ASI5<0b1101, 0b00, (outs), (ins SPR:$src, addrmode5:$addr),
//
let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in {
-def VLDMD : AXDI5<(outs), (ins addrmode5:$addr, pred:$p, reglist:$dsts,
+def VLDMD : AXDI4<(outs), (ins addrmode4:$addr, pred:$p, reglist:$dsts,
variable_ops), IndexModeNone, IIC_fpLoadm,
- "vldm${addr:submode}${p}\t${addr:base}, $dsts", "", []> {
+ "vldm${addr:submode}${p}\t$addr, $dsts", "", []> {
let Inst{20} = 1;
}
-def VLDMS : AXSI5<(outs), (ins addrmode5:$addr, pred:$p, reglist:$dsts,
+def VLDMS : AXSI4<(outs), (ins addrmode4:$addr, pred:$p, reglist:$dsts,
variable_ops), IndexModeNone, IIC_fpLoadm,
- "vldm${addr:submode}${p}\t${addr:base}, $dsts", "", []> {
+ "vldm${addr:submode}${p}\t$addr, $dsts", "", []> {
let Inst{20} = 1;
}
-def VLDMD_UPD : AXDI5<(outs GPR:$wb), (ins addrmode5:$addr, pred:$p,
+def VLDMD_UPD : AXDI4<(outs GPR:$wb), (ins addrmode4:$addr, pred:$p,
reglist:$dsts, variable_ops),
IndexModeUpd, IIC_fpLoadm,
- "vldm${addr:submode}${p}\t${addr:base}!, $dsts",
- "$addr.base = $wb", []> {
+ "vldm${addr:submode}${p}\t$addr!, $dsts",
+ "$addr.addr = $wb", []> {
let Inst{20} = 1;
}
-def VLDMS_UPD : AXSI5<(outs GPR:$wb), (ins addrmode5:$addr, pred:$p,
+def VLDMS_UPD : AXSI4<(outs GPR:$wb), (ins addrmode4:$addr, pred:$p,
reglist:$dsts, variable_ops),
IndexModeUpd, IIC_fpLoadm,
- "vldm${addr:submode}${p}\t${addr:base}!, $dsts",
- "$addr.base = $wb", []> {
+ "vldm${addr:submode}${p}\t$addr!, $dsts",
+ "$addr.addr = $wb", []> {
let Inst{20} = 1;
}
} // mayLoad, neverHasSideEffects, hasExtraDefRegAllocReq
let mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 in {
-def VSTMD : AXDI5<(outs), (ins addrmode5:$addr, pred:$p, reglist:$srcs,
+def VSTMD : AXDI4<(outs), (ins addrmode4:$addr, pred:$p, reglist:$srcs,
variable_ops), IndexModeNone, IIC_fpStorem,
- "vstm${addr:submode}${p}\t${addr:base}, $srcs", "", []> {
+ "vstm${addr:submode}${p}\t$addr, $srcs", "", []> {
let Inst{20} = 0;
}
-def VSTMS : AXSI5<(outs), (ins addrmode5:$addr, pred:$p, reglist:$srcs,
+def VSTMS : AXSI4<(outs), (ins addrmode4:$addr, pred:$p, reglist:$srcs,
variable_ops), IndexModeNone, IIC_fpStorem,
- "vstm${addr:submode}${p}\t${addr:base}, $srcs", "", []> {
+ "vstm${addr:submode}${p}\t$addr, $srcs", "", []> {
let Inst{20} = 0;
}
-def VSTMD_UPD : AXDI5<(outs GPR:$wb), (ins addrmode5:$addr, pred:$p,
+def VSTMD_UPD : AXDI4<(outs GPR:$wb), (ins addrmode4:$addr, pred:$p,
reglist:$srcs, variable_ops),
IndexModeUpd, IIC_fpStorem,
- "vstm${addr:submode}${p}\t${addr:base}!, $srcs",
- "$addr.base = $wb", []> {
+ "vstm${addr:submode}${p}\t$addr!, $srcs",
+ "$addr.addr = $wb", []> {
let Inst{20} = 0;
}
-def VSTMS_UPD : AXSI5<(outs GPR:$wb), (ins addrmode5:$addr, pred:$p,
+def VSTMS_UPD : AXSI4<(outs GPR:$wb), (ins addrmode4:$addr, pred:$p,
reglist:$srcs, variable_ops),
IndexModeUpd, IIC_fpStorem,
- "vstm${addr:submode}${p}\t${addr:base}!, $srcs",
- "$addr.base = $wb", []> {
+ "vstm${addr:submode}${p}\t$addr!, $srcs",
+ "$addr.addr = $wb", []> {
let Inst{20} = 0;
}
} // mayStore, neverHasSideEffects, hasExtraSrcRegAllocReq
@@ -420,34 +420,35 @@ def VTOUIZS : AVConv1In<0b11101, 0b11, 0b1100, 0b1010,
// And the Z bit '0' variants, i.e. use the rounding mode specified by FPSCR.
// For disassembly only.
-
+let Uses = [FPSCR] in {
def VTOSIRD : AVConv1I<0b11101, 0b11, 0b1101, 0b1011,
(outs SPR:$dst), (ins DPR:$a),
IIC_fpCVTDI, "vcvtr", ".s32.f64\t$dst, $a",
- [/* For disassembly only; pattern left blank */]> {
+ [(set SPR:$dst, (int_arm_vcvtr (f64 DPR:$a)))]> {
let Inst{7} = 0; // Z bit
}
def VTOSIRS : AVConv1In<0b11101, 0b11, 0b1101, 0b1010,
(outs SPR:$dst), (ins SPR:$a),
IIC_fpCVTSI, "vcvtr", ".s32.f32\t$dst, $a",
- [/* For disassembly only; pattern left blank */]> {
+ [(set SPR:$dst, (int_arm_vcvtr SPR:$a))]> {
let Inst{7} = 0; // Z bit
}
def VTOUIRD : AVConv1I<0b11101, 0b11, 0b1100, 0b1011,
(outs SPR:$dst), (ins DPR:$a),
IIC_fpCVTDI, "vcvtr", ".u32.f64\t$dst, $a",
- [/* For disassembly only; pattern left blank */]> {
+ [(set SPR:$dst, (int_arm_vcvtru (f64 DPR:$a)))]> {
let Inst{7} = 0; // Z bit
}
def VTOUIRS : AVConv1In<0b11101, 0b11, 0b1100, 0b1010,
(outs SPR:$dst), (ins SPR:$a),
IIC_fpCVTSI, "vcvtr", ".u32.f32\t$dst, $a",
- [/* For disassembly only; pattern left blank */]> {
+ [(set SPR:$dst, (int_arm_vcvtru SPR:$a))]> {
let Inst{7} = 0; // Z bit
}
+}
// Convert between floating-point and fixed-point
// Data type for fixed-point naming convention:
@@ -460,6 +461,7 @@ let Constraints = "$a = $dst" in {
// FP to Fixed-Point:
+let isCodeGenOnly = 1 in {
def VTOSHS : AVConv1XI<0b11101, 0b11, 0b1110, 0b1010, 0,
(outs SPR:$dst), (ins SPR:$a, i32imm:$fbits),
IIC_fpCVTSI, "vcvt", ".s16.f32\t$dst, $a, $fbits",
@@ -499,9 +501,11 @@ def VTOULD : AVConv1XI<0b11101, 0b11, 0b1111, 0b1011, 1,
(outs DPR:$dst), (ins DPR:$a, i32imm:$fbits),
IIC_fpCVTDI, "vcvt", ".u32.f64\t$dst, $a, $fbits",
[/* For disassembly only; pattern left blank */]>;
+}
// Fixed-Point to FP:
+let isCodeGenOnly = 1 in {
def VSHTOS : AVConv1XI<0b11101, 0b11, 0b1010, 0b1010, 0,
(outs SPR:$dst), (ins SPR:$a, i32imm:$fbits),
IIC_fpCVTIS, "vcvt", ".f32.s16\t$dst, $a, $fbits",
@@ -541,6 +545,7 @@ def VULTOD : AVConv1XI<0b11101, 0b11, 0b1011, 0b1011, 1,
(outs DPR:$dst), (ins DPR:$a, i32imm:$fbits),
IIC_fpCVTID, "vcvt", ".f64.u32\t$dst, $a, $fbits",
[/* For disassembly only; pattern left blank */]>;
+}
} // End of 'let Constraints = "$src = $dst" in'
@@ -654,32 +659,27 @@ def FMSTAT : VFPAI<(outs), (ins), VFPMiscFrm, IIC_fpSTAT, "vmrs",
}
// FPSCR <-> GPR (for disassembly only)
-
-let neverHasSideEffects = 1 in {
-let Uses = [FPSCR] in {
-def VMRS : VFPAI<(outs GPR:$dst), (ins), VFPMiscFrm, IIC_fpSTAT, "vmrs",
- "\t$dst, fpscr",
- [/* For disassembly only; pattern left blank */]> {
+let hasSideEffects = 1, Uses = [FPSCR] in
+def VMRS : VFPAI<(outs GPR:$dst), (ins), VFPMiscFrm, IIC_fpSTAT,
+ "vmrs", "\t$dst, fpscr",
+ [(set GPR:$dst, (int_arm_get_fpscr))]> {
let Inst{27-20} = 0b11101111;
let Inst{19-16} = 0b0001;
let Inst{11-8} = 0b1010;
let Inst{7} = 0;
let Inst{4} = 1;
}
-}
-let Defs = [FPSCR] in {
-def VMSR : VFPAI<(outs), (ins GPR:$src), VFPMiscFrm, IIC_fpSTAT, "vmsr",
- "\tfpscr, $src",
- [/* For disassembly only; pattern left blank */]> {
+let Defs = [FPSCR] in
+def VMSR : VFPAI<(outs), (ins GPR:$src), VFPMiscFrm, IIC_fpSTAT,
+ "vmsr", "\tfpscr, $src",
+ [(int_arm_set_fpscr GPR:$src)]> {
let Inst{27-20} = 0b11101110;
let Inst{19-16} = 0b0001;
let Inst{11-8} = 0b1010;
let Inst{7} = 0;
let Inst{4} = 1;
}
-}
-} // neverHasSideEffects
// Materialize FP immediates. VFP3 only.
let isReMaterializable = 1 in {
diff --git a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
index f80e316d23e85..2b7645a42119e 100644
--- a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
+++ b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
@@ -57,7 +57,7 @@ STATISTIC(NumSTRD2STR, "Number of strd instructions turned back into str's");
namespace {
struct ARMLoadStoreOpt : public MachineFunctionPass {
static char ID;
- ARMLoadStoreOpt() : MachineFunctionPass(&ID) {}
+ ARMLoadStoreOpt() : MachineFunctionPass(ID) {}
const TargetInstrInfo *TII;
const TargetRegisterInfo *TRI;
@@ -193,20 +193,17 @@ ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB,
return false;
ARM_AM::AMSubMode Mode = ARM_AM::ia;
- bool isAM4 = isi32Load(Opcode) || isi32Store(Opcode);
- if (isAM4 && Offset == 4) {
- if (isThumb2)
- // Thumb2 does not support ldmib / stmib.
- return false;
+ // VFP and Thumb2 do not support IB or DA modes.
+ bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode);
+ bool haveIBAndDA = isNotVFP && !isThumb2;
+ if (Offset == 4 && haveIBAndDA)
Mode = ARM_AM::ib;
- } else if (isAM4 && Offset == -4 * (int)NumRegs + 4) {
- if (isThumb2)
- // Thumb2 does not support ldmda / stmda.
- return false;
+ else if (Offset == -4 * (int)NumRegs + 4 && haveIBAndDA)
Mode = ARM_AM::da;
- } else if (isAM4 && Offset == -4 * (int)NumRegs) {
+ else if (Offset == -4 * (int)NumRegs && isNotVFP)
+ // VLDM/VSTM do not support DB mode without also updating the base reg.
Mode = ARM_AM::db;
- } else if (Offset != 0) {
+ else if (Offset != 0) {
// If starting offset isn't zero, insert a MI to materialize a new base.
// But only do so if it is cost effective, i.e. merging more than two
// loads / stores.
@@ -246,18 +243,12 @@ ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB,
BaseKill = true; // New base is always killed right its use.
}
- bool isDPR = (Opcode == ARM::VLDRD || Opcode == ARM::VSTRD);
bool isDef = (isi32Load(Opcode) || Opcode == ARM::VLDRS ||
Opcode == ARM::VLDRD);
Opcode = getLoadStoreMultipleOpcode(Opcode);
- MachineInstrBuilder MIB = (isAM4)
- ? BuildMI(MBB, MBBI, dl, TII->get(Opcode))
- .addReg(Base, getKillRegState(BaseKill))
- .addImm(ARM_AM::getAM4ModeImm(Mode)).addImm(Pred).addReg(PredReg)
- : BuildMI(MBB, MBBI, dl, TII->get(Opcode))
- .addReg(Base, getKillRegState(BaseKill))
- .addImm(ARM_AM::getAM5Opc(Mode, isDPR ? NumRegs<<1 : NumRegs))
- .addImm(Pred).addReg(PredReg);
+ MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII->get(Opcode))
+ .addReg(Base, getKillRegState(BaseKill))
+ .addImm(ARM_AM::getAM4ModeImm(Mode)).addImm(Pred).addReg(PredReg);
for (unsigned i = 0; i != NumRegs; ++i)
MIB = MIB.addReg(Regs[i].first, getDefRegState(isDef)
| getKillRegState(Regs[i].second));
@@ -333,6 +324,7 @@ void ARMLoadStoreOpt::MergeOpsUpdate(MachineBasicBlock &MBB,
if (KilledRegs.count(Reg)) {
unsigned j = Killer[Reg];
memOps[j].MBBI->getOperand(0).setIsKill(false);
+ memOps[j].isKill = false;
}
}
MBB.erase(memOps[i].MBBI);
@@ -348,7 +340,7 @@ ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex,
ARMCC::CondCodes Pred, unsigned PredReg,
unsigned Scratch, MemOpQueue &MemOps,
SmallVector<MachineBasicBlock::iterator, 4> &Merges) {
- bool isAM4 = isi32Load(Opcode) || isi32Store(Opcode);
+ bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode);
int Offset = MemOps[SIndex].Offset;
int SOffset = Offset;
unsigned insertAfter = SIndex;
@@ -366,12 +358,12 @@ ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex,
unsigned Reg = MO.getReg();
unsigned RegNum = MO.isUndef() ? UINT_MAX
: ARMRegisterInfo::getRegisterNumbering(Reg);
- // AM4 - register numbers in ascending order.
- // AM5 - consecutive register numbers in ascending order.
- // Can only do up to 16 double-word registers per insn.
+ // Register numbers must be in ascending order. For VFP, the registers
+ // must also be consecutive and there is a limit of 16 double-word
+ // registers per instruction.
if (Reg != ARM::SP &&
NewOffset == Offset + (int)Size &&
- ((isAM4 && RegNum > PRegNum)
+ ((isNotVFP && RegNum > PRegNum)
|| ((Size < 8 || Count < 16) && RegNum == PRegNum+1))) {
Offset += Size;
PRegNum = RegNum;
@@ -409,7 +401,7 @@ static inline bool isMatchingDecrement(MachineInstr *MI, unsigned Base,
return false;
// Make sure the offset fits in 8 bits.
- if (Bytes <= 0 || (Limit && Bytes >= Limit))
+ if (Bytes == 0 || (Limit && Bytes >= Limit))
return false;
unsigned Scale = (MI->getOpcode() == ARM::tSUBspi) ? 4 : 1; // FIXME
@@ -433,7 +425,7 @@ static inline bool isMatchingIncrement(MachineInstr *MI, unsigned Base,
MI->getOpcode() != ARM::ADDri)
return false;
- if (Bytes <= 0 || (Limit && Bytes >= Limit))
+ if (Bytes == 0 || (Limit && Bytes >= Limit))
// Make sure the offset fits in 8 bits.
return false;
@@ -464,12 +456,12 @@ static inline unsigned getLSMultipleTransferSize(MachineInstr *MI) {
case ARM::STM:
case ARM::t2LDM:
case ARM::t2STM:
- return (MI->getNumOperands() - 4) * 4;
case ARM::VLDMS:
case ARM::VSTMS:
+ return (MI->getNumOperands() - 4) * 4;
case ARM::VLDMD:
case ARM::VSTMD:
- return ARM_AM::getAM5Offset(MI->getOperand(1).getImm()) * 4;
+ return (MI->getNumOperands() - 4) * 8;
}
}
@@ -512,26 +504,17 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB,
ARMCC::CondCodes Pred = llvm::getInstrPredicate(MI, PredReg);
int Opcode = MI->getOpcode();
DebugLoc dl = MI->getDebugLoc();
- bool isAM4 = (Opcode == ARM::LDM || Opcode == ARM::t2LDM ||
- Opcode == ARM::STM || Opcode == ARM::t2STM);
bool DoMerge = false;
ARM_AM::AMSubMode Mode = ARM_AM::ia;
- unsigned Offset = 0;
- if (isAM4) {
- // Can't use an updating ld/st if the base register is also a dest
- // register. e.g. ldmdb r0!, {r0, r1, r2}. The behavior is undefined.
- for (unsigned i = 3, e = MI->getNumOperands(); i != e; ++i) {
- if (MI->getOperand(i).getReg() == Base)
- return false;
- }
- Mode = ARM_AM::getAM4SubMode(MI->getOperand(1).getImm());
- } else {
- // VLDM{D|S}, VSTM{D|S} addressing mode 5 ops.
- Mode = ARM_AM::getAM5SubMode(MI->getOperand(1).getImm());
- Offset = ARM_AM::getAM5Offset(MI->getOperand(1).getImm());
+ // Can't use an updating ld/st if the base register is also a dest
+ // register. e.g. ldmdb r0!, {r0, r1, r2}. The behavior is undefined.
+ for (unsigned i = 3, e = MI->getNumOperands(); i != e; ++i) {
+ if (MI->getOperand(i).getReg() == Base)
+ return false;
}
+ Mode = ARM_AM::getAM4SubMode(MI->getOperand(1).getImm());
// Try merging with the previous instruction.
MachineBasicBlock::iterator BeginMBBI = MBB.begin();
@@ -539,22 +522,14 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB,
MachineBasicBlock::iterator PrevMBBI = prior(MBBI);
while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue())
--PrevMBBI;
- if (isAM4) {
- if (Mode == ARM_AM::ia &&
- isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) {
- DoMerge = true;
- Mode = ARM_AM::db;
- } else if (isAM4 && Mode == ARM_AM::ib &&
- isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) {
- DoMerge = true;
- Mode = ARM_AM::da;
- }
- } else {
- if (Mode == ARM_AM::ia &&
- isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) {
- Mode = ARM_AM::db;
- DoMerge = true;
- }
+ if (Mode == ARM_AM::ia &&
+ isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) {
+ Mode = ARM_AM::db;
+ DoMerge = true;
+ } else if (Mode == ARM_AM::ib &&
+ isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) {
+ Mode = ARM_AM::da;
+ DoMerge = true;
}
if (DoMerge)
MBB.erase(PrevMBBI);
@@ -566,19 +541,12 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB,
MachineBasicBlock::iterator NextMBBI = llvm::next(MBBI);
while (NextMBBI != EndMBBI && NextMBBI->isDebugValue())
++NextMBBI;
- if (isAM4) {
- if ((Mode == ARM_AM::ia || Mode == ARM_AM::ib) &&
- isMatchingIncrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
- DoMerge = true;
- } else if ((Mode == ARM_AM::da || Mode == ARM_AM::db) &&
- isMatchingDecrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
- DoMerge = true;
- }
- } else {
- if (Mode == ARM_AM::ia &&
- isMatchingIncrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
- DoMerge = true;
- }
+ if ((Mode == ARM_AM::ia || Mode == ARM_AM::ib) &&
+ isMatchingIncrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
+ DoMerge = true;
+ } else if ((Mode == ARM_AM::da || Mode == ARM_AM::db) &&
+ isMatchingDecrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
+ DoMerge = true;
}
if (DoMerge) {
if (NextMBBI == I) {
@@ -595,16 +563,9 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB,
unsigned NewOpc = getUpdatingLSMultipleOpcode(Opcode);
MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII->get(NewOpc))
.addReg(Base, getDefRegState(true)) // WB base register
- .addReg(Base, getKillRegState(BaseKill));
- if (isAM4) {
- // [t2]LDM_UPD, [t2]STM_UPD
- MIB.addImm(ARM_AM::getAM4ModeImm(Mode))
- .addImm(Pred).addReg(PredReg);
- } else {
- // VLDM[SD}_UPD, VSTM[SD]_UPD
- MIB.addImm(ARM_AM::getAM5Opc(Mode, Offset))
- .addImm(Pred).addReg(PredReg);
- }
+ .addReg(Base, getKillRegState(BaseKill))
+ .addImm(ARM_AM::getAM4ModeImm(Mode))
+ .addImm(Pred).addReg(PredReg);
// Transfer the rest of operands.
for (unsigned OpNum = 4, e = MI->getNumOperands(); OpNum != e; ++OpNum)
MIB.addOperand(MI->getOperand(OpNum));
@@ -736,11 +697,10 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
if (!DoMerge)
return false;
- bool isDPR = NewOpc == ARM::VLDMD || NewOpc == ARM::VSTMD;
unsigned Offset = 0;
if (isAM5)
- Offset = ARM_AM::getAM5Opc(AddSub == ARM_AM::sub ? ARM_AM::db : ARM_AM::ia,
- (isDPR ? 2 : 1));
+ Offset = ARM_AM::getAM4ModeImm(AddSub == ARM_AM::sub ?
+ ARM_AM::db : ARM_AM::ia);
else if (isAM2)
Offset = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift);
else
@@ -748,6 +708,9 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
if (isAM5) {
// VLDM[SD}_UPD, VSTM[SD]_UPD
+ // (There are no base-updating versions of VLDR/VSTR instructions, but the
+ // updating load/store-multiple instructions can be used with only one
+ // register.)
MachineOperand &MO = MI->getOperand(0);
BuildMI(MBB, MBBI, dl, TII->get(NewOpc))
.addReg(Base, getDefRegState(true)) // WB base register
@@ -1268,7 +1231,7 @@ bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
namespace {
struct ARMPreAllocLoadStoreOpt : public MachineFunctionPass{
static char ID;
- ARMPreAllocLoadStoreOpt() : MachineFunctionPass(&ID) {}
+ ARMPreAllocLoadStoreOpt() : MachineFunctionPass(ID) {}
const TargetData *TD;
const TargetInstrInfo *TII;
diff --git a/lib/Target/ARM/AsmPrinter/ARMMCInstLower.cpp b/lib/Target/ARM/ARMMCInstLower.cpp
index ab2b06b60783b..ab2b06b60783b 100644
--- a/lib/Target/ARM/AsmPrinter/ARMMCInstLower.cpp
+++ b/lib/Target/ARM/ARMMCInstLower.cpp
diff --git a/lib/Target/ARM/AsmPrinter/ARMMCInstLower.h b/lib/Target/ARM/ARMMCInstLower.h
index b81a30690ce24..b81a30690ce24 100644
--- a/lib/Target/ARM/AsmPrinter/ARMMCInstLower.h
+++ b/lib/Target/ARM/ARMMCInstLower.h
diff --git a/lib/Target/ARM/ARMMachineFunctionInfo.h b/lib/Target/ARM/ARMMachineFunctionInfo.h
index 7e57a1ca55762..514c26b4daf03 100644
--- a/lib/Target/ARM/ARMMachineFunctionInfo.h
+++ b/lib/Target/ARM/ARMMachineFunctionInfo.h
@@ -43,6 +43,10 @@ class ARMFunctionInfo : public MachineFunctionInfo {
/// processFunctionBeforeCalleeSavedScan().
bool HasStackFrame;
+ /// RestoreSPFromFP - True if epilogue should restore SP from FP. Set by
+ /// emitPrologue.
+ bool RestoreSPFromFP;
+
/// LRSpilledForFarJump - True if the LR register has been for spilled to
/// enable far jump.
bool LRSpilledForFarJump;
@@ -95,7 +99,7 @@ public:
ARMFunctionInfo() :
isThumb(false),
hasThumb2(false),
- VarArgsRegSaveSize(0), HasStackFrame(false),
+ VarArgsRegSaveSize(0), HasStackFrame(false), RestoreSPFromFP(false),
LRSpilledForFarJump(false),
FramePtrSpillOffset(0), GPRCS1Offset(0), GPRCS2Offset(0), DPRCSOffset(0),
GPRCS1Size(0), GPRCS2Size(0), DPRCSSize(0),
@@ -106,7 +110,7 @@ public:
explicit ARMFunctionInfo(MachineFunction &MF) :
isThumb(MF.getTarget().getSubtarget<ARMSubtarget>().isThumb()),
hasThumb2(MF.getTarget().getSubtarget<ARMSubtarget>().hasThumb2()),
- VarArgsRegSaveSize(0), HasStackFrame(false),
+ VarArgsRegSaveSize(0), HasStackFrame(false), RestoreSPFromFP(false),
LRSpilledForFarJump(false),
FramePtrSpillOffset(0), GPRCS1Offset(0), GPRCS2Offset(0), DPRCSOffset(0),
GPRCS1Size(0), GPRCS2Size(0), DPRCSSize(0),
@@ -125,6 +129,9 @@ public:
bool hasStackFrame() const { return HasStackFrame; }
void setHasStackFrame(bool s) { HasStackFrame = s; }
+ bool shouldRestoreSPFromFP() const { return RestoreSPFromFP; }
+ void setShouldRestoreSPFromFP(bool s) { RestoreSPFromFP = s; }
+
bool isLRSpilledForFarJump() const { return LRSpilledForFarJump; }
void setLRIsSpilledForFarJump(bool s) { LRSpilledForFarJump = s; }
diff --git a/lib/Target/ARM/ARMRegisterInfo.td b/lib/Target/ARM/ARMRegisterInfo.td
index d020f3c74bdea..305b232e6a99a 100644
--- a/lib/Target/ARM/ARMRegisterInfo.td
+++ b/lib/Target/ARM/ARMRegisterInfo.td
@@ -1,4 +1,4 @@
-//===- ARMRegisterInfo.td - ARM Register defs -------------------*- C++ -*-===//
+//===- ARMRegisterInfo.td - ARM Register defs --------------*- tablegen -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -220,41 +220,11 @@ def GPR : RegisterClass<"ARM", [i32], 32, [R0, R1, R2, R3, R4, R5, R6,
iterator allocation_order_end(const MachineFunction &MF) const;
}];
let MethodBodies = [{
- // FP is R11, R9 is available.
- static const unsigned ARM_GPR_AO_1[] = {
+ static const unsigned ARM_GPR_AO[] = {
ARM::R0, ARM::R1, ARM::R2, ARM::R3,
ARM::R12,ARM::LR,
ARM::R4, ARM::R5, ARM::R6, ARM::R7,
- ARM::R8, ARM::R9, ARM::R10,
- ARM::R11 };
- // FP is R11, R9 is not available.
- static const unsigned ARM_GPR_AO_2[] = {
- ARM::R0, ARM::R1, ARM::R2, ARM::R3,
- ARM::R12,ARM::LR,
- ARM::R4, ARM::R5, ARM::R6, ARM::R7,
- ARM::R8, ARM::R10,
- ARM::R11 };
- // FP is R7, R9 is available as non-callee-saved register.
- // This is used by Darwin.
- static const unsigned ARM_GPR_AO_3[] = {
- ARM::R0, ARM::R1, ARM::R2, ARM::R3,
- ARM::R9, ARM::R12,ARM::LR,
- ARM::R4, ARM::R5, ARM::R6,
- ARM::R8, ARM::R10,ARM::R11,ARM::R7 };
- // FP is R7, R9 is not available.
- static const unsigned ARM_GPR_AO_4[] = {
- ARM::R0, ARM::R1, ARM::R2, ARM::R3,
- ARM::R12,ARM::LR,
- ARM::R4, ARM::R5, ARM::R6,
- ARM::R8, ARM::R10,ARM::R11,
- ARM::R7 };
- // FP is R7, R9 is available as callee-saved register.
- // This is used by non-Darwin platform in Thumb mode.
- static const unsigned ARM_GPR_AO_5[] = {
- ARM::R0, ARM::R1, ARM::R2, ARM::R3,
- ARM::R12,ARM::LR,
- ARM::R4, ARM::R5, ARM::R6,
- ARM::R8, ARM::R9, ARM::R10,ARM::R11,ARM::R7 };
+ ARM::R8, ARM::R9, ARM::R10, ARM::R11 };
// For Thumb1 mode, we don't want to allocate hi regs at all, as we
// don't know how to spill them. If we make our prologue/epilogue code
@@ -270,85 +240,71 @@ def GPR : RegisterClass<"ARM", [i32], 32, [R0, R1, R2, R3, R4, R5, R6,
const ARMSubtarget &Subtarget = TM.getSubtarget<ARMSubtarget>();
if (Subtarget.isThumb1Only())
return THUMB_GPR_AO;
- if (Subtarget.isTargetDarwin()) {
- if (Subtarget.isR9Reserved())
- return ARM_GPR_AO_4;
- else
- return ARM_GPR_AO_3;
- } else {
- if (Subtarget.isR9Reserved())
- return ARM_GPR_AO_2;
- else if (Subtarget.isThumb())
- return ARM_GPR_AO_5;
- else
- return ARM_GPR_AO_1;
- }
+ return ARM_GPR_AO;
}
GPRClass::iterator
GPRClass::allocation_order_end(const MachineFunction &MF) const {
const TargetMachine &TM = MF.getTarget();
- const TargetRegisterInfo *RI = TM.getRegisterInfo();
const ARMSubtarget &Subtarget = TM.getSubtarget<ARMSubtarget>();
- GPRClass::iterator I;
-
- if (Subtarget.isThumb1Only()) {
- I = THUMB_GPR_AO + (sizeof(THUMB_GPR_AO)/sizeof(unsigned));
- // Mac OS X requires FP not to be clobbered for backtracing purpose.
- return (Subtarget.isTargetDarwin() || RI->hasFP(MF)) ? I-1 : I;
- }
-
- if (Subtarget.isTargetDarwin()) {
- if (Subtarget.isR9Reserved())
- I = ARM_GPR_AO_4 + (sizeof(ARM_GPR_AO_4)/sizeof(unsigned));
- else
- I = ARM_GPR_AO_3 + (sizeof(ARM_GPR_AO_3)/sizeof(unsigned));
- } else {
- if (Subtarget.isR9Reserved())
- I = ARM_GPR_AO_2 + (sizeof(ARM_GPR_AO_2)/sizeof(unsigned));
- else if (Subtarget.isThumb())
- I = ARM_GPR_AO_5 + (sizeof(ARM_GPR_AO_5)/sizeof(unsigned));
- else
- I = ARM_GPR_AO_1 + (sizeof(ARM_GPR_AO_1)/sizeof(unsigned));
- }
-
- // Mac OS X requires FP not to be clobbered for backtracing purpose.
- return (Subtarget.isTargetDarwin() || RI->hasFP(MF)) ? I-1 : I;
+ if (Subtarget.isThumb1Only())
+ return THUMB_GPR_AO + (sizeof(THUMB_GPR_AO)/sizeof(unsigned));
+ return ARM_GPR_AO + (sizeof(ARM_GPR_AO)/sizeof(unsigned));
}
}];
}
-// Thumb registers are R0-R7 normally. Some instructions can still use
-// the general GPR register class above (MOV, e.g.)
-def tGPR : RegisterClass<"ARM", [i32], 32, [R0, R1, R2, R3, R4, R5, R6, R7]> {
+// restricted GPR register class. Many Thumb2 instructions allow the full
+// register range for operands, but have undefined behaviours when PC
+// or SP (R13 or R15) are used. The ARM ARM refers to these operands
+// via the BadReg() pseudo-code description.
+def rGPR : RegisterClass<"ARM", [i32], 32, [R0, R1, R2, R3, R4, R5, R6,
+ R7, R8, R9, R10, R11, R12, LR]> {
let MethodProtos = [{
iterator allocation_order_begin(const MachineFunction &MF) const;
iterator allocation_order_end(const MachineFunction &MF) const;
}];
let MethodBodies = [{
- static const unsigned THUMB_tGPR_AO[] = {
+ static const unsigned ARM_rGPR_AO[] = {
+ ARM::R0, ARM::R1, ARM::R2, ARM::R3,
+ ARM::R12,ARM::LR,
+ ARM::R4, ARM::R5, ARM::R6, ARM::R7,
+ ARM::R8, ARM::R9, ARM::R10,
+ ARM::R11 };
+
+ // For Thumb1 mode, we don't want to allocate hi regs at all, as we
+ // don't know how to spill them. If we make our prologue/epilogue code
+ // smarter at some point, we can go back to using the above allocation
+ // orders for the Thumb1 instructions that know how to use hi regs.
+ static const unsigned THUMB_rGPR_AO[] = {
ARM::R0, ARM::R1, ARM::R2, ARM::R3,
ARM::R4, ARM::R5, ARM::R6, ARM::R7 };
- // FP is R7, only low registers available.
- tGPRClass::iterator
- tGPRClass::allocation_order_begin(const MachineFunction &MF) const {
- return THUMB_tGPR_AO;
+ rGPRClass::iterator
+ rGPRClass::allocation_order_begin(const MachineFunction &MF) const {
+ const TargetMachine &TM = MF.getTarget();
+ const ARMSubtarget &Subtarget = TM.getSubtarget<ARMSubtarget>();
+ if (Subtarget.isThumb1Only())
+ return THUMB_rGPR_AO;
+ return ARM_rGPR_AO;
}
- tGPRClass::iterator
- tGPRClass::allocation_order_end(const MachineFunction &MF) const {
+ rGPRClass::iterator
+ rGPRClass::allocation_order_end(const MachineFunction &MF) const {
const TargetMachine &TM = MF.getTarget();
- const TargetRegisterInfo *RI = TM.getRegisterInfo();
const ARMSubtarget &Subtarget = TM.getSubtarget<ARMSubtarget>();
- tGPRClass::iterator I =
- THUMB_tGPR_AO + (sizeof(THUMB_tGPR_AO)/sizeof(unsigned));
- // Mac OS X requires FP not to be clobbered for backtracing purpose.
- return (Subtarget.isTargetDarwin() || RI->hasFP(MF)) ? I-1 : I;
+
+ if (Subtarget.isThumb1Only())
+ return THUMB_rGPR_AO + (sizeof(THUMB_rGPR_AO)/sizeof(unsigned));
+ return ARM_rGPR_AO + (sizeof(ARM_rGPR_AO)/sizeof(unsigned));
}
}];
}
+// Thumb registers are R0-R7 normally. Some instructions can still use
+// the general GPR register class above (MOV, e.g.)
+def tGPR : RegisterClass<"ARM", [i32], 32, [R0, R1, R2, R3, R4, R5, R6, R7]> {}
+
// For tail calls, we can't use callee-saved registers, as they are restored
// to the saved value before the tail call, which would clobber a call address.
// Note, getMinimalPhysRegClass(R0) returns tGPR because of the names of
@@ -381,36 +337,20 @@ def tcGPR : RegisterClass<"ARM", [i32], 32, [R0, R1, R2, R3, R9, R12]> {
const ARMSubtarget &Subtarget = TM.getSubtarget<ARMSubtarget>();
if (Subtarget.isThumb1Only())
return THUMB_GPR_AO_TC;
- if (Subtarget.isTargetDarwin()) {
- if (Subtarget.isR9Reserved())
- return ARM_GPR_NOR9_TC;
- else
- return ARM_GPR_R9_TC;
- } else
- // R9 is either callee-saved or reserved; can't use it.
- return ARM_GPR_NOR9_TC;
+ return Subtarget.isTargetDarwin() ? ARM_GPR_R9_TC : ARM_GPR_NOR9_TC;
}
tcGPRClass::iterator
tcGPRClass::allocation_order_end(const MachineFunction &MF) const {
const TargetMachine &TM = MF.getTarget();
const ARMSubtarget &Subtarget = TM.getSubtarget<ARMSubtarget>();
- GPRClass::iterator I;
-
- if (Subtarget.isThumb1Only()) {
- I = THUMB_GPR_AO_TC + (sizeof(THUMB_GPR_AO_TC)/sizeof(unsigned));
- return I;
- }
-
- if (Subtarget.isTargetDarwin()) {
- if (Subtarget.isR9Reserved())
- I = ARM_GPR_NOR9_TC + (sizeof(ARM_GPR_NOR9_TC)/sizeof(unsigned));
- else
- I = ARM_GPR_R9_TC + (sizeof(ARM_GPR_R9_TC)/sizeof(unsigned));
- } else
- // R9 is either callee-saved or reserved; can't use it.
- I = ARM_GPR_NOR9_TC + (sizeof(ARM_GPR_NOR9_TC)/sizeof(unsigned));
- return I;
+
+ if (Subtarget.isThumb1Only())
+ return THUMB_GPR_AO_TC + (sizeof(THUMB_GPR_AO_TC)/sizeof(unsigned));
+
+ return Subtarget.isTargetDarwin() ?
+ ARM_GPR_R9_TC + (sizeof(ARM_GPR_R9_TC)/sizeof(unsigned)) :
+ ARM_GPR_NOR9_TC + (sizeof(ARM_GPR_NOR9_TC)/sizeof(unsigned));
}
}];
}
diff --git a/lib/Target/ARM/ARMSubtarget.cpp b/lib/Target/ARM/ARMSubtarget.cpp
index 10fd257055fb5..cb539f4c01ec8 100644
--- a/lib/Target/ARM/ARMSubtarget.cpp
+++ b/lib/Target/ARM/ARMSubtarget.cpp
@@ -33,14 +33,19 @@ ARMSubtarget::ARMSubtarget(const std::string &TT, const std::string &FS,
, ARMFPUType(None)
, UseNEONForSinglePrecisionFP(false)
, SlowVMLx(false)
+ , SlowFPBrcc(false)
, IsThumb(isT)
, ThumbMode(Thumb1)
+ , NoARM(false)
, PostRAScheduler(false)
, IsR9Reserved(ReserveR9)
, UseMovt(UseMOVT)
, HasFP16(false)
, HasHardwareDivide(false)
, HasT2ExtractPack(false)
+ , HasDataBarrier(false)
+ , Pref32BitThumb(false)
+ , FPOnlySP(false)
, stackAlignment(4)
, CPUString("generic")
, TargetType(isELF) // Default to ELF unless otherwise specified.
diff --git a/lib/Target/ARM/ARMSubtarget.h b/lib/Target/ARM/ARMSubtarget.h
index e7d92ede9b984..67e58038ee779 100644
--- a/lib/Target/ARM/ARMSubtarget.h
+++ b/lib/Target/ARM/ARMSubtarget.h
@@ -26,7 +26,7 @@ class GlobalValue;
class ARMSubtarget : public TargetSubtarget {
protected:
enum ARMArchEnum {
- V4, V4T, V5T, V5TE, V6, V6T2, V7A, V7M
+ V4, V4T, V5T, V5TE, V6, V6M, V6T2, V7A, V7M
};
enum ARMFPEnum {
@@ -63,6 +63,9 @@ protected:
/// ThumbMode - Indicates supported Thumb version.
ThumbTypeEnum ThumbMode;
+ /// NoARM - True if subtarget does not support ARM mode execution.
+ bool NoARM;
+
/// PostRAScheduler - True if using post-register-allocation scheduler.
bool PostRAScheduler;
@@ -84,6 +87,18 @@ protected:
/// instructions.
bool HasT2ExtractPack;
+ /// HasDataBarrier - True if the subtarget supports DMB / DSB data barrier
+ /// instructions.
+ bool HasDataBarrier;
+
+ /// Pref32BitThumb - If true, codegen would prefer 32-bit Thumb instructions
+ /// over 16-bit ones.
+ bool Pref32BitThumb;
+
+ /// FPOnlySP - If true, the floating point unit only supports single
+ /// precision.
+ bool FPOnlySP;
+
/// stackAlignment - The minimum alignment known to hold of the stack frame on
/// entry to the function and which must be maintained by every function.
unsigned stackAlignment;
@@ -128,6 +143,8 @@ protected:
bool hasV6T2Ops() const { return ARMArchVersion >= V6T2; }
bool hasV7Ops() const { return ARMArchVersion >= V7A; }
+ bool hasARMOps() const { return !NoARM; }
+
bool hasVFP2() const { return ARMFPUType >= VFPv2; }
bool hasVFP3() const { return ARMFPUType >= VFPv3; }
bool hasNEON() const { return ARMFPUType >= NEON; }
@@ -135,8 +152,11 @@ protected:
return hasNEON() && UseNEONForSinglePrecisionFP; }
bool hasDivide() const { return HasHardwareDivide; }
bool hasT2ExtractPack() const { return HasT2ExtractPack; }
+ bool hasDataBarrier() const { return HasDataBarrier; }
bool useVMLx() const {return hasVFP2() && !SlowVMLx; }
bool isFPBrccSlow() const { return SlowFPBrcc; }
+ bool isFPOnlySP() const { return FPOnlySP; }
+ bool prefers32BitThumb() const { return Pref32BitThumb; }
bool hasFP16() const { return HasFP16; }
diff --git a/lib/Target/ARM/ARMTargetMachine.cpp b/lib/Target/ARM/ARMTargetMachine.cpp
index 09203f9304df0..30ff8276cdaac 100644
--- a/lib/Target/ARM/ARMTargetMachine.cpp
+++ b/lib/Target/ARM/ARMTargetMachine.cpp
@@ -31,7 +31,6 @@ static MCAsmInfo *createMCAsmInfo(const Target &T, StringRef TT) {
}
}
-
extern "C" void LLVMInitializeARMTarget() {
// Register the target.
RegisterTargetMachine<ARMTargetMachine> X(TheARMTarget);
@@ -66,6 +65,9 @@ ARMTargetMachine::ARMTargetMachine(const Target &T, const std::string &TT,
"v128:64:128-v64:64:64-n32")),
TLInfo(*this),
TSInfo(*this) {
+ if (!Subtarget.hasARMOps())
+ report_fatal_error("CPU: '" + Subtarget.getCPUString() + "' does not "
+ "support ARM mode execution!");
}
ThumbTargetMachine::ThumbTargetMachine(const Target &T, const std::string &TT,
@@ -85,9 +87,15 @@ ThumbTargetMachine::ThumbTargetMachine(const Target &T, const std::string &TT,
TSInfo(*this) {
}
+// Pass Pipeline Configuration
+bool ARMBaseTargetMachine::addPreISel(PassManagerBase &PM,
+ CodeGenOpt::Level OptLevel) {
+ if (OptLevel != CodeGenOpt::None)
+ PM.add(createARMGlobalMergePass(getTargetLowering()));
+ return false;
+}
-// Pass Pipeline Configuration
bool ARMBaseTargetMachine::addInstSelector(PassManagerBase &PM,
CodeGenOpt::Level OptLevel) {
PM.add(createARMISelDag(*this, OptLevel));
@@ -132,7 +140,7 @@ bool ARMBaseTargetMachine::addPreSched2(PassManagerBase &PM,
bool ARMBaseTargetMachine::addPreEmitPass(PassManagerBase &PM,
CodeGenOpt::Level OptLevel) {
- if (Subtarget.isThumb2())
+ if (Subtarget.isThumb2() && !Subtarget.prefers32BitThumb())
PM.add(createThumb2SizeReductionPass());
PM.add(createARMConstantIslandPass());
diff --git a/lib/Target/ARM/ARMTargetMachine.h b/lib/Target/ARM/ARMTargetMachine.h
index a222e57b13ff3..17e5425a9d370 100644
--- a/lib/Target/ARM/ARMTargetMachine.h
+++ b/lib/Target/ARM/ARMTargetMachine.h
@@ -50,6 +50,7 @@ public:
}
// Pass Pipeline Configuration
+ virtual bool addPreISel(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
virtual bool addInstSelector(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
virtual bool addPreRegAlloc(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
virtual bool addPreSched2(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
diff --git a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
index 4b083244b2413..75e2a739bf1f8 100644
--- a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
+++ b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
@@ -8,6 +8,7 @@
//===----------------------------------------------------------------------===//
#include "ARM.h"
+#include "ARMSubtarget.h"
#include "llvm/MC/MCParser/MCAsmLexer.h"
#include "llvm/MC/MCParser/MCAsmParser.h"
#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
@@ -18,8 +19,10 @@
#include "llvm/Target/TargetAsmParser.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/SourceMgr.h"
+#include "llvm/Support/raw_ostream.h"
#include "llvm/ADT/OwningPtr.h"
#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringSwitch.h"
#include "llvm/ADT/Twine.h"
using namespace llvm;
@@ -37,6 +40,7 @@ enum ShiftType {
class ARMAsmParser : public TargetAsmParser {
MCAsmParser &Parser;
+ TargetMachine &TM;
private:
MCAsmParser &getParser() const { return Parser; }
@@ -76,26 +80,33 @@ private:
bool ParseDirectiveSyntax(SMLoc L);
- // TODO - For now hacked versions of the next two are in here in this file to
- // allow some parser testing until the table gen versions are implemented.
+ bool MatchInstruction(SMLoc IDLoc,
+ const SmallVectorImpl<MCParsedAsmOperand*> &Operands,
+ MCInst &Inst) {
+ if (!MatchInstructionImpl(Operands, Inst))
+ return false;
+
+ // FIXME: We should give nicer diagnostics about the exact failure.
+ Error(IDLoc, "unrecognized instruction");
+
+ return true;
+ }
/// @name Auto-generated Match Functions
/// {
- bool MatchInstruction(const SmallVectorImpl<MCParsedAsmOperand*> &Operands,
- MCInst &Inst);
- /// MatchRegisterName - Match the given string to a register name and return
- /// its register number, or -1 if there is no match. To allow return values
- /// to be used directly in register lists, arm registers have values between
- /// 0 and 15.
- int MatchRegisterName(StringRef Name);
+ unsigned ComputeAvailableFeatures(const ARMSubtarget *Subtarget) const;
+
+ bool MatchInstructionImpl(const SmallVectorImpl<MCParsedAsmOperand*>
+ &Operands,
+ MCInst &Inst);
/// }
public:
- ARMAsmParser(const Target &T, MCAsmParser &_Parser)
- : TargetAsmParser(T), Parser(_Parser) {}
+ ARMAsmParser(const Target &T, MCAsmParser &_Parser, TargetMachine &_TM)
+ : TargetAsmParser(T), Parser(_Parser), TM(_TM) {}
virtual bool ParseInstruction(StringRef Name, SMLoc NameLoc,
SmallVectorImpl<MCParsedAsmOperand*> &Operands);
@@ -110,16 +121,21 @@ private:
ARMOperand() {}
public:
enum KindTy {
- Token,
- Register,
+ CondCode,
Immediate,
- Memory
+ Memory,
+ Register,
+ Token
} Kind;
SMLoc StartLoc, EndLoc;
union {
struct {
+ ARMCC::CondCodes Val;
+ } CC;
+
+ struct {
const char *Data;
unsigned Length;
} Tok;
@@ -151,16 +167,19 @@ public:
};
- ARMOperand(KindTy K, SMLoc S, SMLoc E)
- : Kind(K), StartLoc(S), EndLoc(E) {}
+ //ARMOperand(KindTy K, SMLoc S, SMLoc E)
+ // : Kind(K), StartLoc(S), EndLoc(E) {}
ARMOperand(const ARMOperand &o) : MCParsedAsmOperand() {
Kind = o.Kind;
StartLoc = o.StartLoc;
EndLoc = o.EndLoc;
switch (Kind) {
+ case CondCode:
+ CC = o.CC;
+ break;
case Token:
- Tok = o.Tok;
+ Tok = o.Tok;
break;
case Register:
Reg = o.Reg;
@@ -179,6 +198,11 @@ public:
/// getEndLoc - Get the location of the last token of this operand.
SMLoc getEndLoc() const { return EndLoc; }
+ ARMCC::CondCodes getCondCode() const {
+ assert(Kind == CondCode && "Invalid access!");
+ return CC.Val;
+ }
+
StringRef getToken() const {
assert(Kind == Token && "Invalid access!");
return StringRef(Tok.Data, Tok.Length);
@@ -194,15 +218,50 @@ public:
return Imm.Val;
}
- bool isToken() const {return Kind == Token; }
+ bool isCondCode() const { return Kind == CondCode; }
+
+ bool isImm() const { return Kind == Immediate; }
bool isReg() const { return Kind == Register; }
+ bool isToken() const {return Kind == Token; }
+
+ void addExpr(MCInst &Inst, const MCExpr *Expr) const {
+ // Add as immediates when possible.
+ if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Expr))
+ Inst.addOperand(MCOperand::CreateImm(CE->getValue()));
+ else
+ Inst.addOperand(MCOperand::CreateExpr(Expr));
+ }
+
+ void addCondCodeOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 2 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::CreateImm(unsigned(getCondCode())));
+ // FIXME: What belongs here?
+ Inst.addOperand(MCOperand::CreateReg(0));
+ }
+
void addRegOperands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
Inst.addOperand(MCOperand::CreateReg(getReg()));
}
+ void addImmOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ addExpr(Inst, getImm());
+ }
+
+ virtual void dump(raw_ostream &OS) const;
+
+ static void CreateCondCode(OwningPtr<ARMOperand> &Op, ARMCC::CondCodes CC,
+ SMLoc S) {
+ Op.reset(new ARMOperand);
+ Op->Kind = CondCode;
+ Op->CC.Val = CC;
+ Op->StartLoc = S;
+ Op->EndLoc = S;
+ }
+
static void CreateToken(OwningPtr<ARMOperand> &Op, StringRef Str,
SMLoc S) {
Op.reset(new ARMOperand);
@@ -262,6 +321,33 @@ public:
} // end anonymous namespace.
+void ARMOperand::dump(raw_ostream &OS) const {
+ switch (Kind) {
+ case CondCode:
+ OS << ARMCondCodeToString(getCondCode());
+ break;
+ case Immediate:
+ getImm()->print(OS);
+ break;
+ case Memory:
+ OS << "<memory>";
+ break;
+ case Register:
+ OS << "<register " << getReg() << ">";
+ break;
+ case Token:
+ OS << "'" << getToken() << "'";
+ break;
+ }
+}
+
+/// @name Auto-generated Match Functions
+/// {
+
+static unsigned MatchRegisterName(StringRef Name);
+
+/// }
+
/// Try to parse a register name. The token must be an Identifier when called,
/// and if it is a register name a Reg operand is created, the token is eaten
/// and false is returned. Else true is returned and no token is eaten.
@@ -548,77 +634,6 @@ bool ARMAsmParser::ParseShift(ShiftType &St,
return false;
}
-/// A hack to allow some testing, to be replaced by a real table gen version.
-int ARMAsmParser::MatchRegisterName(StringRef Name) {
- if (Name == "r0" || Name == "R0")
- return 0;
- else if (Name == "r1" || Name == "R1")
- return 1;
- else if (Name == "r2" || Name == "R2")
- return 2;
- else if (Name == "r3" || Name == "R3")
- return 3;
- else if (Name == "r3" || Name == "R3")
- return 3;
- else if (Name == "r4" || Name == "R4")
- return 4;
- else if (Name == "r5" || Name == "R5")
- return 5;
- else if (Name == "r6" || Name == "R6")
- return 6;
- else if (Name == "r7" || Name == "R7")
- return 7;
- else if (Name == "r8" || Name == "R8")
- return 8;
- else if (Name == "r9" || Name == "R9")
- return 9;
- else if (Name == "r10" || Name == "R10")
- return 10;
- else if (Name == "r11" || Name == "R11" || Name == "fp")
- return 11;
- else if (Name == "r12" || Name == "R12" || Name == "ip")
- return 12;
- else if (Name == "r13" || Name == "R13" || Name == "sp")
- return 13;
- else if (Name == "r14" || Name == "R14" || Name == "lr")
- return 14;
- else if (Name == "r15" || Name == "R15" || Name == "pc")
- return 15;
- return -1;
-}
-
-/// A hack to allow some testing, to be replaced by a real table gen version.
-bool ARMAsmParser::
-MatchInstruction(const SmallVectorImpl<MCParsedAsmOperand*> &Operands,
- MCInst &Inst) {
- ARMOperand &Op0 = *(ARMOperand*)Operands[0];
- assert(Op0.Kind == ARMOperand::Token && "First operand not a Token");
- StringRef Mnemonic = Op0.getToken();
- if (Mnemonic == "add" ||
- Mnemonic == "stmfd" ||
- Mnemonic == "str" ||
- Mnemonic == "ldmfd" ||
- Mnemonic == "ldr" ||
- Mnemonic == "mov" ||
- Mnemonic == "sub" ||
- Mnemonic == "bl" ||
- Mnemonic == "push" ||
- Mnemonic == "blx" ||
- Mnemonic == "pop") {
- // Hard-coded to a valid instruction, till we have a real matcher.
- Inst = MCInst();
- Inst.setOpcode(ARM::MOVr);
- Inst.addOperand(MCOperand::CreateReg(2));
- Inst.addOperand(MCOperand::CreateReg(2));
- Inst.addOperand(MCOperand::CreateImm(0));
- Inst.addOperand(MCOperand::CreateImm(0));
- Inst.addOperand(MCOperand::CreateReg(0));
- return false;
- }
-
- return true;
-}
-
/// Parse a arm instruction operand. For now this parses the operand regardless
/// of the mnemonic.
bool ARMAsmParser::ParseOperand(OwningPtr<ARMOperand> &Op) {
@@ -661,12 +676,56 @@ bool ARMAsmParser::ParseOperand(OwningPtr<ARMOperand> &Op) {
bool ARMAsmParser::ParseInstruction(StringRef Name, SMLoc NameLoc,
SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
OwningPtr<ARMOperand> Op;
- ARMOperand::CreateToken(Op, Name, NameLoc);
-
+
+ // Create the leading tokens for the mnemonic, split by '.' characters.
+ size_t Start = 0, Next = Name.find('.');
+ StringRef Head = Name.slice(Start, Next);
+
+ // Determine the predicate, if any.
+ //
+ // FIXME: We need a way to check whether a prefix supports predication,
+ // otherwise we will end up with an ambiguity for instructions that happen to
+ // end with a predicate name.
+ unsigned CC = StringSwitch<unsigned>(Head.substr(Head.size()-2))
+ .Case("eq", ARMCC::EQ)
+ .Case("ne", ARMCC::NE)
+ .Case("hs", ARMCC::HS)
+ .Case("lo", ARMCC::LO)
+ .Case("mi", ARMCC::MI)
+ .Case("pl", ARMCC::PL)
+ .Case("vs", ARMCC::VS)
+ .Case("vc", ARMCC::VC)
+ .Case("hi", ARMCC::HI)
+ .Case("ls", ARMCC::LS)
+ .Case("ge", ARMCC::GE)
+ .Case("lt", ARMCC::LT)
+ .Case("gt", ARMCC::GT)
+ .Case("le", ARMCC::LE)
+ .Case("al", ARMCC::AL)
+ .Default(~0U);
+ if (CC != ~0U) {
+ Head = Head.slice(0, Head.size() - 2);
+ } else
+ CC = ARMCC::AL;
+
+ ARMOperand::CreateToken(Op, Head, NameLoc);
Operands.push_back(Op.take());
- if (getLexer().isNot(AsmToken::EndOfStatement)) {
+ ARMOperand::CreateCondCode(Op, ARMCC::CondCodes(CC), NameLoc);
+ Operands.push_back(Op.take());
+
+ // Add the remaining tokens in the mnemonic.
+ while (Next != StringRef::npos) {
+ Start = Next;
+ Next = Name.find('.', Start + 1);
+ Head = Name.slice(Start, Next);
+ ARMOperand::CreateToken(Op, Head, NameLoc);
+ Operands.push_back(Op.take());
+ }
+
+ // Read the remaining operands.
+ if (getLexer().isNot(AsmToken::EndOfStatement)) {
// Read the first operand.
OwningPtr<ARMOperand> Op;
if (ParseOperand(Op)) return true;
@@ -809,3 +868,5 @@ extern "C" void LLVMInitializeARMAsmParser() {
RegisterAsmParser<ARMAsmParser> Y(TheThumbTarget);
LLVMInitializeARMAsmLexer();
}
+
+#include "ARMGenAsmMatcher.inc"
diff --git a/lib/Target/ARM/AsmPrinter/ARMInstPrinter.cpp b/lib/Target/ARM/AsmPrinter/ARMInstPrinter.cpp
index edc934549b288..8026e7718ca98 100644
--- a/lib/Target/ARM/AsmPrinter/ARMInstPrinter.cpp
+++ b/lib/Target/ARM/AsmPrinter/ARMInstPrinter.cpp
@@ -158,7 +158,7 @@ void ARMInstPrinter::printInst(const MCInst *MI, raw_ostream &O) {
if ((MI->getOpcode() == ARM::VSTMS_UPD || MI->getOpcode() ==ARM::VSTMD_UPD) &&
MI->getOperand(0).getReg() == ARM::SP) {
const MCOperand &MO1 = MI->getOperand(2);
- if (ARM_AM::getAM5SubMode(MO1.getImm()) == ARM_AM::db) {
+ if (ARM_AM::getAM4SubMode(MO1.getImm()) == ARM_AM::db) {
O << '\t' << "vpush";
printPredicateOperand(MI, 3, O);
O << '\t';
@@ -171,7 +171,7 @@ void ARMInstPrinter::printInst(const MCInst *MI, raw_ostream &O) {
if ((MI->getOpcode() == ARM::VLDMS_UPD || MI->getOpcode() ==ARM::VLDMD_UPD) &&
MI->getOperand(0).getReg() == ARM::SP) {
const MCOperand &MO1 = MI->getOperand(2);
- if (ARM_AM::getAM5SubMode(MO1.getImm()) == ARM_AM::ia) {
+ if (ARM_AM::getAM4SubMode(MO1.getImm()) == ARM_AM::ia) {
O << '\t' << "vpop";
printPredicateOperand(MI, 3, O);
O << '\t';
@@ -278,15 +278,13 @@ void ARMInstPrinter::printSORegOperand(const MCInst *MI, unsigned OpNum,
O << getRegisterName(MO1.getReg());
// Print the shift opc.
- O << ", "
- << ARM_AM::getShiftOpcStr(ARM_AM::getSORegShOp(MO3.getImm()))
- << ' ';
-
+ ARM_AM::ShiftOpc ShOpc = ARM_AM::getSORegShOp(MO3.getImm());
+ O << ", " << ARM_AM::getShiftOpcStr(ShOpc);
if (MO2.getReg()) {
- O << getRegisterName(MO2.getReg());
+ O << ' ' << getRegisterName(MO2.getReg());
assert(ARM_AM::getSORegOffset(MO3.getImm()) == 0);
- } else {
- O << "#" << ARM_AM::getSORegOffset(MO3.getImm());
+ } else if (ShOpc != ARM_AM::rrx) {
+ O << " #" << ARM_AM::getSORegOffset(MO3.getImm());
}
}
@@ -414,16 +412,6 @@ void ARMInstPrinter::printAddrMode5Operand(const MCInst *MI, unsigned OpNum,
return;
}
- if (Modifier && strcmp(Modifier, "submode") == 0) {
- ARM_AM::AMSubMode Mode = ARM_AM::getAM5SubMode(MO2.getImm());
- O << ARM_AM::getAMSubModeStr(Mode);
- return;
- } else if (Modifier && strcmp(Modifier, "base") == 0) {
- // Used for FSTM{D|S} and LSTM{D|S} operations.
- O << getRegisterName(MO1.getReg());
- return;
- }
-
O << "[" << getRegisterName(MO1.getReg());
if (unsigned ImmOffs = ARM_AM::getAM5Offset(MO2.getImm())) {
@@ -463,9 +451,9 @@ void ARMInstPrinter::printAddrModePCOperand(const MCInst *MI, unsigned OpNum,
assert(0 && "FIXME: Implement printAddrModePCOperand");
}
-void ARMInstPrinter::printBitfieldInvMaskImmOperand (const MCInst *MI,
- unsigned OpNum,
- raw_ostream &O) {
+void ARMInstPrinter::printBitfieldInvMaskImmOperand(const MCInst *MI,
+ unsigned OpNum,
+ raw_ostream &O) {
const MCOperand &MO = MI->getOperand(OpNum);
uint32_t v = ~MO.getImm();
int32_t lsb = CountTrailingZeros_32(v);
@@ -474,6 +462,31 @@ void ARMInstPrinter::printBitfieldInvMaskImmOperand (const MCInst *MI,
O << '#' << lsb << ", #" << width;
}
+void ARMInstPrinter::printMemBOption(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O) {
+ unsigned val = MI->getOperand(OpNum).getImm();
+ O << ARM_MB::MemBOptToString(val);
+}
+
+void ARMInstPrinter::printShiftImmOperand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O) {
+ unsigned ShiftOp = MI->getOperand(OpNum).getImm();
+ ARM_AM::ShiftOpc Opc = ARM_AM::getSORegShOp(ShiftOp);
+ switch (Opc) {
+ case ARM_AM::no_shift:
+ return;
+ case ARM_AM::lsl:
+ O << ", lsl #";
+ break;
+ case ARM_AM::asr:
+ O << ", asr #";
+ break;
+ default:
+ assert(0 && "unexpected shift opcode for shift immediate operand");
+ }
+ O << ARM_AM::getSORegOffset(ShiftOp);
+}
+
void ARMInstPrinter::printRegisterList(const MCInst *MI, unsigned OpNum,
raw_ostream &O) {
O << "{";
@@ -669,12 +682,11 @@ void ARMInstPrinter::printT2SOOperand(const MCInst *MI, unsigned OpNum,
O << getRegisterName(Reg);
// Print the shift opc.
- O << ", "
- << ARM_AM::getShiftOpcStr(ARM_AM::getSORegShOp(MO2.getImm()))
- << " ";
-
assert(MO2.isImm() && "Not a valid t2_so_reg value!");
- O << "#" << ARM_AM::getSORegOffset(MO2.getImm());
+ ARM_AM::ShiftOpc ShOpc = ARM_AM::getSORegShOp(MO2.getImm());
+ O << ", " << ARM_AM::getShiftOpcStr(ShOpc);
+ if (ShOpc != ARM_AM::rrx)
+ O << " #" << ARM_AM::getSORegOffset(MO2.getImm());
}
void ARMInstPrinter::printT2AddrModeImm12Operand(const MCInst *MI,
diff --git a/lib/Target/ARM/AsmPrinter/ARMInstPrinter.h b/lib/Target/ARM/AsmPrinter/ARMInstPrinter.h
index ddf5047793d29..e5ad0d07e9bab 100644
--- a/lib/Target/ARM/AsmPrinter/ARMInstPrinter.h
+++ b/lib/Target/ARM/AsmPrinter/ARMInstPrinter.h
@@ -57,6 +57,8 @@ public:
void printBitfieldInvMaskImmOperand(const MCInst *MI, unsigned OpNum,
raw_ostream &O);
+ void printMemBOption(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+ void printShiftImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
void printThumbS4ImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
void printThumbITMask(const MCInst *MI, unsigned OpNum, raw_ostream &O);
diff --git a/lib/Target/ARM/AsmPrinter/CMakeLists.txt b/lib/Target/ARM/AsmPrinter/CMakeLists.txt
index 4e299f86ecb67..18645c0864a32 100644
--- a/lib/Target/ARM/AsmPrinter/CMakeLists.txt
+++ b/lib/Target/ARM/AsmPrinter/CMakeLists.txt
@@ -1,8 +1,6 @@
include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. )
add_llvm_library(LLVMARMAsmPrinter
- ARMAsmPrinter.cpp
ARMInstPrinter.cpp
- ARMMCInstLower.cpp
)
add_dependencies(LLVMARMAsmPrinter ARMCodeGenTable_gen)
diff --git a/lib/Target/ARM/CMakeLists.txt b/lib/Target/ARM/CMakeLists.txt
index 0df34666b959b..6b4dee5965d25 100644
--- a/lib/Target/ARM/CMakeLists.txt
+++ b/lib/Target/ARM/CMakeLists.txt
@@ -7,25 +7,32 @@ tablegen(ARMGenInstrNames.inc -gen-instr-enums)
tablegen(ARMGenInstrInfo.inc -gen-instr-desc)
tablegen(ARMGenCodeEmitter.inc -gen-emitter)
tablegen(ARMGenAsmWriter.inc -gen-asm-writer)
+tablegen(ARMGenAsmMatcher.inc -gen-asm-matcher)
tablegen(ARMGenDAGISel.inc -gen-dag-isel)
+tablegen(ARMGenFastISel.inc -gen-fast-isel)
tablegen(ARMGenCallingConv.inc -gen-callingconv)
tablegen(ARMGenSubtarget.inc -gen-subtarget)
tablegen(ARMGenEDInfo.inc -gen-enhanced-disassembly-info)
add_llvm_target(ARMCodeGen
+ ARMAsmPrinter.cpp
ARMBaseInstrInfo.cpp
ARMBaseRegisterInfo.cpp
ARMCodeEmitter.cpp
ARMConstantIslandPass.cpp
ARMConstantPoolValue.cpp
ARMExpandPseudoInsts.cpp
+ ARMFastISel.cpp
+ ARMGlobalMerge.cpp
ARMISelDAGToDAG.cpp
ARMISelLowering.cpp
ARMInstrInfo.cpp
ARMJITInfo.cpp
ARMLoadStoreOptimizer.cpp
ARMMCAsmInfo.cpp
+ ARMMCInstLower.cpp
ARMRegisterInfo.cpp
+ ARMSelectionDAGInfo.cpp
ARMSubtarget.cpp
ARMTargetMachine.cpp
ARMTargetObjectFile.cpp
@@ -38,7 +45,6 @@ add_llvm_target(ARMCodeGen
Thumb2InstrInfo.cpp
Thumb2RegisterInfo.cpp
Thumb2SizeReduction.cpp
- ARMSelectionDAGInfo.cpp
)
-target_link_libraries (LLVMARMCodeGen LLVMSelectionDAG)
+target_link_libraries (LLVMARMCodeGen LLVMARMAsmPrinter LLVMSelectionDAG)
diff --git a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
index 4de697e8bf676..e22028985b46c 100644
--- a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
+++ b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
@@ -26,6 +26,8 @@
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
+//#define DEBUG(X) do { X; } while (0)
+
/// ARMGenDecoderTables.inc - ARMDecoderTables.inc is tblgen'ed from
/// ARMDecoderEmitter.cpp TableGen backend. It contains:
///
@@ -87,6 +89,11 @@ static unsigned decodeARMInstruction(uint32_t &insn) {
return ARM::BFI;
}
+ // Ditto for STRBT, which is a super-instruction for A8.6.199 Encoding A1 & A2.
+ // As a result, the decoder fails to deocode USAT properly.
+ if (slice(insn, 27, 21) == 0x37 && slice(insn, 5, 4) == 1)
+ return ARM::USAT;
+
// Ditto for ADDSrs, which is a super-instruction for A8.6.7 & A8.6.8.
// As a result, the decoder fails to decode UMULL properly.
if (slice(insn, 27, 21) == 0x04 && slice(insn, 7, 4) == 9) {
@@ -106,7 +113,7 @@ static unsigned decodeARMInstruction(uint32_t &insn) {
// Ditto for STRT, which is a super-instruction for A8.6.210 Encoding A1 & A2.
// As a result, the decoder fails to deocode SSAT properly.
if (slice(insn, 27, 21) == 0x35 && slice(insn, 5, 4) == 1)
- return slice(insn, 6, 6) == 0 ? ARM::SSATlsl : ARM::SSATasr;
+ return ARM::SSAT;
// Ditto for RSCrs, which is a super-instruction for A8.6.146 & A8.6.147.
// As a result, the decoder fails to decode STRHT/LDRHT/LDRSHT/LDRSBT.
@@ -291,7 +298,7 @@ static unsigned T2Morph2LoadLiteral(unsigned Opcode) {
/// decodeInstruction(insn) is invoked on the original insn.
///
/// Otherwise, decodeThumbInstruction is called with the original insn.
-static unsigned decodeThumbSideEffect(bool IsThumb2, uint32_t &insn) {
+static unsigned decodeThumbSideEffect(bool IsThumb2, unsigned &insn) {
if (IsThumb2) {
uint16_t op1 = slice(insn, 28, 27);
uint16_t op2 = slice(insn, 26, 20);
@@ -429,7 +436,7 @@ bool ThumbDisassembler::getInstruction(MCInst &MI,
// passed to decodeThumbInstruction(). For 16-bit Thumb instruction, the top
// halfword of insn is 0x00 0x00; otherwise, the first halfword is moved to
// the top half followed by the second halfword.
- uint32_t insn = 0;
+ unsigned insn = 0;
// Possible second halfword.
uint16_t insn1 = 0;
diff --git a/lib/Target/ARM/Disassembler/ARMDisassemblerCore.cpp b/lib/Target/ARM/Disassembler/ARMDisassemblerCore.cpp
index a07ff2832aa7e..9f493b9aee02a 100644
--- a/lib/Target/ARM/Disassembler/ARMDisassemblerCore.cpp
+++ b/lib/Target/ARM/Disassembler/ARMDisassemblerCore.cpp
@@ -20,6 +20,8 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
+//#define DEBUG(X) do { X; } while (0)
+
/// ARMGenInstrInfo.inc - ARMGenInstrInfo.inc contains the static const
/// TargetInstrDesc ARMInsts[] definition and the TargetOperandInfo[]'s
/// describing the operand info for each ARMInsts[i].
@@ -93,6 +95,9 @@ static unsigned getRegisterEnum(BO B, unsigned RegClassID, unsigned RawRegister,
RegClassID = ARM::DPRRegClassID;
}
+ // For this purpose, we can treat rGPR as if it were GPR.
+ if (RegClassID == ARM::rGPRRegClassID) RegClassID = ARM::GPRRegClassID;
+
// See also decodeNEONRd(), decodeNEONRn(), decodeNEONRm().
unsigned RegNum =
RegClassID == ARM::QPRRegClassID ? RawRegister >> 1 : RawRegister;
@@ -451,12 +456,23 @@ static inline ARM_AM::ShiftOpc getShiftOpcForBits(unsigned bits) {
//
// A8-11: DecodeImmShift()
static inline void getImmShiftSE(ARM_AM::ShiftOpc &ShOp, unsigned &ShImm) {
- // If type == 0b11 and imm5 == 0, we have an rrx, instead.
- if (ShOp == ARM_AM::ror && ShImm == 0)
- ShOp = ARM_AM::rrx;
- // If (lsr or asr) and imm5 == 0, shift amount is 32.
- if ((ShOp == ARM_AM::lsr || ShOp == ARM_AM::asr) && ShImm == 0)
+ if (ShImm != 0)
+ return;
+ switch (ShOp) {
+ case ARM_AM::no_shift:
+ case ARM_AM::rrx:
+ break;
+ case ARM_AM::lsl:
+ ShOp = ARM_AM::no_shift;
+ break;
+ case ARM_AM::lsr:
+ case ARM_AM::asr:
ShImm = 32;
+ break;
+ case ARM_AM::ror:
+ ShOp = ARM_AM::rrx;
+ break;
+ }
}
// getAMSubModeForBits - getAMSubModeForBits translates from the ARM encoding
@@ -490,9 +506,6 @@ static inline ARM_AM::AMSubMode getAMSubModeForBits(unsigned bits) {
static bool DisassemblePseudo(MCInst &MI, unsigned Opcode, uint32_t insn,
unsigned short NumOps, unsigned &NumOpsAdded, BO) {
- if (Opcode == ARM::Int_MemBarrierV7 || Opcode == ARM::Int_SyncBarrierV7)
- return true;
-
assert(0 && "Unexpected pseudo instruction!");
return false;
}
@@ -887,7 +900,6 @@ static bool DisassembleBrMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
return true;
}
- assert(0 && "Unexpected BrMiscFrm Opcode");
return false;
}
@@ -906,34 +918,6 @@ static inline bool getBFCInvMask(uint32_t insn, uint32_t &mask) {
return true;
}
-static inline bool SaturateOpcode(unsigned Opcode) {
- switch (Opcode) {
- case ARM::SSATlsl: case ARM::SSATasr: case ARM::SSAT16:
- case ARM::USATlsl: case ARM::USATasr: case ARM::USAT16:
- return true;
- default:
- return false;
- }
-}
-
-static inline unsigned decodeSaturatePos(unsigned Opcode, uint32_t insn) {
- switch (Opcode) {
- case ARM::SSATlsl:
- case ARM::SSATasr:
- return slice(insn, 20, 16) + 1;
- case ARM::SSAT16:
- return slice(insn, 19, 16) + 1;
- case ARM::USATlsl:
- case ARM::USATasr:
- return slice(insn, 20, 16);
- case ARM::USAT16:
- return slice(insn, 19, 16);
- default:
- assert(0 && "Invalid opcode passed in");
- return 0;
- }
-}
-
// A major complication is the fact that some of the saturating add/subtract
// operations have Rd Rm Rn, instead of the "normal" Rd Rn Rm.
// They are QADD, QDADD, QDSUB, and QSUB.
@@ -959,40 +943,14 @@ static bool DisassembleDPFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
if (OpIdx >= NumOps)
return false;
- // SSAT/SSAT16/USAT/USAT16 has imm operand after Rd.
- if (SaturateOpcode(Opcode)) {
- MI.addOperand(MCOperand::CreateImm(decodeSaturatePos(Opcode, insn)));
-
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
- decodeRm(insn))));
-
- if (Opcode == ARM::SSAT16 || Opcode == ARM::USAT16) {
- OpIdx += 2;
- return true;
- }
-
- // For SSAT operand reg (Rm) has been disassembled above.
- // Now disassemble the shift amount.
-
- // Inst{11-7} encodes the imm5 shift amount.
- unsigned ShAmt = slice(insn, 11, 7);
-
- // A8.6.183. Possible ASR shift amount of 32...
- if (Opcode == ARM::SSATasr && ShAmt == 0)
- ShAmt = 32;
-
- MI.addOperand(MCOperand::CreateImm(ShAmt));
-
- OpIdx += 3;
- return true;
- }
-
// Special-case handling of BFC/BFI/SBFX/UBFX.
if (Opcode == ARM::BFC || Opcode == ARM::BFI) {
- // TIED_TO operand skipped for BFC and Inst{3-0} (Reg) for BFI.
- MI.addOperand(MCOperand::CreateReg(Opcode == ARM::BFC ? 0
- : getRegisterEnum(B, ARM::GPRRegClassID,
+ MI.addOperand(MCOperand::CreateReg(0));
+ if (Opcode == ARM::BFI) {
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
decodeRm(insn))));
+ ++OpIdx;
+ }
uint32_t mask = 0;
if (!getBFCInvMask(insn, mask))
return false;
@@ -1498,13 +1456,55 @@ static bool DisassembleArithMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
&& !OpInfo[OpIdx].isPredicate() && !OpInfo[OpIdx].isOptionalDef()) {
// Extract the 5-bit immediate field Inst{11-7}.
unsigned ShiftAmt = (insn >> ARMII::ShiftShift) & 0x1F;
- MI.addOperand(MCOperand::CreateImm(ShiftAmt));
+ ARM_AM::ShiftOpc Opc = ARM_AM::no_shift;
+ if (Opcode == ARM::PKHBT)
+ Opc = ARM_AM::lsl;
+ else if (Opcode == ARM::PKHBT)
+ Opc = ARM_AM::asr;
+ getImmShiftSE(Opc, ShiftAmt);
+ MI.addOperand(MCOperand::CreateImm(ARM_AM::getSORegOpc(Opc, ShiftAmt)));
++OpIdx;
}
return true;
}
+/// DisassembleSatFrm - Disassemble saturate instructions:
+/// SSAT, SSAT16, USAT, and USAT16.
+static bool DisassembleSatFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
+ unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+
+ const TargetInstrDesc &TID = ARMInsts[Opcode];
+ NumOpsAdded = TID.getNumOperands() - 2; // ignore predicate operands
+
+ // Disassemble register def.
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+ decodeRd(insn))));
+
+ unsigned Pos = slice(insn, 20, 16);
+ if (Opcode == ARM::SSAT || Opcode == ARM::SSAT16)
+ Pos += 1;
+ MI.addOperand(MCOperand::CreateImm(Pos));
+
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+ decodeRm(insn))));
+
+ if (NumOpsAdded == 4) {
+ ARM_AM::ShiftOpc Opc = (slice(insn, 6, 6) != 0 ? ARM_AM::asr : ARM_AM::lsl);
+ // Inst{11-7} encodes the imm5 shift amount.
+ unsigned ShAmt = slice(insn, 11, 7);
+ if (ShAmt == 0) {
+ // A8.6.183. Possible ASR shift amount of 32...
+ if (Opc == ARM_AM::asr)
+ ShAmt = 32;
+ else
+ Opc = ARM_AM::no_shift;
+ }
+ MI.addOperand(MCOperand::CreateImm(ARM_AM::getSORegOpc(Opc, ShAmt)));
+ }
+ return true;
+}
+
// Extend instructions.
// SXT* and UXT*: Rd [Rn] Rm [rot_imm].
// The 2nd operand register is Rn and the 3rd operand regsiter is Rm for the
@@ -1863,7 +1863,7 @@ static bool DisassembleVFPLdStFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
assert(NumOps >= 3 && "VFPLdStFrm expects NumOps >= 3");
- bool isSPVFP = (Opcode == ARM::VLDRS || Opcode == ARM::VSTRS) ? true : false;
+ bool isSPVFP = (Opcode == ARM::VLDRS || Opcode == ARM::VSTRS);
unsigned RegClassID = isSPVFP ? ARM::SPRRegClassID : ARM::DPRRegClassID;
// Extract Dd/Sd for operand 0.
@@ -1886,7 +1886,7 @@ static bool DisassembleVFPLdStFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
// VFP Load/Store Multiple Instructions.
// This is similar to the algorithm for LDM/STM in that operand 0 (the base) and
-// operand 1 (the AM5 mode imm) is followed by two predicate operands. It is
+// operand 1 (the AM4 mode imm) is followed by two predicate operands. It is
// followed by a reglist of either DPR(s) or SPR(s).
//
// VLDMD[_UPD], VLDMS[_UPD], VSTMD[_UPD], VSTMS[_UPD]
@@ -1910,16 +1910,14 @@ static bool DisassembleVFPLdStMulFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
MI.addOperand(MCOperand::CreateReg(Base));
- // Next comes the AM5 Opcode.
+ // Next comes the AM4 Opcode.
ARM_AM::AMSubMode SubMode = getAMSubModeForBits(getPUBits(insn));
// Must be either "ia" or "db" submode.
if (SubMode != ARM_AM::ia && SubMode != ARM_AM::db) {
- DEBUG(errs() << "Illegal addressing mode 5 sub-mode!\n");
+ DEBUG(errs() << "Illegal addressing mode 4 sub-mode!\n");
return false;
}
-
- unsigned char Imm8 = insn & 0xFF;
- MI.addOperand(MCOperand::CreateImm(ARM_AM::getAM5Opc(SubMode, Imm8)));
+ MI.addOperand(MCOperand::CreateImm(ARM_AM::getAM4ModeImm(SubMode)));
// Handling the two predicate operands before the reglist.
int64_t CondVal = insn >> ARMII::CondShift;
@@ -1929,13 +1927,14 @@ static bool DisassembleVFPLdStMulFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
OpIdx += 4;
bool isSPVFP = (Opcode == ARM::VLDMS || Opcode == ARM::VLDMS_UPD ||
- Opcode == ARM::VSTMS || Opcode == ARM::VSTMS_UPD) ? true : false;
+ Opcode == ARM::VSTMS || Opcode == ARM::VSTMS_UPD);
unsigned RegClassID = isSPVFP ? ARM::SPRRegClassID : ARM::DPRRegClassID;
// Extract Dd/Sd.
unsigned RegD = decodeVFPRd(insn, isSPVFP);
// Fill the variadic part of reglist.
+ unsigned char Imm8 = insn & 0xFF;
unsigned Regs = isSPVFP ? Imm8 : Imm8/2;
for (unsigned i = 0; i < Regs; ++i) {
MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, RegClassID,
@@ -2244,9 +2243,10 @@ static bool DisassembleNLdSt0(MCInst &MI, unsigned Opcode, uint32_t insn,
// We have homogeneous NEON registers for Load/Store.
unsigned RegClass = 0;
+ bool DRegPair = UseDRegPair(Opcode);
// Double-spaced registers have increments of 2.
- unsigned Inc = DblSpaced ? 2 : 1;
+ unsigned Inc = (DblSpaced || DRegPair) ? 2 : 1;
unsigned Rn = decodeRn(insn);
unsigned Rm = decodeRm(insn);
@@ -2292,8 +2292,7 @@ static bool DisassembleNLdSt0(MCInst &MI, unsigned Opcode, uint32_t insn,
RegClass = OpInfo[OpIdx].RegClass;
while (OpIdx < NumOps && (unsigned)OpInfo[OpIdx].RegClass == RegClass) {
MI.addOperand(MCOperand::CreateReg(
- getRegisterEnum(B, RegClass, Rd,
- UseDRegPair(Opcode))));
+ getRegisterEnum(B, RegClass, Rd, DRegPair)));
Rd += Inc;
++OpIdx;
}
@@ -2312,8 +2311,7 @@ static bool DisassembleNLdSt0(MCInst &MI, unsigned Opcode, uint32_t insn,
while (OpIdx < NumOps && (unsigned)OpInfo[OpIdx].RegClass == RegClass) {
MI.addOperand(MCOperand::CreateReg(
- getRegisterEnum(B, RegClass, Rd,
- UseDRegPair(Opcode))));
+ getRegisterEnum(B, RegClass, Rd, DRegPair)));
Rd += Inc;
++OpIdx;
}
@@ -2351,6 +2349,11 @@ static bool DisassembleNLdSt0(MCInst &MI, unsigned Opcode, uint32_t insn,
}
}
+ // Accessing registers past the end of the NEON register file is not
+ // defined.
+ if (Rd > 32)
+ return false;
+
return true;
}
@@ -2423,10 +2426,14 @@ static bool DisassembleN1RegModImmFrm(MCInst &MI, unsigned Opcode,
break;
case ARM::VMOVv4i16:
case ARM::VMOVv8i16:
+ case ARM::VMVNv4i16:
+ case ARM::VMVNv8i16:
esize = ESize16;
break;
case ARM::VMOVv2i32:
case ARM::VMOVv4i32:
+ case ARM::VMVNv2i32:
+ case ARM::VMVNv4i32:
esize = ESize32;
break;
case ARM::VMOVv1i64:
@@ -2944,7 +2951,7 @@ static bool DisassembleNDupFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
// A8.6.49 ISB
static inline bool MemBarrierInstr(uint32_t insn) {
unsigned op7_4 = slice(insn, 7, 4);
- if (slice(insn, 31, 20) == 0xf57 && (op7_4 >= 4 && op7_4 <= 6))
+ if (slice(insn, 31, 8) == 0xf57ff0 && (op7_4 >= 4 && op7_4 <= 6))
return true;
return false;
@@ -3001,8 +3008,15 @@ static bool DisassemblePreLoadFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
static bool DisassembleMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
- if (MemBarrierInstr(insn))
+ if (MemBarrierInstr(insn)) {
+ // DMBsy, DSBsy, and ISBsy instructions have zero operand and are taken care
+ // of within the generic ARMBasicMCBuilder::BuildIt() method.
+ //
+ // Inst{3-0} encodes the memory barrier option for the variants.
+ MI.addOperand(MCOperand::CreateImm(slice(insn, 3, 0)));
+ NumOpsAdded = 1;
return true;
+ }
switch (Opcode) {
case ARM::CLREX:
@@ -3073,6 +3087,7 @@ static const DisassembleFP FuncPtrs[] = {
&DisassembleLdStMulFrm,
&DisassembleLdStExFrm,
&DisassembleArithMiscFrm,
+ &DisassembleSatFrm,
&DisassembleExtFrm,
&DisassembleVFPUnaryFrm,
&DisassembleVFPBinaryFrm,
diff --git a/lib/Target/ARM/Disassembler/ARMDisassemblerCore.h b/lib/Target/ARM/Disassembler/ARMDisassemblerCore.h
index 7d21256a14f9f..9c30d332d1f20 100644
--- a/lib/Target/ARM/Disassembler/ARMDisassemblerCore.h
+++ b/lib/Target/ARM/Disassembler/ARMDisassemblerCore.h
@@ -23,7 +23,8 @@
#include "llvm/MC/MCInst.h"
#include "llvm/Target/TargetInstrInfo.h"
-#include "ARMInstrInfo.h"
+#include "ARMBaseInstrInfo.h"
+#include "ARMRegisterInfo.h"
#include "ARMDisassembler.h"
namespace llvm {
@@ -53,36 +54,35 @@ public:
ENTRY(ARM_FORMAT_LDSTMULFRM, 10) \
ENTRY(ARM_FORMAT_LDSTEXFRM, 11) \
ENTRY(ARM_FORMAT_ARITHMISCFRM, 12) \
- ENTRY(ARM_FORMAT_EXTFRM, 13) \
- ENTRY(ARM_FORMAT_VFPUNARYFRM, 14) \
- ENTRY(ARM_FORMAT_VFPBINARYFRM, 15) \
- ENTRY(ARM_FORMAT_VFPCONV1FRM, 16) \
- ENTRY(ARM_FORMAT_VFPCONV2FRM, 17) \
- ENTRY(ARM_FORMAT_VFPCONV3FRM, 18) \
- ENTRY(ARM_FORMAT_VFPCONV4FRM, 19) \
- ENTRY(ARM_FORMAT_VFPCONV5FRM, 20) \
- ENTRY(ARM_FORMAT_VFPLDSTFRM, 21) \
- ENTRY(ARM_FORMAT_VFPLDSTMULFRM, 22) \
- ENTRY(ARM_FORMAT_VFPMISCFRM, 23) \
- ENTRY(ARM_FORMAT_THUMBFRM, 24) \
- ENTRY(ARM_FORMAT_NEONFRM, 25) \
- ENTRY(ARM_FORMAT_NEONGETLNFRM, 26) \
- ENTRY(ARM_FORMAT_NEONSETLNFRM, 27) \
- ENTRY(ARM_FORMAT_NEONDUPFRM, 28) \
- ENTRY(ARM_FORMAT_MISCFRM, 29) \
- ENTRY(ARM_FORMAT_THUMBMISCFRM, 30) \
- ENTRY(ARM_FORMAT_NLdSt, 31) \
- ENTRY(ARM_FORMAT_N1RegModImm, 32) \
- ENTRY(ARM_FORMAT_N2Reg, 33) \
- ENTRY(ARM_FORMAT_NVCVT, 34) \
- ENTRY(ARM_FORMAT_NVecDupLn, 35) \
- ENTRY(ARM_FORMAT_N2RegVecShL, 36) \
- ENTRY(ARM_FORMAT_N2RegVecShR, 37) \
- ENTRY(ARM_FORMAT_N3Reg, 38) \
- ENTRY(ARM_FORMAT_N3RegVecSh, 39) \
- ENTRY(ARM_FORMAT_NVecExtract, 40) \
- ENTRY(ARM_FORMAT_NVecMulScalar, 41) \
- ENTRY(ARM_FORMAT_NVTBL, 42)
+ ENTRY(ARM_FORMAT_SATFRM, 13) \
+ ENTRY(ARM_FORMAT_EXTFRM, 14) \
+ ENTRY(ARM_FORMAT_VFPUNARYFRM, 15) \
+ ENTRY(ARM_FORMAT_VFPBINARYFRM, 16) \
+ ENTRY(ARM_FORMAT_VFPCONV1FRM, 17) \
+ ENTRY(ARM_FORMAT_VFPCONV2FRM, 18) \
+ ENTRY(ARM_FORMAT_VFPCONV3FRM, 19) \
+ ENTRY(ARM_FORMAT_VFPCONV4FRM, 20) \
+ ENTRY(ARM_FORMAT_VFPCONV5FRM, 21) \
+ ENTRY(ARM_FORMAT_VFPLDSTFRM, 22) \
+ ENTRY(ARM_FORMAT_VFPLDSTMULFRM, 23) \
+ ENTRY(ARM_FORMAT_VFPMISCFRM, 24) \
+ ENTRY(ARM_FORMAT_THUMBFRM, 25) \
+ ENTRY(ARM_FORMAT_MISCFRM, 26) \
+ ENTRY(ARM_FORMAT_NEONGETLNFRM, 27) \
+ ENTRY(ARM_FORMAT_NEONSETLNFRM, 28) \
+ ENTRY(ARM_FORMAT_NEONDUPFRM, 29) \
+ ENTRY(ARM_FORMAT_NLdSt, 30) \
+ ENTRY(ARM_FORMAT_N1RegModImm, 31) \
+ ENTRY(ARM_FORMAT_N2Reg, 32) \
+ ENTRY(ARM_FORMAT_NVCVT, 33) \
+ ENTRY(ARM_FORMAT_NVecDupLn, 34) \
+ ENTRY(ARM_FORMAT_N2RegVecShL, 35) \
+ ENTRY(ARM_FORMAT_N2RegVecShR, 36) \
+ ENTRY(ARM_FORMAT_N3Reg, 37) \
+ ENTRY(ARM_FORMAT_N3RegVecSh, 38) \
+ ENTRY(ARM_FORMAT_NVecExtract, 39) \
+ ENTRY(ARM_FORMAT_NVecMulScalar, 40) \
+ ENTRY(ARM_FORMAT_NVTBL, 41)
// ARM instruction format specifies the encoding used by the instruction.
#define ENTRY(n, v) n = v,
@@ -126,8 +126,8 @@ static inline unsigned slice(uint32_t Bits, unsigned From, unsigned To) {
}
/// Utility function for setting [From, To] bits to Val for a uint32_t.
-static inline void setSlice(uint32_t &Bits, unsigned From, unsigned To,
- uint32_t Val) {
+static inline void setSlice(unsigned &Bits, unsigned From, unsigned To,
+ unsigned Val) {
assert(From < 32 && To < 32 && From >= To);
uint32_t Mask = ((1 << (From - To + 1)) - 1);
Bits &= ~(Mask << To);
diff --git a/lib/Target/ARM/Disassembler/ThumbDisassemblerCore.h b/lib/Target/ARM/Disassembler/ThumbDisassemblerCore.h
index 4b7a0bf6fdb91..112817b13cf90 100644
--- a/lib/Target/ARM/Disassembler/ThumbDisassemblerCore.h
+++ b/lib/Target/ARM/Disassembler/ThumbDisassemblerCore.h
@@ -103,7 +103,7 @@ static inline unsigned getT1Cond(uint32_t insn) {
}
static inline bool IsGPR(unsigned RegClass) {
- return RegClass == ARM::GPRRegClassID;
+ return RegClass == ARM::GPRRegClassID || RegClass == ARM::rGPRRegClassID;
}
// Utilities for 32-bit Thumb instructions.
@@ -220,7 +220,7 @@ static inline unsigned decodeImmShift(unsigned bits2, unsigned imm5,
switch (bits2) {
default: assert(0 && "No such value");
case 0:
- ShOp = ARM_AM::lsl;
+ ShOp = (imm5 == 0 ? ARM_AM::no_shift : ARM_AM::lsl);
return imm5;
case 1:
ShOp = ARM_AM::lsr;
@@ -1324,7 +1324,7 @@ static bool DisassembleThumb2DPSoReg(MCInst &MI, unsigned Opcode, uint32_t insn,
&& OpInfo[1].RegClass == ARM::GPRRegClassID
&& OpInfo[2].RegClass < 0
&& OpInfo[3].RegClass < 0
- && "Exactlt 4 operands expect and first two as reg operands");
+ && "Exactly 4 operands expect and first two as reg operands");
// Only need to populate the src reg operand.
MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
decodeRm(insn))));
@@ -1338,17 +1338,20 @@ static bool DisassembleThumb2DPSoReg(MCInst &MI, unsigned Opcode, uint32_t insn,
OpIdx = 0;
assert(NumOps >= 2
- && OpInfo[0].RegClass == ARM::GPRRegClassID
- && OpInfo[1].RegClass == ARM::GPRRegClassID
+ && (OpInfo[0].RegClass == ARM::GPRRegClassID ||
+ OpInfo[0].RegClass == ARM::rGPRRegClassID)
+ && (OpInfo[1].RegClass == ARM::GPRRegClassID ||
+ OpInfo[1].RegClass == ARM::rGPRRegClassID)
&& "Expect >= 2 operands and first two as reg operands");
- bool ThreeReg = (NumOps > 2 && OpInfo[2].RegClass == ARM::GPRRegClassID);
+ bool ThreeReg = (NumOps > 2 && (OpInfo[2].RegClass == ARM::GPRRegClassID ||
+ OpInfo[2].RegClass == ARM::rGPRRegClassID));
bool NoDstReg = (decodeRs(insn) == 0xF);
// Build the register operands, followed by the constant shift specifier.
MI.addOperand(MCOperand::CreateReg(
- getRegisterEnum(B, ARM::GPRRegClassID,
+ getRegisterEnum(B, OpInfo[0].RegClass,
NoDstReg ? decodeRn(insn) : decodeRs(insn))));
++OpIdx;
@@ -1359,7 +1362,7 @@ static bool DisassembleThumb2DPSoReg(MCInst &MI, unsigned Opcode, uint32_t insn,
MI.addOperand(MI.getOperand(Idx));
++OpIdx;
} else if (!NoDstReg) {
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, OpInfo[1].RegClass,
decodeRn(insn))));
++OpIdx;
} else {
@@ -1368,7 +1371,7 @@ static bool DisassembleThumb2DPSoReg(MCInst &MI, unsigned Opcode, uint32_t insn,
}
}
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, OpInfo[OpIdx].RegClass,
decodeRm(insn))));
++OpIdx;
@@ -1386,14 +1389,7 @@ static bool DisassembleThumb2DPSoReg(MCInst &MI, unsigned Opcode, uint32_t insn,
unsigned imm5 = getShiftAmtBits(insn);
ARM_AM::ShiftOpc ShOp = ARM_AM::no_shift;
unsigned ShAmt = decodeImmShift(bits2, imm5, ShOp);
-
- // PKHBT/PKHTB are special in that we need the decodeImmShift() call to
- // decode the shift amount from raw imm5 and bits2, but we DO NOT need
- // to encode the ShOp, as it's in the asm string already.
- if (Opcode == ARM::t2PKHBT || Opcode == ARM::t2PKHTB)
- MI.addOperand(MCOperand::CreateImm(ShAmt));
- else
- MI.addOperand(MCOperand::CreateImm(ARM_AM::getSORegOpc(ShOp, ShAmt)));
+ MI.addOperand(MCOperand::CreateImm(ARM_AM::getSORegOpc(ShOp, ShAmt)));
}
++OpIdx;
}
@@ -1416,16 +1412,20 @@ static bool DisassembleThumb2DPModImm(MCInst &MI, unsigned Opcode,
OpIdx = 0;
- assert(NumOps >= 2 && OpInfo[0].RegClass == ARM::GPRRegClassID
+ unsigned RdRegClassID = OpInfo[0].RegClass;
+ assert(NumOps >= 2 && (RdRegClassID == ARM::GPRRegClassID ||
+ RdRegClassID == ARM::rGPRRegClassID)
&& "Expect >= 2 operands and first one as reg operand");
- bool TwoReg = (OpInfo[1].RegClass == ARM::GPRRegClassID);
+ unsigned RnRegClassID = OpInfo[1].RegClass;
+ bool TwoReg = (RnRegClassID == ARM::GPRRegClassID
+ || RnRegClassID == ARM::rGPRRegClassID);
bool NoDstReg = (decodeRs(insn) == 0xF);
// Build the register operands, followed by the modified immediate.
MI.addOperand(MCOperand::CreateReg(
- getRegisterEnum(B, ARM::GPRRegClassID,
+ getRegisterEnum(B, RdRegClassID,
NoDstReg ? decodeRn(insn) : decodeRs(insn))));
++OpIdx;
@@ -1434,7 +1434,7 @@ static bool DisassembleThumb2DPModImm(MCInst &MI, unsigned Opcode,
DEBUG(errs()<<"Thumb2 encoding error: d==15 for DPModImm 2-reg instr.\n");
return false;
}
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, RnRegClassID,
decodeRn(insn))));
++OpIdx;
}
@@ -1455,30 +1455,48 @@ static bool DisassembleThumb2DPModImm(MCInst &MI, unsigned Opcode,
static inline bool Thumb2SaturateOpcode(unsigned Opcode) {
switch (Opcode) {
- case ARM::t2SSATlsl: case ARM::t2SSATasr: case ARM::t2SSAT16:
- case ARM::t2USATlsl: case ARM::t2USATasr: case ARM::t2USAT16:
+ case ARM::t2SSAT: case ARM::t2SSAT16:
+ case ARM::t2USAT: case ARM::t2USAT16:
return true;
default:
return false;
}
}
-static inline unsigned decodeThumb2SaturatePos(unsigned Opcode, uint32_t insn) {
- switch (Opcode) {
- case ARM::t2SSATlsl:
- case ARM::t2SSATasr:
- return slice(insn, 4, 0) + 1;
- case ARM::t2SSAT16:
- return slice(insn, 3, 0) + 1;
- case ARM::t2USATlsl:
- case ARM::t2USATasr:
- return slice(insn, 4, 0);
- case ARM::t2USAT16:
- return slice(insn, 3, 0);
- default:
- assert(0 && "Unexpected opcode");
- return 0;
+/// DisassembleThumb2Sat - Disassemble Thumb2 saturate instructions:
+/// o t2SSAT, t2USAT: Rs sat_pos Rn shamt
+/// o t2SSAT16, t2USAT16: Rs sat_pos Rn
+static bool DisassembleThumb2Sat(MCInst &MI, unsigned Opcode, uint32_t insn,
+ unsigned &NumOpsAdded, BO B) {
+ const TargetInstrDesc &TID = ARMInsts[Opcode];
+ NumOpsAdded = TID.getNumOperands() - 2; // ignore predicate operands
+
+ // Disassemble the register def.
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::rGPRRegClassID,
+ decodeRs(insn))));
+
+ unsigned Pos = slice(insn, 4, 0);
+ if (Opcode == ARM::t2SSAT || Opcode == ARM::t2SSAT16)
+ Pos += 1;
+ MI.addOperand(MCOperand::CreateImm(Pos));
+
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::rGPRRegClassID,
+ decodeRn(insn))));
+
+ if (NumOpsAdded == 4) {
+ ARM_AM::ShiftOpc Opc = (slice(insn, 21, 21) != 0 ?
+ ARM_AM::asr : ARM_AM::lsl);
+ // Inst{14-12:7-6} encodes the imm5 shift amount.
+ unsigned ShAmt = slice(insn, 14, 12) << 2 | slice(insn, 7, 6);
+ if (ShAmt == 0) {
+ if (Opc == ARM_AM::asr)
+ ShAmt = 32;
+ else
+ Opc = ARM_AM::no_shift;
+ }
+ MI.addOperand(MCOperand::CreateImm(ARM_AM::getSORegOpc(Opc, ShAmt)));
}
+ return true;
}
// A6.3.3 Data-processing (plain binary immediate)
@@ -1492,11 +1510,6 @@ static inline unsigned decodeThumb2SaturatePos(unsigned Opcode, uint32_t insn) {
// o t2SBFX (SBFX): Rs Rn lsb width
// o t2UBFX (UBFX): Rs Rn lsb width
// o t2BFI (BFI): Rs Rn lsb width
-//
-// [Signed|Unsigned] Saturate [16]
-//
-// o t2SSAT[lsl|asr], t2USAT[lsl|asr]: Rs sat_pos Rn shamt
-// o t2SSAT16, t2USAT16: Rs sat_pos Rn
static bool DisassembleThumb2DPBinImm(MCInst &MI, unsigned Opcode,
uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
@@ -1506,41 +1519,21 @@ static bool DisassembleThumb2DPBinImm(MCInst &MI, unsigned Opcode,
OpIdx = 0;
- assert(NumOps >= 2 && OpInfo[0].RegClass == ARM::GPRRegClassID
+ unsigned RdRegClassID = OpInfo[0].RegClass;
+ assert(NumOps >= 2 && (RdRegClassID == ARM::GPRRegClassID ||
+ RdRegClassID == ARM::rGPRRegClassID)
&& "Expect >= 2 operands and first one as reg operand");
- bool TwoReg = (OpInfo[1].RegClass == ARM::GPRRegClassID);
+ unsigned RnRegClassID = OpInfo[1].RegClass;
+ bool TwoReg = (RnRegClassID == ARM::GPRRegClassID
+ || RnRegClassID == ARM::rGPRRegClassID);
// Build the register operand(s), followed by the immediate(s).
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, RdRegClassID,
decodeRs(insn))));
++OpIdx;
- // t2SSAT/t2SSAT16/t2USAT/t2USAT16 has imm operand after Rd.
- if (Thumb2SaturateOpcode(Opcode)) {
- MI.addOperand(MCOperand::CreateImm(decodeThumb2SaturatePos(Opcode, insn)));
-
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
- decodeRn(insn))));
-
- if (Opcode == ARM::t2SSAT16 || Opcode == ARM::t2USAT16) {
- OpIdx += 2;
- return true;
- }
-
- // For SSAT operand reg (Rn) has been disassembled above.
- // Now disassemble the shift amount.
-
- // Inst{14-12:7-6} encodes the imm5 shift amount.
- unsigned ShAmt = slice(insn, 14, 12) << 2 | slice(insn, 7, 6);
-
- MI.addOperand(MCOperand::CreateImm(ShAmt));
-
- OpIdx += 3;
- return true;
- }
-
if (TwoReg) {
assert(NumOps >= 3 && "Expect >= 3 operands");
int Idx;
@@ -1549,12 +1542,19 @@ static bool DisassembleThumb2DPBinImm(MCInst &MI, unsigned Opcode,
MI.addOperand(MI.getOperand(Idx));
} else {
// Add src reg operand.
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, RnRegClassID,
decodeRn(insn))));
}
++OpIdx;
}
+ if (Opcode == ARM::t2BFI) {
+ // Add val reg operand.
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, RnRegClassID,
+ decodeRn(insn))));
+ ++OpIdx;
+ }
+
assert(OpInfo[OpIdx].RegClass < 0 && !OpInfo[OpIdx].isPredicate()
&& !OpInfo[OpIdx].isOptionalDef()
&& "Pure imm operand expected");
@@ -1567,7 +1567,7 @@ static bool DisassembleThumb2DPBinImm(MCInst &MI, unsigned Opcode,
MI.addOperand(MCOperand::CreateImm(getIImm3Imm8(insn)));
else if (Opcode == ARM::t2MOVi16 || Opcode == ARM::t2MOVTi16)
MI.addOperand(MCOperand::CreateImm(getImm16(insn)));
- else if (Opcode == ARM::t2BFC) {
+ else if (Opcode == ARM::t2BFC || Opcode == ARM::t2BFI) {
uint32_t mask = 0;
if (getBitfieldInvMask(insn, mask))
MI.addOperand(MCOperand::CreateImm(mask));
@@ -1575,17 +1575,10 @@ static bool DisassembleThumb2DPBinImm(MCInst &MI, unsigned Opcode,
return false;
} else {
// Handle the case of: lsb width
- assert((Opcode == ARM::t2SBFX || Opcode == ARM::t2UBFX ||
- Opcode == ARM::t2BFI) && "Unexpected opcode");
+ assert((Opcode == ARM::t2SBFX || Opcode == ARM::t2UBFX)
+ && "Unexpected opcode");
MI.addOperand(MCOperand::CreateImm(getLsb(insn)));
- if (Opcode == ARM::t2BFI) {
- if (getMsb(insn) < getLsb(insn)) {
- DEBUG(errs() << "Encoding error: msb < lsb\n");
- return false;
- }
- MI.addOperand(MCOperand::CreateImm(getMsb(insn) - getLsb(insn) + 1));
- } else
- MI.addOperand(MCOperand::CreateImm(getWidthMinus1(insn) + 1));
+ MI.addOperand(MCOperand::CreateImm(getWidthMinus1(insn) + 1));
++OpIdx;
}
@@ -1618,8 +1611,8 @@ static inline bool t2MiscCtrlInstr(uint32_t insn) {
// A8.6.26
// t2BXJ -> Rn
//
-// Miscellaneous control: t2Int_MemBarrierV7 (and its t2DMB variants),
-// t2Int_SyncBarrierV7 (and its t2DSB varianst), t2ISBsy, t2CLREX
+// Miscellaneous control: t2DMBsy (and its t2DMB variants),
+// t2DSBsy (and its t2DSB varianst), t2ISBsy, t2CLREX
// -> no operand (except pred-imm pred-ccr for CLREX, memory barrier variants)
//
// Hint: t2NOP, t2YIELD, t2WFE, t2WFI, t2SEV
@@ -1959,25 +1952,25 @@ static bool DisassembleThumb2DPReg(MCInst &MI, unsigned Opcode, uint32_t insn,
OpIdx = 0;
assert(NumOps >= 2 &&
- OpInfo[0].RegClass == ARM::GPRRegClassID &&
- OpInfo[1].RegClass == ARM::GPRRegClassID &&
+ OpInfo[0].RegClass == ARM::rGPRRegClassID &&
+ OpInfo[1].RegClass == ARM::rGPRRegClassID &&
"Expect >= 2 operands and first two as reg operands");
// Build the register operands, followed by the optional rotation amount.
- bool ThreeReg = NumOps > 2 && OpInfo[2].RegClass == ARM::GPRRegClassID;
+ bool ThreeReg = NumOps > 2 && OpInfo[2].RegClass == ARM::rGPRRegClassID;
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::rGPRRegClassID,
decodeRs(insn))));
++OpIdx;
if (ThreeReg) {
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::rGPRRegClassID,
decodeRn(insn))));
++OpIdx;
}
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::rGPRRegClassID,
decodeRm(insn))));
++OpIdx;
@@ -2009,26 +2002,26 @@ static bool DisassembleThumb2Mul(MCInst &MI, unsigned Opcode, uint32_t insn,
const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
assert(NumOps >= 3 &&
- OpInfo[0].RegClass == ARM::GPRRegClassID &&
- OpInfo[1].RegClass == ARM::GPRRegClassID &&
- OpInfo[2].RegClass == ARM::GPRRegClassID &&
+ OpInfo[0].RegClass == ARM::rGPRRegClassID &&
+ OpInfo[1].RegClass == ARM::rGPRRegClassID &&
+ OpInfo[2].RegClass == ARM::rGPRRegClassID &&
"Expect >= 3 operands and first three as reg operands");
// Build the register operands.
- bool FourReg = NumOps > 3 && OpInfo[3].RegClass == ARM::GPRRegClassID;
+ bool FourReg = NumOps > 3 && OpInfo[3].RegClass == ARM::rGPRRegClassID;
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::rGPRRegClassID,
decodeRs(insn))));
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::rGPRRegClassID,
decodeRn(insn))));
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::rGPRRegClassID,
decodeRm(insn))));
if (FourReg)
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::rGPRRegClassID,
decodeRd(insn))));
NumOpsAdded = FourReg ? 4 : 3;
@@ -2054,26 +2047,26 @@ static bool DisassembleThumb2LongMul(MCInst &MI, unsigned Opcode, uint32_t insn,
const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
assert(NumOps >= 3 &&
- OpInfo[0].RegClass == ARM::GPRRegClassID &&
- OpInfo[1].RegClass == ARM::GPRRegClassID &&
- OpInfo[2].RegClass == ARM::GPRRegClassID &&
+ OpInfo[0].RegClass == ARM::rGPRRegClassID &&
+ OpInfo[1].RegClass == ARM::rGPRRegClassID &&
+ OpInfo[2].RegClass == ARM::rGPRRegClassID &&
"Expect >= 3 operands and first three as reg operands");
- bool FourReg = NumOps > 3 && OpInfo[3].RegClass == ARM::GPRRegClassID;
+ bool FourReg = NumOps > 3 && OpInfo[3].RegClass == ARM::rGPRRegClassID;
// Build the register operands.
if (FourReg)
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::rGPRRegClassID,
decodeRd(insn))));
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::rGPRRegClassID,
decodeRs(insn))));
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::rGPRRegClassID,
decodeRn(insn))));
- MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::rGPRRegClassID,
decodeRm(insn))));
if (FourReg)
@@ -2152,22 +2145,20 @@ static bool DisassembleThumb2(uint16_t op1, uint16_t op2, uint16_t op,
break;
case 2:
if (op == 0) {
- if (slice(op2, 5, 5) == 0) {
+ if (slice(op2, 5, 5) == 0)
// Data-processing (modified immediate)
return DisassembleThumb2DPModImm(MI, Opcode, insn, NumOps, NumOpsAdded,
B);
- } else {
- // Data-processing (plain binary immediate)
- return DisassembleThumb2DPBinImm(MI, Opcode, insn, NumOps, NumOpsAdded,
- B);
- }
- } else {
- // Branches and miscellaneous control on page A6-20.
- return DisassembleThumb2BrMiscCtrl(MI, Opcode, insn, NumOps, NumOpsAdded,
- B);
- }
+ if (Thumb2SaturateOpcode(Opcode))
+ return DisassembleThumb2Sat(MI, Opcode, insn, NumOpsAdded, B);
- break;
+ // Data-processing (plain binary immediate)
+ return DisassembleThumb2DPBinImm(MI, Opcode, insn, NumOps, NumOpsAdded,
+ B);
+ }
+ // Branches and miscellaneous control on page A6-20.
+ return DisassembleThumb2BrMiscCtrl(MI, Opcode, insn, NumOps, NumOpsAdded,
+ B);
case 3:
switch (slice(op2, 6, 5)) {
case 0:
diff --git a/lib/Target/ARM/Makefile b/lib/Target/ARM/Makefile
index 9e3ff29e07c42..b3fcfaf6bda7d 100644
--- a/lib/Target/ARM/Makefile
+++ b/lib/Target/ARM/Makefile
@@ -14,10 +14,11 @@ TARGET = ARM
# Make sure that tblgen is run, first thing.
BUILT_SOURCES = ARMGenRegisterInfo.h.inc ARMGenRegisterNames.inc \
ARMGenRegisterInfo.inc ARMGenInstrNames.inc \
- ARMGenInstrInfo.inc ARMGenAsmWriter.inc \
+ ARMGenInstrInfo.inc ARMGenAsmWriter.inc ARMGenAsmMatcher.inc \
ARMGenDAGISel.inc ARMGenSubtarget.inc \
ARMGenCodeEmitter.inc ARMGenCallingConv.inc \
- ARMGenDecoderTables.inc ARMGenEDInfo.inc
+ ARMGenDecoderTables.inc ARMGenEDInfo.inc \
+ ARMGenFastISel.inc
DIRS = AsmPrinter AsmParser Disassembler TargetInfo
diff --git a/lib/Target/ARM/NEONMoveFix.cpp b/lib/Target/ARM/NEONMoveFix.cpp
index bbdd3c7f7c3e0..97e54bfaed9e7 100644
--- a/lib/Target/ARM/NEONMoveFix.cpp
+++ b/lib/Target/ARM/NEONMoveFix.cpp
@@ -24,7 +24,7 @@ STATISTIC(NumVMovs, "Number of reg-reg moves converted");
namespace {
struct NEONMoveFixPass : public MachineFunctionPass {
static char ID;
- NEONMoveFixPass() : MachineFunctionPass(&ID) {}
+ NEONMoveFixPass() : MachineFunctionPass(ID) {}
virtual bool runOnMachineFunction(MachineFunction &Fn);
diff --git a/lib/Target/ARM/NEONPreAllocPass.cpp b/lib/Target/ARM/NEONPreAllocPass.cpp
index f67717cdd56f5..3407ac6fe08ec 100644
--- a/lib/Target/ARM/NEONPreAllocPass.cpp
+++ b/lib/Target/ARM/NEONPreAllocPass.cpp
@@ -23,7 +23,7 @@ namespace {
public:
static char ID;
- NEONPreAllocPass() : MachineFunctionPass(&ID) {}
+ NEONPreAllocPass() : MachineFunctionPass(ID) {}
virtual bool runOnMachineFunction(MachineFunction &MF);
@@ -51,13 +51,6 @@ static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, unsigned &NumRegs,
default:
break;
- case ARM::VLD1q8:
- case ARM::VLD1q16:
- case ARM::VLD1q32:
- case ARM::VLD1q64:
- case ARM::VLD2d8:
- case ARM::VLD2d16:
- case ARM::VLD2d32:
case ARM::VLD2LNd8:
case ARM::VLD2LNd16:
case ARM::VLD2LNd32:
@@ -65,13 +58,6 @@ static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, unsigned &NumRegs,
NumRegs = 2;
return true;
- case ARM::VLD2q8:
- case ARM::VLD2q16:
- case ARM::VLD2q32:
- FirstOpnd = 0;
- NumRegs = 4;
- return true;
-
case ARM::VLD2LNq16:
case ARM::VLD2LNq32:
FirstOpnd = 0;
@@ -88,10 +74,6 @@ static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, unsigned &NumRegs,
Stride = 2;
return true;
- case ARM::VLD3d8:
- case ARM::VLD3d16:
- case ARM::VLD3d32:
- case ARM::VLD1d64T:
case ARM::VLD3LNd8:
case ARM::VLD3LNd16:
case ARM::VLD3LNd32:
@@ -99,24 +81,6 @@ static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, unsigned &NumRegs,
NumRegs = 3;
return true;
- case ARM::VLD3q8_UPD:
- case ARM::VLD3q16_UPD:
- case ARM::VLD3q32_UPD:
- FirstOpnd = 0;
- NumRegs = 3;
- Offset = 0;
- Stride = 2;
- return true;
-
- case ARM::VLD3q8odd_UPD:
- case ARM::VLD3q16odd_UPD:
- case ARM::VLD3q32odd_UPD:
- FirstOpnd = 0;
- NumRegs = 3;
- Offset = 1;
- Stride = 2;
- return true;
-
case ARM::VLD3LNq16:
case ARM::VLD3LNq32:
FirstOpnd = 0;
@@ -133,10 +97,6 @@ static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, unsigned &NumRegs,
Stride = 2;
return true;
- case ARM::VLD4d8:
- case ARM::VLD4d16:
- case ARM::VLD4d32:
- case ARM::VLD1d64Q:
case ARM::VLD4LNd8:
case ARM::VLD4LNd16:
case ARM::VLD4LNd32:
@@ -144,24 +104,6 @@ static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, unsigned &NumRegs,
NumRegs = 4;
return true;
- case ARM::VLD4q8_UPD:
- case ARM::VLD4q16_UPD:
- case ARM::VLD4q32_UPD:
- FirstOpnd = 0;
- NumRegs = 4;
- Offset = 0;
- Stride = 2;
- return true;
-
- case ARM::VLD4q8odd_UPD:
- case ARM::VLD4q16odd_UPD:
- case ARM::VLD4q32odd_UPD:
- FirstOpnd = 0;
- NumRegs = 4;
- Offset = 1;
- Stride = 2;
- return true;
-
case ARM::VLD4LNq16:
case ARM::VLD4LNq32:
FirstOpnd = 0;
@@ -178,13 +120,6 @@ static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, unsigned &NumRegs,
Stride = 2;
return true;
- case ARM::VST1q8:
- case ARM::VST1q16:
- case ARM::VST1q32:
- case ARM::VST1q64:
- case ARM::VST2d8:
- case ARM::VST2d16:
- case ARM::VST2d32:
case ARM::VST2LNd8:
case ARM::VST2LNd16:
case ARM::VST2LNd32:
@@ -192,13 +127,6 @@ static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, unsigned &NumRegs,
NumRegs = 2;
return true;
- case ARM::VST2q8:
- case ARM::VST2q16:
- case ARM::VST2q32:
- FirstOpnd = 2;
- NumRegs = 4;
- return true;
-
case ARM::VST2LNq16:
case ARM::VST2LNq32:
FirstOpnd = 2;
@@ -215,10 +143,6 @@ static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, unsigned &NumRegs,
Stride = 2;
return true;
- case ARM::VST3d8:
- case ARM::VST3d16:
- case ARM::VST3d32:
- case ARM::VST1d64T:
case ARM::VST3LNd8:
case ARM::VST3LNd16:
case ARM::VST3LNd32:
@@ -226,24 +150,6 @@ static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, unsigned &NumRegs,
NumRegs = 3;
return true;
- case ARM::VST3q8_UPD:
- case ARM::VST3q16_UPD:
- case ARM::VST3q32_UPD:
- FirstOpnd = 4;
- NumRegs = 3;
- Offset = 0;
- Stride = 2;
- return true;
-
- case ARM::VST3q8odd_UPD:
- case ARM::VST3q16odd_UPD:
- case ARM::VST3q32odd_UPD:
- FirstOpnd = 4;
- NumRegs = 3;
- Offset = 1;
- Stride = 2;
- return true;
-
case ARM::VST3LNq16:
case ARM::VST3LNq32:
FirstOpnd = 2;
@@ -260,10 +166,6 @@ static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, unsigned &NumRegs,
Stride = 2;
return true;
- case ARM::VST4d8:
- case ARM::VST4d16:
- case ARM::VST4d32:
- case ARM::VST1d64Q:
case ARM::VST4LNd8:
case ARM::VST4LNd16:
case ARM::VST4LNd32:
@@ -271,24 +173,6 @@ static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, unsigned &NumRegs,
NumRegs = 4;
return true;
- case ARM::VST4q8_UPD:
- case ARM::VST4q16_UPD:
- case ARM::VST4q32_UPD:
- FirstOpnd = 4;
- NumRegs = 4;
- Offset = 0;
- Stride = 2;
- return true;
-
- case ARM::VST4q8odd_UPD:
- case ARM::VST4q16odd_UPD:
- case ARM::VST4q32odd_UPD:
- FirstOpnd = 4;
- NumRegs = 4;
- Offset = 1;
- Stride = 2;
- return true;
-
case ARM::VST4LNq16:
case ARM::VST4LNq32:
FirstOpnd = 2;
@@ -468,7 +352,34 @@ bool NEONPreAllocPass::PreAllocNEONRegisters(MachineBasicBlock &MBB) {
continue;
if (FormsRegSequence(MI, FirstOpnd, NumRegs, Offset, Stride))
continue;
- llvm_unreachable("expected a REG_SEQUENCE");
+
+ MachineBasicBlock::iterator NextI = llvm::next(MBBI);
+ for (unsigned R = 0; R < NumRegs; ++R) {
+ MachineOperand &MO = MI->getOperand(FirstOpnd + R);
+ assert(MO.isReg() && MO.getSubReg() == 0 && "unexpected operand");
+ unsigned VirtReg = MO.getReg();
+ assert(TargetRegisterInfo::isVirtualRegister(VirtReg) &&
+ "expected a virtual register");
+
+ // For now, just assign a fixed set of adjacent registers.
+ // This leaves plenty of room for future improvements.
+ static const unsigned NEONDRegs[] = {
+ ARM::D0, ARM::D1, ARM::D2, ARM::D3,
+ ARM::D4, ARM::D5, ARM::D6, ARM::D7
+ };
+ MO.setReg(NEONDRegs[Offset + R * Stride]);
+
+ if (MO.isUse()) {
+ // Insert a copy from VirtReg.
+ BuildMI(MBB, MBBI, DebugLoc(), TII->get(TargetOpcode::COPY),MO.getReg())
+ .addReg(VirtReg, getKillRegState(MO.isKill()));
+ MO.setIsKill();
+ } else if (MO.isDef() && !MO.isDead()) {
+ // Add a copy to VirtReg.
+ BuildMI(MBB, NextI, DebugLoc(), TII->get(TargetOpcode::COPY), VirtReg)
+ .addReg(MO.getReg());
+ }
+ }
}
return Modified;
diff --git a/lib/Target/ARM/README.txt b/lib/Target/ARM/README.txt
index 0cb8ff01181d7..9fc3fb92cb2c1 100644
--- a/lib/Target/ARM/README.txt
+++ b/lib/Target/ARM/README.txt
@@ -611,27 +611,6 @@ constant which was already loaded). Not sure what's necessary to do that.
//===---------------------------------------------------------------------===//
-Given the following on ARMv7:
-int test1(int A, int B) {
- return (A&-8388481)|(B&8388480);
-}
-
-We currently generate:
- bfc r0, #7, #16
- movw r2, #:lower16:8388480
- movt r2, #:upper16:8388480
- and r1, r1, r2
- orr r0, r1, r0
- bx lr
-
-The following is much shorter:
- lsr r1, r1, #7
- bfi r0, r1, #7, #16
- bx lr
-
-
-//===---------------------------------------------------------------------===//
-
The code generated for bswap on armv4/5 (CPUs without rev) is less than ideal:
int a(int x) { return __builtin_bswap32(x); }
@@ -657,3 +636,24 @@ A custom Thumb version would also be a slight improvement over the generic
version.
//===---------------------------------------------------------------------===//
+
+Consider the following simple C code:
+
+void foo(unsigned char *a, unsigned char *b, int *c) {
+ if ((*a | *b) == 0) *c = 0;
+}
+
+currently llvm-gcc generates something like this (nice branchless code I'd say):
+
+ ldrb r0, [r0]
+ ldrb r1, [r1]
+ orr r0, r1, r0
+ tst r0, #255
+ moveq r0, #0
+ streq r0, [r2]
+ bx lr
+
+Note that both "tst" and "moveq" are redundant.
+
+//===---------------------------------------------------------------------===//
+
diff --git a/lib/Target/ARM/Thumb1RegisterInfo.cpp b/lib/Target/ARM/Thumb1RegisterInfo.cpp
index 39b70b43b23f5..a21a3da10bdad 100644
--- a/lib/Target/ARM/Thumb1RegisterInfo.cpp
+++ b/lib/Target/ARM/Thumb1RegisterInfo.cpp
@@ -68,7 +68,7 @@ void Thumb1RegisterInfo::emitLoadConstPool(MachineBasicBlock &MBB,
.addConstantPoolIndex(Idx).addImm(Pred).addReg(PredReg);
}
-bool Thumb1RegisterInfo::hasReservedCallFrame(MachineFunction &MF) const {
+bool Thumb1RegisterInfo::hasReservedCallFrame(const MachineFunction &MF) const {
const MachineFrameInfo *FFI = MF.getFrameInfo();
unsigned CFSize = FFI->getMaxCallFrameSize();
// It's not always a good idea to include the call frame as part of the
@@ -363,107 +363,19 @@ static void removeOperands(MachineInstr &MI, unsigned i) {
MI.RemoveOperand(Op);
}
-int Thumb1RegisterInfo::
-rewriteFrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
- unsigned FrameReg, int Offset,
- unsigned MOVOpc, unsigned ADDriOpc, unsigned SUBriOpc) const
-{
- // if/when eliminateFrameIndex() conforms with ARMBaseRegisterInfo
- // version then can pull out Thumb1 specific parts here
- return 0;
-}
-
-/// saveScavengerRegister - Spill the register so it can be used by the
-/// register scavenger. Return true.
-bool
-Thumb1RegisterInfo::saveScavengerRegister(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator I,
- MachineBasicBlock::iterator &UseMI,
- const TargetRegisterClass *RC,
- unsigned Reg) const {
- // Thumb1 can't use the emergency spill slot on the stack because
- // ldr/str immediate offsets must be positive, and if we're referencing
- // off the frame pointer (if, for example, there are alloca() calls in
- // the function, the offset will be negative. Use R12 instead since that's
- // a call clobbered register that we know won't be used in Thumb1 mode.
- DebugLoc DL;
- BuildMI(MBB, I, DL, TII.get(ARM::tMOVtgpr2gpr)).
- addReg(ARM::R12, RegState::Define).addReg(Reg, RegState::Kill);
-
- // The UseMI is where we would like to restore the register. If there's
- // interference with R12 before then, however, we'll need to restore it
- // before that instead and adjust the UseMI.
- bool done = false;
- for (MachineBasicBlock::iterator II = I; !done && II != UseMI ; ++II) {
- if (II->isDebugValue())
- continue;
- // If this instruction affects R12, adjust our restore point.
- for (unsigned i = 0, e = II->getNumOperands(); i != e; ++i) {
- const MachineOperand &MO = II->getOperand(i);
- if (!MO.isReg() || MO.isUndef() || !MO.getReg() ||
- TargetRegisterInfo::isVirtualRegister(MO.getReg()))
- continue;
- if (MO.getReg() == ARM::R12) {
- UseMI = II;
- done = true;
- break;
- }
- }
- }
- // Restore the register from R12
- BuildMI(MBB, UseMI, DL, TII.get(ARM::tMOVgpr2tgpr)).
- addReg(Reg, RegState::Define).addReg(ARM::R12, RegState::Kill);
-
- return true;
-}
-
-unsigned
-Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
- int SPAdj, FrameIndexValue *Value,
- RegScavenger *RS) const{
- unsigned VReg = 0;
- unsigned i = 0;
+bool Thumb1RegisterInfo::
+rewriteFrameIndex(MachineBasicBlock::iterator II, unsigned FrameRegIdx,
+ unsigned FrameReg, int &Offset,
+ const ARMBaseInstrInfo &TII) const {
MachineInstr &MI = *II;
MachineBasicBlock &MBB = *MI.getParent();
- MachineFunction &MF = *MBB.getParent();
- ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
DebugLoc dl = MI.getDebugLoc();
-
- while (!MI.getOperand(i).isFI()) {
- ++i;
- assert(i < MI.getNumOperands() && "Instr doesn't have FrameIndex operand!");
- }
-
- unsigned FrameReg = ARM::SP;
- int FrameIndex = MI.getOperand(i).getIndex();
- int Offset = MF.getFrameInfo()->getObjectOffset(FrameIndex) +
- MF.getFrameInfo()->getStackSize() + SPAdj;
-
- if (AFI->isGPRCalleeSavedArea1Frame(FrameIndex))
- Offset -= AFI->getGPRCalleeSavedArea1Offset();
- else if (AFI->isGPRCalleeSavedArea2Frame(FrameIndex))
- Offset -= AFI->getGPRCalleeSavedArea2Offset();
- else if (MF.getFrameInfo()->hasVarSizedObjects()) {
- assert(SPAdj == 0 && hasFP(MF) && "Unexpected");
- // There are alloca()'s in this function, must reference off the frame
- // pointer instead.
- FrameReg = getFrameRegister(MF);
- Offset -= AFI->getFramePtrSpillOffset();
- }
-
- // Special handling of dbg_value instructions.
- if (MI.isDebugValue()) {
- MI.getOperand(i). ChangeToRegister(FrameReg, false /*isDef*/);
- MI.getOperand(i+1).ChangeToImmediate(Offset);
- return 0;
- }
-
unsigned Opcode = MI.getOpcode();
const TargetInstrDesc &Desc = MI.getDesc();
unsigned AddrMode = (Desc.TSFlags & ARMII::AddrModeMask);
if (Opcode == ARM::tADDrSPi) {
- Offset += MI.getOperand(i+1).getImm();
+ Offset += MI.getOperand(FrameRegIdx+1).getImm();
// Can't use tADDrSPi if it's based off the frame pointer.
unsigned NumBits = 0;
@@ -483,12 +395,13 @@ Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
if (Offset == 0 && getInstrPredicate(&MI, PredReg) == ARMCC::AL) {
// Turn it into a move.
MI.setDesc(TII.get(ARM::tMOVgpr2tgpr));
- MI.getOperand(i).ChangeToRegister(FrameReg, false);
+ MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
// Remove offset and remaining explicit predicate operands.
- do MI.RemoveOperand(i+1);
- while (MI.getNumOperands() > i+1 &&
- (!MI.getOperand(i+1).isReg() || !MI.getOperand(i+1).isImm()));
- return 0;
+ do MI.RemoveOperand(FrameRegIdx+1);
+ while (MI.getNumOperands() > FrameRegIdx+1 &&
+ (!MI.getOperand(FrameRegIdx+1).isReg() ||
+ !MI.getOperand(FrameRegIdx+1).isImm()));
+ return true;
}
// Common case: small offset, fits into instruction.
@@ -496,15 +409,15 @@ Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
if (((Offset / Scale) & ~Mask) == 0) {
// Replace the FrameIndex with sp / fp
if (Opcode == ARM::tADDi3) {
- removeOperands(MI, i);
+ removeOperands(MI, FrameRegIdx);
MachineInstrBuilder MIB(&MI);
AddDefaultPred(AddDefaultT1CC(MIB).addReg(FrameReg)
.addImm(Offset / Scale));
} else {
- MI.getOperand(i).ChangeToRegister(FrameReg, false);
- MI.getOperand(i+1).ChangeToImmediate(Offset / Scale);
+ MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
+ MI.getOperand(FrameRegIdx+1).ChangeToImmediate(Offset / Scale);
}
- return 0;
+ return true;
}
unsigned DestReg = MI.getOperand(0).getReg();
@@ -516,7 +429,7 @@ Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
emitThumbRegPlusImmediate(MBB, II, DestReg, FrameReg, Offset, TII,
*this, dl);
MBB.erase(II);
- return 0;
+ return true;
}
if (Offset > 0) {
@@ -524,12 +437,12 @@ Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
// r0 = add sp, 255*4
// r0 = add r0, (imm - 255*4)
if (Opcode == ARM::tADDi3) {
- removeOperands(MI, i);
+ removeOperands(MI, FrameRegIdx);
MachineInstrBuilder MIB(&MI);
AddDefaultPred(AddDefaultT1CC(MIB).addReg(FrameReg).addImm(Mask));
} else {
- MI.getOperand(i).ChangeToRegister(FrameReg, false);
- MI.getOperand(i+1).ChangeToImmediate(Mask);
+ MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
+ MI.getOperand(FrameRegIdx+1).ChangeToImmediate(Mask);
}
Offset = (Offset - Mask * Scale);
MachineBasicBlock::iterator NII = llvm::next(II);
@@ -542,14 +455,14 @@ Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
emitThumbConstant(MBB, II, DestReg, Offset, TII, *this, dl);
MI.setDesc(TII.get(ARM::tADDhirr));
- MI.getOperand(i).ChangeToRegister(DestReg, false, false, true);
- MI.getOperand(i+1).ChangeToRegister(FrameReg, false);
+ MI.getOperand(FrameRegIdx).ChangeToRegister(DestReg, false, false, true);
+ MI.getOperand(FrameRegIdx+1).ChangeToRegister(FrameReg, false);
if (Opcode == ARM::tADDi3) {
MachineInstrBuilder MIB(&MI);
AddDefaultPred(MIB);
}
}
- return 0;
+ return true;
} else {
unsigned ImmIdx = 0;
int InstrOffs = 0;
@@ -557,7 +470,7 @@ Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
unsigned Scale = 1;
switch (AddrMode) {
case ARMII::AddrModeT1_s: {
- ImmIdx = i+1;
+ ImmIdx = FrameRegIdx+1;
InstrOffs = MI.getOperand(ImmIdx).getImm();
NumBits = (FrameReg == ARM::SP) ? 8 : 5;
Scale = 4;
@@ -577,9 +490,9 @@ Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
unsigned Mask = (1 << NumBits) - 1;
if ((unsigned)Offset <= Mask * Scale) {
// Replace the FrameIndex with sp
- MI.getOperand(i).ChangeToRegister(FrameReg, false);
+ MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
ImmOp.ChangeToImmediate(ImmedOffset);
- return 0;
+ return true;
}
bool isThumSpillRestore = Opcode == ARM::tRestore || Opcode == ARM::tSpill;
@@ -600,12 +513,126 @@ Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
Offset &= ~(Mask*Scale);
}
}
+ return Offset == 0;
+}
+
+void
+Thumb1RegisterInfo::resolveFrameIndex(MachineBasicBlock::iterator I,
+ unsigned BaseReg, int64_t Offset) const {
+ MachineInstr &MI = *I;
+ int Off = Offset; // ARM doesn't need the general 64-bit offsets
+ unsigned i = 0;
+
+ while (!MI.getOperand(i).isFI()) {
+ ++i;
+ assert(i < MI.getNumOperands() && "Instr doesn't have FrameIndex operand!");
+ }
+ bool Done = false;
+ Done = rewriteFrameIndex(MI, i, BaseReg, Off, TII);
+ assert (Done && "Unable to resolve frame index!");
+}
+
+/// saveScavengerRegister - Spill the register so it can be used by the
+/// register scavenger. Return true.
+bool
+Thumb1RegisterInfo::saveScavengerRegister(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I,
+ MachineBasicBlock::iterator &UseMI,
+ const TargetRegisterClass *RC,
+ unsigned Reg) const {
+ // Thumb1 can't use the emergency spill slot on the stack because
+ // ldr/str immediate offsets must be positive, and if we're referencing
+ // off the frame pointer (if, for example, there are alloca() calls in
+ // the function, the offset will be negative. Use R12 instead since that's
+ // a call clobbered register that we know won't be used in Thumb1 mode.
+ DebugLoc DL;
+ BuildMI(MBB, I, DL, TII.get(ARM::tMOVtgpr2gpr)).
+ addReg(ARM::R12, RegState::Define).addReg(Reg, RegState::Kill);
+
+ // The UseMI is where we would like to restore the register. If there's
+ // interference with R12 before then, however, we'll need to restore it
+ // before that instead and adjust the UseMI.
+ bool done = false;
+ for (MachineBasicBlock::iterator II = I; !done && II != UseMI ; ++II) {
+ if (II->isDebugValue())
+ continue;
+ // If this instruction affects R12, adjust our restore point.
+ for (unsigned i = 0, e = II->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = II->getOperand(i);
+ if (!MO.isReg() || MO.isUndef() || !MO.getReg() ||
+ TargetRegisterInfo::isVirtualRegister(MO.getReg()))
+ continue;
+ if (MO.getReg() == ARM::R12) {
+ UseMI = II;
+ done = true;
+ break;
+ }
+ }
+ }
+ // Restore the register from R12
+ BuildMI(MBB, UseMI, DL, TII.get(ARM::tMOVgpr2tgpr)).
+ addReg(Reg, RegState::Define).addReg(ARM::R12, RegState::Kill);
+
+ return true;
+}
+
+void
+Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
+ int SPAdj, RegScavenger *RS) const {
+ unsigned VReg = 0;
+ unsigned i = 0;
+ MachineInstr &MI = *II;
+ MachineBasicBlock &MBB = *MI.getParent();
+ MachineFunction &MF = *MBB.getParent();
+ ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+ DebugLoc dl = MI.getDebugLoc();
+
+ while (!MI.getOperand(i).isFI()) {
+ ++i;
+ assert(i < MI.getNumOperands() && "Instr doesn't have FrameIndex operand!");
+ }
+
+ unsigned FrameReg = ARM::SP;
+ int FrameIndex = MI.getOperand(i).getIndex();
+ int Offset = MF.getFrameInfo()->getObjectOffset(FrameIndex) +
+ MF.getFrameInfo()->getStackSize() + SPAdj;
+
+ if (AFI->isGPRCalleeSavedArea1Frame(FrameIndex))
+ Offset -= AFI->getGPRCalleeSavedArea1Offset();
+ else if (AFI->isGPRCalleeSavedArea2Frame(FrameIndex))
+ Offset -= AFI->getGPRCalleeSavedArea2Offset();
+ else if (MF.getFrameInfo()->hasVarSizedObjects()) {
+ assert(SPAdj == 0 && hasFP(MF) && "Unexpected");
+ // There are alloca()'s in this function, must reference off the frame
+ // pointer or base pointer instead.
+ if (!hasBasePointer(MF)) {
+ FrameReg = getFrameRegister(MF);
+ Offset -= AFI->getFramePtrSpillOffset();
+ } else
+ FrameReg = BasePtr;
+ }
+
+ // Special handling of dbg_value instructions.
+ if (MI.isDebugValue()) {
+ MI.getOperand(i). ChangeToRegister(FrameReg, false /*isDef*/);
+ MI.getOperand(i+1).ChangeToImmediate(Offset);
+ return;
+ }
+
+ // Modify MI as necessary to handle as much of 'Offset' as possible
+ assert(AFI->isThumbFunction() &&
+ "This eliminateFrameIndex only supports Thumb1!");
+ if (rewriteFrameIndex(MI, i, FrameReg, Offset, TII))
+ return;
// If we get here, the immediate doesn't fit into the instruction. We folded
// as much as possible above, handle the rest, providing a register that is
// SP+LargeImm.
assert(Offset && "This code isn't needed if offset already handled!");
+ unsigned Opcode = MI.getOpcode();
+ const TargetInstrDesc &Desc = MI.getDesc();
+
// Remove predicate first.
int PIdx = MI.findFirstPredOperandIdx();
if (PIdx != -1)
@@ -637,11 +664,7 @@ Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
MI.addOperand(MachineOperand::CreateReg(0, false));
} else if (Desc.mayStore()) {
VReg = MF.getRegInfo().createVirtualRegister(ARM::tGPRRegisterClass);
- assert (Value && "Frame index virtual allocated, but Value arg is NULL!");
bool UseRR = false;
- bool TrackVReg = true;
- Value->first = FrameReg; // use the frame register as a kind indicator
- Value->second = Offset;
if (Opcode == ARM::tSpill) {
if (FrameReg == ARM::SP)
@@ -650,7 +673,6 @@ Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
else {
emitLoadConstPool(MBB, II, dl, VReg, 0, Offset);
UseRR = true;
- TrackVReg = false;
}
} else
emitThumbRegPlusImmediate(MBB, II, VReg, FrameReg, Offset, TII,
@@ -661,8 +683,6 @@ Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
MI.addOperand(MachineOperand::CreateReg(FrameReg, false));
else // tSTR has an extra register operand.
MI.addOperand(MachineOperand::CreateReg(0, false));
- if (!ReuseFrameIndexVals || !TrackVReg)
- VReg = 0;
} else
assert(false && "Unexpected opcode!");
@@ -671,7 +691,6 @@ Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
MachineInstrBuilder MIB(&MI);
AddDefaultPred(MIB);
}
- return VReg;
}
void Thumb1RegisterInfo::emitPrologue(MachineFunction &MF) const {
@@ -742,11 +761,11 @@ void Thumb1RegisterInfo::emitPrologue(MachineFunction &MF) const {
dl = MBBI->getDebugLoc();
}
- // Darwin ABI requires FP to point to the stack slot that contains the
- // previous FP.
- if (STI.isTargetDarwin() || hasFP(MF)) {
+ // Adjust FP so it point to the stack slot that contains the previous FP.
+ if (hasFP(MF)) {
BuildMI(MBB, MBBI, dl, TII.get(ARM::tADDrSPi), FramePtr)
.addFrameIndex(FramePtrSpillFI).addImm(0);
+ AFI->setShouldRestoreSPFromFP(true);
}
// Determine starting offsets of spill areas.
@@ -764,14 +783,20 @@ void Thumb1RegisterInfo::emitPrologue(MachineFunction &MF) const {
emitSPUpdate(MBB, MBBI, TII, dl, *this, -NumBytes);
}
- if (STI.isTargetELF() && hasFP(MF)) {
+ if (STI.isTargetELF() && hasFP(MF))
MFI->setOffsetAdjustment(MFI->getOffsetAdjustment() -
AFI->getFramePtrSpillOffset());
- }
AFI->setGPRCalleeSavedArea1Size(GPRCS1Size);
AFI->setGPRCalleeSavedArea2Size(GPRCS2Size);
AFI->setDPRCalleeSavedAreaSize(DPRCSSize);
+
+ // If we need a base pointer, set it up here. It's whatever the value
+ // of the stack pointer is at this point. Any variable size objects
+ // will be allocated after this, so we can still use the base pointer
+ // to reference locals.
+ if (hasBasePointer(MF))
+ BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVgpr2gpr), BasePtr).addReg(ARM::SP);
}
static bool isCalleeSavedRegister(unsigned Reg, const unsigned *CSRegs) {
@@ -828,7 +853,7 @@ void Thumb1RegisterInfo::emitEpilogue(MachineFunction &MF,
AFI->getGPRCalleeSavedArea2Size() +
AFI->getDPRCalleeSavedAreaSize());
- if (hasFP(MF)) {
+ if (AFI->shouldRestoreSPFromFP()) {
NumBytes = AFI->getFramePtrSpillOffset() - NumBytes;
// Reset SP based on frame pointer only if the stack frame extends beyond
// frame pointer stack slot or target is ELF and the function has FP.
diff --git a/lib/Target/ARM/Thumb1RegisterInfo.h b/lib/Target/ARM/Thumb1RegisterInfo.h
index 9a0308afa20c2..c578054a5d71f 100644
--- a/lib/Target/ARM/Thumb1RegisterInfo.h
+++ b/lib/Target/ARM/Thumb1RegisterInfo.h
@@ -38,27 +38,27 @@ public:
unsigned PredReg = 0) const;
/// Code Generation virtual methods...
- bool hasReservedCallFrame(MachineFunction &MF) const;
+ bool hasReservedCallFrame(const MachineFunction &MF) const;
void eliminateCallFramePseudoInstr(MachineFunction &MF,
MachineBasicBlock &MBB,
MachineBasicBlock::iterator I) const;
- // rewrite MI to access 'Offset' bytes from the FP. Return the offset that
- // could not be handled directly in MI.
- int rewriteFrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
- unsigned FrameReg, int Offset,
- unsigned MOVOpc, unsigned ADDriOpc,
- unsigned SUBriOpc) const;
-
+ // rewrite MI to access 'Offset' bytes from the FP. Update Offset to be
+ // however much remains to be handled. Return 'true' if no further
+ // work is required.
+ bool rewriteFrameIndex(MachineBasicBlock::iterator II, unsigned FrameRegIdx,
+ unsigned FrameReg, int &Offset,
+ const ARMBaseInstrInfo &TII) const;
+ void resolveFrameIndex(MachineBasicBlock::iterator I,
+ unsigned BaseReg, int64_t Offset) const;
bool saveScavengerRegister(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I,
MachineBasicBlock::iterator &UseMI,
const TargetRegisterClass *RC,
unsigned Reg) const;
- unsigned eliminateFrameIndex(MachineBasicBlock::iterator II,
- int SPAdj, FrameIndexValue *Value = NULL,
- RegScavenger *RS = NULL) const;
+ void eliminateFrameIndex(MachineBasicBlock::iterator II,
+ int SPAdj, RegScavenger *RS = NULL) const;
void emitPrologue(MachineFunction &MF) const;
void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
diff --git a/lib/Target/ARM/Thumb2ITBlockPass.cpp b/lib/Target/ARM/Thumb2ITBlockPass.cpp
index cd15bbed9f23e..45e693744b806 100644
--- a/lib/Target/ARM/Thumb2ITBlockPass.cpp
+++ b/lib/Target/ARM/Thumb2ITBlockPass.cpp
@@ -27,7 +27,7 @@ namespace {
public:
static char ID;
- Thumb2ITBlockPass() : MachineFunctionPass(&ID) {}
+ Thumb2ITBlockPass() : MachineFunctionPass(ID) {}
const Thumb2InstrInfo *TII;
const TargetRegisterInfo *TRI;
@@ -91,35 +91,53 @@ static void TrackDefUses(MachineInstr *MI,
}
}
+static bool isCopy(MachineInstr *MI) {
+ switch (MI->getOpcode()) {
+ default:
+ return false;
+ case ARM::MOVr:
+ case ARM::MOVr_TC:
+ case ARM::tMOVr:
+ case ARM::tMOVgpr2tgpr:
+ case ARM::tMOVtgpr2gpr:
+ case ARM::tMOVgpr2gpr:
+ case ARM::t2MOVr:
+ return true;
+ }
+}
+
bool
Thumb2ITBlockPass::MoveCopyOutOfITBlock(MachineInstr *MI,
ARMCC::CondCodes CC, ARMCC::CondCodes OCC,
SmallSet<unsigned, 4> &Defs,
SmallSet<unsigned, 4> &Uses) {
- unsigned SrcReg, DstReg, SrcSubIdx, DstSubIdx;
- if (TII->isMoveInstr(*MI, SrcReg, DstReg, SrcSubIdx, DstSubIdx)) {
- assert(SrcSubIdx == 0 && DstSubIdx == 0 &&
- "Sub-register indices still around?");
- // llvm models select's as two-address instructions. That means a copy
- // is inserted before a t2MOVccr, etc. If the copy is scheduled in
- // between selects we would end up creating multiple IT blocks.
-
- // First check if it's safe to move it.
- if (Uses.count(DstReg) || Defs.count(SrcReg))
- return false;
-
- // Then peek at the next instruction to see if it's predicated on CC or OCC.
- // If not, then there is nothing to be gained by moving the copy.
- MachineBasicBlock::iterator I = MI; ++I;
- MachineBasicBlock::iterator E = MI->getParent()->end();
- while (I != E && I->isDebugValue())
- ++I;
- if (I != E) {
- unsigned NPredReg = 0;
- ARMCC::CondCodes NCC = llvm::getITInstrPredicate(I, NPredReg);
- if (NCC == CC || NCC == OCC)
- return true;
- }
+ if (!isCopy(MI))
+ return false;
+ // llvm models select's as two-address instructions. That means a copy
+ // is inserted before a t2MOVccr, etc. If the copy is scheduled in
+ // between selects we would end up creating multiple IT blocks.
+ assert(MI->getOperand(0).getSubReg() == 0 &&
+ MI->getOperand(1).getSubReg() == 0 &&
+ "Sub-register indices still around?");
+
+ unsigned DstReg = MI->getOperand(0).getReg();
+ unsigned SrcReg = MI->getOperand(1).getReg();
+
+ // First check if it's safe to move it.
+ if (Uses.count(DstReg) || Defs.count(SrcReg))
+ return false;
+
+ // Then peek at the next instruction to see if it's predicated on CC or OCC.
+ // If not, then there is nothing to be gained by moving the copy.
+ MachineBasicBlock::iterator I = MI; ++I;
+ MachineBasicBlock::iterator E = MI->getParent()->end();
+ while (I != E && I->isDebugValue())
+ ++I;
+ if (I != E) {
+ unsigned NPredReg = 0;
+ ARMCC::CondCodes NCC = llvm::getITInstrPredicate(I, NPredReg);
+ if (NCC == CC || NCC == OCC)
+ return true;
}
return false;
}
diff --git a/lib/Target/ARM/Thumb2InstrInfo.cpp b/lib/Target/ARM/Thumb2InstrInfo.cpp
index ee517279c9d79..442f41da8a2d9 100644
--- a/lib/Target/ARM/Thumb2InstrInfo.cpp
+++ b/lib/Target/ARM/Thumb2InstrInfo.cpp
@@ -147,8 +147,8 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
unsigned SrcReg, bool isKill, int FI,
const TargetRegisterClass *RC,
const TargetRegisterInfo *TRI) const {
- if (RC == ARM::GPRRegisterClass || RC == ARM::tGPRRegisterClass ||
- RC == ARM::tcGPRRegisterClass) {
+ if (RC == ARM::GPRRegisterClass || RC == ARM::tGPRRegisterClass ||
+ RC == ARM::tcGPRRegisterClass || RC == ARM::rGPRRegisterClass) {
DebugLoc DL;
if (I != MBB.end()) DL = I->getDebugLoc();
@@ -173,8 +173,8 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
unsigned DestReg, int FI,
const TargetRegisterClass *RC,
const TargetRegisterInfo *TRI) const {
- if (RC == ARM::GPRRegisterClass || RC == ARM::tGPRRegisterClass ||
- RC == ARM::tcGPRRegisterClass) {
+ if (RC == ARM::GPRRegisterClass || RC == ARM::tGPRRegisterClass ||
+ RC == ARM::tcGPRRegisterClass || RC == ARM::rGPRRegisterClass) {
DebugLoc DL;
if (I != MBB.end()) DL = I->getDebugLoc();
diff --git a/lib/Target/ARM/Thumb2SizeReduction.cpp b/lib/Target/ARM/Thumb2SizeReduction.cpp
index ba392f36d9464..0c3962dd123d8 100644
--- a/lib/Target/ARM/Thumb2SizeReduction.cpp
+++ b/lib/Target/ARM/Thumb2SizeReduction.cpp
@@ -173,7 +173,7 @@ namespace {
char Thumb2SizeReduce::ID = 0;
}
-Thumb2SizeReduce::Thumb2SizeReduce() : MachineFunctionPass(&ID) {
+Thumb2SizeReduce::Thumb2SizeReduce() : MachineFunctionPass(ID) {
for (unsigned i = 0, e = array_lengthof(ReduceTable); i != e; ++i) {
unsigned FromOpc = ReduceTable[i].WideOpc;
if (!ReduceOpcodeMap.insert(std::make_pair(FromOpc, i)).second)
@@ -315,6 +315,18 @@ Thumb2SizeReduce::ReduceLoadStore(MachineBasicBlock &MBB, MachineInstr *MI,
ARM_AM::AMSubMode Mode = ARM_AM::getAM4SubMode(MI->getOperand(1).getImm());
if (!isARMLowRegister(BaseReg) || Mode != ARM_AM::ia)
return false;
+ // For the non-writeback version (this one), the base register must be
+ // one of the registers being loaded.
+ bool isOK = false;
+ for (unsigned i = 4; i < MI->getNumOperands(); ++i) {
+ if (MI->getOperand(i).getReg() == BaseReg) {
+ isOK = true;
+ break;
+ }
+ }
+ if (!isOK)
+ return false;
+
OpNum = 0;
isLdStMul = true;
break;
diff --git a/lib/Target/Alpha/AlphaBranchSelector.cpp b/lib/Target/Alpha/AlphaBranchSelector.cpp
index 001656e0121a7..3768117095369 100644
--- a/lib/Target/Alpha/AlphaBranchSelector.cpp
+++ b/lib/Target/Alpha/AlphaBranchSelector.cpp
@@ -22,7 +22,7 @@ using namespace llvm;
namespace {
struct AlphaBSel : public MachineFunctionPass {
static char ID;
- AlphaBSel() : MachineFunctionPass(&ID) {}
+ AlphaBSel() : MachineFunctionPass(ID) {}
virtual bool runOnMachineFunction(MachineFunction &Fn);
diff --git a/lib/Target/Alpha/AlphaCodeEmitter.cpp b/lib/Target/Alpha/AlphaCodeEmitter.cpp
index a6c6f52704f6b..3aec07035d74c 100644
--- a/lib/Target/Alpha/AlphaCodeEmitter.cpp
+++ b/lib/Target/Alpha/AlphaCodeEmitter.cpp
@@ -34,7 +34,7 @@ namespace {
public:
static char ID;
- AlphaCodeEmitter(JITCodeEmitter &mce) : MachineFunctionPass(&ID),
+ AlphaCodeEmitter(JITCodeEmitter &mce) : MachineFunctionPass(ID),
MCE(mce) {}
/// getBinaryCodeForInstr - This function, generated by the
diff --git a/lib/Target/Alpha/AlphaISelDAGToDAG.cpp b/lib/Target/Alpha/AlphaISelDAGToDAG.cpp
index d526dc0827b26..d197bd15ef9c5 100644
--- a/lib/Target/Alpha/AlphaISelDAGToDAG.cpp
+++ b/lib/Target/Alpha/AlphaISelDAGToDAG.cpp
@@ -113,8 +113,8 @@ namespace {
static uint64_t getNearPower2(uint64_t x) {
if (!x) return 0;
unsigned at = CountLeadingZeros_64(x);
- uint64_t complow = 1 << (63 - at);
- uint64_t comphigh = 1 << (64 - at);
+ uint64_t complow = 1ULL << (63 - at);
+ uint64_t comphigh = 1ULL << (64 - at);
//cerr << x << ":" << complow << ":" << comphigh << "\n";
if (abs64(complow - x) <= abs64(comphigh - x))
return complow;
diff --git a/lib/Target/Alpha/AlphaInstrInfo.cpp b/lib/Target/Alpha/AlphaInstrInfo.cpp
index ad625a2694172..5a2f5610fdb49 100644
--- a/lib/Target/Alpha/AlphaInstrInfo.cpp
+++ b/lib/Target/Alpha/AlphaInstrInfo.cpp
@@ -27,32 +27,6 @@ AlphaInstrInfo::AlphaInstrInfo()
RI(*this) { }
-bool AlphaInstrInfo::isMoveInstr(const MachineInstr& MI,
- unsigned& sourceReg, unsigned& destReg,
- unsigned& SrcSR, unsigned& DstSR) const {
- unsigned oc = MI.getOpcode();
- if (oc == Alpha::BISr ||
- oc == Alpha::CPYSS ||
- oc == Alpha::CPYST ||
- oc == Alpha::CPYSSt ||
- oc == Alpha::CPYSTs) {
- // or r1, r2, r2
- // cpys(s|t) r1 r2 r2
- assert(MI.getNumOperands() >= 3 &&
- MI.getOperand(0).isReg() &&
- MI.getOperand(1).isReg() &&
- MI.getOperand(2).isReg() &&
- "invalid Alpha BIS instruction!");
- if (MI.getOperand(1).getReg() == MI.getOperand(2).getReg()) {
- sourceReg = MI.getOperand(1).getReg();
- destReg = MI.getOperand(0).getReg();
- SrcSR = DstSR = 0;
- return true;
- }
- }
- return false;
-}
-
unsigned
AlphaInstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
int &FrameIndex) const {
diff --git a/lib/Target/Alpha/AlphaInstrInfo.h b/lib/Target/Alpha/AlphaInstrInfo.h
index e20e8323b64e6..ee6077a4a01a1 100644
--- a/lib/Target/Alpha/AlphaInstrInfo.h
+++ b/lib/Target/Alpha/AlphaInstrInfo.h
@@ -30,12 +30,6 @@ public:
///
virtual const AlphaRegisterInfo &getRegisterInfo() const { return RI; }
- /// Return true if the instruction is a register to register move and return
- /// the source and dest operands and their sub-register indices by reference.
- virtual bool isMoveInstr(const MachineInstr &MI,
- unsigned &SrcReg, unsigned &DstReg,
- unsigned &SrcSubIdx, unsigned &DstSubIdx) const;
-
virtual unsigned isLoadFromStackSlot(const MachineInstr *MI,
int &FrameIndex) const;
virtual unsigned isStoreToStackSlot(const MachineInstr *MI,
diff --git a/lib/Target/Alpha/AlphaLLRP.cpp b/lib/Target/Alpha/AlphaLLRP.cpp
index 34be470f03e30..85fbfd1affe21 100644
--- a/lib/Target/Alpha/AlphaLLRP.cpp
+++ b/lib/Target/Alpha/AlphaLLRP.cpp
@@ -39,7 +39,7 @@ namespace {
static char ID;
AlphaLLRPPass(AlphaTargetMachine &tm)
- : MachineFunctionPass(&ID), TM(tm) { }
+ : MachineFunctionPass(ID), TM(tm) { }
virtual const char *getPassName() const {
return "Alpha NOP inserter";
diff --git a/lib/Target/Alpha/AlphaRegisterInfo.cpp b/lib/Target/Alpha/AlphaRegisterInfo.cpp
index dc9d935ec047b..327ddb4d9a720 100644
--- a/lib/Target/Alpha/AlphaRegisterInfo.cpp
+++ b/lib/Target/Alpha/AlphaRegisterInfo.cpp
@@ -137,10 +137,9 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
//variable locals
//<- SP
-unsigned
+void
AlphaRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
- int SPAdj, FrameIndexValue *Value,
- RegScavenger *RS) const {
+ int SPAdj, RegScavenger *RS) const {
assert(SPAdj == 0 && "Unexpected");
unsigned i = 0;
@@ -185,7 +184,6 @@ AlphaRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
} else {
MI.getOperand(i).ChangeToImmediate(Offset);
}
- return 0;
}
diff --git a/lib/Target/Alpha/AlphaRegisterInfo.h b/lib/Target/Alpha/AlphaRegisterInfo.h
index f9fd87a637374..b164979a63119 100644
--- a/lib/Target/Alpha/AlphaRegisterInfo.h
+++ b/lib/Target/Alpha/AlphaRegisterInfo.h
@@ -38,9 +38,8 @@ struct AlphaRegisterInfo : public AlphaGenRegisterInfo {
MachineBasicBlock &MBB,
MachineBasicBlock::iterator I) const;
- unsigned eliminateFrameIndex(MachineBasicBlock::iterator II,
- int SPAdj, FrameIndexValue *Value = NULL,
- RegScavenger *RS = NULL) const;
+ void eliminateFrameIndex(MachineBasicBlock::iterator II,
+ int SPAdj, RegScavenger *RS = NULL) const;
//void processFunctionBeforeFrameFinalized(MachineFunction &MF) const;
diff --git a/lib/Target/Alpha/AsmPrinter/AlphaAsmPrinter.cpp b/lib/Target/Alpha/AsmPrinter/AlphaAsmPrinter.cpp
index 9f4aff6fd5f53..5428cb96173b2 100644
--- a/lib/Target/Alpha/AsmPrinter/AlphaAsmPrinter.cpp
+++ b/lib/Target/Alpha/AsmPrinter/AlphaAsmPrinter.cpp
@@ -53,8 +53,6 @@ namespace {
void printOp(const MachineOperand &MO, raw_ostream &O);
void printOperand(const MachineInstr *MI, int opNum, raw_ostream &O);
- void printBaseOffsetPair(const MachineInstr *MI, int i, raw_ostream &O,
- bool brackets=true);
virtual void EmitFunctionBodyStart();
virtual void EmitFunctionBodyEnd();
void EmitStartOfAsmFile(Module &M);
diff --git a/lib/Target/Blackfin/BlackfinInstrInfo.cpp b/lib/Target/Blackfin/BlackfinInstrInfo.cpp
index a74d42d595496..e50d57a31b6ef 100644
--- a/lib/Target/Blackfin/BlackfinInstrInfo.cpp
+++ b/lib/Target/Blackfin/BlackfinInstrInfo.cpp
@@ -28,34 +28,6 @@ BlackfinInstrInfo::BlackfinInstrInfo(BlackfinSubtarget &ST)
RI(ST, *this),
Subtarget(ST) {}
-/// Return true if the instruction is a register to register move and
-/// leave the source and dest operands in the passed parameters.
-bool BlackfinInstrInfo::isMoveInstr(const MachineInstr &MI,
- unsigned &SrcReg,
- unsigned &DstReg,
- unsigned &SrcSR,
- unsigned &DstSR) const {
- SrcSR = DstSR = 0; // No sub-registers.
- switch (MI.getOpcode()) {
- case BF::MOVE:
- case BF::MOVE_ncccc:
- case BF::MOVE_ccncc:
- case BF::MOVECC_zext:
- case BF::MOVECC_nz:
- DstReg = MI.getOperand(0).getReg();
- SrcReg = MI.getOperand(1).getReg();
- return true;
- case BF::SLL16i:
- if (MI.getOperand(2).getImm()!=0)
- return false;
- DstReg = MI.getOperand(0).getReg();
- SrcReg = MI.getOperand(1).getReg();
- return true;
- default:
- return false;
- }
-}
-
/// isLoadFromStackSlot - If the specified machine instruction is a direct
/// load from a stack slot, return the virtual or physical register number of
/// the destination along with the FrameIndex of the loaded stack slot. If
diff --git a/lib/Target/Blackfin/BlackfinInstrInfo.h b/lib/Target/Blackfin/BlackfinInstrInfo.h
index 6c3591707269f..fdc1029da5883 100644
--- a/lib/Target/Blackfin/BlackfinInstrInfo.h
+++ b/lib/Target/Blackfin/BlackfinInstrInfo.h
@@ -30,10 +30,6 @@ namespace llvm {
/// always be able to get register info as well (through this method).
virtual const BlackfinRegisterInfo &getRegisterInfo() const { return RI; }
- virtual bool isMoveInstr(const MachineInstr &MI,
- unsigned &SrcReg, unsigned &DstReg,
- unsigned &SrcSubIdx, unsigned &DstSubIdx) const;
-
virtual unsigned isLoadFromStackSlot(const MachineInstr *MI,
int &FrameIndex) const;
diff --git a/lib/Target/Blackfin/BlackfinRegisterInfo.cpp b/lib/Target/Blackfin/BlackfinRegisterInfo.cpp
index 06e95de1587f0..a51831263e909 100644
--- a/lib/Target/Blackfin/BlackfinRegisterInfo.cpp
+++ b/lib/Target/Blackfin/BlackfinRegisterInfo.cpp
@@ -190,10 +190,9 @@ static unsigned findScratchRegister(MachineBasicBlock::iterator II,
return Reg;
}
-unsigned
+void
BlackfinRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
- int SPAdj, FrameIndexValue *Value,
- RegScavenger *RS) const {
+ int SPAdj, RegScavenger *RS) const {
MachineInstr &MI = *II;
MachineBasicBlock &MBB = *MI.getParent();
MachineFunction &MF = *MBB.getParent();
@@ -230,20 +229,20 @@ BlackfinRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
MI.setDesc(TII.get(isStore
? BF::STORE32p_uimm6m4
: BF::LOAD32p_uimm6m4));
- return 0;
+ return;
}
if (BaseReg == BF::FP && isUInt<7>(-Offset)) {
MI.setDesc(TII.get(isStore
? BF::STORE32fp_nimm7m4
: BF::LOAD32fp_nimm7m4));
MI.getOperand(FIPos+1).setImm(-Offset);
- return 0;
+ return;
}
if (isInt<18>(Offset)) {
MI.setDesc(TII.get(isStore
? BF::STORE32p_imm18m4
: BF::LOAD32p_imm18m4));
- return 0;
+ return;
}
// Use RegScavenger to calculate proper offset...
MI.dump();
@@ -328,7 +327,6 @@ BlackfinRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
llvm_unreachable("Cannot eliminate frame index");
break;
}
- return 0;
}
void BlackfinRegisterInfo::
@@ -344,10 +342,6 @@ processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
}
}
-void BlackfinRegisterInfo::
-processFunctionBeforeFrameFinalized(MachineFunction &MF) const {
-}
-
// Emit a prologue that sets up a stack frame.
// On function entry, R0-R2 and P0 may hold arguments.
// R3, P1, and P2 may be used as scratch registers
diff --git a/lib/Target/Blackfin/BlackfinRegisterInfo.h b/lib/Target/Blackfin/BlackfinRegisterInfo.h
index ead0b4a73c832..bb83c34f80032 100644
--- a/lib/Target/Blackfin/BlackfinRegisterInfo.h
+++ b/lib/Target/Blackfin/BlackfinRegisterInfo.h
@@ -51,15 +51,12 @@ namespace llvm {
MachineBasicBlock &MBB,
MachineBasicBlock::iterator I) const;
- unsigned eliminateFrameIndex(MachineBasicBlock::iterator II,
- int SPAdj, FrameIndexValue *Value = NULL,
- RegScavenger *RS = NULL) const;
+ void eliminateFrameIndex(MachineBasicBlock::iterator II,
+ int SPAdj, RegScavenger *RS = NULL) const;
void processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
RegScavenger *RS) const;
- void processFunctionBeforeFrameFinalized(MachineFunction &MF) const;
-
void emitPrologue(MachineFunction &MF) const;
void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
diff --git a/lib/Target/CBackend/CBackend.cpp b/lib/Target/CBackend/CBackend.cpp
index e8d8474b5be86..270fff6064adc 100644
--- a/lib/Target/CBackend/CBackend.cpp
+++ b/lib/Target/CBackend/CBackend.cpp
@@ -73,7 +73,7 @@ namespace {
public:
static char ID;
CBackendNameAllUsedStructsAndMergeFunctions()
- : ModulePass(&ID) {}
+ : ModulePass(ID) {}
void getAnalysisUsage(AnalysisUsage &AU) const {
AU.addRequired<FindUsedTypes>();
}
@@ -110,7 +110,7 @@ namespace {
public:
static char ID;
explicit CWriter(formatted_raw_ostream &o)
- : FunctionPass(&ID), Out(o), IL(0), Mang(0), LI(0),
+ : FunctionPass(ID), Out(o), IL(0), Mang(0), LI(0),
TheModule(0), TAsm(0), TCtx(0), TD(0), OpaqueCounter(0),
NextAnonValueNumber(0) {
FPCounter = 0;
@@ -199,7 +199,6 @@ namespace {
void lowerIntrinsics(Function &F);
- void printModule(Module *M);
void printModuleTypes(const TypeSymbolTable &ST);
void printContainedStructs(const Type *Ty, std::set<const Type *> &);
void printFloatingPointConstants(Function &F);
@@ -1300,6 +1299,13 @@ void CWriter::printConstantWithCast(Constant* CPV, unsigned Opcode) {
}
std::string CWriter::GetValueName(const Value *Operand) {
+
+ // Resolve potential alias.
+ if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(Operand)) {
+ if (const Value *V = GA->resolveAliasedGlobal(false))
+ Operand = V;
+ }
+
// Mangle globals with the standard mangler interface for LLC compatibility.
if (const GlobalValue *GV = dyn_cast<GlobalValue>(Operand)) {
SmallString<128> Str;
diff --git a/lib/Target/CellSPU/SPUCallingConv.td b/lib/Target/CellSPU/SPUCallingConv.td
index ec2f663908f68..04fa2ae866d69 100644
--- a/lib/Target/CellSPU/SPUCallingConv.td
+++ b/lib/Target/CellSPU/SPUCallingConv.td
@@ -1,4 +1,4 @@
-//===- SPUCallingConv.td - Calling Conventions for CellSPU ------*- C++ -*-===//
+//===- SPUCallingConv.td - Calling Conventions for CellSPU -*- tablegen -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -19,16 +19,17 @@ class CCIfSubtarget<string F, CCAction A>
// Return Value Calling Convention
//===----------------------------------------------------------------------===//
-// Return-value convention for Cell SPU: Everything can be passed back via $3:
+// Return-value convention for Cell SPU: return value to be passed in reg 3-74
def RetCC_SPU : CallingConv<[
- CCIfType<[i8], CCAssignToReg<[R3]>>,
- CCIfType<[i16], CCAssignToReg<[R3]>>,
- CCIfType<[i32], CCAssignToReg<[R3]>>,
- CCIfType<[i64], CCAssignToReg<[R3]>>,
- CCIfType<[i128], CCAssignToReg<[R3]>>,
- CCIfType<[f32, f64], CCAssignToReg<[R3]>>,
- CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], CCAssignToReg<[R3]>>,
- CCIfType<[v2i32], CCAssignToReg<[R3]>>
+ CCIfType<[i8,i16,i32,i64,i128,f32,f64,v16i8,v8i16,v4i32,v2i64,v4f32,v2f64],
+ CCAssignToReg<[R3, R4, R5, R6, R7, R8, R9, R10, R11,
+ R12, R13, R14, R15, R16, R17, R18, R19, R20,
+ R21, R22, R23, R24, R25, R26, R27, R28, R29,
+ R30, R31, R32, R33, R34, R35, R36, R37, R38,
+ R39, R40, R41, R42, R43, R44, R45, R46, R47,
+ R48, R49, R50, R51, R52, R53, R54, R55, R56,
+ R57, R58, R59, R60, R61, R62, R63, R64, R65,
+ R66, R67, R68, R69, R70, R71, R72, R73, R74]>>
]>;
@@ -45,8 +46,7 @@ def CCC_SPU : CallingConv<[
R39, R40, R41, R42, R43, R44, R45, R46, R47,
R48, R49, R50, R51, R52, R53, R54, R55, R56,
R57, R58, R59, R60, R61, R62, R63, R64, R65,
- R66, R67, R68, R69, R70, R71, R72, R73, R74,
- R75, R76, R77, R78, R79]>>,
+ R66, R67, R68, R69, R70, R71, R72, R73, R74]>>,
// Integer/FP values get stored in stack slots that are 8 bytes in size and
// 8-byte aligned if there are no more registers to hold them.
CCIfType<[i32, i64, f32, f64], CCAssignToStack<8, 8>>,
diff --git a/lib/Target/CellSPU/SPUISelDAGToDAG.cpp b/lib/Target/CellSPU/SPUISelDAGToDAG.cpp
index 9b8c2ddd06359..2f1598441f5ae 100644
--- a/lib/Target/CellSPU/SPUISelDAGToDAG.cpp
+++ b/lib/Target/CellSPU/SPUISelDAGToDAG.cpp
@@ -41,13 +41,6 @@ using namespace llvm;
namespace {
//! ConstantSDNode predicate for i32 sign-extended, 10-bit immediates
bool
- isI64IntS10Immediate(ConstantSDNode *CN)
- {
- return isInt<10>(CN->getSExtValue());
- }
-
- //! ConstantSDNode predicate for i32 sign-extended, 10-bit immediates
- bool
isI32IntS10Immediate(ConstantSDNode *CN)
{
return isInt<10>(CN->getSExtValue());
@@ -67,14 +60,6 @@ namespace {
return isInt<10>(CN->getSExtValue());
}
- //! SDNode predicate for i16 sign-extended, 10-bit immediate values
- bool
- isI16IntS10Immediate(SDNode *N)
- {
- ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N);
- return (CN != 0 && isI16IntS10Immediate(CN));
- }
-
//! ConstantSDNode predicate for i16 unsigned 10-bit immediate values
bool
isI16IntU10Immediate(ConstantSDNode *CN)
@@ -82,14 +67,6 @@ namespace {
return isUInt<10>((short) CN->getZExtValue());
}
- //! SDNode predicate for i16 sign-extended, 10-bit immediate values
- bool
- isI16IntU10Immediate(SDNode *N)
- {
- return (N->getOpcode() == ISD::Constant
- && isI16IntU10Immediate(cast<ConstantSDNode>(N)));
- }
-
//! ConstantSDNode predicate for signed 16-bit values
/*!
\arg CN The constant SelectionDAG node holding the value
@@ -119,14 +96,6 @@ namespace {
return false;
}
- //! SDNode predicate for signed 16-bit values.
- bool
- isIntS16Immediate(SDNode *N, short &Imm)
- {
- return (N->getOpcode() == ISD::Constant
- && isIntS16Immediate(cast<ConstantSDNode>(N), Imm));
- }
-
//! ConstantFPSDNode predicate for representing floats as 16-bit sign ext.
static bool
isFPS16Immediate(ConstantFPSDNode *FPN, short &Imm)
@@ -142,16 +111,6 @@ namespace {
return false;
}
- bool
- isHighLow(const SDValue &Op)
- {
- return (Op.getOpcode() == SPUISD::IndirectAddr
- && ((Op.getOperand(0).getOpcode() == SPUISD::Hi
- && Op.getOperand(1).getOpcode() == SPUISD::Lo)
- || (Op.getOperand(0).getOpcode() == SPUISD::Lo
- && Op.getOperand(1).getOpcode() == SPUISD::Hi)));
- }
-
//===------------------------------------------------------------------===//
//! EVT to "useful stuff" mapping structure:
@@ -607,7 +566,8 @@ SPUDAGToDAGISel::DFormAddressPredicate(SDNode *Op, SDValue N, SDValue &Base,
return true;
} else if (Opc == ISD::Register
||Opc == ISD::CopyFromReg
- ||Opc == ISD::UNDEF) {
+ ||Opc == ISD::UNDEF
+ ||Opc == ISD::Constant) {
unsigned OpOpc = Op->getOpcode();
if (OpOpc == ISD::STORE || OpOpc == ISD::LOAD) {
diff --git a/lib/Target/CellSPU/SPUISelLowering.cpp b/lib/Target/CellSPU/SPUISelLowering.cpp
index ece19b9b89f6d..46f31899be0c4 100644
--- a/lib/Target/CellSPU/SPUISelLowering.cpp
+++ b/lib/Target/CellSPU/SPUISelLowering.cpp
@@ -426,9 +426,6 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
addRegisterClass(MVT::v4f32, SPU::VECREGRegisterClass);
addRegisterClass(MVT::v2f64, SPU::VECREGRegisterClass);
- // "Odd size" vector classes that we're willing to support:
- addRegisterClass(MVT::v2i32, SPU::VECREGRegisterClass);
-
for (unsigned i = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++i) {
MVT::SimpleValueType VT = (MVT::SimpleValueType)i;
@@ -751,7 +748,6 @@ LowerSTORE(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
if (alignment == 16) {
ConstantSDNode *CN;
-
// Special cases for a known aligned load to simplify the base pointer
// and insertion byte:
if (basePtr.getOpcode() == ISD::ADD
@@ -775,6 +771,9 @@ LowerSTORE(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
insertEltOffs = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
basePtr,
DAG.getConstant(0, PtrVT));
+ basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
+ basePtr,
+ DAG.getConstant(0, PtrVT));
}
} else {
// Unaligned load: must be more pessimistic about addressing modes:
@@ -811,8 +810,8 @@ LowerSTORE(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
DAG.getConstant(0, PtrVT));
}
- // Re-emit as a v16i8 vector load
- alignLoadVec = DAG.getLoad(MVT::v16i8, dl, the_chain, basePtr,
+ // Load the memory to which to store.
+ alignLoadVec = DAG.getLoad(vecVT, dl, the_chain, basePtr,
SN->getSrcValue(), SN->getSrcValueOffset(),
SN->isVolatile(), SN->isNonTemporal(), 16);
@@ -843,10 +842,10 @@ LowerSTORE(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
}
#endif
- SDValue insertEltOp =
- DAG.getNode(SPUISD::SHUFFLE_MASK, dl, vecVT, insertEltOffs);
- SDValue vectorizeOp =
- DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, vecVT, theValue);
+ SDValue insertEltOp = DAG.getNode(SPUISD::SHUFFLE_MASK, dl, vecVT,
+ insertEltOffs);
+ SDValue vectorizeOp = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, vecVT,
+ theValue);
result = DAG.getNode(SPUISD::SHUFB, dl, vecVT,
vectorizeOp, alignLoadVec,
@@ -1325,41 +1324,23 @@ SPUTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
if (Ins.empty())
return Chain;
+ // Now handle the return value(s)
+ SmallVector<CCValAssign, 16> RVLocs;
+ CCState CCRetInfo(CallConv, isVarArg, getTargetMachine(),
+ RVLocs, *DAG.getContext());
+ CCRetInfo.AnalyzeCallResult(Ins, CCC_SPU);
+
+
// If the call has results, copy the values out of the ret val registers.
- switch (Ins[0].VT.getSimpleVT().SimpleTy) {
- default: llvm_unreachable("Unexpected ret value!");
- case MVT::Other: break;
- case MVT::i32:
- if (Ins.size() > 1 && Ins[1].VT == MVT::i32) {
- Chain = DAG.getCopyFromReg(Chain, dl, SPU::R4,
- MVT::i32, InFlag).getValue(1);
- InVals.push_back(Chain.getValue(0));
- Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, MVT::i32,
- Chain.getValue(2)).getValue(1);
- InVals.push_back(Chain.getValue(0));
- } else {
- Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, MVT::i32,
- InFlag).getValue(1);
- InVals.push_back(Chain.getValue(0));
- }
- break;
- case MVT::i8:
- case MVT::i16:
- case MVT::i64:
- case MVT::i128:
- case MVT::f32:
- case MVT::f64:
- case MVT::v2f64:
- case MVT::v2i64:
- case MVT::v4f32:
- case MVT::v4i32:
- case MVT::v8i16:
- case MVT::v16i8:
- Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, Ins[0].VT,
- InFlag).getValue(1);
- InVals.push_back(Chain.getValue(0));
- break;
- }
+ for (unsigned i = 0; i != RVLocs.size(); ++i) {
+ CCValAssign VA = RVLocs[i];
+
+ SDValue Val = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), VA.getLocVT(),
+ InFlag);
+ Chain = Val.getValue(1);
+ InFlag = Val.getValue(2);
+ InVals.push_back(Val);
+ }
return Chain;
}
@@ -1621,10 +1602,6 @@ LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) {
SDValue T = DAG.getConstant(unsigned(SplatBits), VT.getVectorElementType());
return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, T, T, T, T);
}
- case MVT::v2i32: {
- SDValue T = DAG.getConstant(unsigned(SplatBits), VT.getVectorElementType());
- return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, T, T);
- }
case MVT::v2i64: {
return SPU::LowerV2I64Splat(VT, DAG, SplatBits, dl);
}
@@ -1748,11 +1725,12 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
// If we have a single element being moved from V1 to V2, this can be handled
// using the C*[DX] compute mask instructions, but the vector elements have
- // to be monotonically increasing with one exception element.
+ // to be monotonically increasing with one exception element, and the source
+ // slot of the element to move must be the same as the destination.
EVT VecVT = V1.getValueType();
EVT EltVT = VecVT.getVectorElementType();
unsigned EltsFromV2 = 0;
- unsigned V2Elt = 0;
+ unsigned V2EltOffset = 0;
unsigned V2EltIdx0 = 0;
unsigned CurrElt = 0;
unsigned MaxElts = VecVT.getVectorNumElements();
@@ -1785,9 +1763,13 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
if (monotonic) {
if (SrcElt >= V2EltIdx0) {
- if (1 >= (++EltsFromV2)) {
- V2Elt = (V2EltIdx0 - SrcElt) << 2;
- }
+ // TODO: optimize for the monotonic case when several consecutive
+ // elements are taken form V2. Do we ever get such a case?
+ if (EltsFromV2 == 0 && CurrElt == (SrcElt - V2EltIdx0))
+ V2EltOffset = (SrcElt - V2EltIdx0) * (EltVT.getSizeInBits()/8);
+ else
+ monotonic = false;
+ ++EltsFromV2;
} else if (CurrElt != SrcElt) {
monotonic = false;
}
@@ -1823,7 +1805,7 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
// R1 ($sp) is used here only as it is guaranteed to have last bits zero
SDValue Pointer = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
DAG.getRegister(SPU::R1, PtrVT),
- DAG.getConstant(V2Elt, MVT::i32));
+ DAG.getConstant(V2EltOffset, MVT::i32));
SDValue ShufMaskOp = DAG.getNode(SPUISD::SHUFFLE_MASK, dl,
maskVT, Pointer);
@@ -1847,7 +1829,6 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
for (unsigned j = 0; j < BytesPerElement; ++j)
ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement+j,MVT::i8));
}
-
SDValue VPermMask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v16i8,
&ResultMask[0], ResultMask.size());
return DAG.getNode(SPUISD::SHUFB, dl, V1.getValueType(), V1, V2, VPermMask);
@@ -1997,7 +1978,7 @@ static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
// Variable index: Rotate the requested element into slot 0, then replicate
// slot 0 across the vector
EVT VecVT = N.getValueType();
- if (!VecVT.isSimple() || !VecVT.isVector() || !VecVT.is128BitVector()) {
+ if (!VecVT.isSimple() || !VecVT.isVector()) {
report_fatal_error("LowerEXTRACT_VECTOR_ELT: Must have a simple, 128-bit"
"vector type!");
}
@@ -2072,21 +2053,25 @@ static SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
SDValue IdxOp = Op.getOperand(2);
DebugLoc dl = Op.getDebugLoc();
EVT VT = Op.getValueType();
+ EVT eltVT = ValOp.getValueType();
// use 0 when the lane to insert to is 'undef'
- int64_t Idx=0;
+ int64_t Offset=0;
if (IdxOp.getOpcode() != ISD::UNDEF) {
ConstantSDNode *CN = cast<ConstantSDNode>(IdxOp);
assert(CN != 0 && "LowerINSERT_VECTOR_ELT: Index is not constant!");
- Idx = (CN->getSExtValue());
+ Offset = (CN->getSExtValue()) * eltVT.getSizeInBits()/8;
}
EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
// Use $sp ($1) because it's always 16-byte aligned and it's available:
SDValue Pointer = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
DAG.getRegister(SPU::R1, PtrVT),
- DAG.getConstant(Idx, PtrVT));
- SDValue ShufMask = DAG.getNode(SPUISD::SHUFFLE_MASK, dl, VT, Pointer);
+ DAG.getConstant(Offset, PtrVT));
+ // widen the mask when dealing with half vectors
+ EVT maskVT = EVT::getVectorVT(*(DAG.getContext()), VT.getVectorElementType(),
+ 128/ VT.getVectorElementType().getSizeInBits());
+ SDValue ShufMask = DAG.getNode(SPUISD::SHUFFLE_MASK, dl, maskVT, Pointer);
SDValue result =
DAG.getNode(SPUISD::SHUFB, dl, VT,
diff --git a/lib/Target/CellSPU/SPUInstrInfo.cpp b/lib/Target/CellSPU/SPUInstrInfo.cpp
index 69aa0887bd77f..26d6b4f25ef12 100644
--- a/lib/Target/CellSPU/SPUInstrInfo.cpp
+++ b/lib/Target/CellSPU/SPUInstrInfo.cpp
@@ -54,148 +54,6 @@ SPUInstrInfo::SPUInstrInfo(SPUTargetMachine &tm)
RI(*TM.getSubtargetImpl(), *this)
{ /* NOP */ }
-bool
-SPUInstrInfo::isMoveInstr(const MachineInstr& MI,
- unsigned& sourceReg,
- unsigned& destReg,
- unsigned& SrcSR, unsigned& DstSR) const {
- SrcSR = DstSR = 0; // No sub-registers.
-
- switch (MI.getOpcode()) {
- default:
- break;
- case SPU::ORIv4i32:
- case SPU::ORIr32:
- case SPU::ORHIv8i16:
- case SPU::ORHIr16:
- case SPU::ORHIi8i16:
- case SPU::ORBIv16i8:
- case SPU::ORBIr8:
- case SPU::ORIi16i32:
- case SPU::ORIi8i32:
- case SPU::AHIvec:
- case SPU::AHIr16:
- case SPU::AIv4i32:
- assert(MI.getNumOperands() == 3 &&
- MI.getOperand(0).isReg() &&
- MI.getOperand(1).isReg() &&
- MI.getOperand(2).isImm() &&
- "invalid SPU ORI/ORHI/ORBI/AHI/AI/SFI/SFHI instruction!");
- if (MI.getOperand(2).getImm() == 0) {
- sourceReg = MI.getOperand(1).getReg();
- destReg = MI.getOperand(0).getReg();
- return true;
- }
- break;
- case SPU::AIr32:
- assert(MI.getNumOperands() == 3 &&
- "wrong number of operands to AIr32");
- if (MI.getOperand(0).isReg() &&
- MI.getOperand(1).isReg() &&
- (MI.getOperand(2).isImm() &&
- MI.getOperand(2).getImm() == 0)) {
- sourceReg = MI.getOperand(1).getReg();
- destReg = MI.getOperand(0).getReg();
- return true;
- }
- break;
- case SPU::LRr8:
- case SPU::LRr16:
- case SPU::LRr32:
- case SPU::LRf32:
- case SPU::LRr64:
- case SPU::LRf64:
- case SPU::LRr128:
- case SPU::LRv16i8:
- case SPU::LRv8i16:
- case SPU::LRv4i32:
- case SPU::LRv4f32:
- case SPU::LRv2i64:
- case SPU::LRv2f64:
- case SPU::ORv16i8_i8:
- case SPU::ORv8i16_i16:
- case SPU::ORv4i32_i32:
- case SPU::ORv2i64_i64:
- case SPU::ORv4f32_f32:
- case SPU::ORv2f64_f64:
- case SPU::ORi8_v16i8:
- case SPU::ORi16_v8i16:
- case SPU::ORi32_v4i32:
- case SPU::ORi64_v2i64:
- case SPU::ORf32_v4f32:
- case SPU::ORf64_v2f64:
-/*
- case SPU::ORi128_r64:
- case SPU::ORi128_f64:
- case SPU::ORi128_r32:
- case SPU::ORi128_f32:
- case SPU::ORi128_r16:
- case SPU::ORi128_r8:
-*/
- case SPU::ORi128_vec:
-/*
- case SPU::ORr64_i128:
- case SPU::ORf64_i128:
- case SPU::ORr32_i128:
- case SPU::ORf32_i128:
- case SPU::ORr16_i128:
- case SPU::ORr8_i128:
-*/
- case SPU::ORvec_i128:
-/*
- case SPU::ORr16_r32:
- case SPU::ORr8_r32:
- case SPU::ORf32_r32:
- case SPU::ORr32_f32:
- case SPU::ORr32_r16:
- case SPU::ORr32_r8:
- case SPU::ORr16_r64:
- case SPU::ORr8_r64:
- case SPU::ORr64_r16:
- case SPU::ORr64_r8:
-*/
- case SPU::ORr64_r32:
- case SPU::ORr32_r64:
- case SPU::ORf32_r32:
- case SPU::ORr32_f32:
- case SPU::ORf64_r64:
- case SPU::ORr64_f64: {
- assert(MI.getNumOperands() == 2 &&
- MI.getOperand(0).isReg() &&
- MI.getOperand(1).isReg() &&
- "invalid SPU OR<type>_<vec> or LR instruction!");
- sourceReg = MI.getOperand(1).getReg();
- destReg = MI.getOperand(0).getReg();
- return true;
- break;
- }
- case SPU::ORv16i8:
- case SPU::ORv8i16:
- case SPU::ORv4i32:
- case SPU::ORv2i64:
- case SPU::ORr8:
- case SPU::ORr16:
- case SPU::ORr32:
- case SPU::ORr64:
- case SPU::ORr128:
- case SPU::ORf32:
- case SPU::ORf64:
- assert(MI.getNumOperands() == 3 &&
- MI.getOperand(0).isReg() &&
- MI.getOperand(1).isReg() &&
- MI.getOperand(2).isReg() &&
- "invalid SPU OR(vec|r32|r64|gprc) instruction!");
- if (MI.getOperand(1).getReg() == MI.getOperand(2).getReg()) {
- sourceReg = MI.getOperand(1).getReg();
- destReg = MI.getOperand(0).getReg();
- return true;
- }
- break;
- }
-
- return false;
-}
-
unsigned
SPUInstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
int &FrameIndex) const {
diff --git a/lib/Target/CellSPU/SPUInstrInfo.h b/lib/Target/CellSPU/SPUInstrInfo.h
index fbb1733181486..191e55d0ca612 100644
--- a/lib/Target/CellSPU/SPUInstrInfo.h
+++ b/lib/Target/CellSPU/SPUInstrInfo.h
@@ -32,12 +32,6 @@ namespace llvm {
///
virtual const SPURegisterInfo &getRegisterInfo() const { return RI; }
- /// Return true if the instruction is a register to register move and return
- /// the source and dest operands and their sub-register indices by reference.
- virtual bool isMoveInstr(const MachineInstr &MI,
- unsigned &SrcReg, unsigned &DstReg,
- unsigned &SrcSubIdx, unsigned &DstSubIdx) const;
-
unsigned isLoadFromStackSlot(const MachineInstr *MI,
int &FrameIndex) const;
unsigned isStoreToStackSlot(const MachineInstr *MI,
diff --git a/lib/Target/CellSPU/SPUInstrInfo.td b/lib/Target/CellSPU/SPUInstrInfo.td
index a7fb14c26a76c..ca0fe00e37f89 100644
--- a/lib/Target/CellSPU/SPUInstrInfo.td
+++ b/lib/Target/CellSPU/SPUInstrInfo.td
@@ -62,8 +62,6 @@ let canFoldAsLoad = 1 in {
def v4f32: LoadDFormVec<v4f32>;
def v2f64: LoadDFormVec<v2f64>;
- def v2i32: LoadDFormVec<v2i32>;
-
def r128: LoadDForm<GPRC>;
def r64: LoadDForm<R64C>;
def r32: LoadDForm<R32C>;
@@ -96,8 +94,6 @@ let canFoldAsLoad = 1 in {
def v4f32: LoadAFormVec<v4f32>;
def v2f64: LoadAFormVec<v2f64>;
- def v2i32: LoadAFormVec<v2i32>;
-
def r128: LoadAForm<GPRC>;
def r64: LoadAForm<R64C>;
def r32: LoadAForm<R32C>;
@@ -130,8 +126,6 @@ let canFoldAsLoad = 1 in {
def v4f32: LoadXFormVec<v4f32>;
def v2f64: LoadXFormVec<v2f64>;
- def v2i32: LoadXFormVec<v2i32>;
-
def r128: LoadXForm<GPRC>;
def r64: LoadXForm<R64C>;
def r32: LoadXForm<R32C>;
@@ -180,8 +174,6 @@ multiclass StoreDForms
def v4f32: StoreDFormVec<v4f32>;
def v2f64: StoreDFormVec<v2f64>;
- def v2i32: StoreDFormVec<v2i32>;
-
def r128: StoreDForm<GPRC>;
def r64: StoreDForm<R64C>;
def r32: StoreDForm<R32C>;
@@ -212,8 +204,6 @@ multiclass StoreAForms
def v4f32: StoreAFormVec<v4f32>;
def v2f64: StoreAFormVec<v2f64>;
- def v2i32: StoreAFormVec<v2i32>;
-
def r128: StoreAForm<GPRC>;
def r64: StoreAForm<R64C>;
def r32: StoreAForm<R32C>;
@@ -246,8 +236,6 @@ multiclass StoreXForms
def v4f32: StoreXFormVec<v4f32>;
def v2f64: StoreXFormVec<v2f64>;
- def v2i32: StoreXFormVec<v2i32>;
-
def r128: StoreXForm<GPRC>;
def r64: StoreXForm<R64C>;
def r32: StoreXForm<R32C>;
@@ -607,7 +595,6 @@ class ARegInst<RegisterClass rclass>:
multiclass AddInstruction {
def v4i32: AVecInst<v4i32>;
def v16i8: AVecInst<v16i8>;
-
def r32: ARegInst<R32C>;
}
@@ -672,6 +659,7 @@ def SFvec : RRForm<0b00000010000, (outs VECREG:$rT),
"sf\t$rT, $rA, $rB", IntegerOp,
[(set (v4i32 VECREG:$rT), (sub (v4i32 VECREG:$rB), (v4i32 VECREG:$rA)))]>;
+
def SFr32 : RRForm<0b00000010000, (outs R32C:$rT), (ins R32C:$rA, R32C:$rB),
"sf\t$rT, $rA, $rB", IntegerOp,
[(set R32C:$rT, (sub R32C:$rB, R32C:$rA))]>;
@@ -1448,6 +1436,9 @@ class ORCvtGPRCVec:
class ORCvtVecGPRC:
ORCvtForm<(outs GPRC:$rT), (ins VECREG:$rA)>;
+class ORCvtVecVec:
+ ORCvtForm<(outs VECREG:$rT), (ins VECREG:$rA)>;
+
multiclass BitwiseOr
{
def v16i8: ORVecInst<v16i8>;
@@ -3894,6 +3885,79 @@ multiclass SFPSub
defm FS : SFPSub;
+class FMInst<dag OOL, dag IOL, list<dag> pattern>:
+ RRForm<0b01100011010, OOL, IOL,
+ "fm\t$rT, $rA, $rB", SPrecFP,
+ pattern>;
+
+class FMVecInst<ValueType type>:
+ FMInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ [(set (type VECREG:$rT),
+ (fmul (type VECREG:$rA), (type VECREG:$rB)))]>;
+
+multiclass SFPMul
+{
+ def v4f32: FMVecInst<v4f32>;
+ def f32: FMInst<(outs R32FP:$rT), (ins R32FP:$rA, R32FP:$rB),
+ [(set R32FP:$rT, (fmul R32FP:$rA, R32FP:$rB))]>;
+}
+
+defm FM : SFPMul;
+
+// Floating point multiply and add
+// e.g. d = c + (a * b)
+def FMAv4f32:
+ RRRForm<0b0111, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, VECREG:$rC),
+ "fma\t$rT, $rA, $rB, $rC", SPrecFP,
+ [(set (v4f32 VECREG:$rT),
+ (fadd (v4f32 VECREG:$rC),
+ (fmul (v4f32 VECREG:$rA), (v4f32 VECREG:$rB))))]>;
+
+def FMAf32:
+ RRRForm<0b0111, (outs R32FP:$rT), (ins R32FP:$rA, R32FP:$rB, R32FP:$rC),
+ "fma\t$rT, $rA, $rB, $rC", SPrecFP,
+ [(set R32FP:$rT, (fadd R32FP:$rC, (fmul R32FP:$rA, R32FP:$rB)))]>;
+
+// FP multiply and subtract
+// Subtracts value in rC from product
+// res = a * b - c
+def FMSv4f32 :
+ RRRForm<0b0111, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, VECREG:$rC),
+ "fms\t$rT, $rA, $rB, $rC", SPrecFP,
+ [(set (v4f32 VECREG:$rT),
+ (fsub (fmul (v4f32 VECREG:$rA), (v4f32 VECREG:$rB)),
+ (v4f32 VECREG:$rC)))]>;
+
+def FMSf32 :
+ RRRForm<0b0111, (outs R32FP:$rT), (ins R32FP:$rA, R32FP:$rB, R32FP:$rC),
+ "fms\t$rT, $rA, $rB, $rC", SPrecFP,
+ [(set R32FP:$rT,
+ (fsub (fmul R32FP:$rA, R32FP:$rB), R32FP:$rC))]>;
+
+// Floating Negative Mulitply and Subtract
+// Subtracts product from value in rC
+// res = fneg(fms a b c)
+// = - (a * b - c)
+// = c - a * b
+// NOTE: subtraction order
+// fsub a b = a - b
+// fs a b = b - a?
+def FNMSf32 :
+ RRRForm<0b1101, (outs R32FP:$rT), (ins R32FP:$rA, R32FP:$rB, R32FP:$rC),
+ "fnms\t$rT, $rA, $rB, $rC", SPrecFP,
+ [(set R32FP:$rT, (fsub R32FP:$rC, (fmul R32FP:$rA, R32FP:$rB)))]>;
+
+def FNMSv4f32 :
+ RRRForm<0b1101, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, VECREG:$rC),
+ "fnms\t$rT, $rA, $rB, $rC", SPrecFP,
+ [(set (v4f32 VECREG:$rT),
+ (fsub (v4f32 VECREG:$rC),
+ (fmul (v4f32 VECREG:$rA),
+ (v4f32 VECREG:$rB))))]>;
+
+
+
+
// Floating point reciprocal estimate
class FRESTInst<dag OOL, dag IOL>:
@@ -4019,72 +4083,6 @@ def FSCRRf32 :
// status and control register read
//--------------------------------------
-// Floating point multiply instructions
-//--------------------------------------
-
-def FMv4f32:
- RRForm<0b00100011010, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
- "fm\t$rT, $rA, $rB", SPrecFP,
- [(set (v4f32 VECREG:$rT), (fmul (v4f32 VECREG:$rA),
- (v4f32 VECREG:$rB)))]>;
-
-def FMf32 :
- RRForm<0b01100011010, (outs R32FP:$rT), (ins R32FP:$rA, R32FP:$rB),
- "fm\t$rT, $rA, $rB", SPrecFP,
- [(set R32FP:$rT, (fmul R32FP:$rA, R32FP:$rB))]>;
-
-// Floating point multiply and add
-// e.g. d = c + (a * b)
-def FMAv4f32:
- RRRForm<0b0111, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, VECREG:$rC),
- "fma\t$rT, $rA, $rB, $rC", SPrecFP,
- [(set (v4f32 VECREG:$rT),
- (fadd (v4f32 VECREG:$rC),
- (fmul (v4f32 VECREG:$rA), (v4f32 VECREG:$rB))))]>;
-
-def FMAf32:
- RRRForm<0b0111, (outs R32FP:$rT), (ins R32FP:$rA, R32FP:$rB, R32FP:$rC),
- "fma\t$rT, $rA, $rB, $rC", SPrecFP,
- [(set R32FP:$rT, (fadd R32FP:$rC, (fmul R32FP:$rA, R32FP:$rB)))]>;
-
-// FP multiply and subtract
-// Subtracts value in rC from product
-// res = a * b - c
-def FMSv4f32 :
- RRRForm<0b0111, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, VECREG:$rC),
- "fms\t$rT, $rA, $rB, $rC", SPrecFP,
- [(set (v4f32 VECREG:$rT),
- (fsub (fmul (v4f32 VECREG:$rA), (v4f32 VECREG:$rB)),
- (v4f32 VECREG:$rC)))]>;
-
-def FMSf32 :
- RRRForm<0b0111, (outs R32FP:$rT), (ins R32FP:$rA, R32FP:$rB, R32FP:$rC),
- "fms\t$rT, $rA, $rB, $rC", SPrecFP,
- [(set R32FP:$rT,
- (fsub (fmul R32FP:$rA, R32FP:$rB), R32FP:$rC))]>;
-
-// Floating Negative Mulitply and Subtract
-// Subtracts product from value in rC
-// res = fneg(fms a b c)
-// = - (a * b - c)
-// = c - a * b
-// NOTE: subtraction order
-// fsub a b = a - b
-// fs a b = b - a?
-def FNMSf32 :
- RRRForm<0b1101, (outs R32FP:$rT), (ins R32FP:$rA, R32FP:$rB, R32FP:$rC),
- "fnms\t$rT, $rA, $rB, $rC", SPrecFP,
- [(set R32FP:$rT, (fsub R32FP:$rC, (fmul R32FP:$rA, R32FP:$rB)))]>;
-
-def FNMSv4f32 :
- RRRForm<0b1101, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, VECREG:$rC),
- "fnms\t$rT, $rA, $rB, $rC", SPrecFP,
- [(set (v4f32 VECREG:$rT),
- (fsub (v4f32 VECREG:$rC),
- (fmul (v4f32 VECREG:$rA),
- (v4f32 VECREG:$rB))))]>;
-
-//--------------------------------------
// Floating Point Conversions
// Signed conversions:
def CSiFv4f32:
diff --git a/lib/Target/CellSPU/SPUOperands.td b/lib/Target/CellSPU/SPUOperands.td
index 6216651e48a40..e1a0358abc46f 100644
--- a/lib/Target/CellSPU/SPUOperands.td
+++ b/lib/Target/CellSPU/SPUOperands.td
@@ -98,12 +98,6 @@ def immU8 : PatLeaf<(imm), [{
return (N->getZExtValue() <= 0xff);
}]>;
-// i64ImmSExt10 predicate - True if the i64 immediate fits in a 10-bit sign
-// extended field. Used by RI10Form instructions like 'ldq'.
-def i64ImmSExt10 : PatLeaf<(imm), [{
- return isI64IntS10Immediate(N);
-}]>;
-
// i32ImmSExt10 predicate - True if the i32 immediate fits in a 10-bit sign
// extended field. Used by RI10Form instructions like 'ldq'.
def i32ImmSExt10 : PatLeaf<(imm), [{
diff --git a/lib/Target/CellSPU/SPURegisterInfo.cpp b/lib/Target/CellSPU/SPURegisterInfo.cpp
index f7cfa42f2a95a..cf718917a5616 100644
--- a/lib/Target/CellSPU/SPURegisterInfo.cpp
+++ b/lib/Target/CellSPU/SPURegisterInfo.cpp
@@ -270,9 +270,8 @@ SPURegisterInfo::eliminateCallFramePseudoInstr(MachineFunction &MF,
MBB.erase(I);
}
-unsigned
+void
SPURegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj,
- FrameIndexValue *Value,
RegScavenger *RS) const
{
unsigned i = 0;
@@ -328,7 +327,6 @@ SPURegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj,
} else {
MO.ChangeToImmediate(Offset);
}
- return 0;
}
/// determineFrameLayout - Determine the size of the frame and maximum call
@@ -417,7 +415,7 @@ void SPURegisterInfo::emitPrologue(MachineFunction &MF) const
if (hasDebugInfo) {
// Mark effective beginning of when frame pointer becomes valid.
FrameLabel = MMI.getContext().CreateTempSymbol();
- BuildMI(MBB, MBBI, dl, TII.get(SPU::DBG_LABEL)).addSym(FrameLabel);
+ BuildMI(MBB, MBBI, dl, TII.get(SPU::PROLOG_LABEL)).addSym(FrameLabel);
}
// Adjust stack pointer, spilling $lr -> 16($sp) and $sp -> -FrameSize($sp)
@@ -476,7 +474,7 @@ void SPURegisterInfo::emitPrologue(MachineFunction &MF) const
// Mark effective beginning of when frame pointer is ready.
MCSymbol *ReadyLabel = MMI.getContext().CreateTempSymbol();
- BuildMI(MBB, MBBI, dl, TII.get(SPU::DBG_LABEL)).addSym(ReadyLabel);
+ BuildMI(MBB, MBBI, dl, TII.get(SPU::PROLOG_LABEL)).addSym(ReadyLabel);
MachineLocation FPDst(SPU::R1);
MachineLocation FPSrc(MachineLocation::VirtualFP);
@@ -491,7 +489,7 @@ void SPURegisterInfo::emitPrologue(MachineFunction &MF) const
dl = MBBI->getDebugLoc();
// Insert terminator label
- BuildMI(MBB, MBBI, dl, TII.get(SPU::DBG_LABEL))
+ BuildMI(MBB, MBBI, dl, TII.get(SPU::PROLOG_LABEL))
.addSym(MMI.getContext().CreateTempSymbol());
}
}
@@ -587,6 +585,7 @@ SPURegisterInfo::convertDFormToXForm(int dFormOpcode) const
case SPU::LQDr32: return SPU::LQXr32;
case SPU::LQDr128: return SPU::LQXr128;
case SPU::LQDv16i8: return SPU::LQXv16i8;
+ case SPU::LQDv4i32: return SPU::LQXv4i32;
case SPU::LQDv4f32: return SPU::LQXv4f32;
case SPU::STQDr32: return SPU::STQXr32;
case SPU::STQDr128: return SPU::STQXr128;
diff --git a/lib/Target/CellSPU/SPURegisterInfo.h b/lib/Target/CellSPU/SPURegisterInfo.h
index 7a6ae6d43c7ed..aedb769cb4fc8 100644
--- a/lib/Target/CellSPU/SPURegisterInfo.h
+++ b/lib/Target/CellSPU/SPURegisterInfo.h
@@ -63,9 +63,8 @@ namespace llvm {
MachineBasicBlock &MBB,
MachineBasicBlock::iterator I) const;
//! Convert frame indicies into machine operands
- unsigned eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj,
- FrameIndexValue *Value = NULL,
- RegScavenger *RS = NULL) const;
+ void eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj,
+ RegScavenger *RS = NULL) const;
//! Determine the frame's layour
void determineFrameLayout(MachineFunction &MF) const;
diff --git a/lib/Target/CellSPU/SPURegisterInfo.td b/lib/Target/CellSPU/SPURegisterInfo.td
index bb88f2bf9a298..3e8f0979256af 100644
--- a/lib/Target/CellSPU/SPURegisterInfo.td
+++ b/lib/Target/CellSPU/SPURegisterInfo.td
@@ -394,7 +394,7 @@ def R8C : RegisterClass<"SPU", [i8], 128,
// The SPU's registers as vector registers:
def VECREG : RegisterClass<"SPU",
- [v16i8,v8i16,v2i32,v4i32,v4f32,v2i64,v2f64],
+ [v16i8,v8i16,v4i32,v4f32,v2i64,v2f64],
128,
[
/* volatile register */
diff --git a/lib/Target/CppBackend/CPPBackend.cpp b/lib/Target/CppBackend/CPPBackend.cpp
index 145568adcd4af..f08559f6e9f2e 100644
--- a/lib/Target/CppBackend/CPPBackend.cpp
+++ b/lib/Target/CppBackend/CPPBackend.cpp
@@ -104,7 +104,7 @@ namespace {
public:
static char ID;
explicit CppWriter(formatted_raw_ostream &o) :
- ModulePass(&ID), Out(o), uniqueNum(0), is_inline(false), indent_level(0){}
+ ModulePass(ID), Out(o), uniqueNum(0), is_inline(false), indent_level(0){}
virtual const char *getPassName() const { return "C++ backend"; }
@@ -288,6 +288,8 @@ void CppWriter::printLinkageType(GlobalValue::LinkageTypes LT) {
Out << "GlobalValue::LinkerPrivateLinkage"; break;
case GlobalValue::LinkerPrivateWeakLinkage:
Out << "GlobalValue::LinkerPrivateWeakLinkage"; break;
+ case GlobalValue::LinkerPrivateWeakDefAutoLinkage:
+ Out << "GlobalValue::LinkerPrivateWeakDefAutoLinkage"; break;
case GlobalValue::AvailableExternallyLinkage:
Out << "GlobalValue::AvailableExternallyLinkage "; break;
case GlobalValue::LinkOnceAnyLinkage:
@@ -471,14 +473,22 @@ void CppWriter::printAttributes(const AttrListPtr &PAL,
HANDLE_ATTR(Nest);
HANDLE_ATTR(ReadNone);
HANDLE_ATTR(ReadOnly);
- HANDLE_ATTR(InlineHint);
HANDLE_ATTR(NoInline);
HANDLE_ATTR(AlwaysInline);
HANDLE_ATTR(OptimizeForSize);
HANDLE_ATTR(StackProtect);
HANDLE_ATTR(StackProtectReq);
HANDLE_ATTR(NoCapture);
+ HANDLE_ATTR(NoRedZone);
+ HANDLE_ATTR(NoImplicitFloat);
+ HANDLE_ATTR(Naked);
+ HANDLE_ATTR(InlineHint);
#undef HANDLE_ATTR
+ if (attrs & Attribute::StackAlignment)
+ Out << " | Attribute::constructStackAlignmentFromInt("
+ << Attribute::getStackAlignmentFromAttrs(attrs)
+ << ")";
+ attrs &= ~Attribute::StackAlignment;
assert(attrs == 0 && "Unhandled attribute!");
Out << ";";
nl(Out);
@@ -1404,7 +1414,8 @@ void CppWriter::printInstruction(const Instruction *I,
nl(Out);
}
Out << "CallInst* " << iName << " = CallInst::Create("
- << opNames[call->getNumArgOperands()] << ", " << iName << "_params.begin(), "
+ << opNames[call->getNumArgOperands()] << ", "
+ << iName << "_params.begin(), "
<< iName << "_params.end(), \"";
} else if (call->getNumArgOperands() == 1) {
Out << "CallInst* " << iName << " = CallInst::Create("
diff --git a/lib/Target/MBlaze/AsmPrinter/MBlazeAsmPrinter.cpp b/lib/Target/MBlaze/AsmPrinter/MBlazeAsmPrinter.cpp
index b6e4d654f4aa7..f4b30ad271f16 100644
--- a/lib/Target/MBlaze/AsmPrinter/MBlazeAsmPrinter.cpp
+++ b/lib/Target/MBlaze/AsmPrinter/MBlazeAsmPrinter.cpp
@@ -65,11 +65,8 @@ namespace {
void printFSLImm(const MachineInstr *MI, int opNum, raw_ostream &O);
void printMemOperand(const MachineInstr *MI, int opNum, raw_ostream &O,
const char *Modifier = 0);
- void printFCCOperand(const MachineInstr *MI, int opNum, raw_ostream &O,
- const char *Modifier = 0);
void printSavedRegsBitmask(raw_ostream &OS);
- const char *emitCurrentABIString();
void emitFrameDirective();
void printInstruction(const MachineInstr *MI, raw_ostream &O);
@@ -292,13 +289,6 @@ printMemOperand(const MachineInstr *MI, int opNum, raw_ostream &O,
printOperand(MI, opNum, O);
}
-void MBlazeAsmPrinter::
-printFCCOperand(const MachineInstr *MI, int opNum, raw_ostream &O,
- const char *Modifier) {
- const MachineOperand& MO = MI->getOperand(opNum);
- O << MBlaze::MBlazeFCCToString((MBlaze::CondCode)MO.getImm());
-}
-
// Force static initialization.
extern "C" void LLVMInitializeMBlazeAsmPrinter() {
RegisterAsmPrinter<MBlazeAsmPrinter> X(TheMBlazeTarget);
diff --git a/lib/Target/MBlaze/MBlaze.td b/lib/Target/MBlaze/MBlaze.td
index 482ddd3963fba..3815b6d0a398d 100644
--- a/lib/Target/MBlaze/MBlaze.td
+++ b/lib/Target/MBlaze/MBlaze.td
@@ -1,4 +1,4 @@
-//===- MBlaze.td - Describe the MBlaze Target Machine -----------*- C++ -*-===//
+//===- MBlaze.td - Describe the MBlaze Target Machine ------*- tablegen -*-===//
//
// The LLVM Compiler Infrastructure
//
diff --git a/lib/Target/MBlaze/MBlazeCallingConv.td b/lib/Target/MBlaze/MBlazeCallingConv.td
index ddd49980e0a29..8622e0d74bcd5 100644
--- a/lib/Target/MBlaze/MBlazeCallingConv.td
+++ b/lib/Target/MBlaze/MBlazeCallingConv.td
@@ -1,4 +1,4 @@
-//===- MBlazeCallingConv.td - Calling Conventions for MBlaze ----*- C++ -*-===//
+//===- MBlazeCallingConv.td - Calling Conventions for MBlaze -*- tablegen -*-=//
//
// The LLVM Compiler Infrastructure
//
diff --git a/lib/Target/MBlaze/MBlazeDelaySlotFiller.cpp b/lib/Target/MBlaze/MBlazeDelaySlotFiller.cpp
index 42fea25073327..b551b79b291ed 100644
--- a/lib/Target/MBlaze/MBlazeDelaySlotFiller.cpp
+++ b/lib/Target/MBlaze/MBlazeDelaySlotFiller.cpp
@@ -32,7 +32,7 @@ namespace {
static char ID;
Filler(TargetMachine &tm)
- : MachineFunctionPass(&ID), TM(tm), TII(tm.getInstrInfo()) { }
+ : MachineFunctionPass(ID), TM(tm), TII(tm.getInstrInfo()) { }
virtual const char *getPassName() const {
return "MBlaze Delay Slot Filler";
diff --git a/lib/Target/MBlaze/MBlazeISelDAGToDAG.cpp b/lib/Target/MBlaze/MBlazeISelDAGToDAG.cpp
index c7cd5f4e44a98..e64dd0e3e2c3b 100644
--- a/lib/Target/MBlaze/MBlazeISelDAGToDAG.cpp
+++ b/lib/Target/MBlaze/MBlazeISelDAGToDAG.cpp
@@ -219,7 +219,7 @@ SelectAddr(SDNode *Op, SDValue Addr, SDValue &Offset, SDValue &Base) {
// Operand is a result from an ADD.
if (Addr.getOpcode() == ISD::ADD) {
if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) {
- if (Predicate_immSExt16(CN)) {
+ if (isUInt<16>(CN->getZExtValue())) {
// If the first operand is a FI, get the TargetFI Node
if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>
diff --git a/lib/Target/MBlaze/MBlazeInstrFPU.td b/lib/Target/MBlaze/MBlazeInstrFPU.td
index a48a8c9723537..657b1d4940a70 100644
--- a/lib/Target/MBlaze/MBlazeInstrFPU.td
+++ b/lib/Target/MBlaze/MBlazeInstrFPU.td
@@ -1,4 +1,4 @@
-//===- MBlazeInstrFPU.td - MBlaze FPU Instruction defs ----------*- C++ -*-===//
+//===- MBlazeInstrFPU.td - MBlaze FPU Instruction defs -----*- tablegen -*-===//
//
// The LLVM Compiler Infrastructure
//
diff --git a/lib/Target/MBlaze/MBlazeInstrFSL.td b/lib/Target/MBlaze/MBlazeInstrFSL.td
index b59999e76ae58..51584111e6661 100644
--- a/lib/Target/MBlaze/MBlazeInstrFSL.td
+++ b/lib/Target/MBlaze/MBlazeInstrFSL.td
@@ -1,4 +1,4 @@
-//===- MBlazeInstrFSL.td - MBlaze FSL Instruction defs ----------*- C++ -*-===//
+//===- MBlazeInstrFSL.td - MBlaze FSL Instruction defs -----*- tablegen -*-===//
//
// The LLVM Compiler Infrastructure
//
diff --git a/lib/Target/MBlaze/MBlazeInstrFormats.td b/lib/Target/MBlaze/MBlazeInstrFormats.td
index 7d655433d4e4d..28e8e4402225b 100644
--- a/lib/Target/MBlaze/MBlazeInstrFormats.td
+++ b/lib/Target/MBlaze/MBlazeInstrFormats.td
@@ -1,4 +1,4 @@
-//===- MBlazeInstrFormats.td - MB Instruction defs --------------*- C++ -*-===//
+//===- MBlazeInstrFormats.td - MB Instruction defs ---------*- tablegen -*-===//
//
// The LLVM Compiler Infrastructure
//
diff --git a/lib/Target/MBlaze/MBlazeInstrInfo.cpp b/lib/Target/MBlaze/MBlazeInstrInfo.cpp
index 6ff5825a26b6e..b590c090e095a 100644
--- a/lib/Target/MBlaze/MBlazeInstrInfo.cpp
+++ b/lib/Target/MBlaze/MBlazeInstrInfo.cpp
@@ -30,41 +30,6 @@ static bool isZeroImm(const MachineOperand &op) {
return op.isImm() && op.getImm() == 0;
}
-/// Return true if the instruction is a register to register move and
-/// leave the source and dest operands in the passed parameters.
-bool MBlazeInstrInfo::
-isMoveInstr(const MachineInstr &MI, unsigned &SrcReg, unsigned &DstReg,
- unsigned &SrcSubIdx, unsigned &DstSubIdx) const {
- SrcSubIdx = DstSubIdx = 0; // No sub-registers.
-
- // add $dst, $src, $zero || addu $dst, $zero, $src
- // or $dst, $src, $zero || or $dst, $zero, $src
- if ((MI.getOpcode() == MBlaze::ADD) || (MI.getOpcode() == MBlaze::OR)) {
- if (MI.getOperand(1).isReg() && MI.getOperand(1).getReg() == MBlaze::R0) {
- DstReg = MI.getOperand(0).getReg();
- SrcReg = MI.getOperand(2).getReg();
- return true;
- } else if (MI.getOperand(2).isReg() &&
- MI.getOperand(2).getReg() == MBlaze::R0) {
- DstReg = MI.getOperand(0).getReg();
- SrcReg = MI.getOperand(1).getReg();
- return true;
- }
- }
-
- // addi $dst, $src, 0
- // ori $dst, $src, 0
- if ((MI.getOpcode() == MBlaze::ADDI) || (MI.getOpcode() == MBlaze::ORI)) {
- if ((MI.getOperand(1).isReg()) && (isZeroImm(MI.getOperand(2)))) {
- DstReg = MI.getOperand(0).getReg();
- SrcReg = MI.getOperand(1).getReg();
- return true;
- }
- }
-
- return false;
-}
-
/// isLoadFromStackSlot - If the specified machine instruction is a direct
/// load from a stack slot, return the virtual or physical register number of
/// the destination along with the FrameIndex of the loaded stack slot. If
diff --git a/lib/Target/MBlaze/MBlazeInstrInfo.h b/lib/Target/MBlaze/MBlazeInstrInfo.h
index f0743705f010e..b3dba0ec768c0 100644
--- a/lib/Target/MBlaze/MBlazeInstrInfo.h
+++ b/lib/Target/MBlaze/MBlazeInstrInfo.h
@@ -173,12 +173,6 @@ public:
///
virtual const MBlazeRegisterInfo &getRegisterInfo() const { return RI; }
- /// Return true if the instruction is a register to register move and return
- /// the source and dest operands and their sub-register indices by reference.
- virtual bool isMoveInstr(const MachineInstr &MI,
- unsigned &SrcReg, unsigned &DstReg,
- unsigned &SrcSubIdx, unsigned &DstSubIdx) const;
-
/// isLoadFromStackSlot - If the specified machine instruction is a direct
/// load from a stack slot, return the virtual or physical register number of
/// the destination along with the FrameIndex of the loaded stack slot. If
diff --git a/lib/Target/MBlaze/MBlazeInstrInfo.td b/lib/Target/MBlaze/MBlazeInstrInfo.td
index 3c406dda0591b..e5d153474a7e5 100644
--- a/lib/Target/MBlaze/MBlazeInstrInfo.td
+++ b/lib/Target/MBlaze/MBlazeInstrInfo.td
@@ -1,4 +1,4 @@
-//===- MBlazeInstrInfo.td - MBlaze Instruction defs -------------*- C++ -*-===//
+//===- MBlazeInstrInfo.td - MBlaze Instruction defs --------*- tablegen -*-===//
//
// The LLVM Compiler Infrastructure
//
diff --git a/lib/Target/MBlaze/MBlazeIntrinsics.td b/lib/Target/MBlaze/MBlazeIntrinsics.td
index 82552fa4b343a..a27cb5ba0dc4f 100644
--- a/lib/Target/MBlaze/MBlazeIntrinsics.td
+++ b/lib/Target/MBlaze/MBlazeIntrinsics.td
@@ -17,17 +17,11 @@
// MBlaze intrinsic classes.
let TargetPrefix = "mblaze", isTarget = 1 in {
- class MBFSL_Get_Intrinsic : Intrinsic<[llvm_i32_ty],
- [llvm_i32_ty],
- [IntrWriteMem]>;
+ class MBFSL_Get_Intrinsic : Intrinsic<[llvm_i32_ty], [llvm_i32_ty], []>;
- class MBFSL_Put_Intrinsic : Intrinsic<[],
- [llvm_i32_ty, llvm_i32_ty],
- [IntrWriteMem]>;
+ class MBFSL_Put_Intrinsic : Intrinsic<[], [llvm_i32_ty, llvm_i32_ty], []>;
- class MBFSL_PutT_Intrinsic : Intrinsic<[],
- [llvm_i32_ty],
- [IntrWriteMem]>;
+ class MBFSL_PutT_Intrinsic : Intrinsic<[], [llvm_i32_ty], []>;
}
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/MBlaze/MBlazeRegisterInfo.cpp b/lib/Target/MBlaze/MBlazeRegisterInfo.cpp
index 8cafa8c519c62..22b6a30470d17 100644
--- a/lib/Target/MBlaze/MBlazeRegisterInfo.cpp
+++ b/lib/Target/MBlaze/MBlazeRegisterInfo.cpp
@@ -242,9 +242,9 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
// FrameIndex represent objects inside a abstract stack.
// We must replace FrameIndex with an stack/frame pointer
// direct reference.
-unsigned MBlazeRegisterInfo::
+void MBlazeRegisterInfo::
eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj,
- FrameIndexValue *Value, RegScavenger *RS) const {
+ RegScavenger *RS) const {
MachineInstr &MI = *II;
MachineFunction &MF = *MI.getParent()->getParent();
@@ -277,7 +277,6 @@ eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj,
MI.getOperand(oi).ChangeToImmediate(Offset);
MI.getOperand(i).ChangeToRegister(getFrameRegister(MF), false);
- return 0;
}
void MBlazeRegisterInfo::
diff --git a/lib/Target/MBlaze/MBlazeRegisterInfo.h b/lib/Target/MBlaze/MBlazeRegisterInfo.h
index af97b0e2d79ec..1e1fde14ab7b8 100644
--- a/lib/Target/MBlaze/MBlazeRegisterInfo.h
+++ b/lib/Target/MBlaze/MBlazeRegisterInfo.h
@@ -63,9 +63,8 @@ struct MBlazeRegisterInfo : public MBlazeGenRegisterInfo {
MachineBasicBlock::iterator I) const;
/// Stack Frame Processing Methods
- unsigned eliminateFrameIndex(MachineBasicBlock::iterator II,
- int SPAdj, FrameIndexValue *Value = NULL,
- RegScavenger *RS = NULL) const;
+ void eliminateFrameIndex(MachineBasicBlock::iterator II,
+ int SPAdj, RegScavenger *RS = NULL) const;
void processFunctionBeforeFrameFinalized(MachineFunction &MF) const;
diff --git a/lib/Target/MBlaze/MBlazeRegisterInfo.td b/lib/Target/MBlaze/MBlazeRegisterInfo.td
index d0a1e7556c439..5e935103389e6 100644
--- a/lib/Target/MBlaze/MBlazeRegisterInfo.td
+++ b/lib/Target/MBlaze/MBlazeRegisterInfo.td
@@ -1,4 +1,4 @@
-//===- MBlazeRegisterInfo.td - MBlaze Register defs -------------*- C++ -*-===//
+//===- MBlazeRegisterInfo.td - MBlaze Register defs --------*- tablegen -*-===//
//
// The LLVM Compiler Infrastructure
//
diff --git a/lib/Target/MBlaze/MBlazeSchedule.td b/lib/Target/MBlaze/MBlazeSchedule.td
index 1fec9e694005c..4a65542a447c6 100644
--- a/lib/Target/MBlaze/MBlazeSchedule.td
+++ b/lib/Target/MBlaze/MBlazeSchedule.td
@@ -1,4 +1,4 @@
-//===- MBlazeSchedule.td - MBlaze Scheduling Definitions --------*- C++ -*-===//
+//===- MBlazeSchedule.td - MBlaze Scheduling Definitions ---*- tablegen -*-===//
//
// The LLVM Compiler Infrastructure
//
diff --git a/lib/Target/MSIL/CMakeLists.txt b/lib/Target/MSIL/CMakeLists.txt
deleted file mode 100644
index b1d47ef05ec5e..0000000000000
--- a/lib/Target/MSIL/CMakeLists.txt
+++ /dev/null
@@ -1,3 +0,0 @@
-add_llvm_target(MSIL
- MSILWriter.cpp
- )
diff --git a/lib/Target/MSIL/MSILWriter.cpp b/lib/Target/MSIL/MSILWriter.cpp
deleted file mode 100644
index cc350e8a4f892..0000000000000
--- a/lib/Target/MSIL/MSILWriter.cpp
+++ /dev/null
@@ -1,1706 +0,0 @@
-//===-- MSILWriter.cpp - Library for converting LLVM code to MSIL ---------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This library converts LLVM code to MSIL code.
-//
-//===----------------------------------------------------------------------===//
-
-#include "MSILWriter.h"
-#include "llvm/CallingConv.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/Intrinsics.h"
-#include "llvm/IntrinsicInst.h"
-#include "llvm/TypeSymbolTable.h"
-#include "llvm/Analysis/ConstantsScanner.h"
-#include "llvm/Support/CallSite.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/InstVisitor.h"
-#include "llvm/Support/MathExtras.h"
-#include "llvm/Target/TargetRegistry.h"
-#include "llvm/Transforms/Scalar.h"
-#include "llvm/ADT/StringExtras.h"
-#include "llvm/CodeGen/Passes.h"
-using namespace llvm;
-
-namespace llvm {
- // TargetMachine for the MSIL
- struct MSILTarget : public TargetMachine {
- MSILTarget(const Target &T, const std::string &TT, const std::string &FS)
- : TargetMachine(T) {}
-
- virtual bool addPassesToEmitFile(PassManagerBase &PM,
- formatted_raw_ostream &Out,
- CodeGenFileType FileType,
- CodeGenOpt::Level OptLevel,
- bool DisableVerify);
-
- virtual const TargetData *getTargetData() const { return 0; }
- };
-}
-
-extern "C" void LLVMInitializeMSILTarget() {
- // Register the target.
- RegisterTargetMachine<MSILTarget> X(TheMSILTarget);
-}
-
-bool MSILModule::runOnModule(Module &M) {
- ModulePtr = &M;
- TD = &getAnalysis<TargetData>();
- bool Changed = false;
- // Find named types.
- TypeSymbolTable& Table = M.getTypeSymbolTable();
- std::set<const Type *> Types = getAnalysis<FindUsedTypes>().getTypes();
- for (TypeSymbolTable::iterator I = Table.begin(), E = Table.end(); I!=E; ) {
- if (!I->second->isStructTy() && !I->second->isOpaqueTy())
- Table.remove(I++);
- else {
- std::set<const Type *>::iterator T = Types.find(I->second);
- if (T==Types.end())
- Table.remove(I++);
- else {
- Types.erase(T);
- ++I;
- }
- }
- }
- // Find unnamed types.
- unsigned RenameCounter = 0;
- for (std::set<const Type *>::const_iterator I = Types.begin(),
- E = Types.end(); I!=E; ++I)
- if (const StructType *STy = dyn_cast<StructType>(*I)) {
- while (ModulePtr->addTypeName("unnamed$"+utostr(RenameCounter), STy))
- ++RenameCounter;
- Changed = true;
- }
- // Pointer for FunctionPass.
- UsedTypes = &getAnalysis<FindUsedTypes>().getTypes();
- return Changed;
-}
-
-char MSILModule::ID = 0;
-char MSILWriter::ID = 0;
-
-bool MSILWriter::runOnFunction(Function &F) {
- if (F.isDeclaration()) return false;
-
- // Do not codegen any 'available_externally' functions at all, they have
- // definitions outside the translation unit.
- if (F.hasAvailableExternallyLinkage())
- return false;
-
- LInfo = &getAnalysis<LoopInfo>();
- printFunction(F);
- return false;
-}
-
-
-bool MSILWriter::doInitialization(Module &M) {
- ModulePtr = &M;
- Out << ".assembly extern mscorlib {}\n";
- Out << ".assembly MSIL {}\n\n";
- Out << "// External\n";
- printExternals();
- Out << "// Declarations\n";
- printDeclarations(M.getTypeSymbolTable());
- Out << "// Definitions\n";
- printGlobalVariables();
- Out << "// Startup code\n";
- printModuleStartup();
- return false;
-}
-
-
-bool MSILWriter::doFinalization(Module &M) {
- return false;
-}
-
-
-void MSILWriter::printModuleStartup() {
- Out <<
- ".method static public int32 $MSIL_Startup() {\n"
- "\t.entrypoint\n"
- "\t.locals (native int i)\n"
- "\t.locals (native int argc)\n"
- "\t.locals (native int ptr)\n"
- "\t.locals (void* argv)\n"
- "\t.locals (string[] args)\n"
- "\tcall\tstring[] [mscorlib]System.Environment::GetCommandLineArgs()\n"
- "\tdup\n"
- "\tstloc\targs\n"
- "\tldlen\n"
- "\tconv.i4\n"
- "\tdup\n"
- "\tstloc\targc\n";
- printPtrLoad(TD->getPointerSize());
- Out <<
- "\tmul\n"
- "\tlocalloc\n"
- "\tstloc\targv\n"
- "\tldc.i4.0\n"
- "\tstloc\ti\n"
- "L_01:\n"
- "\tldloc\ti\n"
- "\tldloc\targc\n"
- "\tceq\n"
- "\tbrtrue\tL_02\n"
- "\tldloc\targs\n"
- "\tldloc\ti\n"
- "\tldelem.ref\n"
- "\tcall\tnative int [mscorlib]System.Runtime.InteropServices.Marshal::"
- "StringToHGlobalAnsi(string)\n"
- "\tstloc\tptr\n"
- "\tldloc\targv\n"
- "\tldloc\ti\n";
- printPtrLoad(TD->getPointerSize());
- Out <<
- "\tmul\n"
- "\tadd\n"
- "\tldloc\tptr\n"
- "\tstind.i\n"
- "\tldloc\ti\n"
- "\tldc.i4.1\n"
- "\tadd\n"
- "\tstloc\ti\n"
- "\tbr\tL_01\n"
- "L_02:\n"
- "\tcall void $MSIL_Init()\n";
-
- // Call user 'main' function.
- const Function* F = ModulePtr->getFunction("main");
- if (!F || F->isDeclaration()) {
- Out << "\tldc.i4.0\n\tret\n}\n";
- return;
- }
- bool BadSig = true;
- std::string Args("");
- Function::const_arg_iterator Arg1,Arg2;
-
- switch (F->arg_size()) {
- case 0:
- BadSig = false;
- break;
- case 1:
- Arg1 = F->arg_begin();
- if (Arg1->getType()->isIntegerTy()) {
- Out << "\tldloc\targc\n";
- Args = getTypeName(Arg1->getType());
- BadSig = false;
- }
- break;
- case 2:
- Arg1 = Arg2 = F->arg_begin(); ++Arg2;
- if (Arg1->getType()->isIntegerTy() &&
- Arg2->getType()->getTypeID() == Type::PointerTyID) {
- Out << "\tldloc\targc\n\tldloc\targv\n";
- Args = getTypeName(Arg1->getType())+","+getTypeName(Arg2->getType());
- BadSig = false;
- }
- break;
- default:
- BadSig = true;
- }
-
- bool RetVoid = (F->getReturnType()->getTypeID() == Type::VoidTyID);
- if (BadSig || (!F->getReturnType()->isIntegerTy() && !RetVoid)) {
- Out << "\tldc.i4.0\n";
- } else {
- Out << "\tcall\t" << getTypeName(F->getReturnType()) <<
- getConvModopt(F->getCallingConv()) << "main(" << Args << ")\n";
- if (RetVoid)
- Out << "\tldc.i4.0\n";
- else
- Out << "\tconv.i4\n";
- }
- Out << "\tret\n}\n";
-}
-
-bool MSILWriter::isZeroValue(const Value* V) {
- if (const Constant *C = dyn_cast<Constant>(V))
- return C->isNullValue();
- return false;
-}
-
-
-std::string MSILWriter::getValueName(const Value* V) {
- std::string Name;
- if (const GlobalValue *GV = dyn_cast<GlobalValue>(V))
- Name = GV->getName();
- else {
- unsigned &No = AnonValueNumbers[V];
- if (No == 0) No = ++NextAnonValueNumber;
- Name = "tmp" + utostr(No);
- }
-
- // Name into the quotes allow control and space characters.
- return "'"+Name+"'";
-}
-
-
-std::string MSILWriter::getLabelName(const std::string& Name) {
- if (Name.find('.')!=std::string::npos) {
- std::string Tmp(Name);
- // Replace unaccepable characters in the label name.
- for (std::string::iterator I = Tmp.begin(), E = Tmp.end(); I!=E; ++I)
- if (*I=='.') *I = '@';
- return Tmp;
- }
- return Name;
-}
-
-
-std::string MSILWriter::getLabelName(const Value* V) {
- std::string Name;
- if (const GlobalValue *GV = dyn_cast<GlobalValue>(V))
- Name = GV->getName();
- else {
- unsigned &No = AnonValueNumbers[V];
- if (No == 0) No = ++NextAnonValueNumber;
- Name = "tmp" + utostr(No);
- }
-
- return getLabelName(Name);
-}
-
-
-std::string MSILWriter::getConvModopt(CallingConv::ID CallingConvID) {
- switch (CallingConvID) {
- case CallingConv::C:
- case CallingConv::Cold:
- case CallingConv::Fast:
- return "modopt([mscorlib]System.Runtime.CompilerServices.CallConvCdecl) ";
- case CallingConv::X86_FastCall:
- return "modopt([mscorlib]System.Runtime.CompilerServices.CallConvFastcall) ";
- case CallingConv::X86_StdCall:
- return "modopt([mscorlib]System.Runtime.CompilerServices.CallConvStdcall) ";
- case CallingConv::X86_ThisCall:
- return "modopt([mscorlib]System.Runtime.CompilerServices.CallConvThiscall) ";
- default:
- errs() << "CallingConvID = " << CallingConvID << '\n';
- llvm_unreachable("Unsupported calling convention");
- }
- return ""; // Not reached
-}
-
-
-std::string MSILWriter::getArrayTypeName(Type::TypeID TyID, const Type* Ty) {
- std::string Tmp = "";
- const Type* ElemTy = Ty;
- assert(Ty->getTypeID()==TyID && "Invalid type passed");
- // Walk trought array element types.
- for (;;) {
- // Multidimensional array.
- if (ElemTy->getTypeID()==TyID) {
- if (const ArrayType* ATy = dyn_cast<ArrayType>(ElemTy))
- Tmp += utostr(ATy->getNumElements());
- else if (const VectorType* VTy = dyn_cast<VectorType>(ElemTy))
- Tmp += utostr(VTy->getNumElements());
- ElemTy = cast<SequentialType>(ElemTy)->getElementType();
- }
- // Base element type found.
- if (ElemTy->getTypeID()!=TyID) break;
- Tmp += ",";
- }
- return getTypeName(ElemTy, false, true)+"["+Tmp+"]";
-}
-
-
-std::string MSILWriter::getPrimitiveTypeName(const Type* Ty, bool isSigned) {
- unsigned NumBits = 0;
- switch (Ty->getTypeID()) {
- case Type::VoidTyID:
- return "void ";
- case Type::IntegerTyID:
- NumBits = getBitWidth(Ty);
- if(NumBits==1)
- return "bool ";
- if (!isSigned)
- return "unsigned int"+utostr(NumBits)+" ";
- return "int"+utostr(NumBits)+" ";
- case Type::FloatTyID:
- return "float32 ";
- case Type::DoubleTyID:
- return "float64 ";
- default:
- errs() << "Type = " << *Ty << '\n';
- llvm_unreachable("Invalid primitive type");
- }
- return ""; // Not reached
-}
-
-
-std::string MSILWriter::getTypeName(const Type* Ty, bool isSigned,
- bool isNested) {
- if (Ty->isPrimitiveType() || Ty->isIntegerTy())
- return getPrimitiveTypeName(Ty,isSigned);
- // FIXME: "OpaqueType" support
- switch (Ty->getTypeID()) {
- case Type::PointerTyID:
- return "void* ";
- case Type::StructTyID:
- if (isNested)
- return ModulePtr->getTypeName(Ty);
- return "valuetype '"+ModulePtr->getTypeName(Ty)+"' ";
- case Type::ArrayTyID:
- if (isNested)
- return getArrayTypeName(Ty->getTypeID(),Ty);
- return "valuetype '"+getArrayTypeName(Ty->getTypeID(),Ty)+"' ";
- case Type::VectorTyID:
- if (isNested)
- return getArrayTypeName(Ty->getTypeID(),Ty);
- return "valuetype '"+getArrayTypeName(Ty->getTypeID(),Ty)+"' ";
- default:
- errs() << "Type = " << *Ty << '\n';
- llvm_unreachable("Invalid type in getTypeName()");
- }
- return ""; // Not reached
-}
-
-
-MSILWriter::ValueType MSILWriter::getValueLocation(const Value* V) {
- // Function argument
- if (isa<Argument>(V))
- return ArgumentVT;
- // Function
- else if (const Function* F = dyn_cast<Function>(V))
- return F->hasLocalLinkage() ? InternalVT : GlobalVT;
- // Variable
- else if (const GlobalVariable* G = dyn_cast<GlobalVariable>(V))
- return G->hasLocalLinkage() ? InternalVT : GlobalVT;
- // Constant
- else if (isa<Constant>(V))
- return isa<ConstantExpr>(V) ? ConstExprVT : ConstVT;
- // Local variable
- return LocalVT;
-}
-
-
-std::string MSILWriter::getTypePostfix(const Type* Ty, bool Expand,
- bool isSigned) {
- unsigned NumBits = 0;
- switch (Ty->getTypeID()) {
- // Integer constant, expanding for stack operations.
- case Type::IntegerTyID:
- NumBits = getBitWidth(Ty);
- // Expand integer value to "int32" or "int64".
- if (Expand) return (NumBits<=32 ? "i4" : "i8");
- if (NumBits==1) return "i1";
- return (isSigned ? "i" : "u")+utostr(NumBits/8);
- // Float constant.
- case Type::FloatTyID:
- return "r4";
- case Type::DoubleTyID:
- return "r8";
- case Type::PointerTyID:
- return "i"+utostr(TD->getTypeAllocSize(Ty));
- default:
- errs() << "TypeID = " << Ty->getTypeID() << '\n';
- llvm_unreachable("Invalid type in TypeToPostfix()");
- }
- return ""; // Not reached
-}
-
-
-void MSILWriter::printConvToPtr() {
- switch (ModulePtr->getPointerSize()) {
- case Module::Pointer32:
- printSimpleInstruction("conv.u4");
- break;
- case Module::Pointer64:
- printSimpleInstruction("conv.u8");
- break;
- default:
- llvm_unreachable("Module use not supporting pointer size");
- }
-}
-
-
-void MSILWriter::printPtrLoad(uint64_t N) {
- switch (ModulePtr->getPointerSize()) {
- case Module::Pointer32:
- printSimpleInstruction("ldc.i4",utostr(N).c_str());
- // FIXME: Need overflow test?
- if (!isUInt<32>(N)) {
- errs() << "Value = " << utostr(N) << '\n';
- llvm_unreachable("32-bit pointer overflowed");
- }
- break;
- case Module::Pointer64:
- printSimpleInstruction("ldc.i8",utostr(N).c_str());
- break;
- default:
- llvm_unreachable("Module use not supporting pointer size");
- }
-}
-
-
-void MSILWriter::printValuePtrLoad(const Value* V) {
- printValueLoad(V);
- printConvToPtr();
-}
-
-
-void MSILWriter::printConstLoad(const Constant* C) {
- if (const ConstantInt* CInt = dyn_cast<ConstantInt>(C)) {
- // Integer constant
- Out << "\tldc." << getTypePostfix(C->getType(),true) << '\t';
- if (CInt->isMinValue(true))
- Out << CInt->getSExtValue();
- else
- Out << CInt->getZExtValue();
- } else if (const ConstantFP* FP = dyn_cast<ConstantFP>(C)) {
- // Float constant
- uint64_t X;
- unsigned Size;
- if (FP->getType()->getTypeID()==Type::FloatTyID) {
- X = (uint32_t)FP->getValueAPF().bitcastToAPInt().getZExtValue();
- Size = 4;
- } else {
- X = FP->getValueAPF().bitcastToAPInt().getZExtValue();
- Size = 8;
- }
- Out << "\tldc.r" << Size << "\t( " << utohexstr(X) << ')';
- } else if (isa<UndefValue>(C)) {
- // Undefined constant value = NULL.
- printPtrLoad(0);
- } else {
- errs() << "Constant = " << *C << '\n';
- llvm_unreachable("Invalid constant value");
- }
- Out << '\n';
-}
-
-
-void MSILWriter::printValueLoad(const Value* V) {
- MSILWriter::ValueType Location = getValueLocation(V);
- switch (Location) {
- // Global variable or function address.
- case GlobalVT:
- case InternalVT:
- if (const Function* F = dyn_cast<Function>(V)) {
- std::string Name = getConvModopt(F->getCallingConv())+getValueName(F);
- printSimpleInstruction("ldftn",
- getCallSignature(F->getFunctionType(),NULL,Name).c_str());
- } else {
- std::string Tmp;
- const Type* ElemTy = cast<PointerType>(V->getType())->getElementType();
- if (Location==GlobalVT && cast<GlobalVariable>(V)->hasDLLImportLinkage()) {
- Tmp = "void* "+getValueName(V);
- printSimpleInstruction("ldsfld",Tmp.c_str());
- } else {
- Tmp = getTypeName(ElemTy)+getValueName(V);
- printSimpleInstruction("ldsflda",Tmp.c_str());
- }
- }
- break;
- // Function argument.
- case ArgumentVT:
- printSimpleInstruction("ldarg",getValueName(V).c_str());
- break;
- // Local function variable.
- case LocalVT:
- printSimpleInstruction("ldloc",getValueName(V).c_str());
- break;
- // Constant value.
- case ConstVT:
- if (isa<ConstantPointerNull>(V))
- printPtrLoad(0);
- else
- printConstLoad(cast<Constant>(V));
- break;
- // Constant expression.
- case ConstExprVT:
- printConstantExpr(cast<ConstantExpr>(V));
- break;
- default:
- errs() << "Value = " << *V << '\n';
- llvm_unreachable("Invalid value location");
- }
-}
-
-
-void MSILWriter::printValueSave(const Value* V) {
- switch (getValueLocation(V)) {
- case ArgumentVT:
- printSimpleInstruction("starg",getValueName(V).c_str());
- break;
- case LocalVT:
- printSimpleInstruction("stloc",getValueName(V).c_str());
- break;
- default:
- errs() << "Value = " << *V << '\n';
- llvm_unreachable("Invalid value location");
- }
-}
-
-
-void MSILWriter::printBinaryInstruction(const char* Name, const Value* Left,
- const Value* Right) {
- printValueLoad(Left);
- printValueLoad(Right);
- Out << '\t' << Name << '\n';
-}
-
-
-void MSILWriter::printSimpleInstruction(const char* Inst, const char* Operand) {
- if(Operand)
- Out << '\t' << Inst << '\t' << Operand << '\n';
- else
- Out << '\t' << Inst << '\n';
-}
-
-
-void MSILWriter::printPHICopy(const BasicBlock* Src, const BasicBlock* Dst) {
- for (BasicBlock::const_iterator I = Dst->begin(); isa<PHINode>(I); ++I) {
- const PHINode* Phi = cast<PHINode>(I);
- const Value* Val = Phi->getIncomingValueForBlock(Src);
- if (isa<UndefValue>(Val)) continue;
- printValueLoad(Val);
- printValueSave(Phi);
- }
-}
-
-
-void MSILWriter::printBranchToBlock(const BasicBlock* CurrBB,
- const BasicBlock* TrueBB,
- const BasicBlock* FalseBB) {
- if (TrueBB==FalseBB) {
- // "TrueBB" and "FalseBB" destination equals
- printPHICopy(CurrBB,TrueBB);
- printSimpleInstruction("pop");
- printSimpleInstruction("br",getLabelName(TrueBB).c_str());
- } else if (FalseBB==NULL) {
- // If "FalseBB" not used the jump have condition
- printPHICopy(CurrBB,TrueBB);
- printSimpleInstruction("brtrue",getLabelName(TrueBB).c_str());
- } else if (TrueBB==NULL) {
- // If "TrueBB" not used the jump is unconditional
- printPHICopy(CurrBB,FalseBB);
- printSimpleInstruction("br",getLabelName(FalseBB).c_str());
- } else {
- // Copy PHI instructions for each block
- std::string TmpLabel;
- // Print PHI instructions for "TrueBB"
- if (isa<PHINode>(TrueBB->begin())) {
- TmpLabel = getLabelName(TrueBB)+"$phi_"+utostr(getUniqID());
- printSimpleInstruction("brtrue",TmpLabel.c_str());
- } else {
- printSimpleInstruction("brtrue",getLabelName(TrueBB).c_str());
- }
- // Print PHI instructions for "FalseBB"
- if (isa<PHINode>(FalseBB->begin())) {
- printPHICopy(CurrBB,FalseBB);
- printSimpleInstruction("br",getLabelName(FalseBB).c_str());
- } else {
- printSimpleInstruction("br",getLabelName(FalseBB).c_str());
- }
- if (isa<PHINode>(TrueBB->begin())) {
- // Handle "TrueBB" PHI Copy
- Out << TmpLabel << ":\n";
- printPHICopy(CurrBB,TrueBB);
- printSimpleInstruction("br",getLabelName(TrueBB).c_str());
- }
- }
-}
-
-
-void MSILWriter::printBranchInstruction(const BranchInst* Inst) {
- if (Inst->isUnconditional()) {
- printBranchToBlock(Inst->getParent(),NULL,Inst->getSuccessor(0));
- } else {
- printValueLoad(Inst->getCondition());
- printBranchToBlock(Inst->getParent(),Inst->getSuccessor(0),
- Inst->getSuccessor(1));
- }
-}
-
-
-void MSILWriter::printSelectInstruction(const Value* Cond, const Value* VTrue,
- const Value* VFalse) {
- std::string TmpLabel = std::string("select$true_")+utostr(getUniqID());
- printValueLoad(VTrue);
- printValueLoad(Cond);
- printSimpleInstruction("brtrue",TmpLabel.c_str());
- printSimpleInstruction("pop");
- printValueLoad(VFalse);
- Out << TmpLabel << ":\n";
-}
-
-
-void MSILWriter::printIndirectLoad(const Value* V) {
- const Type* Ty = V->getType();
- printValueLoad(V);
- if (const PointerType* P = dyn_cast<PointerType>(Ty))
- Ty = P->getElementType();
- std::string Tmp = "ldind."+getTypePostfix(Ty, false);
- printSimpleInstruction(Tmp.c_str());
-}
-
-
-void MSILWriter::printIndirectSave(const Value* Ptr, const Value* Val) {
- printValueLoad(Ptr);
- printValueLoad(Val);
- printIndirectSave(Val->getType());
-}
-
-
-void MSILWriter::printIndirectSave(const Type* Ty) {
- // Instruction need signed postfix for any type.
- std::string postfix = getTypePostfix(Ty, false);
- if (*postfix.begin()=='u') *postfix.begin() = 'i';
- postfix = "stind."+postfix;
- printSimpleInstruction(postfix.c_str());
-}
-
-
-void MSILWriter::printCastInstruction(unsigned int Op, const Value* V,
- const Type* Ty, const Type* SrcTy) {
- std::string Tmp("");
- printValueLoad(V);
- switch (Op) {
- // Signed
- case Instruction::SExt:
- // If sign extending int, convert first from unsigned to signed
- // with the same bit size - because otherwise we will loose the sign.
- if (SrcTy) {
- Tmp = "conv."+getTypePostfix(SrcTy,false,true);
- printSimpleInstruction(Tmp.c_str());
- }
- // FALLTHROUGH
- case Instruction::SIToFP:
- case Instruction::FPToSI:
- Tmp = "conv."+getTypePostfix(Ty,false,true);
- printSimpleInstruction(Tmp.c_str());
- break;
- // Unsigned
- case Instruction::FPTrunc:
- case Instruction::FPExt:
- case Instruction::UIToFP:
- case Instruction::Trunc:
- case Instruction::ZExt:
- case Instruction::FPToUI:
- case Instruction::PtrToInt:
- case Instruction::IntToPtr:
- Tmp = "conv."+getTypePostfix(Ty,false);
- printSimpleInstruction(Tmp.c_str());
- break;
- // Do nothing
- case Instruction::BitCast:
- // FIXME: meaning that ld*/st* instruction do not change data format.
- break;
- default:
- errs() << "Opcode = " << Op << '\n';
- llvm_unreachable("Invalid conversion instruction");
- }
-}
-
-
-void MSILWriter::printGepInstruction(const Value* V, gep_type_iterator I,
- gep_type_iterator E) {
- unsigned Size;
- // Load address
- printValuePtrLoad(V);
- // Calculate element offset.
- for (; I!=E; ++I){
- Size = 0;
- const Value* IndexValue = I.getOperand();
- if (const StructType* StrucTy = dyn_cast<StructType>(*I)) {
- uint64_t FieldIndex = cast<ConstantInt>(IndexValue)->getZExtValue();
- // Offset is the sum of all previous structure fields.
- for (uint64_t F = 0; F<FieldIndex; ++F)
- Size += TD->getTypeAllocSize(StrucTy->getContainedType((unsigned)F));
- printPtrLoad(Size);
- printSimpleInstruction("add");
- continue;
- } else if (const SequentialType* SeqTy = dyn_cast<SequentialType>(*I)) {
- Size = TD->getTypeAllocSize(SeqTy->getElementType());
- } else {
- Size = TD->getTypeAllocSize(*I);
- }
- // Add offset of current element to stack top.
- if (!isZeroValue(IndexValue)) {
- // Constant optimization.
- if (const ConstantInt* C = dyn_cast<ConstantInt>(IndexValue)) {
- if (C->getValue().isNegative()) {
- printPtrLoad(C->getValue().abs().getZExtValue()*Size);
- printSimpleInstruction("sub");
- continue;
- } else
- printPtrLoad(C->getZExtValue()*Size);
- } else {
- printPtrLoad(Size);
- printValuePtrLoad(IndexValue);
- printSimpleInstruction("mul");
- }
- printSimpleInstruction("add");
- }
- }
-}
-
-
-std::string MSILWriter::getCallSignature(const FunctionType* Ty,
- const Instruction* Inst,
- std::string Name) {
- std::string Tmp("");
- if (Ty->isVarArg()) Tmp += "vararg ";
- // Name and return type.
- Tmp += getTypeName(Ty->getReturnType())+Name+"(";
- // Function argument type list.
- unsigned NumParams = Ty->getNumParams();
- for (unsigned I = 0; I!=NumParams; ++I) {
- if (I!=0) Tmp += ",";
- Tmp += getTypeName(Ty->getParamType(I));
- }
- // CLR needs to know the exact amount of parameters received by vararg
- // function, because caller cleans the stack.
- if (Ty->isVarArg() && Inst) {
- // Origin to function arguments in "CallInst" or "InvokeInst".
- unsigned Org = isa<InvokeInst>(Inst) ? 3 : 1;
- // Print variable argument types.
- unsigned NumOperands = Inst->getNumOperands()-Org;
- if (NumParams<NumOperands) {
- if (NumParams!=0) Tmp += ", ";
- Tmp += "... , ";
- for (unsigned J = NumParams; J!=NumOperands; ++J) {
- if (J!=NumParams) Tmp += ", ";
- Tmp += getTypeName(Inst->getOperand(J+Org)->getType());
- }
- }
- }
- return Tmp+")";
-}
-
-
-void MSILWriter::printFunctionCall(const Value* FnVal,
- const Instruction* Inst) {
- // Get function calling convention.
- std::string Name = "";
- if (const CallInst* Call = dyn_cast<CallInst>(Inst))
- Name = getConvModopt(Call->getCallingConv());
- else if (const InvokeInst* Invoke = dyn_cast<InvokeInst>(Inst))
- Name = getConvModopt(Invoke->getCallingConv());
- else {
- errs() << "Instruction = " << Inst->getName() << '\n';
- llvm_unreachable("Need \"Invoke\" or \"Call\" instruction only");
- }
- if (const Function* F = dyn_cast<Function>(FnVal)) {
- // Direct call.
- Name += getValueName(F);
- printSimpleInstruction("call",
- getCallSignature(F->getFunctionType(),Inst,Name).c_str());
- } else {
- // Indirect function call.
- const PointerType* PTy = cast<PointerType>(FnVal->getType());
- const FunctionType* FTy = cast<FunctionType>(PTy->getElementType());
- // Load function address.
- printValueLoad(FnVal);
- printSimpleInstruction("calli",getCallSignature(FTy,Inst,Name).c_str());
- }
-}
-
-
-void MSILWriter::printIntrinsicCall(const IntrinsicInst* Inst) {
- std::string Name;
- switch (Inst->getIntrinsicID()) {
- case Intrinsic::vastart:
- Name = getValueName(Inst->getArgOperand(0));
- Name.insert(Name.length()-1,"$valist");
- // Obtain the argument handle.
- printSimpleInstruction("ldloca",Name.c_str());
- printSimpleInstruction("arglist");
- printSimpleInstruction("call",
- "instance void [mscorlib]System.ArgIterator::.ctor"
- "(valuetype [mscorlib]System.RuntimeArgumentHandle)");
- // Save as pointer type "void*"
- printValueLoad(Inst->getArgOperand(0));
- printSimpleInstruction("ldloca",Name.c_str());
- printIndirectSave(PointerType::getUnqual(
- IntegerType::get(Inst->getContext(), 8)));
- break;
- case Intrinsic::vaend:
- // Close argument list handle.
- printIndirectLoad(Inst->getArgOperand(0));
- printSimpleInstruction("call","instance void [mscorlib]System.ArgIterator::End()");
- break;
- case Intrinsic::vacopy:
- // Copy "ArgIterator" valuetype.
- printIndirectLoad(Inst->getArgOperand(0));
- printIndirectLoad(Inst->getArgOperand(1));
- printSimpleInstruction("cpobj","[mscorlib]System.ArgIterator");
- break;
- default:
- errs() << "Intrinsic ID = " << Inst->getIntrinsicID() << '\n';
- llvm_unreachable("Invalid intrinsic function");
- }
-}
-
-
-void MSILWriter::printCallInstruction(const Instruction* Inst) {
- if (isa<IntrinsicInst>(Inst)) {
- // Handle intrinsic function.
- printIntrinsicCall(cast<IntrinsicInst>(Inst));
- } else {
- const CallInst *CI = cast<CallInst>(Inst);
- // Load arguments to stack and call function.
- for (int I = 0, E = CI->getNumArgOperands(); I!=E; ++I)
- printValueLoad(CI->getArgOperand(I));
- printFunctionCall(CI->getCalledFunction(), Inst);
- }
-}
-
-
-void MSILWriter::printICmpInstruction(unsigned Predicate, const Value* Left,
- const Value* Right) {
- switch (Predicate) {
- case ICmpInst::ICMP_EQ:
- printBinaryInstruction("ceq",Left,Right);
- break;
- case ICmpInst::ICMP_NE:
- // Emulate = not neg (Op1 eq Op2)
- printBinaryInstruction("ceq",Left,Right);
- printSimpleInstruction("neg");
- printSimpleInstruction("not");
- break;
- case ICmpInst::ICMP_ULE:
- case ICmpInst::ICMP_SLE:
- // Emulate = (Op1 eq Op2) or (Op1 lt Op2)
- printBinaryInstruction("ceq",Left,Right);
- if (Predicate==ICmpInst::ICMP_ULE)
- printBinaryInstruction("clt.un",Left,Right);
- else
- printBinaryInstruction("clt",Left,Right);
- printSimpleInstruction("or");
- break;
- case ICmpInst::ICMP_UGE:
- case ICmpInst::ICMP_SGE:
- // Emulate = (Op1 eq Op2) or (Op1 gt Op2)
- printBinaryInstruction("ceq",Left,Right);
- if (Predicate==ICmpInst::ICMP_UGE)
- printBinaryInstruction("cgt.un",Left,Right);
- else
- printBinaryInstruction("cgt",Left,Right);
- printSimpleInstruction("or");
- break;
- case ICmpInst::ICMP_ULT:
- printBinaryInstruction("clt.un",Left,Right);
- break;
- case ICmpInst::ICMP_SLT:
- printBinaryInstruction("clt",Left,Right);
- break;
- case ICmpInst::ICMP_UGT:
- printBinaryInstruction("cgt.un",Left,Right);
- break;
- case ICmpInst::ICMP_SGT:
- printBinaryInstruction("cgt",Left,Right);
- break;
- default:
- errs() << "Predicate = " << Predicate << '\n';
- llvm_unreachable("Invalid icmp predicate");
- }
-}
-
-
-void MSILWriter::printFCmpInstruction(unsigned Predicate, const Value* Left,
- const Value* Right) {
- // FIXME: Correct comparison
- std::string NanFunc = "bool [mscorlib]System.Double::IsNaN(float64)";
- switch (Predicate) {
- case FCmpInst::FCMP_UGT:
- // X > Y || llvm_fcmp_uno(X, Y)
- printBinaryInstruction("cgt",Left,Right);
- printFCmpInstruction(FCmpInst::FCMP_UNO,Left,Right);
- printSimpleInstruction("or");
- break;
- case FCmpInst::FCMP_OGT:
- // X > Y
- printBinaryInstruction("cgt",Left,Right);
- break;
- case FCmpInst::FCMP_UGE:
- // X >= Y || llvm_fcmp_uno(X, Y)
- printBinaryInstruction("ceq",Left,Right);
- printBinaryInstruction("cgt",Left,Right);
- printSimpleInstruction("or");
- printFCmpInstruction(FCmpInst::FCMP_UNO,Left,Right);
- printSimpleInstruction("or");
- break;
- case FCmpInst::FCMP_OGE:
- // X >= Y
- printBinaryInstruction("ceq",Left,Right);
- printBinaryInstruction("cgt",Left,Right);
- printSimpleInstruction("or");
- break;
- case FCmpInst::FCMP_ULT:
- // X < Y || llvm_fcmp_uno(X, Y)
- printBinaryInstruction("clt",Left,Right);
- printFCmpInstruction(FCmpInst::FCMP_UNO,Left,Right);
- printSimpleInstruction("or");
- break;
- case FCmpInst::FCMP_OLT:
- // X < Y
- printBinaryInstruction("clt",Left,Right);
- break;
- case FCmpInst::FCMP_ULE:
- // X <= Y || llvm_fcmp_uno(X, Y)
- printBinaryInstruction("ceq",Left,Right);
- printBinaryInstruction("clt",Left,Right);
- printSimpleInstruction("or");
- printFCmpInstruction(FCmpInst::FCMP_UNO,Left,Right);
- printSimpleInstruction("or");
- break;
- case FCmpInst::FCMP_OLE:
- // X <= Y
- printBinaryInstruction("ceq",Left,Right);
- printBinaryInstruction("clt",Left,Right);
- printSimpleInstruction("or");
- break;
- case FCmpInst::FCMP_UEQ:
- // X == Y || llvm_fcmp_uno(X, Y)
- printBinaryInstruction("ceq",Left,Right);
- printFCmpInstruction(FCmpInst::FCMP_UNO,Left,Right);
- printSimpleInstruction("or");
- break;
- case FCmpInst::FCMP_OEQ:
- // X == Y
- printBinaryInstruction("ceq",Left,Right);
- break;
- case FCmpInst::FCMP_UNE:
- // X != Y
- printBinaryInstruction("ceq",Left,Right);
- printSimpleInstruction("neg");
- printSimpleInstruction("not");
- break;
- case FCmpInst::FCMP_ONE:
- // X != Y && llvm_fcmp_ord(X, Y)
- printBinaryInstruction("ceq",Left,Right);
- printSimpleInstruction("not");
- break;
- case FCmpInst::FCMP_ORD:
- // return X == X && Y == Y
- printBinaryInstruction("ceq",Left,Left);
- printBinaryInstruction("ceq",Right,Right);
- printSimpleInstruction("or");
- break;
- case FCmpInst::FCMP_UNO:
- // X != X || Y != Y
- printBinaryInstruction("ceq",Left,Left);
- printSimpleInstruction("not");
- printBinaryInstruction("ceq",Right,Right);
- printSimpleInstruction("not");
- printSimpleInstruction("or");
- break;
- default:
- llvm_unreachable("Illegal FCmp predicate");
- }
-}
-
-
-void MSILWriter::printInvokeInstruction(const InvokeInst* Inst) {
- std::string Label = "leave$normal_"+utostr(getUniqID());
- Out << ".try {\n";
- // Load arguments
- for (int I = 0, E = Inst->getNumArgOperands(); I!=E; ++I)
- printValueLoad(Inst->getArgOperand(I));
- // Print call instruction
- printFunctionCall(Inst->getOperand(0),Inst);
- // Save function result and leave "try" block
- printValueSave(Inst);
- printSimpleInstruction("leave",Label.c_str());
- Out << "}\n";
- Out << "catch [mscorlib]System.Exception {\n";
- // Redirect to unwind block
- printSimpleInstruction("pop");
- printBranchToBlock(Inst->getParent(),NULL,Inst->getUnwindDest());
- Out << "}\n" << Label << ":\n";
- // Redirect to continue block
- printBranchToBlock(Inst->getParent(),NULL,Inst->getNormalDest());
-}
-
-
-void MSILWriter::printSwitchInstruction(const SwitchInst* Inst) {
- // FIXME: Emulate with IL "switch" instruction
- // Emulate = if () else if () else if () else ...
- for (unsigned int I = 1, E = Inst->getNumCases(); I!=E; ++I) {
- printValueLoad(Inst->getCondition());
- printValueLoad(Inst->getCaseValue(I));
- printSimpleInstruction("ceq");
- // Condition jump to successor block
- printBranchToBlock(Inst->getParent(),Inst->getSuccessor(I),NULL);
- }
- // Jump to default block
- printBranchToBlock(Inst->getParent(),NULL,Inst->getDefaultDest());
-}
-
-
-void MSILWriter::printVAArgInstruction(const VAArgInst* Inst) {
- printIndirectLoad(Inst->getOperand(0));
- printSimpleInstruction("call",
- "instance typedref [mscorlib]System.ArgIterator::GetNextArg()");
- printSimpleInstruction("refanyval","void*");
- std::string Name =
- "ldind."+getTypePostfix(PointerType::getUnqual(
- IntegerType::get(Inst->getContext(), 8)),false);
- printSimpleInstruction(Name.c_str());
-}
-
-
-void MSILWriter::printAllocaInstruction(const AllocaInst* Inst) {
- uint64_t Size = TD->getTypeAllocSize(Inst->getAllocatedType());
- // Constant optimization.
- if (const ConstantInt* CInt = dyn_cast<ConstantInt>(Inst->getOperand(0))) {
- printPtrLoad(CInt->getZExtValue()*Size);
- } else {
- printPtrLoad(Size);
- printValueLoad(Inst->getOperand(0));
- printSimpleInstruction("mul");
- }
- printSimpleInstruction("localloc");
-}
-
-
-void MSILWriter::printInstruction(const Instruction* Inst) {
- const Value *Left = 0, *Right = 0;
- if (Inst->getNumOperands()>=1) Left = Inst->getOperand(0);
- if (Inst->getNumOperands()>=2) Right = Inst->getOperand(1);
- // Print instruction
- // FIXME: "ShuffleVector","ExtractElement","InsertElement" support.
- switch (Inst->getOpcode()) {
- // Terminator
- case Instruction::Ret:
- if (Inst->getNumOperands()) {
- printValueLoad(Left);
- printSimpleInstruction("ret");
- } else
- printSimpleInstruction("ret");
- break;
- case Instruction::Br:
- printBranchInstruction(cast<BranchInst>(Inst));
- break;
- // Binary
- case Instruction::Add:
- case Instruction::FAdd:
- printBinaryInstruction("add",Left,Right);
- break;
- case Instruction::Sub:
- case Instruction::FSub:
- printBinaryInstruction("sub",Left,Right);
- break;
- case Instruction::Mul:
- case Instruction::FMul:
- printBinaryInstruction("mul",Left,Right);
- break;
- case Instruction::UDiv:
- printBinaryInstruction("div.un",Left,Right);
- break;
- case Instruction::SDiv:
- case Instruction::FDiv:
- printBinaryInstruction("div",Left,Right);
- break;
- case Instruction::URem:
- printBinaryInstruction("rem.un",Left,Right);
- break;
- case Instruction::SRem:
- case Instruction::FRem:
- printBinaryInstruction("rem",Left,Right);
- break;
- // Binary Condition
- case Instruction::ICmp:
- printICmpInstruction(cast<ICmpInst>(Inst)->getPredicate(),Left,Right);
- break;
- case Instruction::FCmp:
- printFCmpInstruction(cast<FCmpInst>(Inst)->getPredicate(),Left,Right);
- break;
- // Bitwise Binary
- case Instruction::And:
- printBinaryInstruction("and",Left,Right);
- break;
- case Instruction::Or:
- printBinaryInstruction("or",Left,Right);
- break;
- case Instruction::Xor:
- printBinaryInstruction("xor",Left,Right);
- break;
- case Instruction::Shl:
- printValueLoad(Left);
- printValueLoad(Right);
- printSimpleInstruction("conv.i4");
- printSimpleInstruction("shl");
- break;
- case Instruction::LShr:
- printValueLoad(Left);
- printValueLoad(Right);
- printSimpleInstruction("conv.i4");
- printSimpleInstruction("shr.un");
- break;
- case Instruction::AShr:
- printValueLoad(Left);
- printValueLoad(Right);
- printSimpleInstruction("conv.i4");
- printSimpleInstruction("shr");
- break;
- case Instruction::Select:
- printSelectInstruction(Inst->getOperand(0),Inst->getOperand(1),Inst->getOperand(2));
- break;
- case Instruction::Load:
- printIndirectLoad(Inst->getOperand(0));
- break;
- case Instruction::Store:
- printIndirectSave(Inst->getOperand(1), Inst->getOperand(0));
- break;
- case Instruction::SExt:
- printCastInstruction(Inst->getOpcode(),Left,
- cast<CastInst>(Inst)->getDestTy(),
- cast<CastInst>(Inst)->getSrcTy());
- break;
- case Instruction::Trunc:
- case Instruction::ZExt:
- case Instruction::FPTrunc:
- case Instruction::FPExt:
- case Instruction::UIToFP:
- case Instruction::SIToFP:
- case Instruction::FPToUI:
- case Instruction::FPToSI:
- case Instruction::PtrToInt:
- case Instruction::IntToPtr:
- case Instruction::BitCast:
- printCastInstruction(Inst->getOpcode(),Left,
- cast<CastInst>(Inst)->getDestTy());
- break;
- case Instruction::GetElementPtr:
- printGepInstruction(Inst->getOperand(0),gep_type_begin(Inst),
- gep_type_end(Inst));
- break;
- case Instruction::Call:
- printCallInstruction(cast<CallInst>(Inst));
- break;
- case Instruction::Invoke:
- printInvokeInstruction(cast<InvokeInst>(Inst));
- break;
- case Instruction::Unwind:
- printSimpleInstruction("newobj",
- "instance void [mscorlib]System.Exception::.ctor()");
- printSimpleInstruction("throw");
- break;
- case Instruction::Switch:
- printSwitchInstruction(cast<SwitchInst>(Inst));
- break;
- case Instruction::Alloca:
- printAllocaInstruction(cast<AllocaInst>(Inst));
- break;
- case Instruction::Unreachable:
- printSimpleInstruction("ldstr", "\"Unreachable instruction\"");
- printSimpleInstruction("newobj",
- "instance void [mscorlib]System.Exception::.ctor(string)");
- printSimpleInstruction("throw");
- break;
- case Instruction::VAArg:
- printVAArgInstruction(cast<VAArgInst>(Inst));
- break;
- default:
- errs() << "Instruction = " << Inst->getName() << '\n';
- llvm_unreachable("Unsupported instruction");
- }
-}
-
-
-void MSILWriter::printLoop(const Loop* L) {
- Out << getLabelName(L->getHeader()->getName()) << ":\n";
- const std::vector<BasicBlock*>& blocks = L->getBlocks();
- for (unsigned I = 0, E = blocks.size(); I!=E; I++) {
- BasicBlock* BB = blocks[I];
- Loop* BBLoop = LInfo->getLoopFor(BB);
- if (BBLoop == L)
- printBasicBlock(BB);
- else if (BB==BBLoop->getHeader() && BBLoop->getParentLoop()==L)
- printLoop(BBLoop);
- }
- printSimpleInstruction("br",getLabelName(L->getHeader()->getName()).c_str());
-}
-
-
-void MSILWriter::printBasicBlock(const BasicBlock* BB) {
- Out << getLabelName(BB) << ":\n";
- for (BasicBlock::const_iterator I = BB->begin(), E = BB->end(); I!=E; ++I) {
- const Instruction* Inst = I;
- // Comment llvm original instruction
- // Out << "\n//" << *Inst << "\n";
- // Do not handle PHI instruction in current block
- if (Inst->getOpcode()==Instruction::PHI) continue;
- // Print instruction
- printInstruction(Inst);
- // Save result
- if (Inst->getType()!=Type::getVoidTy(BB->getContext())) {
- // Do not save value after invoke, it done in "try" block
- if (Inst->getOpcode()==Instruction::Invoke) continue;
- printValueSave(Inst);
- }
- }
-}
-
-
-void MSILWriter::printLocalVariables(const Function& F) {
- std::string Name;
- const Type* Ty = NULL;
- std::set<const Value*> Printed;
- const Value* VaList = NULL;
- unsigned StackDepth = 8;
- // Find local variables
- for (const_inst_iterator I = inst_begin(&F), E = inst_end(&F); I!=E; ++I) {
- if (I->getOpcode()==Instruction::Call ||
- I->getOpcode()==Instruction::Invoke) {
- // Test stack depth.
- if (StackDepth<I->getNumOperands())
- StackDepth = I->getNumOperands();
- }
- const AllocaInst* AI = dyn_cast<AllocaInst>(&*I);
- if (AI && !isa<GlobalVariable>(AI)) {
- // Local variable allocation.
- Ty = PointerType::getUnqual(AI->getAllocatedType());
- Name = getValueName(AI);
- Out << "\t.locals (" << getTypeName(Ty) << Name << ")\n";
- } else if (I->getType()!=Type::getVoidTy(F.getContext())) {
- // Operation result.
- Ty = I->getType();
- Name = getValueName(&*I);
- Out << "\t.locals (" << getTypeName(Ty) << Name << ")\n";
- }
- // Test on 'va_list' variable
- bool isVaList = false;
- if (const VAArgInst* VaInst = dyn_cast<VAArgInst>(&*I)) {
- // "va_list" as "va_arg" instruction operand.
- isVaList = true;
- VaList = VaInst->getOperand(0);
- } else if (const IntrinsicInst* Inst = dyn_cast<IntrinsicInst>(&*I)) {
- // "va_list" as intrinsic function operand.
- switch (Inst->getIntrinsicID()) {
- case Intrinsic::vastart:
- case Intrinsic::vaend:
- case Intrinsic::vacopy:
- isVaList = true;
- VaList = Inst->getArgOperand(0);
- break;
- default:
- isVaList = false;
- }
- }
- // Print "va_list" variable.
- if (isVaList && Printed.insert(VaList).second) {
- Name = getValueName(VaList);
- Name.insert(Name.length()-1,"$valist");
- Out << "\t.locals (valuetype [mscorlib]System.ArgIterator "
- << Name << ")\n";
- }
- }
- printSimpleInstruction(".maxstack",utostr(StackDepth*2).c_str());
-}
-
-
-void MSILWriter::printFunctionBody(const Function& F) {
- // Print body
- for (Function::const_iterator I = F.begin(), E = F.end(); I!=E; ++I) {
- if (Loop *L = LInfo->getLoopFor(I)) {
- if (L->getHeader()==I && L->getParentLoop()==0)
- printLoop(L);
- } else {
- printBasicBlock(I);
- }
- }
-}
-
-
-void MSILWriter::printConstantExpr(const ConstantExpr* CE) {
- const Value *left = 0, *right = 0;
- if (CE->getNumOperands()>=1) left = CE->getOperand(0);
- if (CE->getNumOperands()>=2) right = CE->getOperand(1);
- // Print instruction
- switch (CE->getOpcode()) {
- case Instruction::Trunc:
- case Instruction::ZExt:
- case Instruction::SExt:
- case Instruction::FPTrunc:
- case Instruction::FPExt:
- case Instruction::UIToFP:
- case Instruction::SIToFP:
- case Instruction::FPToUI:
- case Instruction::FPToSI:
- case Instruction::PtrToInt:
- case Instruction::IntToPtr:
- case Instruction::BitCast:
- printCastInstruction(CE->getOpcode(),left,CE->getType());
- break;
- case Instruction::GetElementPtr:
- printGepInstruction(CE->getOperand(0),gep_type_begin(CE),gep_type_end(CE));
- break;
- case Instruction::ICmp:
- printICmpInstruction(CE->getPredicate(),left,right);
- break;
- case Instruction::FCmp:
- printFCmpInstruction(CE->getPredicate(),left,right);
- break;
- case Instruction::Select:
- printSelectInstruction(CE->getOperand(0),CE->getOperand(1),CE->getOperand(2));
- break;
- case Instruction::Add:
- case Instruction::FAdd:
- printBinaryInstruction("add",left,right);
- break;
- case Instruction::Sub:
- case Instruction::FSub:
- printBinaryInstruction("sub",left,right);
- break;
- case Instruction::Mul:
- case Instruction::FMul:
- printBinaryInstruction("mul",left,right);
- break;
- case Instruction::UDiv:
- printBinaryInstruction("div.un",left,right);
- break;
- case Instruction::SDiv:
- case Instruction::FDiv:
- printBinaryInstruction("div",left,right);
- break;
- case Instruction::URem:
- printBinaryInstruction("rem.un",left,right);
- break;
- case Instruction::SRem:
- case Instruction::FRem:
- printBinaryInstruction("rem",left,right);
- break;
- case Instruction::And:
- printBinaryInstruction("and",left,right);
- break;
- case Instruction::Or:
- printBinaryInstruction("or",left,right);
- break;
- case Instruction::Xor:
- printBinaryInstruction("xor",left,right);
- break;
- case Instruction::Shl:
- printBinaryInstruction("shl",left,right);
- break;
- case Instruction::LShr:
- printBinaryInstruction("shr.un",left,right);
- break;
- case Instruction::AShr:
- printBinaryInstruction("shr",left,right);
- break;
- default:
- errs() << "Expression = " << *CE << "\n";
- llvm_unreachable("Invalid constant expression");
- }
-}
-
-
-void MSILWriter::printStaticInitializerList() {
- // List of global variables with uninitialized fields.
- for (std::map<const GlobalVariable*,std::vector<StaticInitializer> >::iterator
- VarI = StaticInitList.begin(), VarE = StaticInitList.end(); VarI!=VarE;
- ++VarI) {
- const std::vector<StaticInitializer>& InitList = VarI->second;
- if (InitList.empty()) continue;
- // For each uninitialized field.
- for (std::vector<StaticInitializer>::const_iterator I = InitList.begin(),
- E = InitList.end(); I!=E; ++I) {
- if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(I->constant)) {
- // Out << "\n// Init " << getValueName(VarI->first) << ", offset " <<
- // utostr(I->offset) << ", type "<< *I->constant->getType() << "\n\n";
- // Load variable address
- printValueLoad(VarI->first);
- // Add offset
- if (I->offset!=0) {
- printPtrLoad(I->offset);
- printSimpleInstruction("add");
- }
- // Load value
- printConstantExpr(CE);
- // Save result at offset
- std::string postfix = getTypePostfix(CE->getType(),true);
- if (*postfix.begin()=='u') *postfix.begin() = 'i';
- postfix = "stind."+postfix;
- printSimpleInstruction(postfix.c_str());
- } else {
- errs() << "Constant = " << *I->constant << '\n';
- llvm_unreachable("Invalid static initializer");
- }
- }
- }
-}
-
-
-void MSILWriter::printFunction(const Function& F) {
- bool isSigned = F.paramHasAttr(0, Attribute::SExt);
- Out << "\n.method static ";
- Out << (F.hasLocalLinkage() ? "private " : "public ");
- if (F.isVarArg()) Out << "vararg ";
- Out << getTypeName(F.getReturnType(),isSigned) <<
- getConvModopt(F.getCallingConv()) << getValueName(&F) << '\n';
- // Arguments
- Out << "\t(";
- unsigned ArgIdx = 1;
- for (Function::const_arg_iterator I = F.arg_begin(), E = F.arg_end(); I!=E;
- ++I, ++ArgIdx) {
- isSigned = F.paramHasAttr(ArgIdx, Attribute::SExt);
- if (I!=F.arg_begin()) Out << ", ";
- Out << getTypeName(I->getType(),isSigned) << getValueName(I);
- }
- Out << ") cil managed\n";
- // Body
- Out << "{\n";
- printLocalVariables(F);
- printFunctionBody(F);
- Out << "}\n";
-}
-
-
-void MSILWriter::printDeclarations(const TypeSymbolTable& ST) {
- std::string Name;
- std::set<const Type*> Printed;
- for (std::set<const Type*>::const_iterator
- UI = UsedTypes->begin(), UE = UsedTypes->end(); UI!=UE; ++UI) {
- const Type* Ty = *UI;
- if (Ty->isArrayTy() || Ty->isVectorTy() || Ty->isStructTy())
- Name = getTypeName(Ty, false, true);
- // Type with no need to declare.
- else continue;
- // Print not duplicated type
- if (Printed.insert(Ty).second) {
- Out << ".class value explicit ansi sealed '" << Name << "'";
- Out << " { .pack " << 1 << " .size " << TD->getTypeAllocSize(Ty);
- Out << " }\n\n";
- }
- }
-}
-
-
-unsigned int MSILWriter::getBitWidth(const Type* Ty) {
- unsigned int N = Ty->getPrimitiveSizeInBits();
- assert(N!=0 && "Invalid type in getBitWidth()");
- switch (N) {
- case 1:
- case 8:
- case 16:
- case 32:
- case 64:
- return N;
- default:
- errs() << "Bits = " << N << '\n';
- llvm_unreachable("Unsupported integer width");
- }
- return 0; // Not reached
-}
-
-
-void MSILWriter::printStaticConstant(const Constant* C, uint64_t& Offset) {
- uint64_t TySize = 0;
- const Type* Ty = C->getType();
- // Print zero initialized constant.
- if (isa<ConstantAggregateZero>(C) || C->isNullValue()) {
- TySize = TD->getTypeAllocSize(C->getType());
- Offset += TySize;
- Out << "int8 (0) [" << TySize << "]";
- return;
- }
- // Print constant initializer
- switch (Ty->getTypeID()) {
- case Type::IntegerTyID: {
- TySize = TD->getTypeAllocSize(Ty);
- const ConstantInt* Int = cast<ConstantInt>(C);
- Out << getPrimitiveTypeName(Ty,true) << "(" << Int->getSExtValue() << ")";
- break;
- }
- case Type::FloatTyID:
- case Type::DoubleTyID: {
- TySize = TD->getTypeAllocSize(Ty);
- const ConstantFP* FP = cast<ConstantFP>(C);
- if (Ty->getTypeID() == Type::FloatTyID)
- Out << "int32 (" <<
- (uint32_t)FP->getValueAPF().bitcastToAPInt().getZExtValue() << ')';
- else
- Out << "int64 (" <<
- FP->getValueAPF().bitcastToAPInt().getZExtValue() << ')';
- break;
- }
- case Type::ArrayTyID:
- case Type::VectorTyID:
- case Type::StructTyID:
- for (unsigned I = 0, E = C->getNumOperands(); I<E; I++) {
- if (I!=0) Out << ",\n";
- printStaticConstant(cast<Constant>(C->getOperand(I)), Offset);
- }
- break;
- case Type::PointerTyID:
- TySize = TD->getTypeAllocSize(C->getType());
- // Initialize with global variable address
- if (const GlobalVariable *G = dyn_cast<GlobalVariable>(C)) {
- std::string name = getValueName(G);
- Out << "&(" << name.insert(name.length()-1,"$data") << ")";
- } else {
- // Dynamic initialization
- if (!isa<ConstantPointerNull>(C) && !C->isNullValue())
- InitListPtr->push_back(StaticInitializer(C,Offset));
- // Null pointer initialization
- if (TySize==4) Out << "int32 (0)";
- else if (TySize==8) Out << "int64 (0)";
- else llvm_unreachable("Invalid pointer size");
- }
- break;
- default:
- errs() << "TypeID = " << Ty->getTypeID() << '\n';
- llvm_unreachable("Invalid type in printStaticConstant()");
- }
- // Increase offset.
- Offset += TySize;
-}
-
-
-void MSILWriter::printStaticInitializer(const Constant* C,
- const std::string& Name) {
- switch (C->getType()->getTypeID()) {
- case Type::IntegerTyID:
- case Type::FloatTyID:
- case Type::DoubleTyID:
- Out << getPrimitiveTypeName(C->getType(), false);
- break;
- case Type::ArrayTyID:
- case Type::VectorTyID:
- case Type::StructTyID:
- case Type::PointerTyID:
- Out << getTypeName(C->getType());
- break;
- default:
- errs() << "Type = " << *C << "\n";
- llvm_unreachable("Invalid constant type");
- }
- // Print initializer
- std::string label = Name;
- label.insert(label.length()-1,"$data");
- Out << Name << " at " << label << '\n';
- Out << ".data " << label << " = {\n";
- uint64_t offset = 0;
- printStaticConstant(C,offset);
- Out << "\n}\n\n";
-}
-
-
-void MSILWriter::printVariableDefinition(const GlobalVariable* G) {
- const Constant* C = G->getInitializer();
- if (C->isNullValue() || isa<ConstantAggregateZero>(C) || isa<UndefValue>(C))
- InitListPtr = 0;
- else
- InitListPtr = &StaticInitList[G];
- printStaticInitializer(C,getValueName(G));
-}
-
-
-void MSILWriter::printGlobalVariables() {
- if (ModulePtr->global_empty()) return;
- Module::global_iterator I,E;
- for (I = ModulePtr->global_begin(), E = ModulePtr->global_end(); I!=E; ++I) {
- // Variable definition
- Out << ".field static " << (I->isDeclaration() ? "public " :
- "private ");
- if (I->isDeclaration()) {
- Out << getTypeName(I->getType()) << getValueName(&*I) << "\n\n";
- } else
- printVariableDefinition(&*I);
- }
-}
-
-
-const char* MSILWriter::getLibraryName(const Function* F) {
- return getLibraryForSymbol(F->getName(), true, F->getCallingConv());
-}
-
-
-const char* MSILWriter::getLibraryName(const GlobalVariable* GV) {
- return getLibraryForSymbol(GV->getName(), false, CallingConv::C);
-}
-
-
-const char* MSILWriter::getLibraryForSymbol(StringRef Name, bool isFunction,
- CallingConv::ID CallingConv) {
- // TODO: Read *.def file with function and libraries definitions.
- return "MSVCRT.DLL";
-}
-
-
-void MSILWriter::printExternals() {
- Module::const_iterator I,E;
- // Functions.
- for (I=ModulePtr->begin(),E=ModulePtr->end(); I!=E; ++I) {
- // Skip intrisics
- if (I->isIntrinsic()) continue;
- if (I->isDeclaration()) {
- const Function* F = I;
- std::string Name = getConvModopt(F->getCallingConv())+getValueName(F);
- std::string Sig =
- getCallSignature(cast<FunctionType>(F->getFunctionType()), NULL, Name);
- Out << ".method static hidebysig pinvokeimpl(\""
- << getLibraryName(F) << "\")\n\t" << Sig << " preservesig {}\n\n";
- }
- }
- // External variables and static initialization.
- Out <<
- ".method public hidebysig static pinvokeimpl(\"KERNEL32.DLL\" ansi winapi)"
- " native int LoadLibrary(string) preservesig {}\n"
- ".method public hidebysig static pinvokeimpl(\"KERNEL32.DLL\" ansi winapi)"
- " native int GetProcAddress(native int, string) preservesig {}\n";
- Out <<
- ".method private static void* $MSIL_Import(string lib,string sym)\n"
- " managed cil\n{\n"
- "\tldarg\tlib\n"
- "\tcall\tnative int LoadLibrary(string)\n"
- "\tldarg\tsym\n"
- "\tcall\tnative int GetProcAddress(native int,string)\n"
- "\tdup\n"
- "\tbrtrue\tL_01\n"
- "\tldstr\t\"Can no import variable\"\n"
- "\tnewobj\tinstance void [mscorlib]System.Exception::.ctor(string)\n"
- "\tthrow\n"
- "L_01:\n"
- "\tret\n"
- "}\n\n"
- ".method static private void $MSIL_Init() managed cil\n{\n";
- printStaticInitializerList();
- // Foreach global variable.
- for (Module::global_iterator I = ModulePtr->global_begin(),
- E = ModulePtr->global_end(); I!=E; ++I) {
- if (!I->isDeclaration() || !I->hasDLLImportLinkage()) continue;
- // Use "LoadLibrary"/"GetProcAddress" to recive variable address.
- std::string Tmp = getTypeName(I->getType())+getValueName(&*I);
- printSimpleInstruction("ldsflda",Tmp.c_str());
- Out << "\tldstr\t\"" << getLibraryName(&*I) << "\"\n";
- Out << "\tldstr\t\"" << I->getName() << "\"\n";
- printSimpleInstruction("call","void* $MSIL_Import(string,string)");
- printIndirectSave(I->getType());
- }
- printSimpleInstruction("ret");
- Out << "}\n\n";
-}
-
-
-//===----------------------------------------------------------------------===//
-// External Interface declaration
-//===----------------------------------------------------------------------===//
-
-bool MSILTarget::addPassesToEmitFile(PassManagerBase &PM,
- formatted_raw_ostream &o,
- CodeGenFileType FileType,
- CodeGenOpt::Level OptLevel,
- bool DisableVerify)
-{
- if (FileType != TargetMachine::CGFT_AssemblyFile) return true;
- MSILWriter* Writer = new MSILWriter(o);
- PM.add(createGCLoweringPass());
- // FIXME: Handle switch through native IL instruction "switch"
- PM.add(createLowerSwitchPass());
- PM.add(createCFGSimplificationPass());
- PM.add(new MSILModule(Writer->UsedTypes,Writer->TD));
- PM.add(Writer);
- PM.add(createGCInfoDeleter());
- return false;
-}
diff --git a/lib/Target/MSIL/MSILWriter.h b/lib/Target/MSIL/MSILWriter.h
deleted file mode 100644
index 92a3abe5c0a74..0000000000000
--- a/lib/Target/MSIL/MSILWriter.h
+++ /dev/null
@@ -1,258 +0,0 @@
-//===-- MSILWriter.h - TargetMachine for the MSIL ---------------*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file declares the MSILWriter that is used by the MSIL.
-//
-//===----------------------------------------------------------------------===//
-#ifndef MSILWRITER_H
-#define MSILWRITER_H
-
-#include "llvm/CallingConv.h"
-#include "llvm/Constants.h"
-#include "llvm/Module.h"
-#include "llvm/Instructions.h"
-#include "llvm/IntrinsicInst.h"
-#include "llvm/Pass.h"
-#include "llvm/PassManager.h"
-#include "llvm/ADT/StringRef.h"
-#include "llvm/Analysis/FindUsedTypes.h"
-#include "llvm/Analysis/LoopInfo.h"
-#include "llvm/Support/FormattedStream.h"
-#include "llvm/Support/GetElementPtrTypeIterator.h"
-#include "llvm/Target/TargetData.h"
-#include "llvm/Target/TargetMachine.h"
-
-namespace llvm {
- extern Target TheMSILTarget;
-
- class MSILModule : public ModulePass {
- Module *ModulePtr;
- const std::set<const Type *>*& UsedTypes;
- const TargetData*& TD;
-
- public:
- static char ID;
- MSILModule(const std::set<const Type *>*& _UsedTypes,
- const TargetData*& _TD)
- : ModulePass(&ID), UsedTypes(_UsedTypes), TD(_TD) {}
-
- void getAnalysisUsage(AnalysisUsage &AU) const {
- AU.addRequired<FindUsedTypes>();
- AU.addRequired<TargetData>();
- }
-
- virtual const char *getPassName() const {
- return "MSIL backend definitions";
- }
-
- virtual bool runOnModule(Module &M);
-
- };
-
- class MSILWriter : public FunctionPass {
- struct StaticInitializer {
- const Constant* constant;
- uint64_t offset;
-
- StaticInitializer()
- : constant(0), offset(0) {}
-
- StaticInitializer(const Constant* _constant, uint64_t _offset)
- : constant(_constant), offset(_offset) {}
- };
-
- uint64_t UniqID;
-
- uint64_t getUniqID() {
- return ++UniqID;
- }
-
- public:
- formatted_raw_ostream &Out;
- Module* ModulePtr;
- const TargetData* TD;
- LoopInfo *LInfo;
- std::vector<StaticInitializer>* InitListPtr;
- std::map<const GlobalVariable*,std::vector<StaticInitializer> >
- StaticInitList;
- const std::set<const Type *>* UsedTypes;
- static char ID;
- DenseMap<const Value*, unsigned> AnonValueNumbers;
- unsigned NextAnonValueNumber;
-
- MSILWriter(formatted_raw_ostream &o) : FunctionPass(&ID), Out(o),
- NextAnonValueNumber(0) {
- UniqID = 0;
- }
-
- enum ValueType {
- UndefVT,
- GlobalVT,
- InternalVT,
- ArgumentVT,
- LocalVT,
- ConstVT,
- ConstExprVT
- };
-
- bool isVariable(ValueType V) {
- return V==GlobalVT || V==InternalVT || V==ArgumentVT || V==LocalVT;
- }
-
- bool isConstValue(ValueType V) {
- return V==ConstVT || V==ConstExprVT;
- }
-
- virtual const char *getPassName() const { return "MSIL backend"; }
-
- void getAnalysisUsage(AnalysisUsage &AU) const {
- AU.addRequired<LoopInfo>();
- AU.setPreservesAll();
- }
-
- bool runOnFunction(Function &F);
-
- virtual bool doInitialization(Module &M);
-
- virtual bool doFinalization(Module &M);
-
- void printModuleStartup();
-
- bool isZeroValue(const Value* V);
-
- std::string getValueName(const Value* V);
-
- std::string getLabelName(const Value* V);
-
- std::string getLabelName(const std::string& Name);
-
- std::string getConvModopt(CallingConv::ID CallingConvID);
-
- std::string getArrayTypeName(Type::TypeID TyID, const Type* Ty);
-
- std::string getPrimitiveTypeName(const Type* Ty, bool isSigned);
-
- std::string getFunctionTypeName(const Type* Ty);
-
- std::string getPointerTypeName(const Type* Ty);
-
- std::string getTypeName(const Type* Ty, bool isSigned = false,
- bool isNested = false);
-
- ValueType getValueLocation(const Value* V);
-
- std::string getTypePostfix(const Type* Ty, bool Expand,
- bool isSigned = false);
-
- void printConvToPtr();
-
- void printPtrLoad(uint64_t N);
-
- void printValuePtrLoad(const Value* V);
-
- void printConstLoad(const Constant* C);
-
- void printValueLoad(const Value* V);
-
- void printValueSave(const Value* V);
-
- void printBinaryInstruction(const char* Name, const Value* Left,
- const Value* Right);
-
- void printSimpleInstruction(const char* Inst, const char* Operand = NULL);
-
- void printPHICopy(const BasicBlock* Src, const BasicBlock* Dst);
-
- void printBranchToBlock(const BasicBlock* CurrBB,
- const BasicBlock* TrueBB,
- const BasicBlock* FalseBB);
-
- void printBranchInstruction(const BranchInst* Inst);
-
- void printSelectInstruction(const Value* Cond, const Value* VTrue,
- const Value* VFalse);
-
- void printIndirectLoad(const Value* V);
-
- void printIndirectSave(const Value* Ptr, const Value* Val);
-
- void printIndirectSave(const Type* Ty);
-
- void printCastInstruction(unsigned int Op, const Value* V,
- const Type* Ty, const Type* SrcTy=0);
-
- void printGepInstruction(const Value* V, gep_type_iterator I,
- gep_type_iterator E);
-
- std::string getCallSignature(const FunctionType* Ty,
- const Instruction* Inst,
- std::string Name);
-
- void printFunctionCall(const Value* FnVal, const Instruction* Inst);
-
- void printIntrinsicCall(const IntrinsicInst* Inst);
-
- void printCallInstruction(const Instruction* Inst);
-
- void printICmpInstruction(unsigned Predicate, const Value* Left,
- const Value* Right);
-
- void printFCmpInstruction(unsigned Predicate, const Value* Left,
- const Value* Right);
-
- void printInvokeInstruction(const InvokeInst* Inst);
-
- void printSwitchInstruction(const SwitchInst* Inst);
-
- void printVAArgInstruction(const VAArgInst* Inst);
-
- void printAllocaInstruction(const AllocaInst* Inst);
-
- void printInstruction(const Instruction* Inst);
-
- void printLoop(const Loop* L);
-
- void printBasicBlock(const BasicBlock* BB);
-
- void printLocalVariables(const Function& F);
-
- void printFunctionBody(const Function& F);
-
- void printConstantExpr(const ConstantExpr* CE);
-
- void printStaticInitializerList();
-
- void printFunction(const Function& F);
-
- void printDeclarations(const TypeSymbolTable& ST);
-
- unsigned int getBitWidth(const Type* Ty);
-
- void printStaticConstant(const Constant* C, uint64_t& Offset);
-
- void printStaticInitializer(const Constant* C, const std::string& Name);
-
- void printVariableDefinition(const GlobalVariable* G);
-
- void printGlobalVariables();
-
- const char* getLibraryName(const Function* F);
-
- const char* getLibraryName(const GlobalVariable* GV);
-
- const char* getLibraryForSymbol(StringRef Name, bool isFunction,
- CallingConv::ID CallingConv);
-
- void printExternals();
- };
-
-}
-
-#endif
-
diff --git a/lib/Target/MSIL/README.TXT b/lib/Target/MSIL/README.TXT
deleted file mode 100644
index d797c71fd39f0..0000000000000
--- a/lib/Target/MSIL/README.TXT
+++ /dev/null
@@ -1,26 +0,0 @@
-//===---------------------------------------------------------------------===//
-
-Vector instructions support.
-
-ShuffleVector
-ExtractElement
-InsertElement
-
-//===---------------------------------------------------------------------===//
-
-Add "OpaqueType" type.
-
-//===---------------------------------------------------------------------===//
-
-"switch" instruction emulation with CLI "switch" instruction.
-
-//===---------------------------------------------------------------------===//
-
-Write linker for external function, because function export need to know
-dynamic library where function located.
-
-.method static hidebysig pinvokeimpl("msvcrt.dll" cdecl)
- void free(void*) preservesig {}
-
-
-
diff --git a/lib/Target/MSIL/TargetInfo/CMakeLists.txt b/lib/Target/MSIL/TargetInfo/CMakeLists.txt
deleted file mode 100644
index 9f0c3a09341a9..0000000000000
--- a/lib/Target/MSIL/TargetInfo/CMakeLists.txt
+++ /dev/null
@@ -1,6 +0,0 @@
-include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. )
-
-add_llvm_library(LLVMMSILInfo
- MSILTargetInfo.cpp
- )
-
diff --git a/lib/Target/MSIL/TargetInfo/MSILTargetInfo.cpp b/lib/Target/MSIL/TargetInfo/MSILTargetInfo.cpp
deleted file mode 100644
index dfd42814e51cc..0000000000000
--- a/lib/Target/MSIL/TargetInfo/MSILTargetInfo.cpp
+++ /dev/null
@@ -1,26 +0,0 @@
-//===-- MSILTargetInfo.cpp - MSIL Target Implementation -------------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#include "MSILWriter.h"
-#include "llvm/Module.h"
-#include "llvm/Target/TargetRegistry.h"
-using namespace llvm;
-
-Target llvm::TheMSILTarget;
-
-static unsigned MSIL_TripleMatchQuality(const std::string &TT) {
- // This class always works, but shouldn't be the default in most cases.
- return 1;
-}
-
-extern "C" void LLVMInitializeMSILTargetInfo() {
- TargetRegistry::RegisterTarget(TheMSILTarget, "msil",
- "MSIL backend",
- &MSIL_TripleMatchQuality);
-}
diff --git a/lib/Target/MSIL/TargetInfo/Makefile b/lib/Target/MSIL/TargetInfo/Makefile
deleted file mode 100644
index 30b0950db0f75..0000000000000
--- a/lib/Target/MSIL/TargetInfo/Makefile
+++ /dev/null
@@ -1,15 +0,0 @@
-##===- lib/Target/MSIL/TargetInfo/Makefile -----------------*- Makefile -*-===##
-#
-# The LLVM Compiler Infrastructure
-#
-# This file is distributed under the University of Illinois Open Source
-# License. See LICENSE.TXT for details.
-#
-##===----------------------------------------------------------------------===##
-LEVEL = ../../../..
-LIBRARYNAME = LLVMMSILInfo
-
-# Hack: we need to include 'main' target directory to grab private headers
-CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
-
-include $(LEVEL)/Makefile.common
diff --git a/lib/Target/MSP430/MSP430BranchSelector.cpp b/lib/Target/MSP430/MSP430BranchSelector.cpp
index 68cb342b08f45..bd644435c76fa 100644
--- a/lib/Target/MSP430/MSP430BranchSelector.cpp
+++ b/lib/Target/MSP430/MSP430BranchSelector.cpp
@@ -10,7 +10,7 @@
// This file contains a pass that scans a machine function to determine which
// conditional branches need more than 10 bits of displacement to reach their
// target basic block. It does this in two passes; a calculation of basic block
-// positions pass, and a branch psuedo op to machine branch opcode pass. This
+// positions pass, and a branch pseudo op to machine branch opcode pass. This
// pass should be run last, just before the assembly printer.
//
//===----------------------------------------------------------------------===//
@@ -30,7 +30,7 @@ STATISTIC(NumExpanded, "Number of branches expanded to long format");
namespace {
struct MSP430BSel : public MachineFunctionPass {
static char ID;
- MSP430BSel() : MachineFunctionPass(&ID) {}
+ MSP430BSel() : MachineFunctionPass(ID) {}
/// BlockSizes - The sizes of the basic blocks in the function.
std::vector<unsigned> BlockSizes;
@@ -52,7 +52,8 @@ FunctionPass *llvm::createMSP430BranchSelectionPass() {
}
bool MSP430BSel::runOnMachineFunction(MachineFunction &Fn) {
- const TargetInstrInfo *TII = Fn.getTarget().getInstrInfo();
+ const MSP430InstrInfo *TII =
+ static_cast<const MSP430InstrInfo*>(Fn.getTarget().getInstrInfo());
// Give the blocks of the function a dense, in-order, numbering.
Fn.RenumberBlocks();
BlockSizes.resize(Fn.getNumBlockIDs());
diff --git a/lib/Target/MSP430/MSP430InstrInfo.cpp b/lib/Target/MSP430/MSP430InstrInfo.cpp
index df28d07f5d717..bfab844f5b1a7 100644
--- a/lib/Target/MSP430/MSP430InstrInfo.cpp
+++ b/lib/Target/MSP430/MSP430InstrInfo.cpp
@@ -100,27 +100,6 @@ void MSP430InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
}
bool
-MSP430InstrInfo::isMoveInstr(const MachineInstr& MI,
- unsigned &SrcReg, unsigned &DstReg,
- unsigned &SrcSubIdx, unsigned &DstSubIdx) const {
- SrcSubIdx = DstSubIdx = 0; // No sub-registers yet.
-
- switch (MI.getOpcode()) {
- default:
- return false;
- case MSP430::MOV8rr:
- case MSP430::MOV16rr:
- assert(MI.getNumOperands() >= 2 &&
- MI.getOperand(0).isReg() &&
- MI.getOperand(1).isReg() &&
- "invalid register-register move instruction");
- SrcReg = MI.getOperand(1).getReg();
- DstReg = MI.getOperand(0).getReg();
- return true;
- }
-}
-
-bool
MSP430InstrInfo::spillCalleeSavedRegisters(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
const std::vector<CalleeSavedInfo> &CSI,
@@ -361,7 +340,7 @@ unsigned MSP430InstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const {
switch (Desc.getOpcode()) {
default:
assert(0 && "Unknown instruction size!");
- case TargetOpcode::DBG_LABEL:
+ case TargetOpcode::PROLOG_LABEL:
case TargetOpcode::EH_LABEL:
case TargetOpcode::IMPLICIT_DEF:
case TargetOpcode::KILL:
diff --git a/lib/Target/MSP430/MSP430InstrInfo.h b/lib/Target/MSP430/MSP430InstrInfo.h
index ebbda1aeef513..49ccc032bf29f 100644
--- a/lib/Target/MSP430/MSP430InstrInfo.h
+++ b/lib/Target/MSP430/MSP430InstrInfo.h
@@ -54,10 +54,6 @@ public:
unsigned DestReg, unsigned SrcReg,
bool KillSrc) const;
- bool isMoveInstr(const MachineInstr& MI,
- unsigned &SrcReg, unsigned &DstReg,
- unsigned &SrcSubIdx, unsigned &DstSubIdx) const;
-
virtual void storeRegToStackSlot(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
unsigned SrcReg, bool isKill,
diff --git a/lib/Target/MSP430/MSP430RegisterInfo.cpp b/lib/Target/MSP430/MSP430RegisterInfo.cpp
index 608ca49fcf78e..3c3fa73477a59 100644
--- a/lib/Target/MSP430/MSP430RegisterInfo.cpp
+++ b/lib/Target/MSP430/MSP430RegisterInfo.cpp
@@ -101,7 +101,7 @@ bool MSP430RegisterInfo::hasFP(const MachineFunction &MF) const {
MFI->isFrameAddressTaken());
}
-bool MSP430RegisterInfo::hasReservedCallFrame(MachineFunction &MF) const {
+bool MSP430RegisterInfo::hasReservedCallFrame(const MachineFunction &MF) const {
return !MF.getFrameInfo()->hasVarSizedObjects();
}
@@ -163,10 +163,9 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
MBB.erase(I);
}
-unsigned
+void
MSP430RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
- int SPAdj, FrameIndexValue *Value,
- RegScavenger *RS) const {
+ int SPAdj, RegScavenger *RS) const {
assert(SPAdj == 0 && "Unexpected");
unsigned i = 0;
@@ -204,7 +203,7 @@ MSP430RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
MI.getOperand(i).ChangeToRegister(BasePtr, false);
if (Offset == 0)
- return 0;
+ return;
// We need to materialize the offset via add instruction.
unsigned DstReg = MI.getOperand(0).getReg();
@@ -215,12 +214,11 @@ MSP430RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
BuildMI(MBB, llvm::next(II), dl, TII.get(MSP430::ADD16ri), DstReg)
.addReg(DstReg).addImm(Offset);
- return 0;
+ return;
}
MI.getOperand(i).ChangeToRegister(BasePtr, false);
MI.getOperand(i+1).ChangeToImmediate(Offset);
- return 0;
}
void
diff --git a/lib/Target/MSP430/MSP430RegisterInfo.h b/lib/Target/MSP430/MSP430RegisterInfo.h
index 6e58d3116d273..4d2795bb40201 100644
--- a/lib/Target/MSP430/MSP430RegisterInfo.h
+++ b/lib/Target/MSP430/MSP430RegisterInfo.h
@@ -40,15 +40,14 @@ public:
const TargetRegisterClass* getPointerRegClass(unsigned Kind = 0) const;
bool hasFP(const MachineFunction &MF) const;
- bool hasReservedCallFrame(MachineFunction &MF) const;
+ bool hasReservedCallFrame(const MachineFunction &MF) const;
void eliminateCallFramePseudoInstr(MachineFunction &MF,
MachineBasicBlock &MBB,
MachineBasicBlock::iterator I) const;
- unsigned eliminateFrameIndex(MachineBasicBlock::iterator II,
- int SPAdj, FrameIndexValue *Value = NULL,
- RegScavenger *RS = NULL) const;
+ void eliminateFrameIndex(MachineBasicBlock::iterator II,
+ int SPAdj, RegScavenger *RS = NULL) const;
void emitPrologue(MachineFunction &MF) const;
void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
diff --git a/lib/Target/Mangler.cpp b/lib/Target/Mangler.cpp
index 2037a9114559e..49efe75d79d8f 100644
--- a/lib/Target/Mangler.cpp
+++ b/lib/Target/Mangler.cpp
@@ -180,7 +180,8 @@ void Mangler::getNameWithPrefix(SmallVectorImpl<char> &OutName,
ManglerPrefixTy PrefixTy = Mangler::Default;
if (GV->hasPrivateLinkage() || isImplicitlyPrivate)
PrefixTy = Mangler::Private;
- else if (GV->hasLinkerPrivateLinkage() || GV->hasLinkerPrivateWeakLinkage())
+ else if (GV->hasLinkerPrivateLinkage() || GV->hasLinkerPrivateWeakLinkage() ||
+ GV->hasLinkerPrivateWeakDefAutoLinkage())
PrefixTy = Mangler::LinkerPrivate;
// If this global has a name, handle it simply.
diff --git a/lib/Target/Mips/AsmPrinter/MipsAsmPrinter.cpp b/lib/Target/Mips/AsmPrinter/MipsAsmPrinter.cpp
index 8ae05b75e919d..6660f6b624309 100644
--- a/lib/Target/Mips/AsmPrinter/MipsAsmPrinter.cpp
+++ b/lib/Target/Mips/AsmPrinter/MipsAsmPrinter.cpp
@@ -18,6 +18,8 @@
#include "MipsInstrInfo.h"
#include "MipsTargetMachine.h"
#include "MipsMachineFunction.h"
+#include "llvm/BasicBlock.h"
+#include "llvm/Instructions.h"
#include "llvm/CodeGen/AsmPrinter.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineConstantPool.h"
@@ -75,6 +77,7 @@ namespace {
}
virtual void EmitFunctionBodyStart();
virtual void EmitFunctionBodyEnd();
+ virtual bool isBlockOnlyReachableByFallthrough(const MachineBasicBlock *MBB) const;
static const char *getRegisterName(unsigned RegNo);
virtual void EmitFunctionEntryLabel();
@@ -227,6 +230,23 @@ void MipsAsmPrinter::EmitFunctionBodyEnd() {
}
+/// isBlockOnlyReachableByFallthough - Return true if the basic block has
+/// exactly one predecessor and the control transfer mechanism between
+/// the predecessor and this block is a fall-through.
+bool MipsAsmPrinter::isBlockOnlyReachableByFallthrough(const MachineBasicBlock *MBB)
+ const {
+ // The predecessor has to be immediately before this block.
+ const MachineBasicBlock *Pred = *MBB->pred_begin();
+
+ // If the predecessor is a switch statement, assume a jump table
+ // implementation, so it is not a fall through.
+ if (const BasicBlock *bb = Pred->getBasicBlock())
+ if (isa<SwitchInst>(bb->getTerminator()))
+ return false;
+
+ return AsmPrinter::isBlockOnlyReachableByFallthrough(MBB);
+}
+
// Print out an operand for an inline asm expression.
bool MipsAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
unsigned AsmVariant,const char *ExtraCode,
diff --git a/lib/Target/Mips/Mips.td b/lib/Target/Mips/Mips.td
index aa036aef83d0b..a51c3779c7f4f 100644
--- a/lib/Target/Mips/Mips.td
+++ b/lib/Target/Mips/Mips.td
@@ -1,4 +1,4 @@
-//===- Mips.td - Describe the Mips Target Machine ---------------*- C++ -*-===//
+//===- Mips.td - Describe the Mips Target Machine ----------*- tablegen -*-===//
//
// The LLVM Compiler Infrastructure
//
diff --git a/lib/Target/Mips/MipsCallingConv.td b/lib/Target/Mips/MipsCallingConv.td
index c2bfb8fa738c6..8f313efaf8daa 100644
--- a/lib/Target/Mips/MipsCallingConv.td
+++ b/lib/Target/Mips/MipsCallingConv.td
@@ -1,4 +1,4 @@
-//===- MipsCallingConv.td - Calling Conventions for Mips --------*- C++ -*-===//
+//===- MipsCallingConv.td - Calling Conventions for Mips ---*- tablegen -*-===//
//
// The LLVM Compiler Infrastructure
//
diff --git a/lib/Target/Mips/MipsDelaySlotFiller.cpp b/lib/Target/Mips/MipsDelaySlotFiller.cpp
index a2b615d8add2a..597ea0d6c2072 100644
--- a/lib/Target/Mips/MipsDelaySlotFiller.cpp
+++ b/lib/Target/Mips/MipsDelaySlotFiller.cpp
@@ -32,7 +32,7 @@ namespace {
static char ID;
Filler(TargetMachine &tm)
- : MachineFunctionPass(&ID), TM(tm), TII(tm.getInstrInfo()) { }
+ : MachineFunctionPass(ID), TM(tm), TII(tm.getInstrInfo()) { }
virtual const char *getPassName() const {
return "Mips Delay Slot Filler";
diff --git a/lib/Target/Mips/MipsISelDAGToDAG.cpp b/lib/Target/Mips/MipsISelDAGToDAG.cpp
index 3888bbf09ec7a..a47cf7b4f201e 100644
--- a/lib/Target/Mips/MipsISelDAGToDAG.cpp
+++ b/lib/Target/Mips/MipsISelDAGToDAG.cpp
@@ -137,7 +137,7 @@ SelectAddr(SDNode *Op, SDValue Addr, SDValue &Offset, SDValue &Base)
// Operand is a result from an ADD.
if (Addr.getOpcode() == ISD::ADD) {
if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) {
- if (Predicate_immSExt16(CN)) {
+ if (isInt<16>(CN->getSExtValue())) {
// If the first operand is a FI, get the TargetFI Node
if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>
@@ -184,8 +184,9 @@ SDNode *MipsDAGToDAGISel::SelectLoadFp64(SDNode *N) {
if (!Subtarget.isMips1() || NVT != MVT::f64)
return NULL;
- if (!Predicate_unindexedload(N) ||
- !Predicate_load(N))
+ LoadSDNode *LN = cast<LoadSDNode>(N);
+ if (LN->getExtensionType() != ISD::NON_EXTLOAD ||
+ LN->getAddressingMode() != ISD::UNINDEXED)
return NULL;
SDValue Chain = N->getOperand(0);
@@ -248,8 +249,8 @@ SDNode *MipsDAGToDAGISel::SelectStoreFp64(SDNode *N) {
SDValue Chain = N->getOperand(0);
- if (!Predicate_unindexedstore(N) ||
- !Predicate_store(N))
+ StoreSDNode *SN = cast<StoreSDNode>(N);
+ if (SN->isTruncatingStore() || SN->getAddressingMode() != ISD::UNINDEXED)
return NULL;
SDValue N1 = N->getOperand(1);
diff --git a/lib/Target/Mips/MipsISelLowering.cpp b/lib/Target/Mips/MipsISelLowering.cpp
index b6ff2c371d5c9..b0b99bad16071 100644
--- a/lib/Target/Mips/MipsISelLowering.cpp
+++ b/lib/Target/Mips/MipsISelLowering.cpp
@@ -317,13 +317,13 @@ MipsTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
BB->addSuccessor(sinkMBB);
// sinkMBB:
- // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
+ // %Result = phi [ %TrueValue, thisMBB ], [ %FalseValue, copy0MBB ]
// ...
BB = sinkMBB;
BuildMI(*BB, BB->begin(), dl,
TII->get(Mips::PHI), MI->getOperand(0).getReg())
- .addReg(MI->getOperand(2).getReg()).addMBB(copy0MBB)
- .addReg(MI->getOperand(3).getReg()).addMBB(thisMBB);
+ .addReg(MI->getOperand(2).getReg()).addMBB(thisMBB)
+ .addReg(MI->getOperand(3).getReg()).addMBB(copy0MBB);
MI->eraseFromParent(); // The pseudo instruction is gone now.
return BB;
@@ -542,7 +542,7 @@ LowerJumpTable(SDValue Op, SelectionDAG &DAG) const
SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, OpFlag);
- if (IsPIC) {
+ if (!IsPIC) {
SDValue Ops[] = { JTI };
HiPart = DAG.getNode(MipsISD::Hi, dl, DAG.getVTList(MVT::i32), Ops, 1);
} else // Emit Load from Global Pointer
diff --git a/lib/Target/Mips/MipsInstrFPU.td b/lib/Target/Mips/MipsInstrFPU.td
index e948917eb80eb..cff79966dcd3b 100644
--- a/lib/Target/Mips/MipsInstrFPU.td
+++ b/lib/Target/Mips/MipsInstrFPU.td
@@ -1,4 +1,4 @@
-//===- MipsInstrFPU.td - Mips FPU Instruction Information -------*- C++ -*-===//
+//===- MipsInstrFPU.td - Mips FPU Instruction Information --*- tablegen -*-===//
//
// The LLVM Compiler Infrastructure
//
diff --git a/lib/Target/Mips/MipsInstrFormats.td b/lib/Target/Mips/MipsInstrFormats.td
index 0853272f7280e..98ae2fa7da456 100644
--- a/lib/Target/Mips/MipsInstrFormats.td
+++ b/lib/Target/Mips/MipsInstrFormats.td
@@ -1,4 +1,4 @@
-//===- MipsRegisterInfo.td - Mips Register defs -----------------*- C++ -*-===//
+//===- MipsRegisterInfo.td - Mips Register defs ------------*- tablegen -*-===//
//
// The LLVM Compiler Infrastructure
//
diff --git a/lib/Target/Mips/MipsInstrInfo.cpp b/lib/Target/Mips/MipsInstrInfo.cpp
index 6c09a3e10785b..aaf307b1ce3ff 100644
--- a/lib/Target/Mips/MipsInstrInfo.cpp
+++ b/lib/Target/Mips/MipsInstrInfo.cpp
@@ -30,53 +30,6 @@ static bool isZeroImm(const MachineOperand &op) {
return op.isImm() && op.getImm() == 0;
}
-/// Return true if the instruction is a register to register move and
-/// leave the source and dest operands in the passed parameters.
-bool MipsInstrInfo::
-isMoveInstr(const MachineInstr &MI, unsigned &SrcReg, unsigned &DstReg,
- unsigned &SrcSubIdx, unsigned &DstSubIdx) const
-{
- SrcSubIdx = DstSubIdx = 0; // No sub-registers.
-
- // addu $dst, $src, $zero || addu $dst, $zero, $src
- // or $dst, $src, $zero || or $dst, $zero, $src
- if ((MI.getOpcode() == Mips::ADDu) || (MI.getOpcode() == Mips::OR)) {
- if (MI.getOperand(1).getReg() == Mips::ZERO) {
- DstReg = MI.getOperand(0).getReg();
- SrcReg = MI.getOperand(2).getReg();
- return true;
- } else if (MI.getOperand(2).getReg() == Mips::ZERO) {
- DstReg = MI.getOperand(0).getReg();
- SrcReg = MI.getOperand(1).getReg();
- return true;
- }
- }
-
- // mov $fpDst, $fpSrc
- // mfc $gpDst, $fpSrc
- // mtc $fpDst, $gpSrc
- if (MI.getOpcode() == Mips::FMOV_S32 ||
- MI.getOpcode() == Mips::FMOV_D32 ||
- MI.getOpcode() == Mips::MFC1 ||
- MI.getOpcode() == Mips::MTC1 ||
- MI.getOpcode() == Mips::MOVCCRToCCR) {
- DstReg = MI.getOperand(0).getReg();
- SrcReg = MI.getOperand(1).getReg();
- return true;
- }
-
- // addiu $dst, $src, 0
- if (MI.getOpcode() == Mips::ADDiu) {
- if ((MI.getOperand(1).isReg()) && (isZeroImm(MI.getOperand(2)))) {
- DstReg = MI.getOperand(0).getReg();
- SrcReg = MI.getOperand(1).getReg();
- return true;
- }
- }
-
- return false;
-}
-
/// isLoadFromStackSlot - If the specified machine instruction is a direct
/// load from a stack slot, return the virtual or physical register number of
/// the destination along with the FrameIndex of the loaded stack slot. If
diff --git a/lib/Target/Mips/MipsInstrInfo.h b/lib/Target/Mips/MipsInstrInfo.h
index d6f87f9b0ce82..52a3d39840ba6 100644
--- a/lib/Target/Mips/MipsInstrInfo.h
+++ b/lib/Target/Mips/MipsInstrInfo.h
@@ -174,12 +174,6 @@ public:
///
virtual const MipsRegisterInfo &getRegisterInfo() const { return RI; }
- /// Return true if the instruction is a register to register move and return
- /// the source and dest operands and their sub-register indices by reference.
- virtual bool isMoveInstr(const MachineInstr &MI,
- unsigned &SrcReg, unsigned &DstReg,
- unsigned &SrcSubIdx, unsigned &DstSubIdx) const;
-
/// isLoadFromStackSlot - If the specified machine instruction is a direct
/// load from a stack slot, return the virtual or physical register number of
/// the destination along with the FrameIndex of the loaded stack slot. If
diff --git a/lib/Target/Mips/MipsInstrInfo.td b/lib/Target/Mips/MipsInstrInfo.td
index 5337c9fb816a2..320c5b8834831 100644
--- a/lib/Target/Mips/MipsInstrInfo.td
+++ b/lib/Target/Mips/MipsInstrInfo.td
@@ -1,4 +1,4 @@
-//===- MipsInstrInfo.td - Mips Register defs --------------------*- C++ -*-===//
+//===- MipsInstrInfo.td - Mips Register defs ---------------*- tablegen -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -96,12 +96,7 @@ def HI16 : SDNodeXForm<imm, [{
// Node immediate fits as 16-bit sign extended on target immediate.
// e.g. addi, andi
-def immSExt16 : PatLeaf<(imm), [{
- if (N->getValueType(0) == MVT::i32)
- return (int32_t)N->getZExtValue() == (short)N->getZExtValue();
- else
- return (int64_t)N->getZExtValue() == (short)N->getZExtValue();
-}]>;
+def immSExt16 : PatLeaf<(imm), [{ return isInt<16>(N->getSExtValue()); }]>;
// Node immediate fits as 16-bit zero extended on target immediate.
// The LO16 param means that only the lower 16 bits of the node
diff --git a/lib/Target/Mips/MipsRegisterInfo.cpp b/lib/Target/Mips/MipsRegisterInfo.cpp
index e15f0a58e501b..69436d2acb546 100644
--- a/lib/Target/Mips/MipsRegisterInfo.cpp
+++ b/lib/Target/Mips/MipsRegisterInfo.cpp
@@ -327,10 +327,9 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
// FrameIndex represent objects inside a abstract stack.
// We must replace FrameIndex with an stack/frame pointer
// direct reference.
-unsigned MipsRegisterInfo::
+void MipsRegisterInfo::
eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj,
- FrameIndexValue *Value, RegScavenger *RS) const
-{
+ RegScavenger *RS) const {
MachineInstr &MI = *II;
MachineFunction &MF = *MI.getParent()->getParent();
@@ -361,7 +360,6 @@ eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj,
MI.getOperand(i-1).ChangeToImmediate(Offset);
MI.getOperand(i).ChangeToRegister(getFrameRegister(MF), false);
- return 0;
}
void MipsRegisterInfo::
diff --git a/lib/Target/Mips/MipsRegisterInfo.h b/lib/Target/Mips/MipsRegisterInfo.h
index b500a650f7cc9..89282f8fa1465 100644
--- a/lib/Target/Mips/MipsRegisterInfo.h
+++ b/lib/Target/Mips/MipsRegisterInfo.h
@@ -51,9 +51,8 @@ struct MipsRegisterInfo : public MipsGenRegisterInfo {
MachineBasicBlock::iterator I) const;
/// Stack Frame Processing Methods
- unsigned eliminateFrameIndex(MachineBasicBlock::iterator II,
- int SPAdj, FrameIndexValue *Value = NULL,
- RegScavenger *RS = NULL) const;
+ void eliminateFrameIndex(MachineBasicBlock::iterator II,
+ int SPAdj, RegScavenger *RS = NULL) const;
void processFunctionBeforeFrameFinalized(MachineFunction &MF) const;
diff --git a/lib/Target/Mips/MipsRegisterInfo.td b/lib/Target/Mips/MipsRegisterInfo.td
index be78a2266268c..60efe31fbaf82 100644
--- a/lib/Target/Mips/MipsRegisterInfo.td
+++ b/lib/Target/Mips/MipsRegisterInfo.td
@@ -1,4 +1,4 @@
-//===- MipsRegisterInfo.td - Mips Register defs -----------------*- C++ -*-===//
+//===- MipsRegisterInfo.td - Mips Register defs ------------*- tablegen -*-===//
//
// The LLVM Compiler Infrastructure
//
diff --git a/lib/Target/Mips/MipsSchedule.td b/lib/Target/Mips/MipsSchedule.td
index 616a79bf831cb..055ff32372184 100644
--- a/lib/Target/Mips/MipsSchedule.td
+++ b/lib/Target/Mips/MipsSchedule.td
@@ -1,4 +1,4 @@
-//===- MipsSchedule.td - Mips Scheduling Definitions ------------*- C++ -*-===//
+//===- MipsSchedule.td - Mips Scheduling Definitions -------*- tablegen -*-===//
//
// The LLVM Compiler Infrastructure
//
diff --git a/lib/Target/PIC16/CMakeLists.txt b/lib/Target/PIC16/CMakeLists.txt
index cd4afe8e2342a..2b6cb9e4e461d 100644
--- a/lib/Target/PIC16/CMakeLists.txt
+++ b/lib/Target/PIC16/CMakeLists.txt
@@ -10,7 +10,7 @@ tablegen(PIC16GenDAGISel.inc -gen-dag-isel)
tablegen(PIC16GenCallingConv.inc -gen-callingconv)
tablegen(PIC16GenSubtarget.inc -gen-subtarget)
-add_llvm_target(PIC16
+add_llvm_target(PIC16CodeGen
PIC16DebugInfo.cpp
PIC16InstrInfo.cpp
PIC16ISelDAGToDAG.cpp
diff --git a/lib/Target/PIC16/PIC16.h b/lib/Target/PIC16/PIC16.h
index cee55f4f260fe..08bb3e6f055b6 100644
--- a/lib/Target/PIC16/PIC16.h
+++ b/lib/Target/PIC16/PIC16.h
@@ -58,13 +58,10 @@ namespace PIC16CC {
ESNames() {}
public:
~ESNames() {
- std::vector<char*>::iterator it = stk.end();
- it--;
- while(stk.end() != stk.begin())
+ while (!stk.empty())
{
- char* p = *it;
+ char* p = stk.back();
delete [] p;
- it--;
stk.pop_back();
}
}
diff --git a/lib/Target/PIC16/PIC16ISelLowering.cpp b/lib/Target/PIC16/PIC16ISelLowering.cpp
index 54a6a28992bf9..527b31d0cc9f3 100644
--- a/lib/Target/PIC16/PIC16ISelLowering.cpp
+++ b/lib/Target/PIC16/PIC16ISelLowering.cpp
@@ -312,6 +312,16 @@ PIC16TargetLowering::PIC16TargetLowering(PIC16TargetMachine &TM)
computeRegisterProperties();
}
+std::pair<const TargetRegisterClass*, uint8_t>
+PIC16TargetLowering::findRepresentativeClass(EVT VT) const {
+ switch (VT.getSimpleVT().SimpleTy) {
+ default:
+ return TargetLowering::findRepresentativeClass(VT);
+ case MVT::i16:
+ return std::make_pair(PIC16::FSR16RegisterClass, 1);
+ }
+}
+
// getOutFlag - Extract the flag result if the Op has it.
static SDValue getOutFlag(SDValue &Op) {
// Flag is the last value of the node.
diff --git a/lib/Target/PIC16/PIC16ISelLowering.h b/lib/Target/PIC16/PIC16ISelLowering.h
index 0a7506cb497f5..d942af46a9e92 100644
--- a/lib/Target/PIC16/PIC16ISelLowering.h
+++ b/lib/Target/PIC16/PIC16ISelLowering.h
@@ -50,7 +50,7 @@ namespace llvm {
CALL, // PIC16 Call instruction
CALLW, // PIC16 CALLW instruction
SUBCC, // Compare for equality or inequality.
- SELECT_ICC, // Psuedo to be caught in schedular and expanded to brcond.
+ SELECT_ICC, // Pseudo to be caught in scheduler and expanded to brcond.
BRCOND, // Conditional branch.
RET, // Return.
Dummy
@@ -181,6 +181,9 @@ namespace llvm {
// FIXME: The function never seems to be aligned.
return 1;
}
+ protected:
+ std::pair<const TargetRegisterClass*, uint8_t>
+ findRepresentativeClass(EVT VT) const;
private:
// If the Node is a BUILD_PAIR representing a direct Address,
// then this function will return true.
diff --git a/lib/Target/PIC16/PIC16InstrInfo.cpp b/lib/Target/PIC16/PIC16InstrInfo.cpp
index e784f746f7f9b..81257f3c41083 100644
--- a/lib/Target/PIC16/PIC16InstrInfo.cpp
+++ b/lib/Target/PIC16/PIC16InstrInfo.cpp
@@ -167,21 +167,6 @@ void PIC16InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
.addReg(SrcReg, getKillRegState(KillSrc));
}
-bool PIC16InstrInfo::isMoveInstr(const MachineInstr &MI,
- unsigned &SrcReg, unsigned &DestReg,
- unsigned &SrcSubIdx, unsigned &DstSubIdx) const {
- SrcSubIdx = DstSubIdx = 0; // No sub-registers.
-
- if (MI.getOpcode() == PIC16::copy_fsr
- || MI.getOpcode() == PIC16::copy_w) {
- DestReg = MI.getOperand(0).getReg();
- SrcReg = MI.getOperand(1).getReg();
- return true;
- }
-
- return false;
-}
-
/// InsertBranch - Insert a branch into the end of the specified
/// MachineBasicBlock. This operands to this method are the same as those
/// returned by AnalyzeBranch. This is invoked in cases where AnalyzeBranch
diff --git a/lib/Target/PIC16/PIC16InstrInfo.h b/lib/Target/PIC16/PIC16InstrInfo.h
index a3a77f11ba160..661b335d3b6c5 100644
--- a/lib/Target/PIC16/PIC16InstrInfo.h
+++ b/lib/Target/PIC16/PIC16InstrInfo.h
@@ -61,10 +61,6 @@ public:
MachineBasicBlock::iterator I, DebugLoc DL,
unsigned DestReg, unsigned SrcReg,
bool KillSrc) const;
- virtual bool isMoveInstr(const MachineInstr &MI,
- unsigned &SrcReg, unsigned &DstReg,
- unsigned &SrcSubIdx, unsigned &DstSubIdx) const;
-
virtual
unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
MachineBasicBlock *FBB,
diff --git a/lib/Target/PIC16/PIC16MemSelOpt.cpp b/lib/Target/PIC16/PIC16MemSelOpt.cpp
index 241170b11c2ad..b6aa38f765ea7 100644
--- a/lib/Target/PIC16/PIC16MemSelOpt.cpp
+++ b/lib/Target/PIC16/PIC16MemSelOpt.cpp
@@ -38,7 +38,7 @@ using namespace llvm;
namespace {
struct MemSelOpt : public MachineFunctionPass {
static char ID;
- MemSelOpt() : MachineFunctionPass(&ID) {}
+ MemSelOpt() : MachineFunctionPass(ID) {}
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
AU.addPreservedID(MachineLoopInfoID);
diff --git a/lib/Target/PIC16/PIC16Passes/PIC16Cloner.cpp b/lib/Target/PIC16/PIC16Passes/PIC16Cloner.cpp
index 27f1cf572ae6c..56f0211570928 100644
--- a/lib/Target/PIC16/PIC16Passes/PIC16Cloner.cpp
+++ b/lib/Target/PIC16/PIC16Passes/PIC16Cloner.cpp
@@ -256,7 +256,7 @@ PIC16Cloner::cloneFunction(Function *OrgF) {
CloneAutos(OrgF);
// Now create the clone.
- ClonedF = CloneFunction(OrgF, VMap);
+ ClonedF = CloneFunction(OrgF, VMap, /*ModuleLevelChanges=*/false);
// The new function should be for interrupt line. Therefore should have
// the name suffixed with IL and section attribute marked with IL.
diff --git a/lib/Target/PIC16/PIC16Passes/PIC16Cloner.h b/lib/Target/PIC16/PIC16Passes/PIC16Cloner.h
index e8b5aa45cdca9..e7d67ce09629a 100644
--- a/lib/Target/PIC16/PIC16Passes/PIC16Cloner.h
+++ b/lib/Target/PIC16/PIC16Passes/PIC16Cloner.h
@@ -35,7 +35,7 @@ namespace llvm {
class PIC16Cloner : public ModulePass {
public:
static char ID; // Class identification
- PIC16Cloner() : ModulePass(&ID) {}
+ PIC16Cloner() : ModulePass(ID) {}
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
AU.addRequired<CallGraph>();
diff --git a/lib/Target/PIC16/PIC16Passes/PIC16Overlay.cpp b/lib/Target/PIC16/PIC16Passes/PIC16Overlay.cpp
index 5ecb6aa551576..0f8928a4b5f50 100644
--- a/lib/Target/PIC16/PIC16Passes/PIC16Overlay.cpp
+++ b/lib/Target/PIC16/PIC16Passes/PIC16Overlay.cpp
@@ -171,8 +171,9 @@ void PIC16Overlay::MarkIndirectlyCalledFunctions(Module &M) {
for (Module::iterator MI = M.begin(), E = M.end(); MI != E; ++MI) {
for (Value::use_iterator I = MI->use_begin(), E = MI->use_end(); I != E;
++I) {
- if ((!isa<CallInst>(I) && !isa<InvokeInst>(I))
- || !CallSite(cast<Instruction>(I)).isCallee(I)) {
+ User *U = *I;
+ if ((!isa<CallInst>(U) && !isa<InvokeInst>(U))
+ || !CallSite(cast<Instruction>(U)).isCallee(I)) {
setColor(MI, ++IndirectCallColor);
break;
}
diff --git a/lib/Target/PIC16/PIC16Passes/PIC16Overlay.h b/lib/Target/PIC16/PIC16Passes/PIC16Overlay.h
index 5a2551fabcda9..2f611e65de1fd 100644
--- a/lib/Target/PIC16/PIC16Passes/PIC16Overlay.h
+++ b/lib/Target/PIC16/PIC16Passes/PIC16Overlay.h
@@ -39,7 +39,7 @@ namespace llvm {
unsigned IndirectCallColor;
public:
static char ID; // Class identification
- PIC16Overlay() : ModulePass(&ID) {
+ PIC16Overlay() : ModulePass(ID) {
OverlayStr = "Overlay=";
InterruptDepth = PIC16OVERLAY::StartInterruptColor;
IndirectCallColor = PIC16OVERLAY::StartIndirectCallColor;
diff --git a/lib/Target/PIC16/PIC16RegisterInfo.cpp b/lib/Target/PIC16/PIC16RegisterInfo.cpp
index dff98d12c2ae0..76de47fdf0f4a 100644
--- a/lib/Target/PIC16/PIC16RegisterInfo.cpp
+++ b/lib/Target/PIC16/PIC16RegisterInfo.cpp
@@ -44,13 +44,10 @@ bool PIC16RegisterInfo::hasFP(const MachineFunction &MF) const {
return false;
}
-unsigned PIC16RegisterInfo::
+void PIC16RegisterInfo::
eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj,
- FrameIndexValue *Value, RegScavenger *RS) const
-{
- /* NOT YET IMPLEMENTED */
- return 0;
-}
+ RegScavenger *RS) const
+{ /* NOT YET IMPLEMENTED */ }
void PIC16RegisterInfo::emitPrologue(MachineFunction &MF) const
{ /* NOT YET IMPLEMENTED */ }
diff --git a/lib/Target/PIC16/PIC16RegisterInfo.h b/lib/Target/PIC16/PIC16RegisterInfo.h
index 5536a617d2beb..20052b0034428 100644
--- a/lib/Target/PIC16/PIC16RegisterInfo.h
+++ b/lib/Target/PIC16/PIC16RegisterInfo.h
@@ -44,9 +44,8 @@ class PIC16RegisterInfo : public PIC16GenRegisterInfo {
virtual BitVector getReservedRegs(const MachineFunction &MF) const;
virtual bool hasFP(const MachineFunction &MF) const;
- virtual unsigned eliminateFrameIndex(MachineBasicBlock::iterator MI,
- int SPAdj, FrameIndexValue *Value = NULL,
- RegScavenger *RS=NULL) const;
+ virtual void eliminateFrameIndex(MachineBasicBlock::iterator MI,
+ int SPAdj, RegScavenger *RS=NULL) const;
void eliminateCallFramePseudoInstr(MachineFunction &MF,
MachineBasicBlock &MBB,
diff --git a/lib/Target/PowerPC/AsmPrinter/PPCAsmPrinter.cpp b/lib/Target/PowerPC/AsmPrinter/PPCAsmPrinter.cpp
index e35dc579f2cd5..c1a5663be9315 100644
--- a/lib/Target/PowerPC/AsmPrinter/PPCAsmPrinter.cpp
+++ b/lib/Target/PowerPC/AsmPrinter/PPCAsmPrinter.cpp
@@ -43,6 +43,7 @@
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetOptions.h"
#include "llvm/Target/TargetRegistry.h"
+#include "llvm/Support/Debug.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
@@ -327,6 +328,19 @@ namespace {
void printPredicateOperand(const MachineInstr *MI, unsigned OpNo,
raw_ostream &O, const char *Modifier);
+
+ MachineLocation getDebugValueLocation(const MachineInstr *MI) const {
+
+ MachineLocation Location;
+ assert (MI->getNumOperands() == 4 && "Invalid no. of machine operands!");
+ // Frame address. Currently handles register +- offset only.
+ if (MI->getOperand(0).isReg() && MI->getOperand(2).isImm())
+ Location.set(MI->getOperand(0).getReg(), MI->getOperand(2).getImm());
+ else {
+ DEBUG(dbgs() << "DBG_VALUE instruction ignored! " << *MI << "\n");
+ }
+ return Location;
+ }
};
/// PPCLinuxAsmPrinter - PowerPC assembly printer, customized for Linux
diff --git a/lib/Target/PowerPC/PPCBranchSelector.cpp b/lib/Target/PowerPC/PPCBranchSelector.cpp
index 52948c868b9c9..e161d23600e27 100644
--- a/lib/Target/PowerPC/PPCBranchSelector.cpp
+++ b/lib/Target/PowerPC/PPCBranchSelector.cpp
@@ -10,7 +10,7 @@
// This file contains a pass that scans a machine function to determine which
// conditional branches need more than 16 bits of displacement to reach their
// target basic block. It does this in two passes; a calculation of basic block
-// positions pass, and a branch psuedo op to machine branch opcode pass. This
+// positions pass, and a branch pseudo op to machine branch opcode pass. This
// pass should be run last, just before the assembly printer.
//
//===----------------------------------------------------------------------===//
@@ -31,7 +31,7 @@ STATISTIC(NumExpanded, "Number of branches expanded to long format");
namespace {
struct PPCBSel : public MachineFunctionPass {
static char ID;
- PPCBSel() : MachineFunctionPass(&ID) {}
+ PPCBSel() : MachineFunctionPass(ID) {}
/// BlockSizes - The sizes of the basic blocks in the function.
std::vector<unsigned> BlockSizes;
@@ -53,7 +53,8 @@ FunctionPass *llvm::createPPCBranchSelectionPass() {
}
bool PPCBSel::runOnMachineFunction(MachineFunction &Fn) {
- const TargetInstrInfo *TII = Fn.getTarget().getInstrInfo();
+ const PPCInstrInfo *TII =
+ static_cast<const PPCInstrInfo*>(Fn.getTarget().getInstrInfo());
// Give the blocks of the function a dense, in-order, numbering.
Fn.RenumberBlocks();
BlockSizes.resize(Fn.getNumBlockIDs());
diff --git a/lib/Target/PowerPC/PPCCallingConv.td b/lib/Target/PowerPC/PPCCallingConv.td
index 155fba22d9d7b..441db94581aea 100644
--- a/lib/Target/PowerPC/PPCCallingConv.td
+++ b/lib/Target/PowerPC/PPCCallingConv.td
@@ -1,4 +1,4 @@
-//===- PPCCallingConv.td - Calling Conventions for PowerPC ------*- C++ -*-===//
+//===- PPCCallingConv.td - Calling Conventions for PowerPC -*- tablegen -*-===//
//
// The LLVM Compiler Infrastructure
//
diff --git a/lib/Target/PowerPC/PPCCodeEmitter.cpp b/lib/Target/PowerPC/PPCCodeEmitter.cpp
index 361fa70fb4c45..df9ab52389ba8 100644
--- a/lib/Target/PowerPC/PPCCodeEmitter.cpp
+++ b/lib/Target/PowerPC/PPCCodeEmitter.cpp
@@ -45,7 +45,7 @@ namespace {
public:
PPCCodeEmitter(TargetMachine &tm, JITCodeEmitter &mce)
- : MachineFunctionPass(&ID), TM(tm), MCE(mce) {}
+ : MachineFunctionPass(ID), TM(tm), MCE(mce) {}
/// getBinaryCodeForInstr - This function, generated by the
/// CodeEmitterGenerator using TableGen, produces the binary encoding for
@@ -110,7 +110,7 @@ void PPCCodeEmitter::emitBasicBlock(MachineBasicBlock &MBB) {
default:
MCE.emitWordBE(getBinaryCodeForInstr(MI));
break;
- case TargetOpcode::DBG_LABEL:
+ case TargetOpcode::PROLOG_LABEL:
case TargetOpcode::EH_LABEL:
MCE.emitLabel(MI.getOperand(0).getMCSymbol());
break;
diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp
index d47d989b34c00..14d1b154a5c9c 100644
--- a/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -2467,18 +2467,31 @@ unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag,
unsigned CallOpc = isSVR4ABI ? PPCISD::CALL_SVR4 : PPCISD::CALL_Darwin;
- // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
- // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
- // node so that legalize doesn't hack it.
- if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
- Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl,
- Callee.getValueType());
- else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee))
- Callee = DAG.getTargetExternalSymbol(S->getSymbol(), Callee.getValueType());
- else if (SDNode *Dest = isBLACompatibleAddress(Callee, DAG))
+ bool needIndirectCall = true;
+ if (SDNode *Dest = isBLACompatibleAddress(Callee, DAG)) {
// If this is an absolute destination address, use the munged value.
Callee = SDValue(Dest, 0);
- else {
+ needIndirectCall = false;
+ }
+ // XXX Work around for http://llvm.org/bugs/show_bug.cgi?id=5201
+ // Use indirect calls for ALL functions calls in JIT mode, since the
+ // far-call stubs may be outside relocation limits for a BL instruction.
+ if (!DAG.getTarget().getSubtarget<PPCSubtarget>().isJITCodeModel()) {
+ // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
+ // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
+ // node so that legalize doesn't hack it.
+ if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
+ Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl,
+ Callee.getValueType());
+ needIndirectCall = false;
+ }
+ }
+ if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
+ Callee = DAG.getTargetExternalSymbol(S->getSymbol(),
+ Callee.getValueType());
+ needIndirectCall = false;
+ }
+ if (needIndirectCall) {
// Otherwise, this is an indirect call. We have to use a MTCTR/BCTRL pair
// to do the call, we can't use PPCISD::CALL.
SDValue MTCTROps[] = {Chain, Callee, InFlag};
@@ -3942,17 +3955,17 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
}
// t = vsplti c, result = vsldoi t, t, 1
- if (SextVal == ((i << 8) | (i >> (TypeShiftAmt-8)))) {
+ if (SextVal == ((i << 8) | (i < 0 ? 0xFF : 0))) {
SDValue T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG, dl);
return BuildVSLDOI(T, T, 1, Op.getValueType(), DAG, dl);
}
// t = vsplti c, result = vsldoi t, t, 2
- if (SextVal == ((i << 16) | (i >> (TypeShiftAmt-16)))) {
+ if (SextVal == ((i << 16) | (i < 0 ? 0xFFFF : 0))) {
SDValue T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG, dl);
return BuildVSLDOI(T, T, 2, Op.getValueType(), DAG, dl);
}
// t = vsplti c, result = vsldoi t, t, 3
- if (SextVal == ((i << 24) | (i >> (TypeShiftAmt-24)))) {
+ if (SextVal == ((i << 24) | (i < 0 ? 0xFFFFFF : 0))) {
SDValue T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG, dl);
return BuildVSLDOI(T, T, 3, Op.getValueType(), DAG, dl);
}
diff --git a/lib/Target/PowerPC/PPCInstrInfo.cpp b/lib/Target/PowerPC/PPCInstrInfo.cpp
index 1574aa3fb23a5..c17108fa92309 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.cpp
+++ b/lib/Target/PowerPC/PPCInstrInfo.cpp
@@ -18,8 +18,11 @@
#include "PPCGenInstrInfo.inc"
#include "PPCTargetMachine.h"
#include "llvm/ADT/STLExtras.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
@@ -36,67 +39,6 @@ PPCInstrInfo::PPCInstrInfo(PPCTargetMachine &tm)
: TargetInstrInfoImpl(PPCInsts, array_lengthof(PPCInsts)), TM(tm),
RI(*TM.getSubtargetImpl(), *this) {}
-bool PPCInstrInfo::isMoveInstr(const MachineInstr& MI,
- unsigned& sourceReg,
- unsigned& destReg,
- unsigned& sourceSubIdx,
- unsigned& destSubIdx) const {
- sourceSubIdx = destSubIdx = 0; // No sub-registers.
-
- unsigned oc = MI.getOpcode();
- if (oc == PPC::OR || oc == PPC::OR8 || oc == PPC::VOR ||
- oc == PPC::OR4To8 || oc == PPC::OR8To4) { // or r1, r2, r2
- assert(MI.getNumOperands() >= 3 &&
- MI.getOperand(0).isReg() &&
- MI.getOperand(1).isReg() &&
- MI.getOperand(2).isReg() &&
- "invalid PPC OR instruction!");
- if (MI.getOperand(1).getReg() == MI.getOperand(2).getReg()) {
- sourceReg = MI.getOperand(1).getReg();
- destReg = MI.getOperand(0).getReg();
- return true;
- }
- } else if (oc == PPC::ADDI) { // addi r1, r2, 0
- assert(MI.getNumOperands() >= 3 &&
- MI.getOperand(0).isReg() &&
- MI.getOperand(2).isImm() &&
- "invalid PPC ADDI instruction!");
- if (MI.getOperand(1).isReg() && MI.getOperand(2).getImm() == 0) {
- sourceReg = MI.getOperand(1).getReg();
- destReg = MI.getOperand(0).getReg();
- return true;
- }
- } else if (oc == PPC::ORI) { // ori r1, r2, 0
- assert(MI.getNumOperands() >= 3 &&
- MI.getOperand(0).isReg() &&
- MI.getOperand(1).isReg() &&
- MI.getOperand(2).isImm() &&
- "invalid PPC ORI instruction!");
- if (MI.getOperand(2).getImm() == 0) {
- sourceReg = MI.getOperand(1).getReg();
- destReg = MI.getOperand(0).getReg();
- return true;
- }
- } else if (oc == PPC::FMR || oc == PPC::FMRSD) { // fmr r1, r2
- assert(MI.getNumOperands() >= 2 &&
- MI.getOperand(0).isReg() &&
- MI.getOperand(1).isReg() &&
- "invalid PPC FMR instruction");
- sourceReg = MI.getOperand(1).getReg();
- destReg = MI.getOperand(0).getReg();
- return true;
- } else if (oc == PPC::MCRF) { // mcrf cr1, cr2
- assert(MI.getNumOperands() >= 2 &&
- MI.getOperand(0).isReg() &&
- MI.getOperand(1).isReg() &&
- "invalid PPC MCRF instruction");
- sourceReg = MI.getOperand(1).getReg();
- destReg = MI.getOperand(0).getReg();
- return true;
- }
- return false;
-}
-
unsigned PPCInstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
int &FrameIndex) const {
switch (MI->getOpcode()) {
@@ -524,6 +466,14 @@ PPCInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
for (unsigned i = 0, e = NewMIs.size(); i != e; ++i)
MBB.insert(MI, NewMIs[i]);
+
+ const MachineFrameInfo &MFI = *MF.getFrameInfo();
+ MachineMemOperand *MMO =
+ MF.getMachineMemOperand(PseudoSourceValue::getFixedStack(FrameIdx),
+ MachineMemOperand::MOStore, /*Offset=*/0,
+ MFI.getObjectSize(FrameIdx),
+ MFI.getObjectAlignment(FrameIdx));
+ NewMIs.back()->addMemOperand(MF, MMO);
}
void
@@ -637,6 +587,14 @@ PPCInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
LoadRegFromStackSlot(MF, DL, DestReg, FrameIdx, RC, NewMIs);
for (unsigned i = 0, e = NewMIs.size(); i != e; ++i)
MBB.insert(MI, NewMIs[i]);
+
+ const MachineFrameInfo &MFI = *MF.getFrameInfo();
+ MachineMemOperand *MMO =
+ MF.getMachineMemOperand(PseudoSourceValue::getFixedStack(FrameIdx),
+ MachineMemOperand::MOLoad, /*Offset=*/0,
+ MFI.getObjectSize(FrameIdx),
+ MFI.getObjectAlignment(FrameIdx));
+ NewMIs.back()->addMemOperand(MF, MMO);
}
MachineInstr*
@@ -667,7 +625,7 @@ unsigned PPCInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const {
const char *AsmStr = MI->getOperand(0).getSymbolName();
return getInlineAsmLength(AsmStr, *MF->getTarget().getMCAsmInfo());
}
- case PPC::DBG_LABEL:
+ case PPC::PROLOG_LABEL:
case PPC::EH_LABEL:
case PPC::GC_LABEL:
case PPC::DBG_VALUE:
diff --git a/lib/Target/PowerPC/PPCInstrInfo.h b/lib/Target/PowerPC/PPCInstrInfo.h
index eadb21e217024..fc7b7b3cb8972 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.h
+++ b/lib/Target/PowerPC/PPCInstrInfo.h
@@ -82,12 +82,6 @@ public:
///
virtual const PPCRegisterInfo &getRegisterInfo() const { return RI; }
- /// Return true if the instruction is a register to register move and return
- /// the source and dest operands and their sub-register indices by reference.
- virtual bool isMoveInstr(const MachineInstr &MI,
- unsigned &SrcReg, unsigned &DstReg,
- unsigned &SrcSubIdx, unsigned &DstSubIdx) const;
-
unsigned isLoadFromStackSlot(const MachineInstr *MI,
int &FrameIndex) const;
unsigned isStoreToStackSlot(const MachineInstr *MI,
diff --git a/lib/Target/PowerPC/PPCInstrInfo.td b/lib/Target/PowerPC/PPCInstrInfo.td
index 63b4581a37f9a..eb100ec75280a 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.td
+++ b/lib/Target/PowerPC/PPCInstrInfo.td
@@ -1022,9 +1022,7 @@ let Uses = [RM] in {
}
}
-/// FMR is split into 2 versions, one for 4/8 byte FP, and one for extending.
-///
-/// Note that these are defined as pseudo-ops on the PPC970 because they are
+/// Note that FMR is defined as pseudo-ops on the PPC970 because they are
/// often coalesced away and we don't want the dispatch group builder to think
/// that they will fill slots (which could cause the load of a LSU reject to
/// sneak into a d-group with a store).
@@ -1032,10 +1030,6 @@ def FMR : XForm_26<63, 72, (outs F4RC:$frD), (ins F4RC:$frB),
"fmr $frD, $frB", FPGeneral,
[]>, // (set F4RC:$frD, F4RC:$frB)
PPC970_Unit_Pseudo;
-def FMRSD : XForm_26<63, 72, (outs F8RC:$frD), (ins F4RC:$frB),
- "fmr $frD, $frB", FPGeneral,
- [(set F8RC:$frD, (fextend F4RC:$frB))]>,
- PPC970_Unit_Pseudo;
let PPC970_Unit = 3 in { // FPU Operations.
// These are artificially split into two different forms, for 4/8 byte FP.
@@ -1476,10 +1470,13 @@ def : Pat<(extloadi16 iaddr:$src),
(LHZ iaddr:$src)>;
def : Pat<(extloadi16 xaddr:$src),
(LHZX xaddr:$src)>;
-def : Pat<(extloadf32 iaddr:$src),
- (FMRSD (LFS iaddr:$src))>;
-def : Pat<(extloadf32 xaddr:$src),
- (FMRSD (LFSX xaddr:$src))>;
+def : Pat<(f64 (extloadf32 iaddr:$src)),
+ (COPY_TO_REGCLASS (LFS iaddr:$src), F8RC)>;
+def : Pat<(f64 (extloadf32 xaddr:$src)),
+ (COPY_TO_REGCLASS (LFSX xaddr:$src), F8RC)>;
+
+def : Pat<(f64 (fextend F4RC:$src)),
+ (COPY_TO_REGCLASS F4RC:$src, F8RC)>;
// Memory barriers
def : Pat<(membarrier (i32 imm /*ll*/),
diff --git a/lib/Target/PowerPC/PPCRegisterInfo.cpp b/lib/Target/PowerPC/PPCRegisterInfo.cpp
index 4d6132a9ec50c..653e143ba407f 100644
--- a/lib/Target/PowerPC/PPCRegisterInfo.cpp
+++ b/lib/Target/PowerPC/PPCRegisterInfo.cpp
@@ -449,8 +449,8 @@ void PPCRegisterInfo::lowerDynamicAlloc(MachineBasicBlock::iterator II,
// Get stack alignments.
unsigned TargetAlign = MF.getTarget().getFrameInfo()->getStackAlignment();
unsigned MaxAlign = MFI->getMaxAlignment();
- assert(MaxAlign <= TargetAlign &&
- "Dynamic alloca with large aligns not supported");
+ if (MaxAlign > TargetAlign)
+ report_fatal_error("Dynamic alloca with large aligns not supported");
// Determine the previous frame's address. If FrameSize can't be
// represented as 16 bits or we need special alignment, then we load the
@@ -580,10 +580,9 @@ void PPCRegisterInfo::lowerCRSpilling(MachineBasicBlock::iterator II,
MBB.erase(II);
}
-unsigned
+void
PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
- int SPAdj, FrameIndexValue *Value,
- RegScavenger *RS) const {
+ int SPAdj, RegScavenger *RS) const {
assert(SPAdj == 0 && "Unexpected");
// Get the instruction.
@@ -622,14 +621,14 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
if (FPSI && FrameIndex == FPSI &&
(OpC == PPC::DYNALLOC || OpC == PPC::DYNALLOC8)) {
lowerDynamicAlloc(II, SPAdj, RS);
- return 0;
+ return;
}
// Special case for pseudo-op SPILL_CR.
if (EnableRegisterScavenging) // FIXME (64-bit): Enable by default.
if (OpC == PPC::SPILL_CR) {
lowerCRSpilling(II, FrameIndex, SPAdj, RS);
- return 0;
+ return;
}
// Replace the FrameIndex with base register with GPR1 (SP) or GPR31 (FP).
@@ -674,7 +673,7 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
if (isIXAddr)
Offset >>= 2; // The actual encoded value has the low two bits zero.
MI.getOperand(OffsetOperandNo).ChangeToImmediate(Offset);
- return 0;
+ return;
}
// The offset doesn't fit into a single register, scavenge one to build the
@@ -710,11 +709,10 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
} else {
OperandBase = OffsetOperandNo;
}
-
+
unsigned StackReg = MI.getOperand(FIOperandNo).getReg();
MI.getOperand(OperandBase).ChangeToRegister(StackReg, false);
MI.getOperand(OperandBase + 1).ChangeToRegister(SReg, false);
- return 0;
}
/// VRRegNo - Map from a numbered VR register to its enum value.
@@ -1318,7 +1316,7 @@ PPCRegisterInfo::emitPrologue(MachineFunction &MF) const {
if (needsFrameMoves) {
// Mark effective beginning of when frame pointer becomes valid.
FrameLabel = MMI.getContext().CreateTempSymbol();
- BuildMI(MBB, MBBI, dl, TII.get(PPC::DBG_LABEL)).addSym(FrameLabel);
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::PROLOG_LABEL)).addSym(FrameLabel);
// Show update of SP.
if (NegFrameSize) {
@@ -1361,7 +1359,7 @@ PPCRegisterInfo::emitPrologue(MachineFunction &MF) const {
ReadyLabel = MMI.getContext().CreateTempSymbol();
// Mark effective beginning of when frame pointer is ready.
- BuildMI(MBB, MBBI, dl, TII.get(PPC::DBG_LABEL)).addSym(ReadyLabel);
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::PROLOG_LABEL)).addSym(ReadyLabel);
MachineLocation FPDst(HasFP ? (isPPC64 ? PPC::X31 : PPC::R31) :
(isPPC64 ? PPC::X1 : PPC::R1));
diff --git a/lib/Target/PowerPC/PPCRegisterInfo.h b/lib/Target/PowerPC/PPCRegisterInfo.h
index f026847a540b3..890b24b9c0a8b 100644
--- a/lib/Target/PowerPC/PPCRegisterInfo.h
+++ b/lib/Target/PowerPC/PPCRegisterInfo.h
@@ -63,9 +63,8 @@ public:
int SPAdj, RegScavenger *RS) const;
void lowerCRSpilling(MachineBasicBlock::iterator II, unsigned FrameIndex,
int SPAdj, RegScavenger *RS) const;
- unsigned eliminateFrameIndex(MachineBasicBlock::iterator II,
- int SPAdj, FrameIndexValue *Value = NULL,
- RegScavenger *RS = NULL) const;
+ void eliminateFrameIndex(MachineBasicBlock::iterator II,
+ int SPAdj, RegScavenger *RS = NULL) const;
/// determineFrameLayout - Determine the size of the frame and maximum call
/// frame size.
diff --git a/lib/Target/PowerPC/PPCSubtarget.cpp b/lib/Target/PowerPC/PPCSubtarget.cpp
index 40914ba62a70e..5d46065d96f22 100644
--- a/lib/Target/PowerPC/PPCSubtarget.cpp
+++ b/lib/Target/PowerPC/PPCSubtarget.cpp
@@ -69,6 +69,7 @@ PPCSubtarget::PPCSubtarget(const std::string &TT, const std::string &FS,
, HasFSQRT(false)
, HasSTFIWX(false)
, HasLazyResolverStubs(false)
+ , IsJITCodeModel(false)
, DarwinVers(0) {
// Determine default and user specified characteristics
@@ -117,6 +118,9 @@ void PPCSubtarget::SetJITMode() {
// everything is. This matters for PPC64, which codegens in PIC mode without
// stubs.
HasLazyResolverStubs = false;
+
+ // Calls to external functions need to use indirect calls
+ IsJITCodeModel = true;
}
diff --git a/lib/Target/PowerPC/PPCSubtarget.h b/lib/Target/PowerPC/PPCSubtarget.h
index 75fcf6238a27a..00ec7474c9e39 100644
--- a/lib/Target/PowerPC/PPCSubtarget.h
+++ b/lib/Target/PowerPC/PPCSubtarget.h
@@ -63,6 +63,7 @@ protected:
bool HasFSQRT;
bool HasSTFIWX;
bool HasLazyResolverStubs;
+ bool IsJITCodeModel;
/// DarwinVers - Nonzero if this is a darwin platform. Otherwise, the numeric
/// version of the platform, e.g. 8 = 10.4 (Tiger), 9 = 10.5 (Leopard), etc.
@@ -124,6 +125,9 @@ public:
bool hasLazyResolverStub(const GlobalValue *GV,
const TargetMachine &TM) const;
+ // isJITCodeModel - True if we're generating code for the JIT
+ bool isJITCodeModel() const { return IsJITCodeModel; }
+
// Specific obvious features.
bool hasFSQRT() const { return HasFSQRT; }
bool hasSTFIWX() const { return HasSTFIWX; }
diff --git a/lib/Target/README.txt b/lib/Target/README.txt
index 4d7ee08de1dee..4faf8bcfd4199 100644
--- a/lib/Target/README.txt
+++ b/lib/Target/README.txt
@@ -1919,5 +1919,21 @@ something like the following, which eliminates a branch:
ret
.LBB0_2:
jmp foo # TAILCALL
+//===---------------------------------------------------------------------===//
+Given a branch where the two target blocks are identical ("ret i32 %b" in
+both), simplifycfg will simplify them away. But not so for a switch statement:
+
+define i32 @f(i32 %a, i32 %b) nounwind readnone {
+entry:
+ switch i32 %a, label %bb3 [
+ i32 4, label %bb
+ i32 6, label %bb
+ ]
+bb: ; preds = %entry, %entry
+ ret i32 %b
+
+bb3: ; preds = %entry
+ ret i32 %b
+}
//===---------------------------------------------------------------------===//
diff --git a/lib/Target/Sparc/DelaySlotFiller.cpp b/lib/Target/Sparc/DelaySlotFiller.cpp
index 9e148ada8853e..aae5da8560056 100644
--- a/lib/Target/Sparc/DelaySlotFiller.cpp
+++ b/lib/Target/Sparc/DelaySlotFiller.cpp
@@ -32,7 +32,7 @@ namespace {
static char ID;
Filler(TargetMachine &tm)
- : MachineFunctionPass(&ID), TM(tm), TII(tm.getInstrInfo()) { }
+ : MachineFunctionPass(ID), TM(tm), TII(tm.getInstrInfo()) { }
virtual const char *getPassName() const {
return "SPARC Delay Slot Filler";
diff --git a/lib/Target/Sparc/FPMover.cpp b/lib/Target/Sparc/FPMover.cpp
index 88b0927b35500..1423b1e64d66e 100644
--- a/lib/Target/Sparc/FPMover.cpp
+++ b/lib/Target/Sparc/FPMover.cpp
@@ -36,7 +36,7 @@ namespace {
static char ID;
explicit FPMover(TargetMachine &tm)
- : MachineFunctionPass(&ID), TM(tm) { }
+ : MachineFunctionPass(ID), TM(tm) { }
virtual const char *getPassName() const {
return "Sparc Double-FP Move Fixer";
diff --git a/lib/Target/Sparc/Sparc.td b/lib/Target/Sparc/Sparc.td
index 925d782d988be..764336665d0bb 100644
--- a/lib/Target/Sparc/Sparc.td
+++ b/lib/Target/Sparc/Sparc.td
@@ -1,4 +1,4 @@
-//===- Sparc.td - Describe the Sparc Target Machine -------------*- C++ -*-===//
+//===- Sparc.td - Describe the Sparc Target Machine --------*- tablegen -*-===//
//
// The LLVM Compiler Infrastructure
//
diff --git a/lib/Target/Sparc/SparcISelDAGToDAG.cpp b/lib/Target/Sparc/SparcISelDAGToDAG.cpp
index 698923e3c9e08..4ea94c4cb560f 100644
--- a/lib/Target/Sparc/SparcISelDAGToDAG.cpp
+++ b/lib/Target/Sparc/SparcISelDAGToDAG.cpp
@@ -84,7 +84,7 @@ bool SparcDAGToDAGISel::SelectADDRri(SDNode *Op, SDValue Addr,
if (Addr.getOpcode() == ISD::ADD) {
if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) {
- if (Predicate_simm13(CN)) {
+ if (isInt<13>(CN->getSExtValue())) {
if (FrameIndexSDNode *FIN =
dyn_cast<FrameIndexSDNode>(Addr.getOperand(0))) {
// Constant offset from frame ref.
@@ -120,9 +120,9 @@ bool SparcDAGToDAGISel::SelectADDRrr(SDNode *Op, SDValue Addr,
return false; // direct calls.
if (Addr.getOpcode() == ISD::ADD) {
- if (isa<ConstantSDNode>(Addr.getOperand(1)) &&
- Predicate_simm13(Addr.getOperand(1).getNode()))
- return false; // Let the reg+imm pattern catch this!
+ if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))
+ if (isInt<13>(CN->getSExtValue()))
+ return false; // Let the reg+imm pattern catch this!
if (Addr.getOperand(0).getOpcode() == SPISD::Lo ||
Addr.getOperand(1).getOpcode() == SPISD::Lo)
return false; // Let the reg+imm pattern catch this!
diff --git a/lib/Target/Sparc/SparcInstrInfo.cpp b/lib/Target/Sparc/SparcInstrInfo.cpp
index 3a4c80ad076a4..7ede8e7ebbe46 100644
--- a/lib/Target/Sparc/SparcInstrInfo.cpp
+++ b/lib/Target/Sparc/SparcInstrInfo.cpp
@@ -28,46 +28,6 @@ SparcInstrInfo::SparcInstrInfo(SparcSubtarget &ST)
RI(ST, *this), Subtarget(ST) {
}
-static bool isZeroImm(const MachineOperand &op) {
- return op.isImm() && op.getImm() == 0;
-}
-
-/// Return true if the instruction is a register to register move and
-/// leave the source and dest operands in the passed parameters.
-///
-bool SparcInstrInfo::isMoveInstr(const MachineInstr &MI,
- unsigned &SrcReg, unsigned &DstReg,
- unsigned &SrcSR, unsigned &DstSR) const {
- SrcSR = DstSR = 0; // No sub-registers.
-
- // We look for 3 kinds of patterns here:
- // or with G0 or 0
- // add with G0 or 0
- // fmovs or FpMOVD (pseudo double move).
- if (MI.getOpcode() == SP::ORrr || MI.getOpcode() == SP::ADDrr) {
- if (MI.getOperand(1).getReg() == SP::G0) {
- DstReg = MI.getOperand(0).getReg();
- SrcReg = MI.getOperand(2).getReg();
- return true;
- } else if (MI.getOperand(2).getReg() == SP::G0) {
- DstReg = MI.getOperand(0).getReg();
- SrcReg = MI.getOperand(1).getReg();
- return true;
- }
- } else if ((MI.getOpcode() == SP::ORri || MI.getOpcode() == SP::ADDri) &&
- isZeroImm(MI.getOperand(2)) && MI.getOperand(1).isReg()) {
- DstReg = MI.getOperand(0).getReg();
- SrcReg = MI.getOperand(1).getReg();
- return true;
- } else if (MI.getOpcode() == SP::FMOVS || MI.getOpcode() == SP::FpMOVD ||
- MI.getOpcode() == SP::FMOVD) {
- SrcReg = MI.getOperand(1).getReg();
- DstReg = MI.getOperand(0).getReg();
- return true;
- }
- return false;
-}
-
/// isLoadFromStackSlot - If the specified machine instruction is a direct
/// load from a stack slot, return the virtual or physical register number of
/// the destination along with the FrameIndex of the loaded stack slot. If
diff --git a/lib/Target/Sparc/SparcInstrInfo.h b/lib/Target/Sparc/SparcInstrInfo.h
index 133471857bad6..c00bd2198765c 100644
--- a/lib/Target/Sparc/SparcInstrInfo.h
+++ b/lib/Target/Sparc/SparcInstrInfo.h
@@ -43,12 +43,6 @@ public:
///
virtual const SparcRegisterInfo &getRegisterInfo() const { return RI; }
- /// Return true if the instruction is a register to register move and return
- /// the source and dest operands and their sub-register indices by reference.
- virtual bool isMoveInstr(const MachineInstr &MI,
- unsigned &SrcReg, unsigned &DstReg,
- unsigned &SrcSubIdx, unsigned &DstSubIdx) const;
-
/// isLoadFromStackSlot - If the specified machine instruction is a direct
/// load from a stack slot, return the virtual or physical register number of
/// the destination along with the FrameIndex of the loaded stack slot. If
diff --git a/lib/Target/Sparc/SparcInstrInfo.td b/lib/Target/Sparc/SparcInstrInfo.td
index ddadd51a93a42..467ed48487adf 100644
--- a/lib/Target/Sparc/SparcInstrInfo.td
+++ b/lib/Target/Sparc/SparcInstrInfo.td
@@ -43,17 +43,9 @@ def UseDeprecatedInsts : Predicate<"Subtarget.useDeprecatedV8Instructions()">;
// Instruction Pattern Stuff
//===----------------------------------------------------------------------===//
-def simm11 : PatLeaf<(imm), [{
- // simm11 predicate - True if the imm fits in a 11-bit sign extended field.
- return (((int)N->getZExtValue() << (32-11)) >> (32-11)) ==
- (int)N->getZExtValue();
-}]>;
+def simm11 : PatLeaf<(imm), [{ return isInt<11>(N->getSExtValue()); }]>;
-def simm13 : PatLeaf<(imm), [{
- // simm13 predicate - True if the imm fits in a 13-bit sign extended field.
- return (((int)N->getZExtValue() << (32-13)) >> (32-13)) ==
- (int)N->getZExtValue();
-}]>;
+def simm13 : PatLeaf<(imm), [{ return isInt<13>(N->getSExtValue()); }]>;
def LO10 : SDNodeXForm<imm, [{
return CurDAG->getTargetConstant((unsigned)N->getZExtValue() & 1023,
diff --git a/lib/Target/Sparc/SparcRegisterInfo.cpp b/lib/Target/Sparc/SparcRegisterInfo.cpp
index 427cc7fd45774..c85db20d2b748 100644
--- a/lib/Target/Sparc/SparcRegisterInfo.cpp
+++ b/lib/Target/Sparc/SparcRegisterInfo.cpp
@@ -69,10 +69,9 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
MBB.erase(I);
}
-unsigned
+void
SparcRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
- int SPAdj, FrameIndexValue *Value,
- RegScavenger *RS) const {
+ int SPAdj, RegScavenger *RS) const {
assert(SPAdj == 0 && "Unexpected");
unsigned i = 0;
@@ -108,7 +107,6 @@ SparcRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
MI.getOperand(i).ChangeToRegister(SP::G1, false);
MI.getOperand(i+1).ChangeToImmediate(Offset & ((1 << 10)-1));
}
- return 0;
}
void SparcRegisterInfo::
diff --git a/lib/Target/Sparc/SparcRegisterInfo.h b/lib/Target/Sparc/SparcRegisterInfo.h
index 9f0cda707b3ec..020ce567c9568 100644
--- a/lib/Target/Sparc/SparcRegisterInfo.h
+++ b/lib/Target/Sparc/SparcRegisterInfo.h
@@ -40,9 +40,8 @@ struct SparcRegisterInfo : public SparcGenRegisterInfo {
MachineBasicBlock &MBB,
MachineBasicBlock::iterator I) const;
- unsigned eliminateFrameIndex(MachineBasicBlock::iterator II,
- int SPAdj, FrameIndexValue *Value = NULL,
- RegScavenger *RS = NULL) const;
+ void eliminateFrameIndex(MachineBasicBlock::iterator II,
+ int SPAdj, RegScavenger *RS = NULL) const;
void processFunctionBeforeFrameFinalized(MachineFunction &MF) const;
diff --git a/lib/Target/SystemZ/SystemZInstrInfo.cpp b/lib/Target/SystemZ/SystemZInstrInfo.cpp
index c03864fe41e4f..367bed3a85395 100644
--- a/lib/Target/SystemZ/SystemZInstrInfo.cpp
+++ b/lib/Target/SystemZ/SystemZInstrInfo.cpp
@@ -141,31 +141,6 @@ void SystemZInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
.addReg(SrcReg, getKillRegState(KillSrc));
}
-bool
-SystemZInstrInfo::isMoveInstr(const MachineInstr& MI,
- unsigned &SrcReg, unsigned &DstReg,
- unsigned &SrcSubIdx, unsigned &DstSubIdx) const {
- switch (MI.getOpcode()) {
- default:
- return false;
- case SystemZ::MOV32rr:
- case SystemZ::MOV64rr:
- case SystemZ::MOV64rrP:
- case SystemZ::MOV128rr:
- case SystemZ::FMOV32rr:
- case SystemZ::FMOV64rr:
- assert(MI.getNumOperands() >= 2 &&
- MI.getOperand(0).isReg() &&
- MI.getOperand(1).isReg() &&
- "invalid register-register move instruction");
- SrcReg = MI.getOperand(1).getReg();
- DstReg = MI.getOperand(0).getReg();
- SrcSubIdx = MI.getOperand(1).getSubReg();
- DstSubIdx = MI.getOperand(0).getSubReg();
- return true;
- }
-}
-
unsigned SystemZInstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
int &FrameIndex) const {
switch (MI->getOpcode()) {
diff --git a/lib/Target/SystemZ/SystemZInstrInfo.h b/lib/Target/SystemZ/SystemZInstrInfo.h
index 0559619248a63..c248f2489c493 100644
--- a/lib/Target/SystemZ/SystemZInstrInfo.h
+++ b/lib/Target/SystemZ/SystemZInstrInfo.h
@@ -65,9 +65,6 @@ public:
unsigned DestReg, unsigned SrcReg,
bool KillSrc) const;
- bool isMoveInstr(const MachineInstr& MI,
- unsigned &SrcReg, unsigned &DstReg,
- unsigned &SrcSubIdx, unsigned &DstSubIdx) const;
unsigned isLoadFromStackSlot(const MachineInstr *MI, int &FrameIndex) const;
unsigned isStoreToStackSlot(const MachineInstr *MI, int &FrameIndex) const;
diff --git a/lib/Target/SystemZ/SystemZRegisterInfo.cpp b/lib/Target/SystemZ/SystemZRegisterInfo.cpp
index ae96b0b08ff62..f8d3e6ac8a6fb 100644
--- a/lib/Target/SystemZ/SystemZRegisterInfo.cpp
+++ b/lib/Target/SystemZ/SystemZRegisterInfo.cpp
@@ -92,10 +92,9 @@ int SystemZRegisterInfo::getFrameIndexOffset(const MachineFunction &MF,
return Offset;
}
-unsigned
+void
SystemZRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
- int SPAdj, FrameIndexValue *Value,
- RegScavenger *RS) const {
+ int SPAdj, RegScavenger *RS) const {
assert(SPAdj == 0 && "Unxpected");
unsigned i = 0;
@@ -117,13 +116,13 @@ SystemZRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
// Offset is a either 12-bit unsigned or 20-bit signed integer.
// FIXME: handle "too long" displacements.
- int Offset = getFrameIndexOffset(MF, FrameIndex) + MI.getOperand(i+1).getImm();
+ int Offset =
+ getFrameIndexOffset(MF, FrameIndex) + MI.getOperand(i+1).getImm();
// Check whether displacement is too long to fit into 12 bit zext field.
MI.setDesc(TII.getMemoryInstr(MI.getOpcode(), Offset));
MI.getOperand(i+1).ChangeToImmediate(Offset);
- return 0;
}
void
diff --git a/lib/Target/SystemZ/SystemZRegisterInfo.h b/lib/Target/SystemZ/SystemZRegisterInfo.h
index 670025f86e08f..5dae865cb79a0 100644
--- a/lib/Target/SystemZ/SystemZRegisterInfo.h
+++ b/lib/Target/SystemZ/SystemZRegisterInfo.h
@@ -34,7 +34,7 @@ struct SystemZRegisterInfo : public SystemZGenRegisterInfo {
BitVector getReservedRegs(const MachineFunction &MF) const;
- bool hasReservedCallFrame(MachineFunction &MF) const { return true; }
+ bool hasReservedCallFrame(const MachineFunction &MF) const { return true; }
bool hasFP(const MachineFunction &MF) const;
int getFrameIndexOffset(const MachineFunction &MF, int FI) const;
@@ -43,9 +43,8 @@ struct SystemZRegisterInfo : public SystemZGenRegisterInfo {
MachineBasicBlock &MBB,
MachineBasicBlock::iterator I) const;
- unsigned eliminateFrameIndex(MachineBasicBlock::iterator II,
- int SPAdj, FrameIndexValue *Value = NULL,
- RegScavenger *RS = NULL) const;
+ void eliminateFrameIndex(MachineBasicBlock::iterator II,
+ int SPAdj, RegScavenger *RS = NULL) const;
void processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
diff --git a/lib/Target/TargetData.cpp b/lib/Target/TargetData.cpp
index 5870d8a87004d..f35c96dadcee5 100644
--- a/lib/Target/TargetData.cpp
+++ b/lib/Target/TargetData.cpp
@@ -34,8 +34,7 @@ using namespace llvm;
// Handle the Pass registration stuff necessary to use TargetData's.
// Register the default SparcV9 implementation...
-static RegisterPass<TargetData> X("targetdata", "Target Data Layout", false,
- true);
+INITIALIZE_PASS(TargetData, "targetdata", "Target Data Layout", false, true);
char TargetData::ID = 0;
//===----------------------------------------------------------------------===//
@@ -98,8 +97,8 @@ unsigned StructLayout::getElementContainingOffset(uint64_t Offset) const {
//===----------------------------------------------------------------------===//
TargetAlignElem
-TargetAlignElem::get(AlignTypeEnum align_type, unsigned char abi_align,
- unsigned char pref_align, uint32_t bit_width) {
+TargetAlignElem::get(AlignTypeEnum align_type, unsigned abi_align,
+ unsigned pref_align, uint32_t bit_width) {
assert(abi_align <= pref_align && "Preferred alignment worse than ABI!");
TargetAlignElem retval;
retval.AlignType = align_type;
@@ -197,10 +196,10 @@ void TargetData::init(StringRef Desc) {
}
unsigned Size = getInt(Specifier.substr(1));
Split = Token.split(':');
- unsigned char ABIAlign = getInt(Split.first) / 8;
+ unsigned ABIAlign = getInt(Split.first) / 8;
Split = Split.second.split(':');
- unsigned char PrefAlign = getInt(Split.first) / 8;
+ unsigned PrefAlign = getInt(Split.first) / 8;
if (PrefAlign == 0)
PrefAlign = ABIAlign;
setAlignment(AlignType, ABIAlign, PrefAlign, Size);
@@ -227,19 +226,19 @@ void TargetData::init(StringRef Desc) {
///
/// @note This has to exist, because this is a pass, but it should never be
/// used.
-TargetData::TargetData() : ImmutablePass(&ID) {
+TargetData::TargetData() : ImmutablePass(ID) {
report_fatal_error("Bad TargetData ctor used. "
"Tool did not specify a TargetData to use?");
}
TargetData::TargetData(const Module *M)
- : ImmutablePass(&ID) {
+ : ImmutablePass(ID) {
init(M->getDataLayout());
}
void
-TargetData::setAlignment(AlignTypeEnum align_type, unsigned char abi_align,
- unsigned char pref_align, uint32_t bit_width) {
+TargetData::setAlignment(AlignTypeEnum align_type, unsigned abi_align,
+ unsigned pref_align, uint32_t bit_width) {
assert(abi_align <= pref_align && "Preferred alignment worse than ABI!");
for (unsigned i = 0, e = Alignments.size(); i != e; ++i) {
if (Alignments[i].AlignType == align_type &&
@@ -455,15 +454,6 @@ uint64_t TargetData::getTypeSizeInBits(const Type *Ty) const {
case Type::StructTyID:
// Get the layout annotation... which is lazily created on demand.
return getStructLayout(cast<StructType>(Ty))->getSizeInBits();
- case Type::UnionTyID: {
- const UnionType *UnTy = cast<UnionType>(Ty);
- uint64_t Size = 0;
- for (UnionType::element_iterator i = UnTy->element_begin(),
- e = UnTy->element_end(); i != e; ++i) {
- Size = std::max(Size, getTypeSizeInBits(*i));
- }
- return Size;
- }
case Type::IntegerTyID:
return cast<IntegerType>(Ty)->getBitWidth();
case Type::VoidTyID:
@@ -496,7 +486,7 @@ uint64_t TargetData::getTypeSizeInBits(const Type *Ty) const {
Get the ABI (\a abi_or_pref == true) or preferred alignment (\a abi_or_pref
== false) for the requested type \a Ty.
*/
-unsigned char TargetData::getAlignment(const Type *Ty, bool abi_or_pref) const {
+unsigned TargetData::getAlignment(const Type *Ty, bool abi_or_pref) const {
int AlignType = -1;
assert(Ty->isSized() && "Cannot getTypeInfo() on a type that is unsized!");
@@ -518,18 +508,7 @@ unsigned char TargetData::getAlignment(const Type *Ty, bool abi_or_pref) const {
// Get the layout annotation... which is lazily created on demand.
const StructLayout *Layout = getStructLayout(cast<StructType>(Ty));
unsigned Align = getAlignmentInfo(AGGREGATE_ALIGN, 0, abi_or_pref, Ty);
- return std::max(Align, (unsigned)Layout->getAlignment());
- }
- case Type::UnionTyID: {
- const UnionType *UnTy = cast<UnionType>(Ty);
- unsigned Align = 1;
-
- // Unions need the maximum alignment of all their entries
- for (UnionType::element_iterator i = UnTy->element_begin(),
- e = UnTy->element_end(); i != e; ++i) {
- Align = std::max(Align, (unsigned)getAlignment(*i, abi_or_pref));
- }
- return Align;
+ return std::max(Align, Layout->getAlignment());
}
case Type::IntegerTyID:
case Type::VoidTyID:
@@ -556,18 +535,18 @@ unsigned char TargetData::getAlignment(const Type *Ty, bool abi_or_pref) const {
abi_or_pref, Ty);
}
-unsigned char TargetData::getABITypeAlignment(const Type *Ty) const {
+unsigned TargetData::getABITypeAlignment(const Type *Ty) const {
return getAlignment(Ty, true);
}
/// getABIIntegerTypeAlignment - Return the minimum ABI-required alignment for
/// an integer type of the specified bitwidth.
-unsigned char TargetData::getABIIntegerTypeAlignment(unsigned BitWidth) const {
+unsigned TargetData::getABIIntegerTypeAlignment(unsigned BitWidth) const {
return getAlignmentInfo(INTEGER_ALIGN, BitWidth, true, 0);
}
-unsigned char TargetData::getCallFrameTypeAlignment(const Type *Ty) const {
+unsigned TargetData::getCallFrameTypeAlignment(const Type *Ty) const {
for (unsigned i = 0, e = Alignments.size(); i != e; ++i)
if (Alignments[i].AlignType == STACK_ALIGN)
return Alignments[i].ABIAlign;
@@ -575,12 +554,12 @@ unsigned char TargetData::getCallFrameTypeAlignment(const Type *Ty) const {
return getABITypeAlignment(Ty);
}
-unsigned char TargetData::getPrefTypeAlignment(const Type *Ty) const {
+unsigned TargetData::getPrefTypeAlignment(const Type *Ty) const {
return getAlignment(Ty, false);
}
-unsigned char TargetData::getPreferredTypeAlignmentShift(const Type *Ty) const {
- unsigned Align = (unsigned) getPrefTypeAlignment(Ty);
+unsigned TargetData::getPreferredTypeAlignmentShift(const Type *Ty) const {
+ unsigned Align = getPrefTypeAlignment(Ty);
assert(!(Align & (Align-1)) && "Alignment is not a power of two!");
return Log2_32(Align);
}
@@ -615,18 +594,13 @@ uint64_t TargetData::getIndexedOffset(const Type *ptrTy, Value* const* Indices,
// Update Ty to refer to current element
Ty = STy->getElementType(FieldNo);
- } else if (const UnionType *UnTy = dyn_cast<UnionType>(*TI)) {
- unsigned FieldNo = cast<ConstantInt>(Indices[CurIDX])->getZExtValue();
-
- // Offset into union is canonically 0, but type changes
- Ty = UnTy->getElementType(FieldNo);
} else {
// Update Ty to refer to current element
Ty = cast<SequentialType>(Ty)->getElementType();
// Get the array index and the size of each array element.
if (int64_t arrayIdx = cast<ConstantInt>(Indices[CurIDX])->getSExtValue())
- Result += arrayIdx * (int64_t)getTypeAllocSize(Ty);
+ Result += (uint64_t)arrayIdx * getTypeAllocSize(Ty);
}
}
diff --git a/lib/Target/TargetMachine.cpp b/lib/Target/TargetMachine.cpp
index 47c91df1400e6..705b1c097e55f 100644
--- a/lib/Target/TargetMachine.cpp
+++ b/lib/Target/TargetMachine.cpp
@@ -30,7 +30,8 @@ namespace llvm {
bool NoFramePointerElimNonLeaf;
bool NoExcessFPPrecision;
bool UnsafeFPMath;
- bool FiniteOnlyFPMathOption;
+ bool NoInfsFPMath;
+ bool NoNaNsFPMath;
bool HonorSignDependentRoundingFPMathOption;
bool UseSoftFloat;
FloatABI::ABIType FloatABIType;
@@ -80,9 +81,14 @@ EnableUnsafeFPMath("enable-unsafe-fp-math",
cl::location(UnsafeFPMath),
cl::init(false));
static cl::opt<bool, true>
-EnableFiniteOnlyFPMath("enable-finite-only-fp-math",
- cl::desc("Enable optimizations that assumes non- NaNs / +-Infs"),
- cl::location(FiniteOnlyFPMathOption),
+EnableNoInfsFPMath("enable-no-infs-fp-math",
+ cl::desc("Enable FP math optimizations that assume no +-Infs"),
+ cl::location(NoInfsFPMath),
+ cl::init(false));
+static cl::opt<bool, true>
+EnableNoNaNsFPMath("enable-no-nans-fp-math",
+ cl::desc("Enable FP math optimizations that assume no NaNs"),
+ cl::location(NoNaNsFPMath),
cl::init(false));
static cl::opt<bool, true>
EnableHonorSignDependentRoundingFPMath("enable-sign-dependent-rounding-fp-math",
@@ -290,12 +296,6 @@ namespace llvm {
/// result is "less precise" than doing those operations individually.
bool LessPreciseFPMAD() { return UnsafeFPMath || LessPreciseFPMADOption; }
- /// FiniteOnlyFPMath - This returns true when the -enable-finite-only-fp-math
- /// option is specified on the command line. If this returns false (default),
- /// the code generator is not allowed to assume that FP arithmetic arguments
- /// and results are never NaNs or +-Infs.
- bool FiniteOnlyFPMath() { return FiniteOnlyFPMathOption; }
-
/// HonorSignDependentRoundingFPMath - Return true if the codegen must assume
/// that the rounding mode of the FPU can change from its default.
bool HonorSignDependentRoundingFPMath() {
diff --git a/lib/Target/TargetRegisterInfo.cpp b/lib/Target/TargetRegisterInfo.cpp
index 49bfad54136d3..55f222c7c1c95 100644
--- a/lib/Target/TargetRegisterInfo.cpp
+++ b/lib/Target/TargetRegisterInfo.cpp
@@ -63,7 +63,7 @@ TargetRegisterInfo::getMinimalPhysRegClass(unsigned reg, EVT VT) const {
/// getAllocatableSetForRC - Toggle the bits that represent allocatable
/// registers for the specific register class.
static void getAllocatableSetForRC(const MachineFunction &MF,
- const TargetRegisterClass *RC, BitVector &R){
+ const TargetRegisterClass *RC, BitVector &R){
for (TargetRegisterClass::iterator I = RC->allocation_order_begin(MF),
E = RC->allocation_order_end(MF); I != E; ++I)
R.set(*I);
@@ -74,12 +74,16 @@ BitVector TargetRegisterInfo::getAllocatableSet(const MachineFunction &MF,
BitVector Allocatable(NumRegs);
if (RC) {
getAllocatableSetForRC(MF, RC, Allocatable);
- return Allocatable;
+ } else {
+ for (TargetRegisterInfo::regclass_iterator I = regclass_begin(),
+ E = regclass_end(); I != E; ++I)
+ getAllocatableSetForRC(MF, *I, Allocatable);
}
- for (TargetRegisterInfo::regclass_iterator I = regclass_begin(),
- E = regclass_end(); I != E; ++I)
- getAllocatableSetForRC(MF, *I, Allocatable);
+ // Mask out the reserved registers
+ BitVector Reserved = getReservedRegs(MF);
+ Allocatable ^= Reserved & Allocatable;
+
return Allocatable;
}
diff --git a/lib/Target/X86/AsmParser/X86AsmParser.cpp b/lib/Target/X86/AsmParser/X86AsmParser.cpp
index f1e66ab9d2c3f..f8588d818b75d 100644
--- a/lib/Target/X86/AsmParser/X86AsmParser.cpp
+++ b/lib/Target/X86/AsmParser/X86AsmParser.cpp
@@ -9,6 +9,8 @@
#include "llvm/Target/TargetAsmParser.h"
#include "X86.h"
+#include "X86Subtarget.h"
+#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringSwitch.h"
#include "llvm/ADT/Twine.h"
@@ -19,6 +21,7 @@
#include "llvm/MC/MCParser/MCAsmParser.h"
#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
#include "llvm/Support/SourceMgr.h"
+#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetRegistry.h"
#include "llvm/Target/TargetAsmParser.h"
using namespace llvm;
@@ -28,6 +31,7 @@ struct X86Operand;
class X86ATTAsmParser : public TargetAsmParser {
MCAsmParser &Parser;
+ TargetMachine &TM;
protected:
unsigned Is64Bit : 1;
@@ -37,8 +41,6 @@ private:
MCAsmLexer &getLexer() const { return Parser.getLexer(); }
- void Warning(SMLoc L, const Twine &Msg) { Parser.Warning(L, Msg); }
-
bool Error(SMLoc L, const Twine &Msg) { return Parser.Error(L, Msg); }
bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc);
@@ -48,13 +50,14 @@ private:
bool ParseDirectiveWord(unsigned Size, SMLoc L);
- void InstructionCleanup(MCInst &Inst);
+ bool MatchInstruction(SMLoc IDLoc,
+ const SmallVectorImpl<MCParsedAsmOperand*> &Operands,
+ MCInst &Inst);
- /// @name Auto-generated Match Functions
+ /// @name Auto-generated Matcher Functions
/// {
- bool MatchInstruction(const SmallVectorImpl<MCParsedAsmOperand*> &Operands,
- MCInst &Inst);
+ unsigned ComputeAvailableFeatures(const X86Subtarget *Subtarget) const;
bool MatchInstructionImpl(
const SmallVectorImpl<MCParsedAsmOperand*> &Operands, MCInst &Inst);
@@ -62,27 +65,32 @@ private:
/// }
public:
- X86ATTAsmParser(const Target &T, MCAsmParser &_Parser)
- : TargetAsmParser(T), Parser(_Parser) {}
+ X86ATTAsmParser(const Target &T, MCAsmParser &_Parser, TargetMachine &TM)
+ : TargetAsmParser(T), Parser(_Parser), TM(TM) {
+
+ // Initialize the set of available features.
+ setAvailableFeatures(ComputeAvailableFeatures(
+ &TM.getSubtarget<X86Subtarget>()));
+ }
virtual bool ParseInstruction(StringRef Name, SMLoc NameLoc,
SmallVectorImpl<MCParsedAsmOperand*> &Operands);
virtual bool ParseDirective(AsmToken DirectiveID);
};
-
+
class X86_32ATTAsmParser : public X86ATTAsmParser {
public:
- X86_32ATTAsmParser(const Target &T, MCAsmParser &_Parser)
- : X86ATTAsmParser(T, _Parser) {
+ X86_32ATTAsmParser(const Target &T, MCAsmParser &_Parser, TargetMachine &TM)
+ : X86ATTAsmParser(T, _Parser, TM) {
Is64Bit = false;
}
};
class X86_64ATTAsmParser : public X86ATTAsmParser {
public:
- X86_64ATTAsmParser(const Target &T, MCAsmParser &_Parser)
- : X86ATTAsmParser(T, _Parser) {
+ X86_64ATTAsmParser(const Target &T, MCAsmParser &_Parser, TargetMachine &TM)
+ : X86ATTAsmParser(T, _Parser, TM) {
Is64Bit = true;
}
};
@@ -90,7 +98,7 @@ public:
} // end anonymous namespace
/// @name Auto-generated Match Functions
-/// {
+/// {
static unsigned MatchRegisterName(StringRef Name);
@@ -109,7 +117,7 @@ struct X86Operand : public MCParsedAsmOperand {
} Kind;
SMLoc StartLoc, EndLoc;
-
+
union {
struct {
const char *Data;
@@ -141,6 +149,8 @@ struct X86Operand : public MCParsedAsmOperand {
/// getEndLoc - Get the location of the last token of this operand.
SMLoc getEndLoc() const { return EndLoc; }
+ virtual void dump(raw_ostream &OS) const {}
+
StringRef getToken() const {
assert(Kind == Token && "Invalid access!");
return StringRef(Tok.Data, Tok.Length);
@@ -185,7 +195,7 @@ struct X86Operand : public MCParsedAsmOperand {
bool isToken() const {return Kind == Token; }
bool isImm() const { return Kind == Immediate; }
-
+
bool isImmSExti16i8() const {
if (!isImm())
return false;
@@ -260,10 +270,6 @@ struct X86Operand : public MCParsedAsmOperand {
!getMemIndexReg() && getMemScale() == 1;
}
- bool isNoSegMem() const {
- return Kind == Memory && !getMemSegReg();
- }
-
bool isReg() const { return Kind == Register; }
void addExpr(MCInst &Inst, const MCExpr *Expr) const {
@@ -298,14 +304,6 @@ struct X86Operand : public MCParsedAsmOperand {
Inst.addOperand(MCOperand::CreateExpr(getMemDisp()));
}
- void addNoSegMemOperands(MCInst &Inst, unsigned N) const {
- assert((N == 4) && "Invalid number of operands!");
- Inst.addOperand(MCOperand::CreateReg(getMemBaseReg()));
- Inst.addOperand(MCOperand::CreateImm(getMemScale()));
- Inst.addOperand(MCOperand::CreateReg(getMemIndexReg()));
- addExpr(Inst, getMemDisp());
- }
-
static X86Operand *CreateToken(StringRef Str, SMLoc Loc) {
X86Operand *Res = new X86Operand(Token, Loc, Loc);
Res->Tok.Data = Str.data();
@@ -376,13 +374,19 @@ bool X86ATTAsmParser::ParseRegister(unsigned &RegNo,
// FIXME: Validate register for the current architecture; we have to do
// validation later, so maybe there is no need for this here.
RegNo = MatchRegisterName(Tok.getString());
-
+
+ // FIXME: This should be done using Requires<In32BitMode> and
+ // Requires<In64BitMode> so "eiz" usage in 64-bit instructions
+ // can be also checked.
+ if (RegNo == X86::RIZ && !Is64Bit)
+ return Error(Tok.getLoc(), "riz register in 64-bit mode only");
+
// Parse %st(1) and "%st" as "%st(0)"
if (RegNo == 0 && Tok.getString() == "st") {
RegNo = X86::ST0;
EndLoc = Tok.getLoc();
Parser.Lex(); // Eat 'st'
-
+
// Check to see if we have '(4)' after %st.
if (getLexer().isNot(AsmToken::LParen))
return false;
@@ -403,15 +407,15 @@ bool X86ATTAsmParser::ParseRegister(unsigned &RegNo,
case 7: RegNo = X86::ST7; break;
default: return Error(IntTok.getLoc(), "invalid stack index");
}
-
+
if (getParser().Lex().isNot(AsmToken::RParen))
return Error(Parser.getTok().getLoc(), "expected ')'");
-
+
EndLoc = Tok.getLoc();
Parser.Lex(); // Eat ')'
return false;
}
-
+
// If this is "db[0-7]", match it as an alias
// for dr[0-7].
if (RegNo == 0 && Tok.getString().size() == 3 &&
@@ -426,14 +430,14 @@ bool X86ATTAsmParser::ParseRegister(unsigned &RegNo,
case '6': RegNo = X86::DR6; break;
case '7': RegNo = X86::DR7; break;
}
-
+
if (RegNo != 0) {
EndLoc = Tok.getLoc();
Parser.Lex(); // Eat it.
return false;
}
}
-
+
if (RegNo == 0)
return Error(Tok.getLoc(), "invalid register name");
@@ -452,13 +456,17 @@ X86Operand *X86ATTAsmParser::ParseOperand() {
unsigned RegNo;
SMLoc Start, End;
if (ParseRegister(RegNo, Start, End)) return 0;
-
+ if (RegNo == X86::EIZ || RegNo == X86::RIZ) {
+ Error(Start, "eiz and riz can only be used as index registers");
+ return 0;
+ }
+
// If this is a segment register followed by a ':', then this is the start
// of a memory reference, otherwise this is a normal register reference.
if (getLexer().isNot(AsmToken::Colon))
return X86Operand::CreateReg(RegNo, Start, End);
-
-
+
+
getParser().Lex(); // Eat the colon.
return ParseMemOperand(RegNo, Start);
}
@@ -477,7 +485,7 @@ X86Operand *X86ATTAsmParser::ParseOperand() {
/// ParseMemOperand: segment: disp(basereg, indexreg, scale). The '%ds:' prefix
/// has already been parsed if present.
X86Operand *X86ATTAsmParser::ParseMemOperand(unsigned SegReg, SMLoc MemStart) {
-
+
// We have to disambiguate a parenthesized expression "(4+5)" from the start
// of a memory operand with a missing displacement "(%ebx)" or "(,%eax)". The
// only way to do this without lookahead is to eat the '(' and see what is
@@ -486,7 +494,7 @@ X86Operand *X86ATTAsmParser::ParseMemOperand(unsigned SegReg, SMLoc MemStart) {
if (getLexer().isNot(AsmToken::LParen)) {
SMLoc ExprEnd;
if (getParser().ParseExpression(Disp, ExprEnd)) return 0;
-
+
// After parsing the base expression we could either have a parenthesized
// memory address or not. If not, return now. If so, eat the (.
if (getLexer().isNot(AsmToken::LParen)) {
@@ -495,7 +503,7 @@ X86Operand *X86ATTAsmParser::ParseMemOperand(unsigned SegReg, SMLoc MemStart) {
return X86Operand::CreateMem(Disp, MemStart, ExprEnd);
return X86Operand::CreateMem(SegReg, Disp, 0, 0, 1, MemStart, ExprEnd);
}
-
+
// Eat the '('.
Parser.Lex();
} else {
@@ -503,17 +511,17 @@ X86Operand *X86ATTAsmParser::ParseMemOperand(unsigned SegReg, SMLoc MemStart) {
// so we have to eat the ( to see beyond it.
SMLoc LParenLoc = Parser.getTok().getLoc();
Parser.Lex(); // Eat the '('.
-
+
if (getLexer().is(AsmToken::Percent) || getLexer().is(AsmToken::Comma)) {
// Nothing to do here, fall into the code below with the '(' part of the
// memory operand consumed.
} else {
SMLoc ExprEnd;
-
+
// It must be an parenthesized expression, parse it now.
if (getParser().ParseParenExpression(Disp, ExprEnd))
return 0;
-
+
// After parsing the base expression we could either have a parenthesized
// memory address or not. If not, return now. If so, eat the (.
if (getLexer().isNot(AsmToken::LParen)) {
@@ -522,21 +530,25 @@ X86Operand *X86ATTAsmParser::ParseMemOperand(unsigned SegReg, SMLoc MemStart) {
return X86Operand::CreateMem(Disp, LParenLoc, ExprEnd);
return X86Operand::CreateMem(SegReg, Disp, 0, 0, 1, MemStart, ExprEnd);
}
-
+
// Eat the '('.
Parser.Lex();
}
}
-
+
// If we reached here, then we just ate the ( of the memory operand. Process
// the rest of the memory operand.
unsigned BaseReg = 0, IndexReg = 0, Scale = 1;
-
+
if (getLexer().is(AsmToken::Percent)) {
SMLoc L;
if (ParseRegister(BaseReg, L, L)) return 0;
+ if (BaseReg == X86::EIZ || BaseReg == X86::RIZ) {
+ Error(L, "eiz and riz can only be used as index registers");
+ return 0;
+ }
}
-
+
if (getLexer().is(AsmToken::Comma)) {
Parser.Lex(); // Eat the comma.
@@ -545,11 +557,11 @@ X86Operand *X86ATTAsmParser::ParseMemOperand(unsigned SegReg, SMLoc MemStart) {
// correctly.
//
// Not that even though it would be completely consistent to support syntax
- // like "1(%eax,,1)", the assembler doesn't.
+ // like "1(%eax,,1)", the assembler doesn't. Use "eiz" or "riz" for this.
if (getLexer().is(AsmToken::Percent)) {
SMLoc L;
if (ParseRegister(IndexReg, L, L)) return 0;
-
+
if (getLexer().isNot(AsmToken::RParen)) {
// Parse the scale amount:
// ::= ',' [scale-expression]
@@ -566,7 +578,7 @@ X86Operand *X86ATTAsmParser::ParseMemOperand(unsigned SegReg, SMLoc MemStart) {
int64_t ScaleVal;
if (getParser().ParseAbsoluteExpression(ScaleVal))
return 0;
-
+
// Validate the scale amount.
if (ScaleVal != 1 && ScaleVal != 2 && ScaleVal != 4 && ScaleVal != 8){
Error(Loc, "scale factor in address must be 1, 2, 4 or 8");
@@ -576,19 +588,20 @@ X86Operand *X86ATTAsmParser::ParseMemOperand(unsigned SegReg, SMLoc MemStart) {
}
}
} else if (getLexer().isNot(AsmToken::RParen)) {
- // Otherwise we have the unsupported form of a scale amount without an
+ // A scale amount without an index is ignored.
// index.
SMLoc Loc = Parser.getTok().getLoc();
int64_t Value;
if (getParser().ParseAbsoluteExpression(Value))
return 0;
-
- Error(Loc, "cannot have scale factor without index register");
- return 0;
+
+ if (Value != 1)
+ Warning(Loc, "scale factor without index register is ignored");
+ Scale = 1;
}
}
-
+
// Ok, we've eaten the memory operand, verify we have a ')' and eat it too.
if (getLexer().isNot(AsmToken::RParen)) {
Error(Parser.getTok().getLoc(), "unexpected token in memory operand");
@@ -596,7 +609,7 @@ X86Operand *X86ATTAsmParser::ParseMemOperand(unsigned SegReg, SMLoc MemStart) {
}
SMLoc MemEnd = Parser.getTok().getLoc();
Parser.Lex(); // Eat the ')'.
-
+
return X86Operand::CreateMem(SegReg, Disp, BaseReg, IndexReg, Scale,
MemStart, MemEnd);
}
@@ -743,6 +756,23 @@ ParseInstruction(StringRef Name, SMLoc NameLoc,
}
}
}
+
+ // FIXME: Hack to recognize vpclmul<src1_quadword, src2_quadword>dq
+ if (PatchedName.startswith("vpclmul")) {
+ unsigned CLMULQuadWordSelect = StringSwitch<unsigned>(
+ PatchedName.slice(7, PatchedName.size() - 2))
+ .Case("lqlq", 0x00) // src1[63:0], src2[63:0]
+ .Case("hqlq", 0x01) // src1[127:64], src2[63:0]
+ .Case("lqhq", 0x10) // src1[63:0], src2[127:64]
+ .Case("hqhq", 0x11) // src1[127:64], src2[127:64]
+ .Default(~0U);
+ if (CLMULQuadWordSelect != ~0U) {
+ ExtraImmOp = MCConstantExpr::Create(CLMULQuadWordSelect,
+ getParser().getContext());
+ assert(PatchedName.endswith("dq") && "Unexpected mnemonic!");
+ PatchedName = "vpclmulqdq";
+ }
+ }
Operands.push_back(X86Operand::CreateToken(PatchedName, NameLoc));
if (ExtraImmOp)
@@ -785,6 +815,20 @@ ParseInstruction(StringRef Name, SMLoc NameLoc,
Operands.erase(Operands.begin() + 1);
}
+ // FIXME: Hack to handle "out[bwl]? %al, (%dx)" -> "outb %al, %dx".
+ if ((Name == "outb" || Name == "outw" || Name == "outl" || Name == "out") &&
+ Operands.size() == 3) {
+ X86Operand &Op = *(X86Operand*)Operands.back();
+ if (Op.isMem() && Op.Mem.SegReg == 0 &&
+ isa<MCConstantExpr>(Op.Mem.Disp) &&
+ cast<MCConstantExpr>(Op.Mem.Disp)->getValue() == 0 &&
+ Op.Mem.BaseReg == MatchRegisterName("dx") && Op.Mem.IndexReg == 0) {
+ SMLoc Loc = Op.getEndLoc();
+ Operands.back() = X86Operand::CreateReg(Op.Mem.BaseReg, Loc, Loc);
+ delete &Op;
+ }
+ }
+
// FIXME: Hack to handle "f{mul*,add*,sub*,div*} $op, st(0)" the same as
// "f{mul*,add*,sub*,div*} $op"
if ((Name.startswith("fmul") || Name.startswith("fadd") ||
@@ -796,6 +840,16 @@ ParseInstruction(StringRef Name, SMLoc NameLoc,
Operands.erase(Operands.begin() + 2);
}
+ // FIXME: Hack to handle "imul <imm>, B" which is an alias for "imul <imm>, B,
+ // B".
+ if (Name.startswith("imul") && Operands.size() == 3 &&
+ static_cast<X86Operand*>(Operands[1])->isImm() &&
+ static_cast<X86Operand*>(Operands.back())->isReg()) {
+ X86Operand *Op = static_cast<X86Operand*>(Operands.back());
+ Operands.push_back(X86Operand::CreateReg(Op->getReg(), Op->getStartLoc(),
+ Op->getEndLoc()));
+ }
+
return false;
}
@@ -819,7 +873,7 @@ bool X86ATTAsmParser::ParseDirectiveWord(unsigned Size, SMLoc L) {
if (getLexer().is(AsmToken::EndOfStatement))
break;
-
+
// FIXME: Improve diagnostic.
if (getLexer().isNot(AsmToken::Comma))
return Error(L, "unexpected token in directive");
@@ -831,82 +885,32 @@ bool X86ATTAsmParser::ParseDirectiveWord(unsigned Size, SMLoc L) {
return false;
}
-/// LowerMOffset - Lower an 'moffset' form of an instruction, which just has a
-/// imm operand, to having "rm" or "mr" operands with the offset in the disp
-/// field.
-static void LowerMOffset(MCInst &Inst, unsigned Opc, unsigned RegNo,
- bool isMR) {
- MCOperand Disp = Inst.getOperand(0);
-
- // Start over with an empty instruction.
- Inst = MCInst();
- Inst.setOpcode(Opc);
-
- if (!isMR)
- Inst.addOperand(MCOperand::CreateReg(RegNo));
-
- // Add the mem operand.
- Inst.addOperand(MCOperand::CreateReg(0)); // Segment
- Inst.addOperand(MCOperand::CreateImm(1)); // Scale
- Inst.addOperand(MCOperand::CreateReg(0)); // IndexReg
- Inst.addOperand(Disp); // Displacement
- Inst.addOperand(MCOperand::CreateReg(0)); // BaseReg
-
- if (isMR)
- Inst.addOperand(MCOperand::CreateReg(RegNo));
-}
-
-// FIXME: Custom X86 cleanup function to implement a temporary hack to handle
-// matching INCL/DECL correctly for x86_64. This needs to be replaced by a
-// proper mechanism for supporting (ambiguous) feature dependent instructions.
-void X86ATTAsmParser::InstructionCleanup(MCInst &Inst) {
- if (!Is64Bit) return;
-
- switch (Inst.getOpcode()) {
- case X86::DEC16r: Inst.setOpcode(X86::DEC64_16r); break;
- case X86::DEC16m: Inst.setOpcode(X86::DEC64_16m); break;
- case X86::DEC32r: Inst.setOpcode(X86::DEC64_32r); break;
- case X86::DEC32m: Inst.setOpcode(X86::DEC64_32m); break;
- case X86::INC16r: Inst.setOpcode(X86::INC64_16r); break;
- case X86::INC16m: Inst.setOpcode(X86::INC64_16m); break;
- case X86::INC32r: Inst.setOpcode(X86::INC64_32r); break;
- case X86::INC32m: Inst.setOpcode(X86::INC64_32m); break;
-
- // moffset instructions are x86-32 only.
- case X86::MOV8o8a: LowerMOffset(Inst, X86::MOV8rm , X86::AL , false); break;
- case X86::MOV16o16a: LowerMOffset(Inst, X86::MOV16rm, X86::AX , false); break;
- case X86::MOV32o32a: LowerMOffset(Inst, X86::MOV32rm, X86::EAX, false); break;
- case X86::MOV8ao8: LowerMOffset(Inst, X86::MOV8mr , X86::AL , true); break;
- case X86::MOV16ao16: LowerMOffset(Inst, X86::MOV16mr, X86::AX , true); break;
- case X86::MOV32ao32: LowerMOffset(Inst, X86::MOV32mr, X86::EAX, true); break;
- }
-}
bool
-X86ATTAsmParser::MatchInstruction(const SmallVectorImpl<MCParsedAsmOperand*>
+X86ATTAsmParser::MatchInstruction(SMLoc IDLoc,
+ const SmallVectorImpl<MCParsedAsmOperand*>
&Operands,
MCInst &Inst) {
+ assert(!Operands.empty() && "Unexpect empty operand list!");
+
+ X86Operand *Op = static_cast<X86Operand*>(Operands[0]);
+ assert(Op->isToken() && "Leading operand should always be a mnemonic!");
+
// First, try a direct match.
if (!MatchInstructionImpl(Operands, Inst))
return false;
- // Ignore anything which is obviously not a suffix match.
- if (Operands.size() == 0)
- return true;
- X86Operand *Op = static_cast<X86Operand*>(Operands[0]);
- if (!Op->isToken() || Op->getToken().size() > 15)
- return true;
-
// FIXME: Ideally, we would only attempt suffix matches for things which are
// valid prefixes, and we could just infer the right unambiguous
// type. However, that requires substantially more matcher support than the
// following hack.
// Change the operand to point to a temporary token.
- char Tmp[16];
StringRef Base = Op->getToken();
- memcpy(Tmp, Base.data(), Base.size());
- Op->setTokenValue(StringRef(Tmp, Base.size() + 1));
+ SmallString<16> Tmp;
+ Tmp += Base;
+ Tmp += ' ';
+ Op->setTokenValue(Tmp.str());
// Check for the various suffix matches.
Tmp[Base.size()] = 'b';
@@ -928,6 +932,38 @@ X86ATTAsmParser::MatchInstruction(const SmallVectorImpl<MCParsedAsmOperand*>
return false;
// Otherwise, the match failed.
+
+ // If we had multiple suffix matches, then identify this as an ambiguous
+ // match.
+ if (MatchB + MatchW + MatchL + MatchQ != 4) {
+ char MatchChars[4];
+ unsigned NumMatches = 0;
+ if (!MatchB)
+ MatchChars[NumMatches++] = 'b';
+ if (!MatchW)
+ MatchChars[NumMatches++] = 'w';
+ if (!MatchL)
+ MatchChars[NumMatches++] = 'l';
+ if (!MatchQ)
+ MatchChars[NumMatches++] = 'q';
+
+ SmallString<126> Msg;
+ raw_svector_ostream OS(Msg);
+ OS << "ambiguous instructions require an explicit suffix (could be ";
+ for (unsigned i = 0; i != NumMatches; ++i) {
+ if (i != 0)
+ OS << ", ";
+ if (i + 1 == NumMatches)
+ OS << "or ";
+ OS << "'" << Base << MatchChars[i] << "'";
+ }
+ OS << ")";
+ Error(IDLoc, OS.str());
+ } else {
+ // FIXME: We should give nicer diagnostics about the exact failure.
+ Error(IDLoc, "unrecognized instruction");
+ }
+
return true;
}
diff --git a/lib/Target/X86/AsmPrinter/CMakeLists.txt b/lib/Target/X86/AsmPrinter/CMakeLists.txt
index b70a587ec4e24..033973eeeff93 100644
--- a/lib/Target/X86/AsmPrinter/CMakeLists.txt
+++ b/lib/Target/X86/AsmPrinter/CMakeLists.txt
@@ -2,8 +2,7 @@ include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/
add_llvm_library(LLVMX86AsmPrinter
X86ATTInstPrinter.cpp
- X86AsmPrinter.cpp
X86IntelInstPrinter.cpp
- X86MCInstLower.cpp
+ X86InstComments.cpp
)
add_dependencies(LLVMX86AsmPrinter X86CodeGenTable_gen)
diff --git a/lib/Target/X86/AsmPrinter/X86ATTInstPrinter.cpp b/lib/Target/X86/AsmPrinter/X86ATTInstPrinter.cpp
index f2cdb5ba55eb0..554b96c96e0e5 100644
--- a/lib/Target/X86/AsmPrinter/X86ATTInstPrinter.cpp
+++ b/lib/Target/X86/AsmPrinter/X86ATTInstPrinter.cpp
@@ -14,6 +14,7 @@
#define DEBUG_TYPE "asm-printer"
#include "X86ATTInstPrinter.h"
+#include "X86InstComments.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCExpr.h"
@@ -31,6 +32,10 @@ using namespace llvm;
void X86ATTInstPrinter::printInst(const MCInst *MI, raw_ostream &OS) {
printInstruction(MI, OS);
+
+ // If verbose assembly is enabled, we can print some informative comments.
+ if (CommentStream)
+ EmitAnyX86InstComments(MI, *CommentStream, getRegisterName);
}
StringRef X86ATTInstPrinter::getOpcodeName(unsigned Opcode) const {
return getInstructionName(Opcode);
diff --git a/lib/Target/X86/AsmPrinter/X86ATTInstPrinter.h b/lib/Target/X86/AsmPrinter/X86ATTInstPrinter.h
index 3be4bae5bec22..eb986643014c7 100644
--- a/lib/Target/X86/AsmPrinter/X86ATTInstPrinter.h
+++ b/lib/Target/X86/AsmPrinter/X86ATTInstPrinter.h
@@ -56,6 +56,9 @@ public:
void printi128mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
printMemReference(MI, OpNo, O);
}
+ void printi256mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
+ printMemReference(MI, OpNo, O);
+ }
void printf32mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
printMemReference(MI, OpNo, O);
}
diff --git a/lib/Target/X86/AsmPrinter/X86InstComments.cpp b/lib/Target/X86/AsmPrinter/X86InstComments.cpp
new file mode 100644
index 0000000000000..da9d5a3579e5f
--- /dev/null
+++ b/lib/Target/X86/AsmPrinter/X86InstComments.cpp
@@ -0,0 +1,232 @@
+//===-- X86InstComments.cpp - Generate verbose-asm comments for instrs ----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This defines functionality used to emit comments about X86 instructions to
+// an output stream for -fverbose-asm.
+//
+//===----------------------------------------------------------------------===//
+
+#include "X86InstComments.h"
+#include "X86GenInstrNames.inc"
+#include "llvm/MC/MCInst.h"
+#include "llvm/Support/raw_ostream.h"
+#include "../X86ShuffleDecode.h"
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// Top Level Entrypoint
+//===----------------------------------------------------------------------===//
+
+/// EmitAnyX86InstComments - This function decodes x86 instructions and prints
+/// newline terminated strings to the specified string if desired. This
+/// information is shown in disassembly dumps when verbose assembly is enabled.
+void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
+ const char *(*getRegName)(unsigned)) {
+ // If this is a shuffle operation, the switch should fill in this state.
+ SmallVector<unsigned, 8> ShuffleMask;
+ const char *DestName = 0, *Src1Name = 0, *Src2Name = 0;
+
+ switch (MI->getOpcode()) {
+ case X86::INSERTPSrr:
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ DecodeINSERTPSMask(MI->getOperand(3).getImm(), ShuffleMask);
+ break;
+
+ case X86::MOVLHPSrr:
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ Src1Name = getRegName(MI->getOperand(0).getReg());
+ DecodeMOVLHPSMask(2, ShuffleMask);
+ break;
+
+ case X86::MOVHLPSrr:
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ Src1Name = getRegName(MI->getOperand(0).getReg());
+ DecodeMOVHLPSMask(2, ShuffleMask);
+ break;
+
+ case X86::PSHUFDri:
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ // FALL THROUGH.
+ case X86::PSHUFDmi:
+ DestName = getRegName(MI->getOperand(0).getReg());
+ DecodePSHUFMask(4, MI->getOperand(MI->getNumOperands()-1).getImm(),
+ ShuffleMask);
+ break;
+
+ case X86::PSHUFHWri:
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ // FALL THROUGH.
+ case X86::PSHUFHWmi:
+ DestName = getRegName(MI->getOperand(0).getReg());
+ DecodePSHUFHWMask(MI->getOperand(MI->getNumOperands()-1).getImm(),
+ ShuffleMask);
+ break;
+ case X86::PSHUFLWri:
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ // FALL THROUGH.
+ case X86::PSHUFLWmi:
+ DestName = getRegName(MI->getOperand(0).getReg());
+ DecodePSHUFLWMask(MI->getOperand(MI->getNumOperands()-1).getImm(),
+ ShuffleMask);
+ break;
+
+ case X86::PUNPCKHBWrr:
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ // FALL THROUGH.
+ case X86::PUNPCKHBWrm:
+ Src1Name = getRegName(MI->getOperand(0).getReg());
+ DecodePUNPCKHMask(16, ShuffleMask);
+ break;
+ case X86::PUNPCKHWDrr:
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ // FALL THROUGH.
+ case X86::PUNPCKHWDrm:
+ Src1Name = getRegName(MI->getOperand(0).getReg());
+ DecodePUNPCKHMask(8, ShuffleMask);
+ break;
+ case X86::PUNPCKHDQrr:
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ // FALL THROUGH.
+ case X86::PUNPCKHDQrm:
+ Src1Name = getRegName(MI->getOperand(0).getReg());
+ DecodePUNPCKHMask(4, ShuffleMask);
+ break;
+ case X86::PUNPCKHQDQrr:
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ // FALL THROUGH.
+ case X86::PUNPCKHQDQrm:
+ Src1Name = getRegName(MI->getOperand(0).getReg());
+ DecodePUNPCKHMask(2, ShuffleMask);
+ break;
+
+ case X86::PUNPCKLBWrr:
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ // FALL THROUGH.
+ case X86::PUNPCKLBWrm:
+ Src1Name = getRegName(MI->getOperand(0).getReg());
+ DecodePUNPCKLMask(16, ShuffleMask);
+ break;
+ case X86::PUNPCKLWDrr:
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ // FALL THROUGH.
+ case X86::PUNPCKLWDrm:
+ Src1Name = getRegName(MI->getOperand(0).getReg());
+ DecodePUNPCKLMask(8, ShuffleMask);
+ break;
+ case X86::PUNPCKLDQrr:
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ // FALL THROUGH.
+ case X86::PUNPCKLDQrm:
+ Src1Name = getRegName(MI->getOperand(0).getReg());
+ DecodePUNPCKLMask(4, ShuffleMask);
+ break;
+ case X86::PUNPCKLQDQrr:
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ // FALL THROUGH.
+ case X86::PUNPCKLQDQrm:
+ Src1Name = getRegName(MI->getOperand(0).getReg());
+ DecodePUNPCKLMask(2, ShuffleMask);
+ break;
+
+ case X86::SHUFPDrri:
+ DecodeSHUFPSMask(2, MI->getOperand(3).getImm(), ShuffleMask);
+ Src1Name = getRegName(MI->getOperand(0).getReg());
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ break;
+
+ case X86::SHUFPSrri:
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ // FALL THROUGH.
+ case X86::SHUFPSrmi:
+ DecodeSHUFPSMask(4, MI->getOperand(3).getImm(), ShuffleMask);
+ Src1Name = getRegName(MI->getOperand(0).getReg());
+ break;
+
+ case X86::UNPCKLPDrr:
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ // FALL THROUGH.
+ case X86::UNPCKLPDrm:
+ DecodeUNPCKLPMask(2, ShuffleMask);
+ Src1Name = getRegName(MI->getOperand(0).getReg());
+ break;
+ case X86::UNPCKLPSrr:
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ // FALL THROUGH.
+ case X86::UNPCKLPSrm:
+ DecodeUNPCKLPMask(4, ShuffleMask);
+ Src1Name = getRegName(MI->getOperand(0).getReg());
+ break;
+ case X86::UNPCKHPDrr:
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ // FALL THROUGH.
+ case X86::UNPCKHPDrm:
+ DecodeUNPCKHPMask(2, ShuffleMask);
+ Src1Name = getRegName(MI->getOperand(0).getReg());
+ break;
+ case X86::UNPCKHPSrr:
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ // FALL THROUGH.
+ case X86::UNPCKHPSrm:
+ DecodeUNPCKHPMask(4, ShuffleMask);
+ Src1Name = getRegName(MI->getOperand(0).getReg());
+ break;
+ }
+
+
+ // If this was a shuffle operation, print the shuffle mask.
+ if (!ShuffleMask.empty()) {
+ if (DestName == 0) DestName = Src1Name;
+ OS << (DestName ? DestName : "mem") << " = ";
+
+ // If the two sources are the same, canonicalize the input elements to be
+ // from the first src so that we get larger element spans.
+ if (Src1Name == Src2Name) {
+ for (unsigned i = 0, e = ShuffleMask.size(); i != e; ++i) {
+ if ((int)ShuffleMask[i] >= 0 && // Not sentinel.
+ ShuffleMask[i] >= e) // From second mask.
+ ShuffleMask[i] -= e;
+ }
+ }
+
+ // The shuffle mask specifies which elements of the src1/src2 fill in the
+ // destination, with a few sentinel values. Loop through and print them
+ // out.
+ for (unsigned i = 0, e = ShuffleMask.size(); i != e; ++i) {
+ if (i != 0)
+ OS << ',';
+ if (ShuffleMask[i] == SM_SentinelZero) {
+ OS << "zero";
+ continue;
+ }
+
+ // Otherwise, it must come from src1 or src2. Print the span of elements
+ // that comes from this src.
+ bool isSrc1 = ShuffleMask[i] < ShuffleMask.size();
+ const char *SrcName = isSrc1 ? Src1Name : Src2Name;
+ OS << (SrcName ? SrcName : "mem") << '[';
+ bool IsFirst = true;
+ while (i != e &&
+ (int)ShuffleMask[i] >= 0 &&
+ (ShuffleMask[i] < ShuffleMask.size()) == isSrc1) {
+ if (!IsFirst)
+ OS << ',';
+ else
+ IsFirst = false;
+ OS << ShuffleMask[i] % ShuffleMask.size();
+ ++i;
+ }
+ OS << ']';
+ --i; // For loop increments element #.
+ }
+ //MI->print(OS, 0);
+ OS << "\n";
+ }
+
+}
diff --git a/lib/Target/X86/AsmPrinter/X86InstComments.h b/lib/Target/X86/AsmPrinter/X86InstComments.h
new file mode 100644
index 0000000000000..6b86db4f9e5c9
--- /dev/null
+++ b/lib/Target/X86/AsmPrinter/X86InstComments.h
@@ -0,0 +1,25 @@
+//===-- X86InstComments.h - Generate verbose-asm comments for instrs ------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This defines functionality used to emit comments about X86 instructions to
+// an output stream for -fverbose-asm.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef X86_INST_COMMENTS_H
+#define X86_INST_COMMENTS_H
+
+namespace llvm {
+ class MCInst;
+ class raw_ostream;
+ void EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
+ const char *(*getRegName)(unsigned));
+}
+
+#endif
diff --git a/lib/Target/X86/AsmPrinter/X86IntelInstPrinter.cpp b/lib/Target/X86/AsmPrinter/X86IntelInstPrinter.cpp
index a632047f6592b..5625b0ea618f8 100644
--- a/lib/Target/X86/AsmPrinter/X86IntelInstPrinter.cpp
+++ b/lib/Target/X86/AsmPrinter/X86IntelInstPrinter.cpp
@@ -14,6 +14,7 @@
#define DEBUG_TYPE "asm-printer"
#include "X86IntelInstPrinter.h"
+#include "X86InstComments.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCExpr.h"
@@ -30,6 +31,10 @@ using namespace llvm;
void X86IntelInstPrinter::printInst(const MCInst *MI, raw_ostream &OS) {
printInstruction(MI, OS);
+
+ // If verbose assembly is enabled, we can print some informative comments.
+ if (CommentStream)
+ EmitAnyX86InstComments(MI, *CommentStream, getRegisterName);
}
StringRef X86IntelInstPrinter::getOpcodeName(unsigned Opcode) const {
return getInstructionName(Opcode);
diff --git a/lib/Target/X86/AsmPrinter/X86IntelInstPrinter.h b/lib/Target/X86/AsmPrinter/X86IntelInstPrinter.h
index 4d680744dd60b..6f120322742b2 100644
--- a/lib/Target/X86/AsmPrinter/X86IntelInstPrinter.h
+++ b/lib/Target/X86/AsmPrinter/X86IntelInstPrinter.h
@@ -64,6 +64,10 @@ public:
O << "XMMWORD PTR ";
printMemReference(MI, OpNo, O);
}
+ void printi256mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
+ O << "YMMWORD PTR ";
+ printMemReference(MI, OpNo, O);
+ }
void printf32mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
O << "DWORD PTR ";
printMemReference(MI, OpNo, O);
diff --git a/lib/Target/X86/CMakeLists.txt b/lib/Target/X86/CMakeLists.txt
index 133482036ce1b..e9399f5c83224 100644
--- a/lib/Target/X86/CMakeLists.txt
+++ b/lib/Target/X86/CMakeLists.txt
@@ -18,23 +18,24 @@ tablegen(X86GenEDInfo.inc -gen-enhanced-disassembly-info)
set(sources
SSEDomainFix.cpp
X86AsmBackend.cpp
- X86CodeEmitter.cpp
+ X86AsmPrinter.cpp
X86COFFMachineModuleInfo.cpp
+ X86CodeEmitter.cpp
X86ELFWriterInfo.cpp
+ X86FastISel.cpp
X86FloatingPoint.cpp
- X86FloatingPointRegKill.cpp
X86ISelDAGToDAG.cpp
X86ISelLowering.cpp
X86InstrInfo.cpp
X86JITInfo.cpp
X86MCAsmInfo.cpp
X86MCCodeEmitter.cpp
+ X86MCInstLower.cpp
X86RegisterInfo.cpp
+ X86SelectionDAGInfo.cpp
X86Subtarget.cpp
X86TargetMachine.cpp
X86TargetObjectFile.cpp
- X86FastISel.cpp
- X86SelectionDAGInfo.cpp
)
if( CMAKE_CL_64 )
@@ -49,4 +50,3 @@ endif()
add_llvm_target(X86CodeGen ${sources})
-target_link_libraries (LLVMX86CodeGen LLVMSelectionDAG)
diff --git a/lib/Target/X86/README-FPStack.txt b/lib/Target/X86/README-FPStack.txt
index be28e8b394a42..39efd2dbcf1ad 100644
--- a/lib/Target/X86/README-FPStack.txt
+++ b/lib/Target/X86/README-FPStack.txt
@@ -27,8 +27,8 @@ def FpIADD32m : FpI<(ops RFP:$dst, RFP:$src1, i32mem:$src2), OneArgFPRW,
//===---------------------------------------------------------------------===//
-The FP stackifier needs to be global. Also, it should handle simple permutates
-to reduce number of shuffle instructions, e.g. turning:
+The FP stackifier should handle simple permutates to reduce number of shuffle
+instructions, e.g. turning:
fld P -> fld Q
fld Q fld P
diff --git a/lib/Target/X86/README-SSE.txt b/lib/Target/X86/README-SSE.txt
index b6aba93f37383..f96b22f1e2042 100644
--- a/lib/Target/X86/README-SSE.txt
+++ b/lib/Target/X86/README-SSE.txt
@@ -2,8 +2,46 @@
// Random ideas for the X86 backend: SSE-specific stuff.
//===---------------------------------------------------------------------===//
-- Consider eliminating the unaligned SSE load intrinsics, replacing them with
- unaligned LLVM load instructions.
+//===---------------------------------------------------------------------===//
+
+SSE Variable shift can be custom lowered to something like this, which uses a
+small table + unaligned load + shuffle instead of going through memory.
+
+__m128i_shift_right:
+ .byte 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
+ .byte -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1
+
+...
+__m128i shift_right(__m128i value, unsigned long offset) {
+ return _mm_shuffle_epi8(value,
+ _mm_loadu_si128((__m128 *) (___m128i_shift_right + offset)));
+}
+
+//===---------------------------------------------------------------------===//
+
+SSE has instructions for doing operations on complex numbers, we should pattern
+match them. Compiling this:
+
+_Complex float f32(_Complex float A, _Complex float B) {
+ return A+B;
+}
+
+into:
+
+_f32:
+ movdqa %xmm0, %xmm2
+ addss %xmm1, %xmm2
+ pshufd $16, %xmm2, %xmm2
+ pshufd $1, %xmm1, %xmm1
+ pshufd $1, %xmm0, %xmm0
+ addss %xmm1, %xmm0
+ pshufd $16, %xmm0, %xmm1
+ movdqa %xmm2, %xmm0
+ unpcklps %xmm1, %xmm0
+ ret
+
+seems silly.
+
//===---------------------------------------------------------------------===//
diff --git a/lib/Target/X86/README.txt b/lib/Target/X86/README.txt
index efc0cd82f23e9..a305ae6ec5505 100644
--- a/lib/Target/X86/README.txt
+++ b/lib/Target/X86/README.txt
@@ -1135,13 +1135,6 @@ void test(double *P) {
//===---------------------------------------------------------------------===//
-handling llvm.memory.barrier on pre SSE2 cpus
-
-should generate:
-lock ; mov %esp, %esp
-
-//===---------------------------------------------------------------------===//
-
The generated code on x86 for checking for signed overflow on a multiply the
obvious way is much longer than it needs to be.
@@ -1870,3 +1863,100 @@ The code produced by gcc is 3 bytes shorter. This sort of construct often
shows up with bitfields.
//===---------------------------------------------------------------------===//
+
+Take the following C code:
+int f(int a, int b) { return (unsigned char)a == (unsigned char)b; }
+
+We generate the following IR with clang:
+define i32 @f(i32 %a, i32 %b) nounwind readnone {
+entry:
+ %tmp = xor i32 %b, %a ; <i32> [#uses=1]
+ %tmp6 = and i32 %tmp, 255 ; <i32> [#uses=1]
+ %cmp = icmp eq i32 %tmp6, 0 ; <i1> [#uses=1]
+ %conv5 = zext i1 %cmp to i32 ; <i32> [#uses=1]
+ ret i32 %conv5
+}
+
+And the following x86 code:
+ xorl %esi, %edi
+ testb $-1, %dil
+ sete %al
+ movzbl %al, %eax
+ ret
+
+A cmpb instead of the xorl+testb would be one instruction shorter.
+
+//===---------------------------------------------------------------------===//
+
+Given the following C code:
+int f(int a, int b) { return (signed char)a == (signed char)b; }
+
+We generate the following IR with clang:
+define i32 @f(i32 %a, i32 %b) nounwind readnone {
+entry:
+ %sext = shl i32 %a, 24 ; <i32> [#uses=1]
+ %conv1 = ashr i32 %sext, 24 ; <i32> [#uses=1]
+ %sext6 = shl i32 %b, 24 ; <i32> [#uses=1]
+ %conv4 = ashr i32 %sext6, 24 ; <i32> [#uses=1]
+ %cmp = icmp eq i32 %conv1, %conv4 ; <i1> [#uses=1]
+ %conv5 = zext i1 %cmp to i32 ; <i32> [#uses=1]
+ ret i32 %conv5
+}
+
+And the following x86 code:
+ movsbl %sil, %eax
+ movsbl %dil, %ecx
+ cmpl %eax, %ecx
+ sete %al
+ movzbl %al, %eax
+ ret
+
+
+It should be possible to eliminate the sign extensions.
+
+//===---------------------------------------------------------------------===//
+
+LLVM misses a load+store narrowing opportunity in this code:
+
+%struct.bf = type { i64, i16, i16, i32 }
+
+@bfi = external global %struct.bf* ; <%struct.bf**> [#uses=2]
+
+define void @t1() nounwind ssp {
+entry:
+ %0 = load %struct.bf** @bfi, align 8 ; <%struct.bf*> [#uses=1]
+ %1 = getelementptr %struct.bf* %0, i64 0, i32 1 ; <i16*> [#uses=1]
+ %2 = bitcast i16* %1 to i32* ; <i32*> [#uses=2]
+ %3 = load i32* %2, align 1 ; <i32> [#uses=1]
+ %4 = and i32 %3, -65537 ; <i32> [#uses=1]
+ store i32 %4, i32* %2, align 1
+ %5 = load %struct.bf** @bfi, align 8 ; <%struct.bf*> [#uses=1]
+ %6 = getelementptr %struct.bf* %5, i64 0, i32 1 ; <i16*> [#uses=1]
+ %7 = bitcast i16* %6 to i32* ; <i32*> [#uses=2]
+ %8 = load i32* %7, align 1 ; <i32> [#uses=1]
+ %9 = and i32 %8, -131073 ; <i32> [#uses=1]
+ store i32 %9, i32* %7, align 1
+ ret void
+}
+
+LLVM currently emits this:
+
+ movq bfi(%rip), %rax
+ andl $-65537, 8(%rax)
+ movq bfi(%rip), %rax
+ andl $-131073, 8(%rax)
+ ret
+
+It could narrow the loads and stores to emit this:
+
+ movq bfi(%rip), %rax
+ andb $-2, 10(%rax)
+ movq bfi(%rip), %rax
+ andb $-3, 10(%rax)
+ ret
+
+The trouble is that there is a TokenFactor between the store and the
+load, making it non-trivial to determine if there's anything between
+the load and the store which would prohibit narrowing.
+
+//===---------------------------------------------------------------------===//
diff --git a/lib/Target/X86/SSEDomainFix.cpp b/lib/Target/X86/SSEDomainFix.cpp
index dab070e1febda..13680c592e01b 100644
--- a/lib/Target/X86/SSEDomainFix.cpp
+++ b/lib/Target/X86/SSEDomainFix.cpp
@@ -115,7 +115,7 @@ class SSEDomainFixPass : public MachineFunctionPass {
unsigned Distance;
public:
- SSEDomainFixPass() : MachineFunctionPass(&ID) {}
+ SSEDomainFixPass() : MachineFunctionPass(ID) {}
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesAll();
diff --git a/lib/Target/X86/X86.h b/lib/Target/X86/X86.h
index 677781d3730e2..27e88505150b4 100644
--- a/lib/Target/X86/X86.h
+++ b/lib/Target/X86/X86.h
@@ -49,11 +49,6 @@ FunctionPass *createX86FloatingPointStackifierPass();
/// crossings.
FunctionPass *createSSEDomainFixPass();
-/// createX87FPRegKillInserterPass - This function returns a pass which
-/// inserts FP_REG_KILL instructions where needed.
-///
-FunctionPass *createX87FPRegKillInserterPass();
-
/// createX86CodeEmitterPass - Return a pass that emits the collected X86 code
/// to the specified MCE object.
FunctionPass *createX86JITCodeEmitterPass(X86TargetMachine &TM,
diff --git a/lib/Target/X86/X86.td b/lib/Target/X86/X86.td
index a53f973c1c431..a19f1acffaca8 100644
--- a/lib/Target/X86/X86.td
+++ b/lib/Target/X86/X86.td
@@ -67,6 +67,8 @@ def FeatureSSE4A : SubtargetFeature<"sse4a", "HasSSE4A", "true",
def FeatureAVX : SubtargetFeature<"avx", "HasAVX", "true",
"Enable AVX instructions">;
+def FeatureCLMUL : SubtargetFeature<"clmul", "HasCLMUL", "true",
+ "Enable carry-less multiplication instructions">;
def FeatureFMA3 : SubtargetFeature<"fma3", "HasFMA3", "true",
"Enable three-operand fused multiple-add">;
def FeatureFMA4 : SubtargetFeature<"fma4", "HasFMA4", "true",
@@ -180,8 +182,6 @@ include "X86CallingConv.td"
// Currently the X86 assembly parser only supports ATT syntax.
def ATTAsmParser : AsmParser {
string AsmParserClassName = "ATTAsmParser";
- string AsmParserInstCleanup = "InstructionCleanup";
- string MatchInstructionName = "MatchInstructionImpl";
int Variant = 0;
// Discard comments in assembly strings.
diff --git a/lib/Target/X86/X86AsmBackend.cpp b/lib/Target/X86/X86AsmBackend.cpp
index 2cf65c11f94aa..69dc967f9d88c 100644
--- a/lib/Target/X86/X86AsmBackend.cpp
+++ b/lib/Target/X86/X86AsmBackend.cpp
@@ -11,9 +11,11 @@
#include "X86.h"
#include "X86FixupKinds.h"
#include "llvm/ADT/Twine.h"
+#include "llvm/MC/ELFObjectWriter.h"
#include "llvm/MC/MCAssembler.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCObjectWriter.h"
+#include "llvm/MC/MCSectionCOFF.h"
#include "llvm/MC/MCSectionELF.h"
#include "llvm/MC/MCSectionMachO.h"
#include "llvm/MC/MachObjectWriter.h"
@@ -190,10 +192,6 @@ public:
HasScatteredSymbols = true;
}
- MCObjectWriter *createObjectWriter(raw_ostream &OS) const {
- return 0;
- }
-
bool isVirtualSection(const MCSection &Section) const {
const MCSectionELF &SE = static_cast<const MCSectionELF&>(Section);
return SE.getType() == MCSectionELF::SHT_NOBITS;;
@@ -204,12 +202,43 @@ class ELFX86_32AsmBackend : public ELFX86AsmBackend {
public:
ELFX86_32AsmBackend(const Target &T)
: ELFX86AsmBackend(T) {}
+
+ MCObjectWriter *createObjectWriter(raw_ostream &OS) const {
+ return new ELFObjectWriter(OS, /*Is64Bit=*/false,
+ /*IsLittleEndian=*/true,
+ /*HasRelocationAddend=*/false);
+ }
};
class ELFX86_64AsmBackend : public ELFX86AsmBackend {
public:
ELFX86_64AsmBackend(const Target &T)
: ELFX86AsmBackend(T) {}
+
+ MCObjectWriter *createObjectWriter(raw_ostream &OS) const {
+ return new ELFObjectWriter(OS, /*Is64Bit=*/true,
+ /*IsLittleEndian=*/true,
+ /*HasRelocationAddend=*/true);
+ }
+};
+
+class WindowsX86AsmBackend : public X86AsmBackend {
+ bool Is64Bit;
+public:
+ WindowsX86AsmBackend(const Target &T, bool is64Bit)
+ : X86AsmBackend(T)
+ , Is64Bit(is64Bit) {
+ HasScatteredSymbols = true;
+ }
+
+ MCObjectWriter *createObjectWriter(raw_ostream &OS) const {
+ return createWinCOFFObjectWriter(OS, Is64Bit);
+ }
+
+ bool isVirtualSection(const MCSection &Section) const {
+ const MCSectionCOFF &SE = static_cast<const MCSectionCOFF&>(Section);
+ return SE.getCharacteristics() & COFF::IMAGE_SCN_CNT_UNINITIALIZED_DATA;
+ }
};
class DarwinX86AsmBackend : public X86AsmBackend {
@@ -290,6 +319,10 @@ TargetAsmBackend *llvm::createX86_32AsmBackend(const Target &T,
switch (Triple(TT).getOS()) {
case Triple::Darwin:
return new DarwinX86_32AsmBackend(T);
+ case Triple::MinGW32:
+ case Triple::Cygwin:
+ case Triple::Win32:
+ return new WindowsX86AsmBackend(T, false);
default:
return new ELFX86_32AsmBackend(T);
}
@@ -300,6 +333,10 @@ TargetAsmBackend *llvm::createX86_64AsmBackend(const Target &T,
switch (Triple(TT).getOS()) {
case Triple::Darwin:
return new DarwinX86_64AsmBackend(T);
+ case Triple::MinGW64:
+ case Triple::Cygwin:
+ case Triple::Win32:
+ return new WindowsX86AsmBackend(T, true);
default:
return new ELFX86_64AsmBackend(T);
}
diff --git a/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp b/lib/Target/X86/X86AsmPrinter.cpp
index 08e6486d5b7a3..20110ad788cd1 100644
--- a/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp
+++ b/lib/Target/X86/X86AsmPrinter.cpp
@@ -13,8 +13,8 @@
//===----------------------------------------------------------------------===//
#include "X86AsmPrinter.h"
-#include "X86ATTInstPrinter.h"
-#include "X86IntelInstPrinter.h"
+#include "AsmPrinter/X86ATTInstPrinter.h"
+#include "AsmPrinter/X86IntelInstPrinter.h"
#include "X86MCInstLower.h"
#include "X86.h"
#include "X86COFFMachineModuleInfo.h"
@@ -24,6 +24,7 @@
#include "llvm/DerivedTypes.h"
#include "llvm/Module.h"
#include "llvm/Type.h"
+#include "llvm/Analysis/DebugInfo.h"
#include "llvm/Assembly/Writer.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCContext.h"
@@ -35,6 +36,7 @@
#include "llvm/CodeGen/MachineModuleInfoImpls.h"
#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
#include "llvm/Support/COFF.h"
+#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Target/Mangler.h"
#include "llvm/Target/TargetOptions.h"
@@ -218,6 +220,10 @@ void X86AsmPrinter::print_pcrel_imm(const MachineInstr *MI, unsigned OpNo,
const MachineOperand &MO = MI->getOperand(OpNo);
switch (MO.getType()) {
default: llvm_unreachable("Unknown pcrel immediate operand");
+ case MachineOperand::MO_Register:
+ // pc-relativeness was handled when computing the value in the reg.
+ printOperand(MI, OpNo, O);
+ return;
case MachineOperand::MO_Immediate:
O << MO.getImm();
return;
@@ -655,6 +661,47 @@ void X86AsmPrinter::EmitEndOfAsmFile(Module &M) {
}
}
+MachineLocation
+X86AsmPrinter::getDebugValueLocation(const MachineInstr *MI) const {
+ MachineLocation Location;
+ assert (MI->getNumOperands() == 7 && "Invalid no. of machine operands!");
+ // Frame address. Currently handles register +- offset only.
+
+ if (MI->getOperand(0).isReg() && MI->getOperand(3).isImm())
+ Location.set(MI->getOperand(0).getReg(), MI->getOperand(3).getImm());
+ else {
+ DEBUG(dbgs() << "DBG_VALUE instruction ignored! " << *MI << "\n");
+ }
+ return Location;
+}
+
+void X86AsmPrinter::PrintDebugValueComment(const MachineInstr *MI,
+ raw_ostream &O) {
+ // Only the target-dependent form of DBG_VALUE should get here.
+ // Referencing the offset and metadata as NOps-2 and NOps-1 is
+ // probably portable to other targets; frame pointer location is not.
+ unsigned NOps = MI->getNumOperands();
+ assert(NOps==7);
+ O << '\t' << MAI->getCommentString() << "DEBUG_VALUE: ";
+ // cast away const; DIetc do not take const operands for some reason.
+ DIVariable V(const_cast<MDNode *>(MI->getOperand(NOps-1).getMetadata()));
+ if (V.getContext().isSubprogram())
+ O << DISubprogram(V.getContext()).getDisplayName() << ":";
+ O << V.getName();
+ O << " <- ";
+ // Frame address. Currently handles register +- offset only.
+ O << '[';
+ if (MI->getOperand(0).isReg() && MI->getOperand(0).getReg())
+ printOperand(MI, 0, O);
+ else
+ O << "undef";
+ O << '+'; printOperand(MI, 3, O);
+ O << ']';
+ O << "+";
+ printOperand(MI, NOps-2, O);
+}
+
+
//===----------------------------------------------------------------------===//
// Target Registry Stuff
diff --git a/lib/Target/X86/AsmPrinter/X86AsmPrinter.h b/lib/Target/X86/X86AsmPrinter.h
index b5a7f8dc321ad..e61be66c75a2e 100644
--- a/lib/Target/X86/AsmPrinter/X86AsmPrinter.h
+++ b/lib/Target/X86/X86AsmPrinter.h
@@ -14,9 +14,9 @@
#ifndef X86ASMPRINTER_H
#define X86ASMPRINTER_H
-#include "../X86.h"
-#include "../X86MachineFunctionInfo.h"
-#include "../X86TargetMachine.h"
+#include "X86.h"
+#include "X86MachineFunctionInfo.h"
+#include "X86TargetMachine.h"
#include "llvm/ADT/StringSet.h"
#include "llvm/CodeGen/AsmPrinter.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
diff --git a/lib/Target/X86/X86CallingConv.td b/lib/Target/X86/X86CallingConv.td
index a6a1e4e573cff..e3409effc3187 100644
--- a/lib/Target/X86/X86CallingConv.td
+++ b/lib/Target/X86/X86CallingConv.td
@@ -33,13 +33,19 @@ def RetCC_X86Common : CallingConv<[
CCIfType<[i16], CCAssignToReg<[AX, DX]>>,
CCIfType<[i32], CCAssignToReg<[EAX, EDX]>>,
CCIfType<[i64], CCAssignToReg<[RAX, RDX]>>,
-
- // Vector types are returned in XMM0 and XMM1, when they fit. XMMM2 and XMM3
+
+ // Vector types are returned in XMM0 and XMM1, when they fit. XMM2 and XMM3
// can only be used by ABI non-compliant code. If the target doesn't have XMM
// registers, it won't have vector types.
CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
CCAssignToReg<[XMM0,XMM1,XMM2,XMM3]>>,
+ // 256-bit vectors are returned in YMM0 and XMM1, when they fit. YMM2 and YMM3
+ // can only be used by ABI non-compliant code. This vector type is only
+ // supported while using the AVX target feature.
+ CCIfType<[v32i8, v16i16, v8i32, v4i64, v8f32, v4f64],
+ CCIfSubtarget<"hasAVX()", CCAssignToReg<[YMM0,YMM1,YMM2,YMM3]>>>,
+
// MMX vector types are always returned in MM0. If the target doesn't have
// MM0, it doesn't support these vector types.
CCIfType<[v8i8, v4i16, v2i32, v1i64], CCAssignToReg<[MM0]>>,
@@ -164,11 +170,16 @@ def CC_X86_64_C : CallingConv<[
CCIfType<[f32, f64, v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
CCIfSubtarget<"hasSSE1()",
CCAssignToReg<[XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7]>>>,
-
+
+ // The first 8 256-bit vector arguments are passed in YMM registers.
+ CCIfType<[v32i8, v16i16, v8i32, v4i64, v8f32, v4f64],
+ CCIfSubtarget<"hasAVX()",
+ CCAssignToReg<[YMM0, YMM1, YMM2, YMM3, YMM4, YMM5, YMM6, YMM7]>>>,
+
// Integer/FP values get stored in stack slots that are 8 bytes in size and
// 8-byte aligned if there are no more registers to hold them.
CCIfType<[i32, i64, f32, f64], CCAssignToStack<8, 8>>,
-
+
// Long doubles get stack slots whose size and alignment depends on the
// subtarget.
CCIfType<[f80], CCAssignToStack<0, 0>>,
@@ -176,6 +187,10 @@ def CC_X86_64_C : CallingConv<[
// Vectors get 16-byte stack slots that are 16-byte aligned.
CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], CCAssignToStack<16, 16>>,
+ // 256-bit vectors get 32-byte stack slots that are 32-byte aligned.
+ CCIfType<[v32i8, v16i16, v8i32, v4i64, v8f32, v4f64],
+ CCAssignToStack<32, 32>>,
+
// __m64 vectors get 8-byte stack slots that are 8-byte aligned.
CCIfType<[v8i8, v4i16, v2i32, v1i64], CCAssignToStack<8, 8>>
]>;
@@ -271,9 +286,18 @@ def CC_X86_32_Common : CallingConv<[
CCIfNotVarArg<CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
CCAssignToReg<[XMM0, XMM1, XMM2, XMM3]>>>,
+ // The first 4 AVX 256-bit vector arguments are passed in YMM registers.
+ CCIfNotVarArg<CCIfType<[v32i8, v16i16, v8i32, v4i64, v8f32, v4f64],
+ CCIfSubtarget<"hasAVX()",
+ CCAssignToReg<[YMM0, YMM1, YMM2, YMM3]>>>>,
+
// Other SSE vectors get 16-byte stack slots that are 16-byte aligned.
CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], CCAssignToStack<16, 16>>,
+ // 256-bit AVX vectors get 32-byte stack slots that are 32-byte aligned.
+ CCIfType<[v32i8, v16i16, v8i32, v4i64, v8f32, v4f64],
+ CCAssignToStack<32, 32>>,
+
// __m64 vectors get 8-byte stack slots that are 4-byte aligned. They are
// passed in the parameter area.
CCIfType<[v8i8, v4i16, v2i32, v1i64], CCAssignToStack<8, 4>>]>;
diff --git a/lib/Target/X86/X86CodeEmitter.cpp b/lib/Target/X86/X86CodeEmitter.cpp
index f13669bd741d1..824021c0c882b 100644
--- a/lib/Target/X86/X86CodeEmitter.cpp
+++ b/lib/Target/X86/X86CodeEmitter.cpp
@@ -53,12 +53,12 @@ namespace {
public:
static char ID;
explicit Emitter(X86TargetMachine &tm, CodeEmitter &mce)
- : MachineFunctionPass(&ID), II(0), TD(0), TM(tm),
+ : MachineFunctionPass(ID), II(0), TD(0), TM(tm),
MCE(mce), PICBaseOffset(0), Is64BitMode(false),
IsPIC(TM.getRelocationModel() == Reloc::PIC_) {}
Emitter(X86TargetMachine &tm, CodeEmitter &mce,
const X86InstrInfo &ii, const TargetData &td, bool is64)
- : MachineFunctionPass(&ID), II(&ii), TD(&td), TM(tm),
+ : MachineFunctionPass(ID), II(&ii), TD(&td), TM(tm),
MCE(mce), PICBaseOffset(0), Is64BitMode(is64),
IsPIC(TM.getRelocationModel() == Reloc::PIC_) {}
@@ -146,6 +146,103 @@ bool Emitter<CodeEmitter>::runOnMachineFunction(MachineFunction &MF) {
return false;
}
+/// determineREX - Determine if the MachineInstr has to be encoded with a X86-64
+/// REX prefix which specifies 1) 64-bit instructions, 2) non-default operand
+/// size, and 3) use of X86-64 extended registers.
+static unsigned determineREX(const MachineInstr &MI) {
+ unsigned REX = 0;
+ const TargetInstrDesc &Desc = MI.getDesc();
+
+ // Pseudo instructions do not need REX prefix byte.
+ if ((Desc.TSFlags & X86II::FormMask) == X86II::Pseudo)
+ return 0;
+ if (Desc.TSFlags & X86II::REX_W)
+ REX |= 1 << 3;
+
+ unsigned NumOps = Desc.getNumOperands();
+ if (NumOps) {
+ bool isTwoAddr = NumOps > 1 &&
+ Desc.getOperandConstraint(1, TOI::TIED_TO) != -1;
+
+ // If it accesses SPL, BPL, SIL, or DIL, then it requires a 0x40 REX prefix.
+ unsigned i = isTwoAddr ? 1 : 0;
+ for (unsigned e = NumOps; i != e; ++i) {
+ const MachineOperand& MO = MI.getOperand(i);
+ if (MO.isReg()) {
+ unsigned Reg = MO.getReg();
+ if (X86InstrInfo::isX86_64NonExtLowByteReg(Reg))
+ REX |= 0x40;
+ }
+ }
+
+ switch (Desc.TSFlags & X86II::FormMask) {
+ case X86II::MRMInitReg:
+ if (X86InstrInfo::isX86_64ExtendedReg(MI.getOperand(0)))
+ REX |= (1 << 0) | (1 << 2);
+ break;
+ case X86II::MRMSrcReg: {
+ if (X86InstrInfo::isX86_64ExtendedReg(MI.getOperand(0)))
+ REX |= 1 << 2;
+ i = isTwoAddr ? 2 : 1;
+ for (unsigned e = NumOps; i != e; ++i) {
+ const MachineOperand& MO = MI.getOperand(i);
+ if (X86InstrInfo::isX86_64ExtendedReg(MO))
+ REX |= 1 << 0;
+ }
+ break;
+ }
+ case X86II::MRMSrcMem: {
+ if (X86InstrInfo::isX86_64ExtendedReg(MI.getOperand(0)))
+ REX |= 1 << 2;
+ unsigned Bit = 0;
+ i = isTwoAddr ? 2 : 1;
+ for (; i != NumOps; ++i) {
+ const MachineOperand& MO = MI.getOperand(i);
+ if (MO.isReg()) {
+ if (X86InstrInfo::isX86_64ExtendedReg(MO))
+ REX |= 1 << Bit;
+ Bit++;
+ }
+ }
+ break;
+ }
+ case X86II::MRM0m: case X86II::MRM1m:
+ case X86II::MRM2m: case X86II::MRM3m:
+ case X86II::MRM4m: case X86II::MRM5m:
+ case X86II::MRM6m: case X86II::MRM7m:
+ case X86II::MRMDestMem: {
+ unsigned e = (isTwoAddr ? X86::AddrNumOperands+1 : X86::AddrNumOperands);
+ i = isTwoAddr ? 1 : 0;
+ if (NumOps > e && X86InstrInfo::isX86_64ExtendedReg(MI.getOperand(e)))
+ REX |= 1 << 2;
+ unsigned Bit = 0;
+ for (; i != e; ++i) {
+ const MachineOperand& MO = MI.getOperand(i);
+ if (MO.isReg()) {
+ if (X86InstrInfo::isX86_64ExtendedReg(MO))
+ REX |= 1 << Bit;
+ Bit++;
+ }
+ }
+ break;
+ }
+ default: {
+ if (X86InstrInfo::isX86_64ExtendedReg(MI.getOperand(0)))
+ REX |= 1 << 0;
+ i = isTwoAddr ? 2 : 1;
+ for (unsigned e = NumOps; i != e; ++i) {
+ const MachineOperand& MO = MI.getOperand(i);
+ if (X86InstrInfo::isX86_64ExtendedReg(MO))
+ REX |= 1 << 2;
+ }
+ break;
+ }
+ }
+ }
+ return REX;
+}
+
+
/// emitPCRelativeBlockAddress - This method keeps track of the information
/// necessary to resolve the address of this block later and emits a dummy
/// value.
@@ -569,7 +666,7 @@ void Emitter<CodeEmitter>::emitInstruction(const MachineInstr &MI,
// Handle REX prefix.
if (Is64BitMode) {
- if (unsigned REX = X86InstrInfo::determineREX(MI))
+ if (unsigned REX = determineREX(MI))
MCE.emitByte(0x40 | REX);
}
@@ -605,24 +702,29 @@ void Emitter<CodeEmitter>::emitInstruction(const MachineInstr &MI,
// base address.
switch (Opcode) {
default:
- llvm_unreachable("psuedo instructions should be removed before code"
+ llvm_unreachable("pseudo instructions should be removed before code"
" emission");
break;
+ // Do nothing for Int_MemBarrier - it's just a comment. Add a debug
+ // to make it slightly easier to see.
+ case X86::Int_MemBarrier:
+ DEBUG(dbgs() << "#MEMBARRIER\n");
+ break;
+
case TargetOpcode::INLINEASM:
// We allow inline assembler nodes with empty bodies - they can
// implicitly define registers, which is ok for JIT.
if (MI.getOperand(0).getSymbolName()[0])
report_fatal_error("JIT does not support inline asm!");
break;
- case TargetOpcode::DBG_LABEL:
+ case TargetOpcode::PROLOG_LABEL:
case TargetOpcode::GC_LABEL:
case TargetOpcode::EH_LABEL:
MCE.emitLabel(MI.getOperand(0).getMCSymbol());
break;
-
+
case TargetOpcode::IMPLICIT_DEF:
case TargetOpcode::KILL:
- case X86::FP_REG_KILL:
break;
case X86::MOVPC32r: {
// This emits the "call" portion of this pseudo instruction.
@@ -674,7 +776,8 @@ void Emitter<CodeEmitter>::emitInstruction(const MachineInstr &MI,
}
assert(MO.isImm() && "Unknown RawFrm operand!");
- if (Opcode == X86::CALLpcrel32 || Opcode == X86::CALL64pcrel32) {
+ if (Opcode == X86::CALLpcrel32 || Opcode == X86::CALL64pcrel32 ||
+ Opcode == X86::WINCALL64pcrel32) {
// Fix up immediate operand for pc relative calls.
intptr_t Imm = (intptr_t)MO.getImm();
Imm = Imm - MCE.getCurrentPCValue() - 4;
diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp
index ce1370763b77f..0c70eec4827fb 100644
--- a/lib/Target/X86/X86FastISel.cpp
+++ b/lib/Target/X86/X86FastISel.cpp
@@ -960,9 +960,11 @@ bool X86FastISel::X86SelectBranch(const Instruction *I) {
MachineBasicBlock *TrueMBB = FuncInfo.MBBMap[BI->getSuccessor(0)];
MachineBasicBlock *FalseMBB = FuncInfo.MBBMap[BI->getSuccessor(1)];
- // Fold the common case of a conditional branch with a comparison.
+ // Fold the common case of a conditional branch with a comparison
+ // in the same block (values defined on other blocks may not have
+ // initialized registers).
if (const CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) {
- if (CI->hasOneUse()) {
+ if (CI->hasOneUse() && CI->getParent() == I->getParent()) {
EVT VT = TLI.getValueType(CI->getOperand(0)->getType());
// Try to take advantage of fallthrough opportunities.
@@ -1058,10 +1060,8 @@ bool X86FastISel::X86SelectBranch(const Instruction *I) {
const MachineInstr &MI = *RI;
if (MI.definesRegister(Reg)) {
- unsigned Src, Dst, SrcSR, DstSR;
-
- if (getInstrInfo()->isMoveInstr(MI, Src, Dst, SrcSR, DstSR)) {
- Reg = Src;
+ if (MI.isCopy()) {
+ Reg = MI.getOperand(1).getReg();
continue;
}
@@ -1648,15 +1648,26 @@ bool X86FastISel::X86SelectCall(const Instruction *I) {
MachineInstrBuilder MIB;
if (CalleeOp) {
// Register-indirect call.
- unsigned CallOpc = Subtarget->is64Bit() ? X86::CALL64r : X86::CALL32r;
+ unsigned CallOpc;
+ if (Subtarget->isTargetWin64())
+ CallOpc = X86::WINCALL64r;
+ else if (Subtarget->is64Bit())
+ CallOpc = X86::CALL64r;
+ else
+ CallOpc = X86::CALL32r;
MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CallOpc))
.addReg(CalleeOp);
} else {
// Direct call.
assert(GV && "Not a direct call");
- unsigned CallOpc =
- Subtarget->is64Bit() ? X86::CALL64pcrel32 : X86::CALLpcrel32;
+ unsigned CallOpc;
+ if (Subtarget->isTargetWin64())
+ CallOpc = X86::WINCALL64pcrel32;
+ else if (Subtarget->is64Bit())
+ CallOpc = X86::CALL64pcrel32;
+ else
+ CallOpc = X86::CALLpcrel32;
// See if we need any target-specific flags on the GV operand.
unsigned char OpFlags = 0;
diff --git a/lib/Target/X86/X86FloatingPoint.cpp b/lib/Target/X86/X86FloatingPoint.cpp
index cee4ad70201a7..e6ebf669587dd 100644
--- a/lib/Target/X86/X86FloatingPoint.cpp
+++ b/lib/Target/X86/X86FloatingPoint.cpp
@@ -8,23 +8,18 @@
//===----------------------------------------------------------------------===//
//
// This file defines the pass which converts floating point instructions from
-// virtual registers into register stack instructions. This pass uses live
+// pseudo registers into register stack instructions. This pass uses live
// variable information to indicate where the FPn registers are used and their
// lifetimes.
//
-// This pass is hampered by the lack of decent CFG manipulation routines for
-// machine code. In particular, this wants to be able to split critical edges
-// as necessary, traverse the machine basic block CFG in depth-first order, and
-// allow there to be multiple machine basic blocks for each LLVM basicblock
-// (needed for critical edge splitting).
+// The x87 hardware tracks liveness of the stack registers, so it is necessary
+// to implement exact liveness tracking between basic blocks. The CFG edges are
+// partitioned into bundles where the same FP registers must be live in
+// identical stack positions. Instructions are inserted at the end of each basic
+// block to rearrange the live registers to match the outgoing bundle.
//
-// In particular, this pass currently barfs on critical edges. Because of this,
-// it requires the instruction selector to insert FP_REG_KILL instructions on
-// the exits of any basic block that has critical edges going from it, or which
-// branch to a critical basic block.
-//
-// FIXME: this is not implemented yet. The stackifier pass only works on local
-// basic blocks.
+// This approach avoids splitting critical edges at the potential cost of more
+// live register shuffling instructions when critical edges are present.
//
//===----------------------------------------------------------------------===//
@@ -32,6 +27,7 @@
#include "X86.h"
#include "X86InstrInfo.h"
#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
@@ -54,7 +50,12 @@ STATISTIC(NumFP , "Number of floating point instructions");
namespace {
struct FPS : public MachineFunctionPass {
static char ID;
- FPS() : MachineFunctionPass(&ID) {}
+ FPS() : MachineFunctionPass(ID) {
+ // This is really only to keep valgrind quiet.
+ // The logic in isLive() is too much for it.
+ memset(Stack, 0, sizeof(Stack));
+ memset(RegMap, 0, sizeof(RegMap));
+ }
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesCFG();
@@ -69,11 +70,71 @@ namespace {
private:
const TargetInstrInfo *TII; // Machine instruction info.
+
+ // Two CFG edges are related if they leave the same block, or enter the same
+ // block. The transitive closure of an edge under this relation is a
+ // LiveBundle. It represents a set of CFG edges where the live FP stack
+ // registers must be allocated identically in the x87 stack.
+ //
+ // A LiveBundle is usually all the edges leaving a block, or all the edges
+ // entering a block, but it can contain more edges if critical edges are
+ // present.
+ //
+ // The set of live FP registers in a LiveBundle is calculated by bundleCFG,
+ // but the exact mapping of FP registers to stack slots is fixed later.
+ struct LiveBundle {
+ // Bit mask of live FP registers. Bit 0 = FP0, bit 1 = FP1, &c.
+ unsigned Mask;
+
+ // Number of pre-assigned live registers in FixStack. This is 0 when the
+ // stack order has not yet been fixed.
+ unsigned FixCount;
+
+ // Assigned stack order for live-in registers.
+ // FixStack[i] == getStackEntry(i) for all i < FixCount.
+ unsigned char FixStack[8];
+
+ LiveBundle(unsigned m = 0) : Mask(m), FixCount(0) {}
+
+ // Have the live registers been assigned a stack order yet?
+ bool isFixed() const { return !Mask || FixCount; }
+ };
+
+ // Numbered LiveBundle structs. LiveBundles[0] is used for all CFG edges
+ // with no live FP registers.
+ SmallVector<LiveBundle, 8> LiveBundles;
+
+ // Map each MBB in the current function to an (ingoing, outgoing) index into
+ // LiveBundles. Blocks with no FP registers live in or out map to (0, 0)
+ // and are not actually stored in the map.
+ DenseMap<MachineBasicBlock*, std::pair<unsigned, unsigned> > BlockBundle;
+
+ // Return a bitmask of FP registers in block's live-in list.
+ unsigned calcLiveInMask(MachineBasicBlock *MBB) {
+ unsigned Mask = 0;
+ for (MachineBasicBlock::livein_iterator I = MBB->livein_begin(),
+ E = MBB->livein_end(); I != E; ++I) {
+ unsigned Reg = *I - X86::FP0;
+ if (Reg < 8)
+ Mask |= 1 << Reg;
+ }
+ return Mask;
+ }
+
+ // Partition all the CFG edges into LiveBundles.
+ void bundleCFG(MachineFunction &MF);
+
MachineBasicBlock *MBB; // Current basic block
unsigned Stack[8]; // FP<n> Registers in each stack slot...
unsigned RegMap[8]; // Track which stack slot contains each register
unsigned StackTop; // The current top of the FP stack.
+ // Set up our stack model to match the incoming registers to MBB.
+ void setupBlockStack();
+
+ // Shuffle live registers to match the expectations of successor blocks.
+ void finishBlockStack();
+
void dumpStack() const {
dbgs() << "Stack contents:";
for (unsigned i = 0; i != StackTop; ++i) {
@@ -82,27 +143,36 @@ namespace {
}
dbgs() << "\n";
}
- private:
- /// isStackEmpty - Return true if the FP stack is empty.
- bool isStackEmpty() const {
- return StackTop == 0;
- }
-
- // getSlot - Return the stack slot number a particular register number is
- // in.
+
+ /// getSlot - Return the stack slot number a particular register number is
+ /// in.
unsigned getSlot(unsigned RegNo) const {
assert(RegNo < 8 && "Regno out of range!");
return RegMap[RegNo];
}
- // getStackEntry - Return the X86::FP<n> register in register ST(i).
+ /// isLive - Is RegNo currently live in the stack?
+ bool isLive(unsigned RegNo) const {
+ unsigned Slot = getSlot(RegNo);
+ return Slot < StackTop && Stack[Slot] == RegNo;
+ }
+
+ /// getScratchReg - Return an FP register that is not currently in use.
+ unsigned getScratchReg() {
+ for (int i = 7; i >= 0; --i)
+ if (!isLive(i))
+ return i;
+ llvm_unreachable("Ran out of scratch FP registers");
+ }
+
+ /// getStackEntry - Return the X86::FP<n> register in register ST(i).
unsigned getStackEntry(unsigned STi) const {
assert(STi < StackTop && "Access past stack top!");
return Stack[StackTop-1-STi];
}
- // getSTReg - Return the X86::ST(i) register which contains the specified
- // FP<RegNo> register.
+ /// getSTReg - Return the X86::ST(i) register which contains the specified
+ /// FP<RegNo> register.
unsigned getSTReg(unsigned RegNo) const {
return StackTop - 1 - getSlot(RegNo) + llvm::X86::ST0;
}
@@ -117,10 +187,9 @@ namespace {
bool isAtTop(unsigned RegNo) const { return getSlot(RegNo) == StackTop-1; }
void moveToTop(unsigned RegNo, MachineBasicBlock::iterator I) {
- MachineInstr *MI = I;
- DebugLoc dl = MI->getDebugLoc();
+ DebugLoc dl = I == MBB->end() ? DebugLoc() : I->getDebugLoc();
if (isAtTop(RegNo)) return;
-
+
unsigned STReg = getSTReg(RegNo);
unsigned RegOnTop = getStackEntry(0);
@@ -137,24 +206,37 @@ namespace {
}
void duplicateToTop(unsigned RegNo, unsigned AsReg, MachineInstr *I) {
- DebugLoc dl = I->getDebugLoc();
+ DebugLoc dl = I == MBB->end() ? DebugLoc() : I->getDebugLoc();
unsigned STReg = getSTReg(RegNo);
pushReg(AsReg); // New register on top of stack
BuildMI(*MBB, I, dl, TII->get(X86::LD_Frr)).addReg(STReg);
}
- // popStackAfter - Pop the current value off of the top of the FP stack
- // after the specified instruction.
+ /// popStackAfter - Pop the current value off of the top of the FP stack
+ /// after the specified instruction.
void popStackAfter(MachineBasicBlock::iterator &I);
- // freeStackSlotAfter - Free the specified register from the register stack,
- // so that it is no longer in a register. If the register is currently at
- // the top of the stack, we just pop the current instruction, otherwise we
- // store the current top-of-stack into the specified slot, then pop the top
- // of stack.
+ /// freeStackSlotAfter - Free the specified register from the register
+ /// stack, so that it is no longer in a register. If the register is
+ /// currently at the top of the stack, we just pop the current instruction,
+ /// otherwise we store the current top-of-stack into the specified slot,
+ /// then pop the top of stack.
void freeStackSlotAfter(MachineBasicBlock::iterator &I, unsigned Reg);
+ /// freeStackSlotBefore - Just the pop, no folding. Return the inserted
+ /// instruction.
+ MachineBasicBlock::iterator
+ freeStackSlotBefore(MachineBasicBlock::iterator I, unsigned FPRegNo);
+
+ /// Adjust the live registers to be the set in Mask.
+ void adjustLiveRegs(unsigned Mask, MachineBasicBlock::iterator I);
+
+ /// Shuffle the top FixCount stack entries susch that FP reg FixStack[0] is
+ /// st(0), FP reg FixStack[1] is st(1) etc.
+ void shuffleStackTop(const unsigned char *FixStack, unsigned FixCount,
+ MachineBasicBlock::iterator I);
+
bool processBasicBlock(MachineFunction &MF, MachineBasicBlock &MBB);
void handleZeroArgFP(MachineBasicBlock::iterator &I);
@@ -181,7 +263,6 @@ static unsigned getFPReg(const MachineOperand &MO) {
return Reg - X86::FP0;
}
-
/// runOnMachineFunction - Loop over all of the basic blocks, transforming FP
/// register references into FP stack references.
///
@@ -201,6 +282,10 @@ bool FPS::runOnMachineFunction(MachineFunction &MF) {
if (!FPIsUsed) return false;
TII = MF.getTarget().getInstrInfo();
+
+ // Prepare cross-MBB liveness.
+ bundleCFG(MF);
+
StackTop = 0;
// Process the function in depth first order so that we process at least one
@@ -215,16 +300,111 @@ bool FPS::runOnMachineFunction(MachineFunction &MF) {
Changed |= processBasicBlock(MF, **I);
// Process any unreachable blocks in arbitrary order now.
- if (MF.size() == Processed.size())
- return Changed;
+ if (MF.size() != Processed.size())
+ for (MachineFunction::iterator BB = MF.begin(), E = MF.end(); BB != E; ++BB)
+ if (Processed.insert(BB))
+ Changed |= processBasicBlock(MF, *BB);
+
+ BlockBundle.clear();
+ LiveBundles.clear();
- for (MachineFunction::iterator BB = MF.begin(), E = MF.end(); BB != E; ++BB)
- if (Processed.insert(BB))
- Changed |= processBasicBlock(MF, *BB);
-
return Changed;
}
+/// bundleCFG - Scan all the basic blocks to determine consistent live-in and
+/// live-out sets for the FP registers. Consistent means that the set of
+/// registers live-out from a block is identical to the live-in set of all
+/// successors. This is not enforced by the normal live-in lists since
+/// registers may be implicitly defined, or not used by all successors.
+void FPS::bundleCFG(MachineFunction &MF) {
+ assert(LiveBundles.empty() && "Stale data in LiveBundles");
+ assert(BlockBundle.empty() && "Stale data in BlockBundle");
+ SmallPtrSet<MachineBasicBlock*, 8> PropDown, PropUp;
+
+ // LiveBundle[0] is the empty live-in set.
+ LiveBundles.resize(1);
+
+ // First gather the actual live-in masks for all MBBs.
+ for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) {
+ MachineBasicBlock *MBB = I;
+ const unsigned Mask = calcLiveInMask(MBB);
+ if (!Mask)
+ continue;
+ // Ingoing bundle index.
+ unsigned &Idx = BlockBundle[MBB].first;
+ // Already assigned an ingoing bundle?
+ if (Idx)
+ continue;
+ // Allocate a new LiveBundle struct for this block's live-ins.
+ const unsigned BundleIdx = Idx = LiveBundles.size();
+ DEBUG(dbgs() << "Creating LB#" << BundleIdx << ": in:BB#"
+ << MBB->getNumber());
+ LiveBundles.push_back(Mask);
+ LiveBundle &Bundle = LiveBundles.back();
+
+ // Make sure all predecessors have the same live-out set.
+ PropUp.insert(MBB);
+
+ // Keep pushing liveness up and down the CFG until convergence.
+ // Only critical edges cause iteration here, but when they do, multiple
+ // blocks can be assigned to the same LiveBundle index.
+ do {
+ // Assign BundleIdx as liveout from predecessors in PropUp.
+ for (SmallPtrSet<MachineBasicBlock*, 16>::iterator I = PropUp.begin(),
+ E = PropUp.end(); I != E; ++I) {
+ MachineBasicBlock *MBB = *I;
+ for (MachineBasicBlock::const_pred_iterator LinkI = MBB->pred_begin(),
+ LinkE = MBB->pred_end(); LinkI != LinkE; ++LinkI) {
+ MachineBasicBlock *PredMBB = *LinkI;
+ // PredMBB's liveout bundle should be set to LIIdx.
+ unsigned &Idx = BlockBundle[PredMBB].second;
+ if (Idx) {
+ assert(Idx == BundleIdx && "Inconsistent CFG");
+ continue;
+ }
+ Idx = BundleIdx;
+ DEBUG(dbgs() << " out:BB#" << PredMBB->getNumber());
+ // Propagate to siblings.
+ if (PredMBB->succ_size() > 1)
+ PropDown.insert(PredMBB);
+ }
+ }
+ PropUp.clear();
+
+ // Assign BundleIdx as livein to successors in PropDown.
+ for (SmallPtrSet<MachineBasicBlock*, 16>::iterator I = PropDown.begin(),
+ E = PropDown.end(); I != E; ++I) {
+ MachineBasicBlock *MBB = *I;
+ for (MachineBasicBlock::const_succ_iterator LinkI = MBB->succ_begin(),
+ LinkE = MBB->succ_end(); LinkI != LinkE; ++LinkI) {
+ MachineBasicBlock *SuccMBB = *LinkI;
+ // LinkMBB's livein bundle should be set to BundleIdx.
+ unsigned &Idx = BlockBundle[SuccMBB].first;
+ if (Idx) {
+ assert(Idx == BundleIdx && "Inconsistent CFG");
+ continue;
+ }
+ Idx = BundleIdx;
+ DEBUG(dbgs() << " in:BB#" << SuccMBB->getNumber());
+ // Propagate to siblings.
+ if (SuccMBB->pred_size() > 1)
+ PropUp.insert(SuccMBB);
+ // Also accumulate the bundle liveness mask from the liveins here.
+ Bundle.Mask |= calcLiveInMask(SuccMBB);
+ }
+ }
+ PropDown.clear();
+ } while (!PropUp.empty());
+ DEBUG({
+ dbgs() << " live:";
+ for (unsigned i = 0; i < 8; ++i)
+ if (Bundle.Mask & (1<<i))
+ dbgs() << " %FP" << i;
+ dbgs() << '\n';
+ });
+ }
+}
+
/// processBasicBlock - Loop over all of the instructions in the basic block,
/// transforming FP instructions into their stack form.
///
@@ -232,10 +412,12 @@ bool FPS::processBasicBlock(MachineFunction &MF, MachineBasicBlock &BB) {
bool Changed = false;
MBB = &BB;
+ setupBlockStack();
+
for (MachineBasicBlock::iterator I = BB.begin(); I != BB.end(); ++I) {
MachineInstr *MI = I;
uint64_t Flags = MI->getDesc().TSFlags;
-
+
unsigned FPInstClass = Flags & X86II::FPTypeMask;
if (MI->isInlineAsm())
FPInstClass = X86II::SpecialFP;
@@ -302,10 +484,82 @@ bool FPS::processBasicBlock(MachineFunction &MF, MachineBasicBlock &BB) {
Changed = true;
}
- assert(isStackEmpty() && "Stack not empty at end of basic block?");
+ finishBlockStack();
+
return Changed;
}
+/// setupBlockStack - Use the BlockBundle map to set up our model of the stack
+/// to match predecessors' live out stack.
+void FPS::setupBlockStack() {
+ DEBUG(dbgs() << "\nSetting up live-ins for BB#" << MBB->getNumber()
+ << " derived from " << MBB->getName() << ".\n");
+ StackTop = 0;
+ const LiveBundle &Bundle = LiveBundles[BlockBundle.lookup(MBB).first];
+
+ if (!Bundle.Mask) {
+ DEBUG(dbgs() << "Block has no FP live-ins.\n");
+ return;
+ }
+
+ // Depth-first iteration should ensure that we always have an assigned stack.
+ assert(Bundle.isFixed() && "Reached block before any predecessors");
+
+ // Push the fixed live-in registers.
+ for (unsigned i = Bundle.FixCount; i > 0; --i) {
+ MBB->addLiveIn(X86::ST0+i-1);
+ DEBUG(dbgs() << "Live-in st(" << (i-1) << "): %FP"
+ << unsigned(Bundle.FixStack[i-1]) << '\n');
+ pushReg(Bundle.FixStack[i-1]);
+ }
+
+ // Kill off unwanted live-ins. This can happen with a critical edge.
+ // FIXME: We could keep these live registers around as zombies. They may need
+ // to be revived at the end of a short block. It might save a few instrs.
+ adjustLiveRegs(calcLiveInMask(MBB), MBB->begin());
+ DEBUG(MBB->dump());
+}
+
+/// finishBlockStack - Revive live-outs that are implicitly defined out of
+/// MBB. Shuffle live registers to match the expected fixed stack of any
+/// predecessors, and ensure that all predecessors are expecting the same
+/// stack.
+void FPS::finishBlockStack() {
+ // The RET handling below takes care of return blocks for us.
+ if (MBB->succ_empty())
+ return;
+
+ DEBUG(dbgs() << "Setting up live-outs for BB#" << MBB->getNumber()
+ << " derived from " << MBB->getName() << ".\n");
+
+ unsigned BundleIdx = BlockBundle.lookup(MBB).second;
+ LiveBundle &Bundle = LiveBundles[BundleIdx];
+
+ // We may need to kill and define some registers to match successors.
+ // FIXME: This can probably be combined with the shuffle below.
+ MachineBasicBlock::iterator Term = MBB->getFirstTerminator();
+ adjustLiveRegs(Bundle.Mask, Term);
+
+ if (!Bundle.Mask) {
+ DEBUG(dbgs() << "No live-outs.\n");
+ return;
+ }
+
+ // Has the stack order been fixed yet?
+ DEBUG(dbgs() << "LB#" << BundleIdx << ": ");
+ if (Bundle.isFixed()) {
+ DEBUG(dbgs() << "Shuffling stack to match.\n");
+ shuffleStackTop(Bundle.FixStack, Bundle.FixCount, Term);
+ } else {
+ // Not fixed yet, we get to choose.
+ DEBUG(dbgs() << "Fixing stack order now.\n");
+ Bundle.FixCount = StackTop;
+ for (unsigned i = 0; i < StackTop; ++i)
+ Bundle.FixStack[i] = getStackEntry(i);
+ }
+}
+
+
//===----------------------------------------------------------------------===//
// Efficient Lookup Table Support
//===----------------------------------------------------------------------===//
@@ -318,7 +572,7 @@ namespace {
friend bool operator<(const TableEntry &TE, unsigned V) {
return TE.from < V;
}
- friend bool operator<(unsigned V, const TableEntry &TE) {
+ friend bool ATTRIBUTE_USED operator<(unsigned V, const TableEntry &TE) {
return V < TE.from;
}
};
@@ -597,6 +851,13 @@ void FPS::freeStackSlotAfter(MachineBasicBlock::iterator &I, unsigned FPRegNo) {
// Otherwise, store the top of stack into the dead slot, killing the operand
// without having to add in an explicit xchg then pop.
//
+ I = freeStackSlotBefore(++I, FPRegNo);
+}
+
+/// freeStackSlotBefore - Free the specified register without trying any
+/// folding.
+MachineBasicBlock::iterator
+FPS::freeStackSlotBefore(MachineBasicBlock::iterator I, unsigned FPRegNo) {
unsigned STReg = getSTReg(FPRegNo);
unsigned OldSlot = getSlot(FPRegNo);
unsigned TopReg = Stack[StackTop-1];
@@ -604,9 +865,90 @@ void FPS::freeStackSlotAfter(MachineBasicBlock::iterator &I, unsigned FPRegNo) {
RegMap[TopReg] = OldSlot;
RegMap[FPRegNo] = ~0;
Stack[--StackTop] = ~0;
- MachineInstr *MI = I;
- DebugLoc dl = MI->getDebugLoc();
- I = BuildMI(*MBB, ++I, dl, TII->get(X86::ST_FPrr)).addReg(STReg);
+ return BuildMI(*MBB, I, DebugLoc(), TII->get(X86::ST_FPrr)).addReg(STReg);
+}
+
+/// adjustLiveRegs - Kill and revive registers such that exactly the FP
+/// registers with a bit in Mask are live.
+void FPS::adjustLiveRegs(unsigned Mask, MachineBasicBlock::iterator I) {
+ unsigned Defs = Mask;
+ unsigned Kills = 0;
+ for (unsigned i = 0; i < StackTop; ++i) {
+ unsigned RegNo = Stack[i];
+ if (!(Defs & (1 << RegNo)))
+ // This register is live, but we don't want it.
+ Kills |= (1 << RegNo);
+ else
+ // We don't need to imp-def this live register.
+ Defs &= ~(1 << RegNo);
+ }
+ assert((Kills & Defs) == 0 && "Register needs killing and def'ing?");
+
+ // Produce implicit-defs for free by using killed registers.
+ while (Kills && Defs) {
+ unsigned KReg = CountTrailingZeros_32(Kills);
+ unsigned DReg = CountTrailingZeros_32(Defs);
+ DEBUG(dbgs() << "Renaming %FP" << KReg << " as imp %FP" << DReg << "\n");
+ std::swap(Stack[getSlot(KReg)], Stack[getSlot(DReg)]);
+ std::swap(RegMap[KReg], RegMap[DReg]);
+ Kills &= ~(1 << KReg);
+ Defs &= ~(1 << DReg);
+ }
+
+ // Kill registers by popping.
+ if (Kills && I != MBB->begin()) {
+ MachineBasicBlock::iterator I2 = llvm::prior(I);
+ for (;;) {
+ unsigned KReg = getStackEntry(0);
+ if (!(Kills & (1 << KReg)))
+ break;
+ DEBUG(dbgs() << "Popping %FP" << KReg << "\n");
+ popStackAfter(I2);
+ Kills &= ~(1 << KReg);
+ }
+ }
+
+ // Manually kill the rest.
+ while (Kills) {
+ unsigned KReg = CountTrailingZeros_32(Kills);
+ DEBUG(dbgs() << "Killing %FP" << KReg << "\n");
+ freeStackSlotBefore(I, KReg);
+ Kills &= ~(1 << KReg);
+ }
+
+ // Load zeros for all the imp-defs.
+ while(Defs) {
+ unsigned DReg = CountTrailingZeros_32(Defs);
+ DEBUG(dbgs() << "Defining %FP" << DReg << " as 0\n");
+ BuildMI(*MBB, I, DebugLoc(), TII->get(X86::LD_F0));
+ pushReg(DReg);
+ Defs &= ~(1 << DReg);
+ }
+
+ // Now we should have the correct registers live.
+ DEBUG(dumpStack());
+ assert(StackTop == CountPopulation_32(Mask) && "Live count mismatch");
+}
+
+/// shuffleStackTop - emit fxch instructions before I to shuffle the top
+/// FixCount entries into the order given by FixStack.
+/// FIXME: Is there a better algorithm than insertion sort?
+void FPS::shuffleStackTop(const unsigned char *FixStack,
+ unsigned FixCount,
+ MachineBasicBlock::iterator I) {
+ // Move items into place, starting from the desired stack bottom.
+ while (FixCount--) {
+ // Old register at position FixCount.
+ unsigned OldReg = getStackEntry(FixCount);
+ // Desired register at position FixCount.
+ unsigned Reg = FixStack[FixCount];
+ if (Reg == OldReg)
+ continue;
+ // (Reg st0) (OldReg st0) = (Reg OldReg st0)
+ moveToTop(Reg, I);
+ moveToTop(OldReg, I);
+ }
+ DEBUG(dumpStack());
}
@@ -660,7 +1002,7 @@ void FPS::handleOneArgFP(MachineBasicBlock::iterator &I) {
MI->getOpcode() == X86::ISTT_Fp32m80 ||
MI->getOpcode() == X86::ISTT_Fp64m80 ||
MI->getOpcode() == X86::ST_FpP80m)) {
- duplicateToTop(Reg, 7 /*temp register*/, I);
+ duplicateToTop(Reg, getScratchReg(), I);
} else {
moveToTop(Reg, I); // Move to the top of the stack...
}
@@ -1013,8 +1355,7 @@ void FPS::handleSpecialFP(MachineBasicBlock::iterator &I) {
if (!MI->killsRegister(X86::FP0 + Op0)) {
// Duplicate Op0 into a temporary on the stack top.
- // This actually assumes that FP7 is dead.
- duplicateToTop(Op0, 7, I);
+ duplicateToTop(Op0, getScratchReg(), I);
} else {
// Op0 is killed, so just swap it into position.
moveToTop(Op0, I);
@@ -1034,8 +1375,7 @@ void FPS::handleSpecialFP(MachineBasicBlock::iterator &I) {
++StackTop;
unsigned RegOnTop = getStackEntry(0); // This reg must remain in st(0).
if (!MI->killsRegister(X86::FP0 + Op0)) {
- // Assume FP6 is not live, use it as a scratch register.
- duplicateToTop(Op0, 6, I);
+ duplicateToTop(Op0, getScratchReg(), I);
moveToTop(RegOnTop, I);
} else if (getSTReg(Op0) != X86::ST1) {
// We have the wrong value at st(1). Shuffle! Untested!
@@ -1119,11 +1459,11 @@ void FPS::handleSpecialFP(MachineBasicBlock::iterator &I) {
case X86::RETI:
// If RET has an FP register use operand, pass the first one in ST(0) and
// the second one in ST(1).
- if (isStackEmpty()) return; // Quick check to see if any are possible.
-
+
// Find the register operands.
unsigned FirstFPRegOp = ~0U, SecondFPRegOp = ~0U;
-
+ unsigned LiveMask = 0;
+
for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
MachineOperand &Op = MI->getOperand(i);
if (!Op.isReg() || Op.getReg() < X86::FP0 || Op.getReg() > X86::FP6)
@@ -1142,12 +1482,18 @@ void FPS::handleSpecialFP(MachineBasicBlock::iterator &I) {
assert(SecondFPRegOp == ~0U && "More than two fp operands!");
SecondFPRegOp = getFPReg(Op);
}
+ LiveMask |= (1 << getFPReg(Op));
// Remove the operand so that later passes don't see it.
MI->RemoveOperand(i);
--i, --e;
}
-
+
+ // We may have been carrying spurious live-ins, so make sure only the returned
+ // registers are left live.
+ adjustLiveRegs(LiveMask, MI);
+ if (!LiveMask) return; // Quick check to see if any are possible.
+
// There are only four possibilities here:
// 1) we are returning a single FP value. In this case, it has to be in
// ST(0) already, so just declare success by removing the value from the
@@ -1173,7 +1519,7 @@ void FPS::handleSpecialFP(MachineBasicBlock::iterator &I) {
// Duplicate the TOS so that we return it twice. Just pick some other FPx
// register to hold it.
- unsigned NewReg = (FirstFPRegOp+1)%7;
+ unsigned NewReg = getScratchReg();
duplicateToTop(FirstFPRegOp, NewReg, MI);
FirstFPRegOp = NewReg;
}
@@ -1197,7 +1543,14 @@ void FPS::handleSpecialFP(MachineBasicBlock::iterator &I) {
}
I = MBB->erase(I); // Remove the pseudo instruction
- --I;
+
+ // We want to leave I pointing to the previous instruction, but what if we
+ // just erased the first instruction?
+ if (I == MBB->begin()) {
+ DEBUG(dbgs() << "Inserting dummy KILL\n");
+ I = BuildMI(*MBB, I, DebugLoc(), TII->get(TargetOpcode::KILL));
+ } else
+ --I;
}
// Translate a COPY instruction to a pseudo-op that handleSpecialFP understands.
diff --git a/lib/Target/X86/X86FloatingPointRegKill.cpp b/lib/Target/X86/X86FloatingPointRegKill.cpp
deleted file mode 100644
index 2c98b96c510b9..0000000000000
--- a/lib/Target/X86/X86FloatingPointRegKill.cpp
+++ /dev/null
@@ -1,153 +0,0 @@
-//===-- X86FloatingPoint.cpp - FP_REG_KILL inserter -----------------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines the pass which inserts FP_REG_KILL instructions.
-//
-//===----------------------------------------------------------------------===//
-
-#define DEBUG_TYPE "x86-codegen"
-#include "X86.h"
-#include "X86InstrInfo.h"
-#include "llvm/Instructions.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/Passes.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/CFG.h"
-#include "llvm/ADT/Statistic.h"
-using namespace llvm;
-
-STATISTIC(NumFPKill, "Number of FP_REG_KILL instructions added");
-
-namespace {
- struct FPRegKiller : public MachineFunctionPass {
- static char ID;
- FPRegKiller() : MachineFunctionPass(&ID) {}
-
- virtual void getAnalysisUsage(AnalysisUsage &AU) const {
- AU.setPreservesCFG();
- AU.addPreservedID(MachineLoopInfoID);
- AU.addPreservedID(MachineDominatorsID);
- MachineFunctionPass::getAnalysisUsage(AU);
- }
-
- virtual bool runOnMachineFunction(MachineFunction &MF);
-
- virtual const char *getPassName() const {
- return "X86 FP_REG_KILL inserter";
- }
- };
- char FPRegKiller::ID = 0;
-}
-
-FunctionPass *llvm::createX87FPRegKillInserterPass() {
- return new FPRegKiller();
-}
-
-/// isFPStackVReg - Return true if the specified vreg is from a fp stack
-/// register class.
-static bool isFPStackVReg(unsigned RegNo, const MachineRegisterInfo &MRI) {
- if (!TargetRegisterInfo::isVirtualRegister(RegNo))
- return false;
-
- switch (MRI.getRegClass(RegNo)->getID()) {
- default: return false;
- case X86::RFP32RegClassID:
- case X86::RFP64RegClassID:
- case X86::RFP80RegClassID:
- return true;
- }
-}
-
-
-/// ContainsFPStackCode - Return true if the specific MBB has floating point
-/// stack code, and thus needs an FP_REG_KILL.
-static bool ContainsFPStackCode(MachineBasicBlock *MBB,
- const MachineRegisterInfo &MRI) {
- // Scan the block, looking for instructions that define or use fp stack vregs.
- for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end();
- I != E; ++I) {
- for (unsigned op = 0, e = I->getNumOperands(); op != e; ++op) {
- if (!I->getOperand(op).isReg())
- continue;
- if (unsigned Reg = I->getOperand(op).getReg())
- if (isFPStackVReg(Reg, MRI))
- return true;
- }
- }
-
- // Check PHI nodes in successor blocks. These PHI's will be lowered to have
- // a copy of the input value in this block, which is a definition of the
- // value.
- for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(),
- E = MBB->succ_end(); SI != E; ++ SI) {
- MachineBasicBlock *SuccBB = *SI;
- for (MachineBasicBlock::iterator I = SuccBB->begin(), E = SuccBB->end();
- I != E; ++I) {
- // All PHI nodes are at the top of the block.
- if (!I->isPHI()) break;
-
- if (isFPStackVReg(I->getOperand(0).getReg(), MRI))
- return true;
- }
- }
-
- return false;
-}
-
-bool FPRegKiller::runOnMachineFunction(MachineFunction &MF) {
- // If we are emitting FP stack code, scan the basic block to determine if this
- // block defines or uses any FP values. If so, put an FP_REG_KILL instruction
- // before the terminator of the block.
-
- // Note that FP stack instructions are used in all modes for long double,
- // so we always need to do this check.
- // Also note that it's possible for an FP stack register to be live across
- // an instruction that produces multiple basic blocks (SSE CMOV) so we
- // must check all the generated basic blocks.
-
- // Scan all of the machine instructions in these MBBs, checking for FP
- // stores. (RFP32 and RFP64 will not exist in SSE mode, but RFP80 might.)
-
- // Fast-path: If nothing is using the x87 registers, we don't need to do
- // any scanning.
- const MachineRegisterInfo &MRI = MF.getRegInfo();
- if (MRI.getRegClassVirtRegs(X86::RFP80RegisterClass).empty() &&
- MRI.getRegClassVirtRegs(X86::RFP64RegisterClass).empty() &&
- MRI.getRegClassVirtRegs(X86::RFP32RegisterClass).empty())
- return false;
-
- bool Changed = false;
- MachineFunction::iterator MBBI = MF.begin();
- MachineFunction::iterator EndMBB = MF.end();
- for (; MBBI != EndMBB; ++MBBI) {
- MachineBasicBlock *MBB = MBBI;
-
- // If this block returns, ignore it. We don't want to insert an FP_REG_KILL
- // before the return.
- if (!MBB->empty()) {
- MachineBasicBlock::iterator EndI = MBB->end();
- --EndI;
- if (EndI->getDesc().isReturn())
- continue;
- }
-
- // If we find any FP stack code, emit the FP_REG_KILL instruction.
- if (ContainsFPStackCode(MBB, MRI)) {
- BuildMI(*MBB, MBBI->getFirstTerminator(), DebugLoc(),
- MF.getTarget().getInstrInfo()->get(X86::FP_REG_KILL));
- ++NumFPKill;
- Changed = true;
- }
- }
-
- return Changed;
-}
diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp
index 72f2bc11d7cc7..c5234413aba63 100644
--- a/lib/Target/X86/X86ISelDAGToDAG.cpp
+++ b/lib/Target/X86/X86ISelDAGToDAG.cpp
@@ -171,6 +171,17 @@ namespace {
virtual void PreprocessISelDAG();
+ inline bool immSext8(SDNode *N) const {
+ return isInt<8>(cast<ConstantSDNode>(N)->getSExtValue());
+ }
+
+ // i64immSExt32 predicate - True if the 64-bit immediate fits in a 32-bit
+ // sign extended field.
+ inline bool i64immSExt32(SDNode *N) const {
+ uint64_t v = cast<ConstantSDNode>(N)->getZExtValue();
+ return (int64_t)v == (int32_t)v;
+ }
+
// Include the pieces autogenerated from the target description.
#include "X86GenDAGISel.inc"
@@ -1312,13 +1323,6 @@ SDNode *X86DAGToDAGISel::getGlobalBaseReg() {
return CurDAG->getRegister(GlobalBaseReg, TLI.getPointerTy()).getNode();
}
-static SDNode *FindCallStartFromCall(SDNode *Node) {
- if (Node->getOpcode() == ISD::CALLSEQ_START) return Node;
- assert(Node->getOperand(0).getValueType() == MVT::Other &&
- "Node doesn't have a token chain argument!");
- return FindCallStartFromCall(Node->getOperand(0).getNode());
-}
-
SDNode *X86DAGToDAGISel::SelectAtomic64(SDNode *Node, unsigned Opc) {
SDValue Chain = Node->getOperand(0);
SDValue In1 = Node->getOperand(1);
@@ -1403,7 +1407,7 @@ SDNode *X86DAGToDAGISel::SelectAtomicLoadAdd(SDNode *Node, EVT NVT) {
Opc = X86::LOCK_DEC16m;
else if (isSub) {
if (isCN) {
- if (Predicate_immSext8(Val.getNode()))
+ if (immSext8(Val.getNode()))
Opc = X86::LOCK_SUB16mi8;
else
Opc = X86::LOCK_SUB16mi;
@@ -1411,7 +1415,7 @@ SDNode *X86DAGToDAGISel::SelectAtomicLoadAdd(SDNode *Node, EVT NVT) {
Opc = X86::LOCK_SUB16mr;
} else {
if (isCN) {
- if (Predicate_immSext8(Val.getNode()))
+ if (immSext8(Val.getNode()))
Opc = X86::LOCK_ADD16mi8;
else
Opc = X86::LOCK_ADD16mi;
@@ -1426,7 +1430,7 @@ SDNode *X86DAGToDAGISel::SelectAtomicLoadAdd(SDNode *Node, EVT NVT) {
Opc = X86::LOCK_DEC32m;
else if (isSub) {
if (isCN) {
- if (Predicate_immSext8(Val.getNode()))
+ if (immSext8(Val.getNode()))
Opc = X86::LOCK_SUB32mi8;
else
Opc = X86::LOCK_SUB32mi;
@@ -1434,7 +1438,7 @@ SDNode *X86DAGToDAGISel::SelectAtomicLoadAdd(SDNode *Node, EVT NVT) {
Opc = X86::LOCK_SUB32mr;
} else {
if (isCN) {
- if (Predicate_immSext8(Val.getNode()))
+ if (immSext8(Val.getNode()))
Opc = X86::LOCK_ADD32mi8;
else
Opc = X86::LOCK_ADD32mi;
@@ -1450,17 +1454,17 @@ SDNode *X86DAGToDAGISel::SelectAtomicLoadAdd(SDNode *Node, EVT NVT) {
else if (isSub) {
Opc = X86::LOCK_SUB64mr;
if (isCN) {
- if (Predicate_immSext8(Val.getNode()))
+ if (immSext8(Val.getNode()))
Opc = X86::LOCK_SUB64mi8;
- else if (Predicate_i64immSExt32(Val.getNode()))
+ else if (i64immSExt32(Val.getNode()))
Opc = X86::LOCK_SUB64mi32;
}
} else {
Opc = X86::LOCK_ADD64mr;
if (isCN) {
- if (Predicate_immSext8(Val.getNode()))
+ if (immSext8(Val.getNode()))
Opc = X86::LOCK_ADD64mi8;
- else if (Predicate_i64immSExt32(Val.getNode()))
+ else if (i64immSExt32(Val.getNode()))
Opc = X86::LOCK_ADD64mi32;
}
}
@@ -1841,7 +1845,8 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
// Look for (X86cmp (and $op, $imm), 0) and see if we can convert it to
// use a smaller encoding.
- if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse())
+ if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() &&
+ HasNoSignedComparisonUses(Node))
// Look past the truncate if CMP is the only use of it.
N0 = N0.getOperand(0);
if (N0.getNode()->getOpcode() == ISD::AND && N0.getNode()->hasOneUse() &&
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index b3c48862898ff..95dbb61766874 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -16,6 +16,7 @@
#include "X86.h"
#include "X86InstrBuilder.h"
#include "X86ISelLowering.h"
+#include "X86ShuffleDecode.h"
#include "X86TargetMachine.h"
#include "X86TargetObjectFile.h"
#include "llvm/CallingConv.h"
@@ -343,8 +344,9 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
if (Subtarget->hasSSE1())
setOperationAction(ISD::PREFETCH , MVT::Other, Legal);
- if (!Subtarget->hasSSE2())
- setOperationAction(ISD::MEMBARRIER , MVT::Other, Expand);
+ // We may not have a libcall for MEMBARRIER so we should lower this.
+ setOperationAction(ISD::MEMBARRIER , MVT::Other, Custom);
+
// On X86 and X86-64, atomic operations are lowered to locked instructions.
// Locked instructions, in turn, have implicit fence semantics (all memory
// operations are flushed before issuing the locked instruction, and they
@@ -837,6 +839,10 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
// FIXME: Do we need to handle scalar-to-vector here?
setOperationAction(ISD::MUL, MVT::v4i32, Legal);
+ // Can turn SHL into an integer multiply.
+ setOperationAction(ISD::SHL, MVT::v4i32, Custom);
+ setOperationAction(ISD::SHL, MVT::v16i8, Custom);
+
// i8 and i16 vectors are custom , because the source register and source
// source memory operand types are not the same width. f32 vectors are
// custom since the immediate controlling the insert encodes additional
@@ -866,6 +872,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
addRegisterClass(MVT::v4f64, X86::VR256RegisterClass);
addRegisterClass(MVT::v8i32, X86::VR256RegisterClass);
addRegisterClass(MVT::v4i64, X86::VR256RegisterClass);
+ addRegisterClass(MVT::v32i8, X86::VR256RegisterClass);
setOperationAction(ISD::LOAD, MVT::v8f32, Legal);
setOperationAction(ISD::LOAD, MVT::v8i32, Legal);
@@ -877,7 +884,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
setOperationAction(ISD::FDIV, MVT::v8f32, Legal);
setOperationAction(ISD::FSQRT, MVT::v8f32, Legal);
setOperationAction(ISD::FNEG, MVT::v8f32, Custom);
- //setOperationAction(ISD::BUILD_VECTOR, MVT::v8f32, Custom);
+ setOperationAction(ISD::BUILD_VECTOR, MVT::v8f32, Custom);
//setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v8f32, Custom);
//setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v8f32, Custom);
//setOperationAction(ISD::SELECT, MVT::v8f32, Custom);
@@ -1189,6 +1196,50 @@ unsigned X86TargetLowering::getFunctionAlignment(const Function *F) const {
return F->hasFnAttr(Attribute::OptimizeForSize) ? 0 : 4;
}
+std::pair<const TargetRegisterClass*, uint8_t>
+X86TargetLowering::findRepresentativeClass(EVT VT) const{
+ const TargetRegisterClass *RRC = 0;
+ uint8_t Cost = 1;
+ switch (VT.getSimpleVT().SimpleTy) {
+ default:
+ return TargetLowering::findRepresentativeClass(VT);
+ case MVT::i8: case MVT::i16: case MVT::i32: case MVT::i64:
+ RRC = (Subtarget->is64Bit()
+ ? X86::GR64RegisterClass : X86::GR32RegisterClass);
+ break;
+ case MVT::v8i8: case MVT::v4i16:
+ case MVT::v2i32: case MVT::v1i64:
+ RRC = X86::VR64RegisterClass;
+ break;
+ case MVT::f32: case MVT::f64:
+ case MVT::v16i8: case MVT::v8i16: case MVT::v4i32: case MVT::v2i64:
+ case MVT::v4f32: case MVT::v2f64:
+ case MVT::v32i8: case MVT::v8i32: case MVT::v4i64: case MVT::v8f32:
+ case MVT::v4f64:
+ RRC = X86::VR128RegisterClass;
+ break;
+ }
+ return std::make_pair(RRC, Cost);
+}
+
+unsigned
+X86TargetLowering::getRegPressureLimit(const TargetRegisterClass *RC,
+ MachineFunction &MF) const {
+ unsigned FPDiff = RegInfo->hasFP(MF) ? 1 : 0;
+ switch (RC->getID()) {
+ default:
+ return 0;
+ case X86::GR32RegClassID:
+ return 4 - FPDiff;
+ case X86::GR64RegClassID:
+ return 8 - FPDiff;
+ case X86::VR128RegClassID:
+ return Subtarget->is64Bit() ? 10 : 4;
+ case X86::VR64RegClassID:
+ return 4;
+ }
+}
+
bool X86TargetLowering::getStackCookieLocation(unsigned &AddressSpace,
unsigned &Offset) const {
if (!Subtarget->isTargetLinux())
@@ -1259,6 +1310,19 @@ X86TargetLowering::LowerReturn(SDValue Chain,
CCValAssign &VA = RVLocs[i];
assert(VA.isRegLoc() && "Can only return in registers!");
SDValue ValToCopy = OutVals[i];
+ EVT ValVT = ValToCopy.getValueType();
+
+ // If this is x86-64, and we disabled SSE, we can't return FP values
+ if ((ValVT == MVT::f32 || ValVT == MVT::f64) &&
+ (Subtarget->is64Bit() && !Subtarget->hasSSE1())) {
+ report_fatal_error("SSE register return with SSE disabled");
+ }
+ // Likewise we can't return F64 values with SSE1 only. gcc does so, but
+ // llvm-gcc has never done it right and no one has noticed, so this
+ // should be OK for now.
+ if (ValVT == MVT::f64 &&
+ (Subtarget->is64Bit() && !Subtarget->hasSSE2()))
+ report_fatal_error("SSE2 register return with SSE2 disabled");
// Returns in ST0/ST1 are handled specially: these are pushed as operands to
// the RET instruction and handled by the FP Stackifier.
@@ -1276,14 +1340,20 @@ X86TargetLowering::LowerReturn(SDValue Chain,
// 64-bit vector (MMX) values are returned in XMM0 / XMM1 except for v1i64
// which is returned in RAX / RDX.
if (Subtarget->is64Bit()) {
- EVT ValVT = ValToCopy.getValueType();
if (ValVT.isVector() && ValVT.getSizeInBits() == 64) {
ValToCopy = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i64, ValToCopy);
- if (VA.getLocReg() == X86::XMM0 || VA.getLocReg() == X86::XMM1)
- ValToCopy = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64, ValToCopy);
+ if (VA.getLocReg() == X86::XMM0 || VA.getLocReg() == X86::XMM1) {
+ ValToCopy = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64,
+ ValToCopy);
+
+ // If we don't have SSE2 available, convert to v4f32 so the generated
+ // register is legal.
+ if (!Subtarget->hasSSE2())
+ ValToCopy = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v4f32,ValToCopy);
+ }
}
}
-
+
Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), ValToCopy, Flag);
Flag = Chain.getValue(1);
}
@@ -1570,6 +1640,8 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain,
RC = X86::FR32RegisterClass;
else if (RegVT == MVT::f64)
RC = X86::FR64RegisterClass;
+ else if (RegVT.isVector() && RegVT.getSizeInBits() == 256)
+ RC = X86::VR256RegisterClass;
else if (RegVT.isVector() && RegVT.getSizeInBits() == 128)
RC = X86::VR128RegisterClass;
else if (RegVT.isVector() && RegVT.getSizeInBits() == 64)
@@ -1937,6 +2009,19 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee,
if (VA.isRegLoc()) {
RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
+ if (isVarArg && Subtarget->isTargetWin64()) {
+ // Win64 ABI requires argument XMM reg to be copied to the corresponding
+ // shadow reg if callee is a varargs function.
+ unsigned ShadowReg = 0;
+ switch (VA.getLocReg()) {
+ case X86::XMM0: ShadowReg = X86::RCX; break;
+ case X86::XMM1: ShadowReg = X86::RDX; break;
+ case X86::XMM2: ShadowReg = X86::R8; break;
+ case X86::XMM3: ShadowReg = X86::R9; break;
+ }
+ if (ShadowReg)
+ RegsToPass.push_back(std::make_pair(ShadowReg, Arg));
+ }
} else if (!IsSibcall && (!isTailCall || isByVal)) {
assert(VA.isMemLoc());
if (StackPtr.getNode() == 0)
@@ -1990,7 +2075,7 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee,
}
}
- if (Is64Bit && isVarArg) {
+ if (Is64Bit && isVarArg && !Subtarget->isTargetWin64()) {
// From AMD64 ABI document:
// For calls that may call functions that use varargs or stdargs
// (prototype-less calls or calls to functions containing ellipsis (...) in
@@ -1999,7 +2084,6 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee,
// the number of registers, but must be an ubound on the number of SSE
// registers used and is in the range 0 - 8 inclusive.
- // FIXME: Verify this on Win64
// Count the number of XMM registers allocated.
static const unsigned XMMArgRegs[] = {
X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
@@ -2165,8 +2249,8 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee,
if (!isTailCall && Subtarget->isPICStyleGOT())
Ops.push_back(DAG.getRegister(X86::EBX, getPointerTy()));
- // Add an implicit use of AL for x86 vararg functions.
- if (Is64Bit && isVarArg)
+ // Add an implicit use of AL for non-Windows x86 64-bit vararg functions.
+ if (Is64Bit && isVarArg && !Subtarget->isTargetWin64())
Ops.push_back(DAG.getRegister(X86::AL, MVT::i8));
if (InFlag.getNode())
@@ -2356,8 +2440,8 @@ X86TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
if (RegInfo->needsStackRealignment(MF))
return false;
- // Do not sibcall optimize vararg calls unless the call site is not passing any
- // arguments.
+ // Do not sibcall optimize vararg calls unless the call site is not passing
+ // any arguments.
if (isVarArg && !Outs.empty())
return false;
@@ -2493,6 +2577,112 @@ X86TargetLowering::createFastISel(FunctionLoweringInfo &funcInfo) const {
// Other Lowering Hooks
//===----------------------------------------------------------------------===//
+static bool MayFoldLoad(SDValue Op) {
+ return Op.hasOneUse() && ISD::isNormalLoad(Op.getNode());
+}
+
+static bool MayFoldIntoStore(SDValue Op) {
+ return Op.hasOneUse() && ISD::isNormalStore(*Op.getNode()->use_begin());
+}
+
+static bool isTargetShuffle(unsigned Opcode) {
+ switch(Opcode) {
+ default: return false;
+ case X86ISD::PSHUFD:
+ case X86ISD::PSHUFHW:
+ case X86ISD::PSHUFLW:
+ case X86ISD::SHUFPD:
+ case X86ISD::SHUFPS:
+ case X86ISD::MOVLHPS:
+ case X86ISD::MOVLHPD:
+ case X86ISD::MOVHLPS:
+ case X86ISD::MOVLPS:
+ case X86ISD::MOVLPD:
+ case X86ISD::MOVSHDUP:
+ case X86ISD::MOVSLDUP:
+ case X86ISD::MOVSS:
+ case X86ISD::MOVSD:
+ case X86ISD::UNPCKLPS:
+ case X86ISD::UNPCKLPD:
+ case X86ISD::PUNPCKLWD:
+ case X86ISD::PUNPCKLBW:
+ case X86ISD::PUNPCKLDQ:
+ case X86ISD::PUNPCKLQDQ:
+ case X86ISD::UNPCKHPS:
+ case X86ISD::UNPCKHPD:
+ case X86ISD::PUNPCKHWD:
+ case X86ISD::PUNPCKHBW:
+ case X86ISD::PUNPCKHDQ:
+ case X86ISD::PUNPCKHQDQ:
+ return true;
+ }
+ return false;
+}
+
+static SDValue getTargetShuffleNode(unsigned Opc, DebugLoc dl, EVT VT,
+ SDValue V1, SelectionDAG &DAG) {
+ switch(Opc) {
+ default: llvm_unreachable("Unknown x86 shuffle node");
+ case X86ISD::MOVSHDUP:
+ case X86ISD::MOVSLDUP:
+ return DAG.getNode(Opc, dl, VT, V1);
+ }
+
+ return SDValue();
+}
+
+static SDValue getTargetShuffleNode(unsigned Opc, DebugLoc dl, EVT VT,
+ SDValue V1, unsigned TargetMask, SelectionDAG &DAG) {
+ switch(Opc) {
+ default: llvm_unreachable("Unknown x86 shuffle node");
+ case X86ISD::PSHUFD:
+ case X86ISD::PSHUFHW:
+ case X86ISD::PSHUFLW:
+ return DAG.getNode(Opc, dl, VT, V1, DAG.getConstant(TargetMask, MVT::i8));
+ }
+
+ return SDValue();
+}
+
+static SDValue getTargetShuffleNode(unsigned Opc, DebugLoc dl, EVT VT,
+ SDValue V1, SDValue V2, unsigned TargetMask, SelectionDAG &DAG) {
+ switch(Opc) {
+ default: llvm_unreachable("Unknown x86 shuffle node");
+ case X86ISD::SHUFPD:
+ case X86ISD::SHUFPS:
+ return DAG.getNode(Opc, dl, VT, V1, V2,
+ DAG.getConstant(TargetMask, MVT::i8));
+ }
+ return SDValue();
+}
+
+static SDValue getTargetShuffleNode(unsigned Opc, DebugLoc dl, EVT VT,
+ SDValue V1, SDValue V2, SelectionDAG &DAG) {
+ switch(Opc) {
+ default: llvm_unreachable("Unknown x86 shuffle node");
+ case X86ISD::MOVLHPS:
+ case X86ISD::MOVLHPD:
+ case X86ISD::MOVHLPS:
+ case X86ISD::MOVLPS:
+ case X86ISD::MOVLPD:
+ case X86ISD::MOVSS:
+ case X86ISD::MOVSD:
+ case X86ISD::UNPCKLPS:
+ case X86ISD::UNPCKLPD:
+ case X86ISD::PUNPCKLWD:
+ case X86ISD::PUNPCKLBW:
+ case X86ISD::PUNPCKLDQ:
+ case X86ISD::PUNPCKLQDQ:
+ case X86ISD::UNPCKHPS:
+ case X86ISD::UNPCKHPD:
+ case X86ISD::PUNPCKHWD:
+ case X86ISD::PUNPCKHBW:
+ case X86ISD::PUNPCKHDQ:
+ case X86ISD::PUNPCKHQDQ:
+ return DAG.getNode(Opc, dl, VT, V1, V2);
+ }
+ return SDValue();
+}
SDValue X86TargetLowering::getReturnAddressFrameIndex(SelectionDAG &DAG) const {
MachineFunction &MF = DAG.getMachineFunction();
@@ -3347,18 +3537,27 @@ static SDValue getZeroVector(EVT VT, bool HasSSE2, SelectionDAG &DAG,
DebugLoc dl) {
assert(VT.isVector() && "Expected a vector type");
- // Always build zero vectors as <4 x i32> or <2 x i32> bitcasted to their dest
- // type. This ensures they get CSE'd.
+ // Always build zero vectors as <4 x i32> or <2 x i32> bitcasted
+ // to their dest type. This ensures they get CSE'd.
SDValue Vec;
if (VT.getSizeInBits() == 64) { // MMX
SDValue Cst = DAG.getTargetConstant(0, MVT::i32);
Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i32, Cst, Cst);
- } else if (HasSSE2) { // SSE2
- SDValue Cst = DAG.getTargetConstant(0, MVT::i32);
- Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Cst, Cst, Cst, Cst);
- } else { // SSE1
+ } else if (VT.getSizeInBits() == 128) {
+ if (HasSSE2) { // SSE2
+ SDValue Cst = DAG.getTargetConstant(0, MVT::i32);
+ Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Cst, Cst, Cst, Cst);
+ } else { // SSE1
+ SDValue Cst = DAG.getTargetConstantFP(+0.0, MVT::f32);
+ Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4f32, Cst, Cst, Cst, Cst);
+ }
+ } else if (VT.getSizeInBits() == 256) { // AVX
+ // 256-bit logic and arithmetic instructions in AVX are
+ // all floating-point, no support for integer ops. Default
+ // to emitting fp zeroed vectors then.
SDValue Cst = DAG.getTargetConstantFP(+0.0, MVT::f32);
- Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4f32, Cst, Cst, Cst, Cst);
+ SDValue Ops[] = { Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst };
+ Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v8f32, Ops, 8);
}
return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Vec);
}
@@ -3372,9 +3571,9 @@ static SDValue getOnesVector(EVT VT, SelectionDAG &DAG, DebugLoc dl) {
// type. This ensures they get CSE'd.
SDValue Cst = DAG.getTargetConstant(~0U, MVT::i32);
SDValue Vec;
- if (VT.getSizeInBits() == 64) // MMX
+ if (VT.getSizeInBits() == 64) // MMX
Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i32, Cst, Cst);
- else // SSE
+ else // SSE
Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Cst, Cst, Cst, Cst);
return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Vec);
}
@@ -3439,9 +3638,8 @@ static SDValue getUnpackh(SelectionDAG &DAG, DebugLoc dl, EVT VT, SDValue V1,
return DAG.getVectorShuffle(VT, dl, V1, V2, &Mask[0]);
}
-/// PromoteSplat - Promote a splat of v4f32, v8i16 or v16i8 to v4i32.
-static SDValue PromoteSplat(ShuffleVectorSDNode *SV, SelectionDAG &DAG,
- bool HasSSE2) {
+/// PromoteSplat - Promote a splat of v4i32, v8i16 or v16i8 to v4f32.
+static SDValue PromoteSplat(ShuffleVectorSDNode *SV, SelectionDAG &DAG) {
if (SV->getValueType(0).getVectorNumElements() <= 4)
return SDValue(SV, 0);
@@ -3488,68 +3686,253 @@ static SDValue getShuffleVectorZeroOrUndef(SDValue V2, unsigned Idx,
return DAG.getVectorShuffle(VT, V2.getDebugLoc(), V1, V2, &MaskVec[0]);
}
-/// getNumOfConsecutiveZeros - Return the number of elements in a result of
-/// a shuffle that is zero.
-static
-unsigned getNumOfConsecutiveZeros(ShuffleVectorSDNode *SVOp, int NumElems,
- bool Low, SelectionDAG &DAG) {
- unsigned NumZeros = 0;
- for (int i = 0; i < NumElems; ++i) {
- unsigned Index = Low ? i : NumElems-i-1;
- int Idx = SVOp->getMaskElt(Index);
- if (Idx < 0) {
- ++NumZeros;
- continue;
- }
- SDValue Elt = DAG.getShuffleScalarElt(SVOp, Index);
- if (Elt.getNode() && X86::isZeroNode(Elt))
- ++NumZeros;
- else
+/// getShuffleScalarElt - Returns the scalar element that will make up the ith
+/// element of the result of the vector shuffle.
+SDValue getShuffleScalarElt(SDNode *N, int Index, SelectionDAG &DAG,
+ unsigned Depth) {
+ if (Depth == 6)
+ return SDValue(); // Limit search depth.
+
+ SDValue V = SDValue(N, 0);
+ EVT VT = V.getValueType();
+ unsigned Opcode = V.getOpcode();
+
+ // Recurse into ISD::VECTOR_SHUFFLE node to find scalars.
+ if (const ShuffleVectorSDNode *SV = dyn_cast<ShuffleVectorSDNode>(N)) {
+ Index = SV->getMaskElt(Index);
+
+ if (Index < 0)
+ return DAG.getUNDEF(VT.getVectorElementType());
+
+ int NumElems = VT.getVectorNumElements();
+ SDValue NewV = (Index < NumElems) ? SV->getOperand(0) : SV->getOperand(1);
+ return getShuffleScalarElt(NewV.getNode(), Index % NumElems, DAG, Depth+1);
+ }
+
+ // Recurse into target specific vector shuffles to find scalars.
+ if (isTargetShuffle(Opcode)) {
+ int NumElems = VT.getVectorNumElements();
+ SmallVector<unsigned, 16> ShuffleMask;
+ SDValue ImmN;
+
+ switch(Opcode) {
+ case X86ISD::SHUFPS:
+ case X86ISD::SHUFPD:
+ ImmN = N->getOperand(N->getNumOperands()-1);
+ DecodeSHUFPSMask(NumElems,
+ cast<ConstantSDNode>(ImmN)->getZExtValue(),
+ ShuffleMask);
+ break;
+ case X86ISD::PUNPCKHBW:
+ case X86ISD::PUNPCKHWD:
+ case X86ISD::PUNPCKHDQ:
+ case X86ISD::PUNPCKHQDQ:
+ DecodePUNPCKHMask(NumElems, ShuffleMask);
break;
+ case X86ISD::UNPCKHPS:
+ case X86ISD::UNPCKHPD:
+ DecodeUNPCKHPMask(NumElems, ShuffleMask);
+ break;
+ case X86ISD::PUNPCKLBW:
+ case X86ISD::PUNPCKLWD:
+ case X86ISD::PUNPCKLDQ:
+ case X86ISD::PUNPCKLQDQ:
+ DecodePUNPCKLMask(NumElems, ShuffleMask);
+ break;
+ case X86ISD::UNPCKLPS:
+ case X86ISD::UNPCKLPD:
+ DecodeUNPCKLPMask(NumElems, ShuffleMask);
+ break;
+ case X86ISD::MOVHLPS:
+ DecodeMOVHLPSMask(NumElems, ShuffleMask);
+ break;
+ case X86ISD::MOVLHPS:
+ DecodeMOVLHPSMask(NumElems, ShuffleMask);
+ break;
+ case X86ISD::PSHUFD:
+ ImmN = N->getOperand(N->getNumOperands()-1);
+ DecodePSHUFMask(NumElems,
+ cast<ConstantSDNode>(ImmN)->getZExtValue(),
+ ShuffleMask);
+ break;
+ case X86ISD::PSHUFHW:
+ ImmN = N->getOperand(N->getNumOperands()-1);
+ DecodePSHUFHWMask(cast<ConstantSDNode>(ImmN)->getZExtValue(),
+ ShuffleMask);
+ break;
+ case X86ISD::PSHUFLW:
+ ImmN = N->getOperand(N->getNumOperands()-1);
+ DecodePSHUFLWMask(cast<ConstantSDNode>(ImmN)->getZExtValue(),
+ ShuffleMask);
+ break;
+ case X86ISD::MOVSS:
+ case X86ISD::MOVSD: {
+ // The index 0 always comes from the first element of the second source,
+ // this is why MOVSS and MOVSD are used in the first place. The other
+ // elements come from the other positions of the first source vector.
+ unsigned OpNum = (Index == 0) ? 1 : 0;
+ return getShuffleScalarElt(V.getOperand(OpNum).getNode(), Index, DAG,
+ Depth+1);
+ }
+ default:
+ assert("not implemented for target shuffle node");
+ return SDValue();
+ }
+
+ Index = ShuffleMask[Index];
+ if (Index < 0)
+ return DAG.getUNDEF(VT.getVectorElementType());
+
+ SDValue NewV = (Index < NumElems) ? N->getOperand(0) : N->getOperand(1);
+ return getShuffleScalarElt(NewV.getNode(), Index % NumElems, DAG,
+ Depth+1);
}
- return NumZeros;
-}
-/// isVectorShift - Returns true if the shuffle can be implemented as a
-/// logical left or right shift of a vector.
-/// FIXME: split into pslldqi, psrldqi, palignr variants.
-static bool isVectorShift(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG,
- bool &isLeft, SDValue &ShVal, unsigned &ShAmt) {
- unsigned NumElems = SVOp->getValueType(0).getVectorNumElements();
+ // Actual nodes that may contain scalar elements
+ if (Opcode == ISD::BIT_CONVERT) {
+ V = V.getOperand(0);
+ EVT SrcVT = V.getValueType();
+ unsigned NumElems = VT.getVectorNumElements();
- isLeft = true;
- unsigned NumZeros = getNumOfConsecutiveZeros(SVOp, NumElems, true, DAG);
- if (!NumZeros) {
- isLeft = false;
- NumZeros = getNumOfConsecutiveZeros(SVOp, NumElems, false, DAG);
- if (!NumZeros)
- return false;
+ if (!SrcVT.isVector() || SrcVT.getVectorNumElements() != NumElems)
+ return SDValue();
+ }
+
+ if (V.getOpcode() == ISD::SCALAR_TO_VECTOR)
+ return (Index == 0) ? V.getOperand(0)
+ : DAG.getUNDEF(VT.getVectorElementType());
+
+ if (V.getOpcode() == ISD::BUILD_VECTOR)
+ return V.getOperand(Index);
+
+ return SDValue();
+}
+
+/// getNumOfConsecutiveZeros - Return the number of elements of a vector
+/// shuffle operation which come from a consecutively from a zero. The
+/// search can start in two diferent directions, from left or right.
+static
+unsigned getNumOfConsecutiveZeros(SDNode *N, int NumElems,
+ bool ZerosFromLeft, SelectionDAG &DAG) {
+ int i = 0;
+
+ while (i < NumElems) {
+ unsigned Index = ZerosFromLeft ? i : NumElems-i-1;
+ SDValue Elt = getShuffleScalarElt(N, Index, DAG, 0);
+ if (!(Elt.getNode() &&
+ (Elt.getOpcode() == ISD::UNDEF || X86::isZeroNode(Elt))))
+ break;
+ ++i;
}
+
+ return i;
+}
+
+/// isShuffleMaskConsecutive - Check if the shuffle mask indicies from MaskI to
+/// MaskE correspond consecutively to elements from one of the vector operands,
+/// starting from its index OpIdx. Also tell OpNum which source vector operand.
+static
+bool isShuffleMaskConsecutive(ShuffleVectorSDNode *SVOp, int MaskI, int MaskE,
+ int OpIdx, int NumElems, unsigned &OpNum) {
bool SeenV1 = false;
bool SeenV2 = false;
- for (unsigned i = NumZeros; i < NumElems; ++i) {
- unsigned Val = isLeft ? (i - NumZeros) : i;
- int Idx_ = SVOp->getMaskElt(isLeft ? i : (i - NumZeros));
- if (Idx_ < 0)
+
+ for (int i = MaskI; i <= MaskE; ++i, ++OpIdx) {
+ int Idx = SVOp->getMaskElt(i);
+ // Ignore undef indicies
+ if (Idx < 0)
continue;
- unsigned Idx = (unsigned) Idx_;
+
if (Idx < NumElems)
SeenV1 = true;
- else {
- Idx -= NumElems;
+ else
SeenV2 = true;
- }
- if (Idx != Val)
+
+ // Only accept consecutive elements from the same vector
+ if ((Idx % NumElems != OpIdx) || (SeenV1 && SeenV2))
return false;
}
- if (SeenV1 && SeenV2)
+
+ OpNum = SeenV1 ? 0 : 1;
+ return true;
+}
+
+/// isVectorShiftRight - Returns true if the shuffle can be implemented as a
+/// logical left shift of a vector.
+static bool isVectorShiftRight(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG,
+ bool &isLeft, SDValue &ShVal, unsigned &ShAmt) {
+ unsigned NumElems = SVOp->getValueType(0).getVectorNumElements();
+ unsigned NumZeros = getNumOfConsecutiveZeros(SVOp, NumElems,
+ false /* check zeros from right */, DAG);
+ unsigned OpSrc;
+
+ if (!NumZeros)
+ return false;
+
+ // Considering the elements in the mask that are not consecutive zeros,
+ // check if they consecutively come from only one of the source vectors.
+ //
+ // V1 = {X, A, B, C} 0
+ // \ \ \ /
+ // vector_shuffle V1, V2 <1, 2, 3, X>
+ //
+ if (!isShuffleMaskConsecutive(SVOp,
+ 0, // Mask Start Index
+ NumElems-NumZeros-1, // Mask End Index
+ NumZeros, // Where to start looking in the src vector
+ NumElems, // Number of elements in vector
+ OpSrc)) // Which source operand ?
+ return false;
+
+ isLeft = false;
+ ShAmt = NumZeros;
+ ShVal = SVOp->getOperand(OpSrc);
+ return true;
+}
+
+/// isVectorShiftLeft - Returns true if the shuffle can be implemented as a
+/// logical left shift of a vector.
+static bool isVectorShiftLeft(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG,
+ bool &isLeft, SDValue &ShVal, unsigned &ShAmt) {
+ unsigned NumElems = SVOp->getValueType(0).getVectorNumElements();
+ unsigned NumZeros = getNumOfConsecutiveZeros(SVOp, NumElems,
+ true /* check zeros from left */, DAG);
+ unsigned OpSrc;
+
+ if (!NumZeros)
+ return false;
+
+ // Considering the elements in the mask that are not consecutive zeros,
+ // check if they consecutively come from only one of the source vectors.
+ //
+ // 0 { A, B, X, X } = V2
+ // / \ / /
+ // vector_shuffle V1, V2 <X, X, 4, 5>
+ //
+ if (!isShuffleMaskConsecutive(SVOp,
+ NumZeros, // Mask Start Index
+ NumElems-1, // Mask End Index
+ 0, // Where to start looking in the src vector
+ NumElems, // Number of elements in vector
+ OpSrc)) // Which source operand ?
return false;
- ShVal = SeenV1 ? SVOp->getOperand(0) : SVOp->getOperand(1);
+ isLeft = true;
ShAmt = NumZeros;
+ ShVal = SVOp->getOperand(OpSrc);
return true;
}
+/// isVectorShift - Returns true if the shuffle can be implemented as a
+/// logical left or right shift of a vector.
+static bool isVectorShift(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG,
+ bool &isLeft, SDValue &ShVal, unsigned &ShAmt) {
+ if (isVectorShiftLeft(SVOp, DAG, isLeft, ShVal, ShAmt) ||
+ isVectorShiftRight(SVOp, DAG, isLeft, ShVal, ShAmt))
+ return true;
+
+ return false;
+}
/// LowerBuildVectorv16i8 - Custom lower build_vector of v16i8.
///
@@ -3779,9 +4162,13 @@ static SDValue EltsFromConsecutiveLoads(EVT VT, SmallVectorImpl<SDValue> &Elts,
SDValue
X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
DebugLoc dl = Op.getDebugLoc();
- // All zero's are handled with pxor, all one's are handled with pcmpeqd.
- if (ISD::isBuildVectorAllZeros(Op.getNode())
- || ISD::isBuildVectorAllOnes(Op.getNode())) {
+ // All zero's are handled with pxor in SSE2 and above, xorps in SSE1.
+ // All one's are handled with pcmpeqd. In AVX, zero's are handled with
+ // vpxor in 128-bit and xor{pd,ps} in 256-bit, but no 256 version of pcmpeqd
+ // is present, so AllOnes is ignored.
+ if (ISD::isBuildVectorAllZeros(Op.getNode()) ||
+ (Op.getValueType().getSizeInBits() != 256 &&
+ ISD::isBuildVectorAllOnes(Op.getNode()))) {
// Canonicalize this to either <4 x i32> or <2 x i32> (SSE vs MMX) to
// 1) ensure the zero vectors are CSE'd, and 2) ensure that i64 scalars are
// eliminated on x86-32 hosts.
@@ -3819,10 +4206,9 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
}
}
- if (NumNonZero == 0) {
- // All undef vector. Return an UNDEF. All zero vectors were handled above.
+ // All undef vector. Return an UNDEF. All zero vectors were handled above.
+ if (NumNonZero == 0)
return DAG.getUNDEF(VT);
- }
// Special case for single non-zero, non-undef, element.
if (NumNonZero == 1) {
@@ -3960,7 +4346,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
if (EVTBits == 16 && NumElems == 8) {
SDValue V = LowerBuildVectorv8i16(Op, NonZeros,NumNonZero,NumZero, DAG,
- *this);
+ *this);
if (V.getNode()) return V;
}
@@ -4014,28 +4400,51 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
if (LD.getNode())
return LD;
- // For SSE 4.1, use inserts into undef.
+ // For SSE 4.1, use insertps to put the high elements into the low element.
if (getSubtarget()->hasSSE41()) {
- V[0] = DAG.getUNDEF(VT);
- for (unsigned i = 0; i < NumElems; ++i)
- if (Op.getOperand(i).getOpcode() != ISD::UNDEF)
- V[0] = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, V[0],
+ SDValue Result;
+ if (Op.getOperand(0).getOpcode() != ISD::UNDEF)
+ Result = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Op.getOperand(0));
+ else
+ Result = DAG.getUNDEF(VT);
+
+ for (unsigned i = 1; i < NumElems; ++i) {
+ if (Op.getOperand(i).getOpcode() == ISD::UNDEF) continue;
+ Result = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, Result,
Op.getOperand(i), DAG.getIntPtrConstant(i));
- return V[0];
+ }
+ return Result;
}
- // Otherwise, expand into a number of unpckl*
- // e.g. for v4f32
+ // Otherwise, expand into a number of unpckl*, start by extending each of
+ // our (non-undef) elements to the full vector width with the element in the
+ // bottom slot of the vector (which generates no code for SSE).
+ for (unsigned i = 0; i < NumElems; ++i) {
+ if (Op.getOperand(i).getOpcode() != ISD::UNDEF)
+ V[i] = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Op.getOperand(i));
+ else
+ V[i] = DAG.getUNDEF(VT);
+ }
+
+ // Next, we iteratively mix elements, e.g. for v4f32:
// Step 1: unpcklps 0, 2 ==> X: <?, ?, 2, 0>
// : unpcklps 1, 3 ==> Y: <?, ?, 3, 1>
// Step 2: unpcklps X, Y ==> <3, 2, 1, 0>
- for (unsigned i = 0; i < NumElems; ++i)
- V[i] = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Op.getOperand(i));
- NumElems >>= 1;
- while (NumElems != 0) {
- for (unsigned i = 0; i < NumElems; ++i)
- V[i] = getUnpackl(DAG, dl, VT, V[i], V[i + NumElems]);
- NumElems >>= 1;
+ unsigned EltStride = NumElems >> 1;
+ while (EltStride != 0) {
+ for (unsigned i = 0; i < EltStride; ++i) {
+ // If V[i+EltStride] is undef and this is the first round of mixing,
+ // then it is safe to just drop this shuffle: V[i] is already in the
+ // right place, the one element (since it's the first round) being
+ // inserted as undef can be dropped. This isn't safe for successive
+ // rounds because they will permute elements within both vectors.
+ if (V[i+EltStride].getOpcode() == ISD::UNDEF &&
+ EltStride == NumElems/2)
+ continue;
+
+ V[i] = getUnpackl(DAG, dl, VT, V[i], V[i + EltStride]);
+ }
+ EltStride >>= 1;
}
return V[0];
}
@@ -4074,10 +4483,10 @@ X86TargetLowering::LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const {
// 2. [ssse3] 1 x pshufb
// 3. [ssse3] 2 x pshufb + 1 x por
// 4. [all] mov + pshuflw + pshufhw + N x (pextrw + pinsrw)
-static
-SDValue LowerVECTOR_SHUFFLEv8i16(ShuffleVectorSDNode *SVOp,
- SelectionDAG &DAG,
- const X86TargetLowering &TLI) {
+SDValue
+X86TargetLowering::LowerVECTOR_SHUFFLEv8i16(SDValue Op,
+ SelectionDAG &DAG) const {
+ ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
SDValue V1 = SVOp->getOperand(0);
SDValue V2 = SVOp->getOperand(1);
DebugLoc dl = SVOp->getDebugLoc();
@@ -4128,7 +4537,7 @@ SDValue LowerVECTOR_SHUFFLEv8i16(ShuffleVectorSDNode *SVOp,
// quads, disable the next transformation since it does not help SSSE3.
bool V1Used = InputQuads[0] || InputQuads[1];
bool V2Used = InputQuads[2] || InputQuads[3];
- if (TLI.getSubtarget()->hasSSSE3()) {
+ if (Subtarget->hasSSSE3()) {
if (InputQuads.count() == 2 && V1Used && V2Used) {
BestLoQuad = InputQuads.find_first();
BestHiQuad = InputQuads.find_next(BestLoQuad);
@@ -4187,15 +4596,21 @@ SDValue LowerVECTOR_SHUFFLEv8i16(ShuffleVectorSDNode *SVOp,
// If we've eliminated the use of V2, and the new mask is a pshuflw or
// pshufhw, that's as cheap as it gets. Return the new shuffle.
if ((pshufhw && InOrder[0]) || (pshuflw && InOrder[1])) {
- return DAG.getVectorShuffle(MVT::v8i16, dl, NewV,
+ unsigned Opc = pshufhw ? X86ISD::PSHUFHW : X86ISD::PSHUFLW;
+ unsigned TargetMask = 0;
+ NewV = DAG.getVectorShuffle(MVT::v8i16, dl, NewV,
DAG.getUNDEF(MVT::v8i16), &MaskVals[0]);
+ TargetMask = pshufhw ? X86::getShufflePSHUFHWImmediate(NewV.getNode()):
+ X86::getShufflePSHUFLWImmediate(NewV.getNode());
+ V1 = NewV.getOperand(0);
+ return getTargetShuffleNode(Opc, dl, MVT::v8i16, V1, TargetMask, DAG);
}
}
// If we have SSSE3, and all words of the result are from 1 input vector,
// case 2 is generated, otherwise case 3 is generated. If no SSSE3
// is present, fall back to case 4.
- if (TLI.getSubtarget()->hasSSSE3()) {
+ if (Subtarget->hasSSSE3()) {
SmallVector<SDValue,16> pshufbMask;
// If we have elements from both input vectors, set the high bit of the
@@ -4262,6 +4677,12 @@ SDValue LowerVECTOR_SHUFFLEv8i16(ShuffleVectorSDNode *SVOp,
MaskV.push_back(i);
NewV = DAG.getVectorShuffle(MVT::v8i16, dl, NewV, DAG.getUNDEF(MVT::v8i16),
&MaskV[0]);
+
+ if (NewV.getOpcode() == ISD::VECTOR_SHUFFLE && Subtarget->hasSSSE3())
+ NewV = getTargetShuffleNode(X86ISD::PSHUFLW, dl, MVT::v8i16,
+ NewV.getOperand(0),
+ X86::getShufflePSHUFLWImmediate(NewV.getNode()),
+ DAG);
}
// If BestHi >= 0, generate a pshufhw to put the high elements in order,
@@ -4284,6 +4705,12 @@ SDValue LowerVECTOR_SHUFFLEv8i16(ShuffleVectorSDNode *SVOp,
}
NewV = DAG.getVectorShuffle(MVT::v8i16, dl, NewV, DAG.getUNDEF(MVT::v8i16),
&MaskV[0]);
+
+ if (NewV.getOpcode() == ISD::VECTOR_SHUFFLE && Subtarget->hasSSSE3())
+ NewV = getTargetShuffleNode(X86ISD::PSHUFHW, dl, MVT::v8i16,
+ NewV.getOperand(0),
+ X86::getShufflePSHUFHWImmediate(NewV.getNode()),
+ DAG);
}
// In case BestHi & BestLo were both -1, which means each quadword has a word
@@ -4473,7 +4900,7 @@ SDValue RewriteAsNarrowerShuffle(ShuffleVectorSDNode *SVOp,
SDValue V2 = SVOp->getOperand(1);
unsigned NumElems = VT.getVectorNumElements();
unsigned NewWidth = (NumElems == 4) ? 2 : 4;
- EVT MaskVT = MVT::getIntVectorWithNumElements(NewWidth);
+ EVT MaskVT = (NewWidth == 4) ? MVT::v4i16 : MVT::v2i32;
EVT NewVT = MaskVT;
switch (VT.getSimpleVT().SimpleTy) {
default: assert(false && "Unexpected!");
@@ -4697,6 +5124,129 @@ LowerVECTOR_SHUFFLE_4wide(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG) {
return DAG.getVectorShuffle(VT, dl, LoShuffle, HiShuffle, &MaskOps[0]);
}
+static bool MayFoldVectorLoad(SDValue V) {
+ if (V.hasOneUse() && V.getOpcode() == ISD::BIT_CONVERT)
+ V = V.getOperand(0);
+ if (V.hasOneUse() && V.getOpcode() == ISD::SCALAR_TO_VECTOR)
+ V = V.getOperand(0);
+ if (MayFoldLoad(V))
+ return true;
+ return false;
+}
+
+static
+SDValue getMOVLowToHigh(SDValue &Op, DebugLoc &dl, SelectionDAG &DAG,
+ bool HasSSE2) {
+ SDValue V1 = Op.getOperand(0);
+ SDValue V2 = Op.getOperand(1);
+ EVT VT = Op.getValueType();
+
+ assert(VT != MVT::v2i64 && "unsupported shuffle type");
+
+ if (HasSSE2 && VT == MVT::v2f64)
+ return getTargetShuffleNode(X86ISD::MOVLHPD, dl, VT, V1, V2, DAG);
+
+ // v4f32 or v4i32
+ return getTargetShuffleNode(X86ISD::MOVLHPS, dl, VT, V1, V2, DAG);
+}
+
+static
+SDValue getMOVHighToLow(SDValue &Op, DebugLoc &dl, SelectionDAG &DAG) {
+ SDValue V1 = Op.getOperand(0);
+ SDValue V2 = Op.getOperand(1);
+ EVT VT = Op.getValueType();
+
+ assert((VT == MVT::v4i32 || VT == MVT::v4f32) &&
+ "unsupported shuffle type");
+
+ if (V2.getOpcode() == ISD::UNDEF)
+ V2 = V1;
+
+ // v4i32 or v4f32
+ return getTargetShuffleNode(X86ISD::MOVHLPS, dl, VT, V1, V2, DAG);
+}
+
+static
+SDValue getMOVLP(SDValue &Op, DebugLoc &dl, SelectionDAG &DAG, bool HasSSE2) {
+ SDValue V1 = Op.getOperand(0);
+ SDValue V2 = Op.getOperand(1);
+ EVT VT = Op.getValueType();
+ unsigned NumElems = VT.getVectorNumElements();
+
+ // Use MOVLPS and MOVLPD in case V1 or V2 are loads. During isel, the second
+ // operand of these instructions is only memory, so check if there's a
+ // potencial load folding here, otherwise use SHUFPS or MOVSD to match the
+ // same masks.
+ bool CanFoldLoad = false;
+
+ // Trivial case, when V2 comes from a load.
+ if (MayFoldVectorLoad(V2))
+ CanFoldLoad = true;
+
+ // When V1 is a load, it can be folded later into a store in isel, example:
+ // (store (v4f32 (X86Movlps (load addr:$src1), VR128:$src2)), addr:$src1)
+ // turns into:
+ // (MOVLPSmr addr:$src1, VR128:$src2)
+ // So, recognize this potential and also use MOVLPS or MOVLPD
+ if (MayFoldVectorLoad(V1) && MayFoldIntoStore(Op))
+ CanFoldLoad = true;
+
+ if (CanFoldLoad) {
+ if (HasSSE2 && NumElems == 2)
+ return getTargetShuffleNode(X86ISD::MOVLPD, dl, VT, V1, V2, DAG);
+
+ if (NumElems == 4)
+ return getTargetShuffleNode(X86ISD::MOVLPS, dl, VT, V1, V2, DAG);
+ }
+
+ ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
+ // movl and movlp will both match v2i64, but v2i64 is never matched by
+ // movl earlier because we make it strict to avoid messing with the movlp load
+ // folding logic (see the code above getMOVLP call). Match it here then,
+ // this is horrible, but will stay like this until we move all shuffle
+ // matching to x86 specific nodes. Note that for the 1st condition all
+ // types are matched with movsd.
+ if ((HasSSE2 && NumElems == 2) || !X86::isMOVLMask(SVOp))
+ return getTargetShuffleNode(X86ISD::MOVSD, dl, VT, V1, V2, DAG);
+ else if (HasSSE2)
+ return getTargetShuffleNode(X86ISD::MOVSS, dl, VT, V1, V2, DAG);
+
+
+ assert(VT != MVT::v4i32 && "unsupported shuffle type");
+
+ // Invert the operand order and use SHUFPS to match it.
+ return getTargetShuffleNode(X86ISD::SHUFPS, dl, VT, V2, V1,
+ X86::getShuffleSHUFImmediate(SVOp), DAG);
+}
+
+static inline unsigned getUNPCKLOpcode(EVT VT) {
+ switch(VT.getSimpleVT().SimpleTy) {
+ case MVT::v4i32: return X86ISD::PUNPCKLDQ;
+ case MVT::v2i64: return X86ISD::PUNPCKLQDQ;
+ case MVT::v4f32: return X86ISD::UNPCKLPS;
+ case MVT::v2f64: return X86ISD::UNPCKLPD;
+ case MVT::v16i8: return X86ISD::PUNPCKLBW;
+ case MVT::v8i16: return X86ISD::PUNPCKLWD;
+ default:
+ llvm_unreachable("Unknow type for unpckl");
+ }
+ return 0;
+}
+
+static inline unsigned getUNPCKHOpcode(EVT VT) {
+ switch(VT.getSimpleVT().SimpleTy) {
+ case MVT::v4i32: return X86ISD::PUNPCKHDQ;
+ case MVT::v2i64: return X86ISD::PUNPCKHQDQ;
+ case MVT::v4f32: return X86ISD::UNPCKHPS;
+ case MVT::v2f64: return X86ISD::UNPCKHPD;
+ case MVT::v16i8: return X86ISD::PUNPCKHBW;
+ case MVT::v8i16: return X86ISD::PUNPCKHWD;
+ default:
+ llvm_unreachable("Unknow type for unpckh");
+ }
+ return 0;
+}
+
SDValue
X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
@@ -4710,6 +5260,10 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
bool V2IsUndef = V2.getOpcode() == ISD::UNDEF;
bool V1IsSplat = false;
bool V2IsSplat = false;
+ bool HasSSE2 = Subtarget->hasSSE2() || Subtarget->hasAVX();
+ bool HasSSE3 = Subtarget->hasSSE3() || Subtarget->hasAVX();
+ MachineFunction &MF = DAG.getMachineFunction();
+ bool OptForSize = MF.getFunction()->hasFnAttr(Attribute::OptimizeForSize);
if (isZeroShuffle(SVOp))
return getZeroVector(VT, Subtarget->hasSSE2(), DAG, dl);
@@ -4718,7 +5272,7 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
if (SVOp->isSplat()) {
if (isMMX || NumElems < 4)
return Op;
- return PromoteSplat(SVOp, DAG, Subtarget->hasSSE2());
+ return PromoteSplat(SVOp, DAG);
}
// If the shuffle can be profitably rewritten as a narrower shuffle, then
@@ -4746,8 +5300,35 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
}
}
- if (X86::isPSHUFDMask(SVOp))
- return Op;
+ // NOTE: isPSHUFDMask can also match both masks below (unpckl_undef and
+ // unpckh_undef). Only use pshufd if speed is more important than size.
+ if (OptForSize && X86::isUNPCKL_v_undef_Mask(SVOp))
+ if (VT != MVT::v2i64 && VT != MVT::v2f64)
+ return getTargetShuffleNode(getUNPCKLOpcode(VT), dl, VT, V1, V1, DAG);
+ if (OptForSize && X86::isUNPCKH_v_undef_Mask(SVOp))
+ if (VT != MVT::v2i64 && VT != MVT::v2f64)
+ return getTargetShuffleNode(getUNPCKHOpcode(VT), dl, VT, V1, V1, DAG);
+
+ if (X86::isPSHUFDMask(SVOp)) {
+ // The actual implementation will match the mask in the if above and then
+ // during isel it can match several different instructions, not only pshufd
+ // as its name says, sad but true, emulate the behavior for now...
+ if (X86::isMOVDDUPMask(SVOp) && ((VT == MVT::v4f32 || VT == MVT::v2i64)))
+ return getTargetShuffleNode(X86ISD::MOVLHPS, dl, VT, V1, V1, DAG);
+
+ unsigned TargetMask = X86::getShuffleSHUFImmediate(SVOp);
+
+ if (HasSSE2 && (VT == MVT::v4f32 || VT == MVT::v4i32))
+ return getTargetShuffleNode(X86ISD::PSHUFD, dl, VT, V1, TargetMask, DAG);
+
+ if (HasSSE2 && (VT == MVT::v2i64 || VT == MVT::v2f64))
+ return getTargetShuffleNode(X86ISD::SHUFPD, dl, VT, V1, V1,
+ TargetMask, DAG);
+
+ if (VT == MVT::v4f32)
+ return getTargetShuffleNode(X86ISD::SHUFPS, dl, VT, V1, V1,
+ TargetMask, DAG);
+ }
// Check if this can be converted into a logical shift.
bool isLeft = false;
@@ -4768,17 +5349,32 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
return V2;
if (ISD::isBuildVectorAllZeros(V1.getNode()))
return getVZextMovL(VT, VT, V2, DAG, Subtarget, dl);
- if (!isMMX)
- return Op;
+ if (!isMMX && !X86::isMOVLPMask(SVOp)) {
+ if (HasSSE2 && (VT == MVT::v2i64 || VT == MVT::v2f64))
+ return getTargetShuffleNode(X86ISD::MOVSD, dl, VT, V1, V2, DAG);
+
+ if (VT == MVT::v4i32 || VT == MVT::v4f32)
+ return getTargetShuffleNode(X86ISD::MOVSS, dl, VT, V1, V2, DAG);
+ }
}
// FIXME: fold these into legal mask.
- if (!isMMX && (X86::isMOVSHDUPMask(SVOp) ||
- X86::isMOVSLDUPMask(SVOp) ||
- X86::isMOVHLPSMask(SVOp) ||
- X86::isMOVLHPSMask(SVOp) ||
- X86::isMOVLPMask(SVOp)))
- return Op;
+ if (!isMMX) {
+ if (X86::isMOVLHPSMask(SVOp) && !X86::isUNPCKLMask(SVOp))
+ return getMOVLowToHigh(Op, dl, DAG, HasSSE2);
+
+ if (X86::isMOVHLPSMask(SVOp))
+ return getMOVHighToLow(Op, dl, DAG);
+
+ if (X86::isMOVSHDUPMask(SVOp) && HasSSE3 && V2IsUndef && NumElems == 4)
+ return getTargetShuffleNode(X86ISD::MOVSHDUP, dl, VT, V1, DAG);
+
+ if (X86::isMOVSLDUPMask(SVOp) && HasSSE3 && V2IsUndef && NumElems == 4)
+ return getTargetShuffleNode(X86ISD::MOVSLDUP, dl, VT, V1, DAG);
+
+ if (X86::isMOVLPMask(SVOp))
+ return getMOVLP(Op, dl, DAG, HasSSE2);
+ }
if (ShouldXformToMOVHLPS(SVOp) ||
ShouldXformToMOVLP(V1.getNode(), V2.getNode(), SVOp))
@@ -4818,11 +5414,13 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
return getMOVL(DAG, dl, VT, V2, V1);
}
- if (X86::isUNPCKL_v_undef_Mask(SVOp) ||
- X86::isUNPCKH_v_undef_Mask(SVOp) ||
- X86::isUNPCKLMask(SVOp) ||
- X86::isUNPCKHMask(SVOp))
- return Op;
+ if (X86::isUNPCKLMask(SVOp))
+ return (isMMX) ?
+ Op : getTargetShuffleNode(getUNPCKLOpcode(VT), dl, VT, V1, V2, DAG);
+
+ if (X86::isUNPCKHMask(SVOp))
+ return (isMMX) ?
+ Op : getTargetShuffleNode(getUNPCKHOpcode(VT), dl, VT, V1, V2, DAG);
if (V2IsSplat) {
// Normalize mask so all entries that point to V2 points to its first
@@ -4844,11 +5442,14 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
// FIXME: this seems wrong.
SDValue NewOp = CommuteVectorShuffle(SVOp, DAG);
ShuffleVectorSDNode *NewSVOp = cast<ShuffleVectorSDNode>(NewOp);
- if (X86::isUNPCKL_v_undef_Mask(NewSVOp) ||
- X86::isUNPCKH_v_undef_Mask(NewSVOp) ||
- X86::isUNPCKLMask(NewSVOp) ||
- X86::isUNPCKHMask(NewSVOp))
- return NewOp;
+
+ if (X86::isUNPCKLMask(NewSVOp))
+ return (isMMX) ?
+ NewOp : getTargetShuffleNode(getUNPCKLOpcode(VT), dl, VT, V2, V1, DAG);
+
+ if (X86::isUNPCKHMask(NewSVOp))
+ return (isMMX) ?
+ NewOp : getTargetShuffleNode(getUNPCKHOpcode(VT), dl, VT, V2, V1, DAG);
}
// FIXME: for mmx, bitcast v2i32 to v4i16 for shuffle.
@@ -4857,15 +5458,52 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
if (!isMMX && V2.getOpcode() != ISD::UNDEF && isCommutedSHUFP(SVOp))
return CommuteVectorShuffle(SVOp, DAG);
- // Check for legal shuffle and return?
- SmallVector<int, 16> PermMask;
- SVOp->getMask(PermMask);
- if (isShuffleMaskLegal(PermMask, VT))
+ // The checks below are all present in isShuffleMaskLegal, but they are
+ // inlined here right now to enable us to directly emit target specific
+ // nodes, and remove one by one until they don't return Op anymore.
+ SmallVector<int, 16> M;
+ SVOp->getMask(M);
+
+ // Very little shuffling can be done for 64-bit vectors right now.
+ if (VT.getSizeInBits() == 64)
+ return isPALIGNRMask(M, VT, Subtarget->hasSSSE3()) ? Op : SDValue();
+
+ // FIXME: pshufb, blends, shifts.
+ if (VT.getVectorNumElements() == 2 ||
+ ShuffleVectorSDNode::isSplatMask(&M[0], VT) ||
+ isPALIGNRMask(M, VT, Subtarget->hasSSSE3()))
return Op;
+ if (isPSHUFHWMask(M, VT))
+ return getTargetShuffleNode(X86ISD::PSHUFHW, dl, VT, V1,
+ X86::getShufflePSHUFHWImmediate(SVOp),
+ DAG);
+
+ if (isPSHUFLWMask(M, VT))
+ return getTargetShuffleNode(X86ISD::PSHUFLW, dl, VT, V1,
+ X86::getShufflePSHUFLWImmediate(SVOp),
+ DAG);
+
+ if (isSHUFPMask(M, VT)) {
+ unsigned TargetMask = X86::getShuffleSHUFImmediate(SVOp);
+ if (VT == MVT::v4f32 || VT == MVT::v4i32)
+ return getTargetShuffleNode(X86ISD::SHUFPS, dl, VT, V1, V2,
+ TargetMask, DAG);
+ if (VT == MVT::v2f64 || VT == MVT::v2i64)
+ return getTargetShuffleNode(X86ISD::SHUFPD, dl, VT, V1, V2,
+ TargetMask, DAG);
+ }
+
+ if (X86::isUNPCKL_v_undef_Mask(SVOp))
+ if (VT != MVT::v2i64 && VT != MVT::v2f64)
+ return getTargetShuffleNode(getUNPCKLOpcode(VT), dl, VT, V1, V1, DAG);
+ if (X86::isUNPCKH_v_undef_Mask(SVOp))
+ if (VT != MVT::v2i64 && VT != MVT::v2f64)
+ return getTargetShuffleNode(getUNPCKHOpcode(VT), dl, VT, V1, V1, DAG);
+
// Handle v8i16 specifically since SSE can do byte extraction and insertion.
if (VT == MVT::v8i16) {
- SDValue NewOp = LowerVECTOR_SHUFFLEv8i16(SVOp, DAG, *this);
+ SDValue NewOp = LowerVECTOR_SHUFFLEv8i16(Op, DAG);
if (NewOp.getNode())
return NewOp;
}
@@ -6922,24 +7560,58 @@ X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const
DAG.getConstant(X86CC, MVT::i8), Cond);
return DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, SetCC);
}
- // ptest intrinsics. The intrinsic these come from are designed to return
- // an integer value, not just an instruction so lower it to the ptest
- // pattern and a setcc for the result.
+ // ptest and testp intrinsics. The intrinsic these come from are designed to
+ // return an integer value, not just an instruction so lower it to the ptest
+ // or testp pattern and a setcc for the result.
case Intrinsic::x86_sse41_ptestz:
case Intrinsic::x86_sse41_ptestc:
- case Intrinsic::x86_sse41_ptestnzc:{
+ case Intrinsic::x86_sse41_ptestnzc:
+ case Intrinsic::x86_avx_ptestz_256:
+ case Intrinsic::x86_avx_ptestc_256:
+ case Intrinsic::x86_avx_ptestnzc_256:
+ case Intrinsic::x86_avx_vtestz_ps:
+ case Intrinsic::x86_avx_vtestc_ps:
+ case Intrinsic::x86_avx_vtestnzc_ps:
+ case Intrinsic::x86_avx_vtestz_pd:
+ case Intrinsic::x86_avx_vtestc_pd:
+ case Intrinsic::x86_avx_vtestnzc_pd:
+ case Intrinsic::x86_avx_vtestz_ps_256:
+ case Intrinsic::x86_avx_vtestc_ps_256:
+ case Intrinsic::x86_avx_vtestnzc_ps_256:
+ case Intrinsic::x86_avx_vtestz_pd_256:
+ case Intrinsic::x86_avx_vtestc_pd_256:
+ case Intrinsic::x86_avx_vtestnzc_pd_256: {
+ bool IsTestPacked = false;
unsigned X86CC = 0;
switch (IntNo) {
default: llvm_unreachable("Bad fallthrough in Intrinsic lowering.");
+ case Intrinsic::x86_avx_vtestz_ps:
+ case Intrinsic::x86_avx_vtestz_pd:
+ case Intrinsic::x86_avx_vtestz_ps_256:
+ case Intrinsic::x86_avx_vtestz_pd_256:
+ IsTestPacked = true; // Fallthrough
case Intrinsic::x86_sse41_ptestz:
+ case Intrinsic::x86_avx_ptestz_256:
// ZF = 1
X86CC = X86::COND_E;
break;
+ case Intrinsic::x86_avx_vtestc_ps:
+ case Intrinsic::x86_avx_vtestc_pd:
+ case Intrinsic::x86_avx_vtestc_ps_256:
+ case Intrinsic::x86_avx_vtestc_pd_256:
+ IsTestPacked = true; // Fallthrough
case Intrinsic::x86_sse41_ptestc:
+ case Intrinsic::x86_avx_ptestc_256:
// CF = 1
X86CC = X86::COND_B;
break;
+ case Intrinsic::x86_avx_vtestnzc_ps:
+ case Intrinsic::x86_avx_vtestnzc_pd:
+ case Intrinsic::x86_avx_vtestnzc_ps_256:
+ case Intrinsic::x86_avx_vtestnzc_pd_256:
+ IsTestPacked = true; // Fallthrough
case Intrinsic::x86_sse41_ptestnzc:
+ case Intrinsic::x86_avx_ptestnzc_256:
// ZF and CF = 0
X86CC = X86::COND_A;
break;
@@ -6947,7 +7619,8 @@ X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const
SDValue LHS = Op.getOperand(1);
SDValue RHS = Op.getOperand(2);
- SDValue Test = DAG.getNode(X86ISD::PTEST, dl, MVT::i32, LHS, RHS);
+ unsigned TestOpc = IsTestPacked ? X86ISD::TESTP : X86ISD::PTEST;
+ SDValue Test = DAG.getNode(TestOpc, dl, MVT::i32, LHS, RHS);
SDValue CC = DAG.getConstant(X86CC, MVT::i8);
SDValue SetCC = DAG.getNode(X86ISD::SETCC, dl, MVT::i8, CC, Test);
return DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, SetCC);
@@ -7110,12 +7783,13 @@ SDValue X86TargetLowering::LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const {
SDValue Handler = Op.getOperand(2);
DebugLoc dl = Op.getDebugLoc();
- SDValue Frame = DAG.getRegister(Subtarget->is64Bit() ? X86::RBP : X86::EBP,
- getPointerTy());
+ SDValue Frame = DAG.getCopyFromReg(DAG.getEntryNode(), dl,
+ Subtarget->is64Bit() ? X86::RBP : X86::EBP,
+ getPointerTy());
unsigned StoreAddrReg = (Subtarget->is64Bit() ? X86::RCX : X86::ECX);
- SDValue StoreAddr = DAG.getNode(ISD::SUB, dl, getPointerTy(), Frame,
- DAG.getIntPtrConstant(-TD->getPointerSize()));
+ SDValue StoreAddr = DAG.getNode(ISD::ADD, dl, getPointerTy(), Frame,
+ DAG.getIntPtrConstant(TD->getPointerSize()));
StoreAddr = DAG.getNode(ISD::ADD, dl, getPointerTy(), StoreAddr, Offset);
Chain = DAG.getStore(Chain, dl, Handler, StoreAddr, NULL, 0, false, false, 0);
Chain = DAG.getCopyToReg(Chain, dl, StoreAddrReg, StoreAddr);
@@ -7218,7 +7892,8 @@ SDValue X86TargetLowering::LowerTRAMPOLINE(SDValue Op,
InRegCount += (TD->getTypeSizeInBits(*I) + 31) / 32;
if (InRegCount > 2) {
- report_fatal_error("Nest register in use - reduce number of inreg parameters!");
+ report_fatal_error("Nest register in use - reduce number of inreg"
+ " parameters!");
}
}
break;
@@ -7439,6 +8114,86 @@ SDValue X86TargetLowering::LowerMUL_V2I64(SDValue Op, SelectionDAG &DAG) const {
return Res;
}
+SDValue X86TargetLowering::LowerSHL(SDValue Op, SelectionDAG &DAG) const {
+ EVT VT = Op.getValueType();
+ DebugLoc dl = Op.getDebugLoc();
+ SDValue R = Op.getOperand(0);
+
+ LLVMContext *Context = DAG.getContext();
+
+ assert(Subtarget->hasSSE41() && "Cannot lower SHL without SSE4.1 or later");
+
+ if (VT == MVT::v4i32) {
+ Op = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
+ DAG.getConstant(Intrinsic::x86_sse2_pslli_d, MVT::i32),
+ Op.getOperand(1), DAG.getConstant(23, MVT::i32));
+
+ ConstantInt *CI = ConstantInt::get(*Context, APInt(32, 0x3f800000U));
+
+ std::vector<Constant*> CV(4, CI);
+ Constant *C = ConstantVector::get(CV);
+ SDValue CPIdx = DAG.getConstantPool(C, getPointerTy(), 16);
+ SDValue Addend = DAG.getLoad(VT, dl, DAG.getEntryNode(), CPIdx,
+ PseudoSourceValue::getConstantPool(), 0,
+ false, false, 16);
+
+ Op = DAG.getNode(ISD::ADD, dl, VT, Op, Addend);
+ Op = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v4f32, Op);
+ Op = DAG.getNode(ISD::FP_TO_SINT, dl, VT, Op);
+ return DAG.getNode(ISD::MUL, dl, VT, Op, R);
+ }
+ if (VT == MVT::v16i8) {
+ // a = a << 5;
+ Op = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
+ DAG.getConstant(Intrinsic::x86_sse2_pslli_w, MVT::i32),
+ Op.getOperand(1), DAG.getConstant(5, MVT::i32));
+
+ ConstantInt *CM1 = ConstantInt::get(*Context, APInt(8, 15));
+ ConstantInt *CM2 = ConstantInt::get(*Context, APInt(8, 63));
+
+ std::vector<Constant*> CVM1(16, CM1);
+ std::vector<Constant*> CVM2(16, CM2);
+ Constant *C = ConstantVector::get(CVM1);
+ SDValue CPIdx = DAG.getConstantPool(C, getPointerTy(), 16);
+ SDValue M = DAG.getLoad(VT, dl, DAG.getEntryNode(), CPIdx,
+ PseudoSourceValue::getConstantPool(), 0,
+ false, false, 16);
+
+ // r = pblendv(r, psllw(r & (char16)15, 4), a);
+ M = DAG.getNode(ISD::AND, dl, VT, R, M);
+ M = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
+ DAG.getConstant(Intrinsic::x86_sse2_pslli_w, MVT::i32), M,
+ DAG.getConstant(4, MVT::i32));
+ R = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
+ DAG.getConstant(Intrinsic::x86_sse41_pblendvb, MVT::i32),
+ R, M, Op);
+ // a += a
+ Op = DAG.getNode(ISD::ADD, dl, VT, Op, Op);
+
+ C = ConstantVector::get(CVM2);
+ CPIdx = DAG.getConstantPool(C, getPointerTy(), 16);
+ M = DAG.getLoad(VT, dl, DAG.getEntryNode(), CPIdx,
+ PseudoSourceValue::getConstantPool(), 0, false, false, 16);
+
+ // r = pblendv(r, psllw(r & (char16)63, 2), a);
+ M = DAG.getNode(ISD::AND, dl, VT, R, M);
+ M = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
+ DAG.getConstant(Intrinsic::x86_sse2_pslli_w, MVT::i32), M,
+ DAG.getConstant(2, MVT::i32));
+ R = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
+ DAG.getConstant(Intrinsic::x86_sse41_pblendvb, MVT::i32),
+ R, M, Op);
+ // a += a
+ Op = DAG.getNode(ISD::ADD, dl, VT, Op, Op);
+
+ // return pblendv(r, r+r, a);
+ R = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
+ DAG.getConstant(Intrinsic::x86_sse41_pblendvb, MVT::i32),
+ R, DAG.getNode(ISD::ADD, dl, VT, R, R), Op);
+ return R;
+ }
+ return SDValue();
+}
SDValue X86TargetLowering::LowerXALUO(SDValue Op, SelectionDAG &DAG) const {
// Lower the "add/sub/mul with overflow" instruction into a regular ins plus
@@ -7508,6 +8263,50 @@ SDValue X86TargetLowering::LowerXALUO(SDValue Op, SelectionDAG &DAG) const {
return Sum;
}
+SDValue X86TargetLowering::LowerMEMBARRIER(SDValue Op, SelectionDAG &DAG) const{
+ DebugLoc dl = Op.getDebugLoc();
+
+ if (!Subtarget->hasSSE2()) {
+ SDValue Chain = Op.getOperand(0);
+ SDValue Zero = DAG.getConstant(0,
+ Subtarget->is64Bit() ? MVT::i64 : MVT::i32);
+ SDValue Ops[] = {
+ DAG.getRegister(X86::ESP, MVT::i32), // Base
+ DAG.getTargetConstant(1, MVT::i8), // Scale
+ DAG.getRegister(0, MVT::i32), // Index
+ DAG.getTargetConstant(0, MVT::i32), // Disp
+ DAG.getRegister(0, MVT::i32), // Segment.
+ Zero,
+ Chain
+ };
+ SDNode *Res =
+ DAG.getMachineNode(X86::OR32mrLocked, dl, MVT::Other, Ops,
+ array_lengthof(Ops));
+ return SDValue(Res, 0);
+ }
+
+ unsigned isDev = cast<ConstantSDNode>(Op.getOperand(5))->getZExtValue();
+ if (!isDev)
+ return DAG.getNode(X86ISD::MEMBARRIER, dl, MVT::Other, Op.getOperand(0));
+
+ unsigned Op1 = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
+ unsigned Op2 = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue();
+ unsigned Op3 = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue();
+ unsigned Op4 = cast<ConstantSDNode>(Op.getOperand(4))->getZExtValue();
+
+ // def : Pat<(membarrier (i8 0), (i8 0), (i8 0), (i8 1), (i8 1)), (SFENCE)>;
+ if (!Op1 && !Op2 && !Op3 && Op4)
+ return DAG.getNode(X86ISD::SFENCE, dl, MVT::Other, Op.getOperand(0));
+
+ // def : Pat<(membarrier (i8 1), (i8 0), (i8 0), (i8 0), (i8 1)), (LFENCE)>;
+ if (Op1 && !Op2 && !Op3 && !Op4)
+ return DAG.getNode(X86ISD::LFENCE, dl, MVT::Other, Op.getOperand(0));
+
+ // def : Pat<(membarrier (i8 imm), (i8 imm), (i8 imm), (i8 imm), (i8 1)),
+ // (MFENCE)>;
+ return DAG.getNode(X86ISD::MFENCE, dl, MVT::Other, Op.getOperand(0));
+}
+
SDValue X86TargetLowering::LowerCMP_SWAP(SDValue Op, SelectionDAG &DAG) const {
EVT T = Op.getValueType();
DebugLoc dl = Op.getDebugLoc();
@@ -7597,6 +8396,7 @@ SDValue X86TargetLowering::LowerLOAD_SUB(SDValue Op, SelectionDAG &DAG) const {
SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
switch (Op.getOpcode()) {
default: llvm_unreachable("Should not custom lower this!");
+ case ISD::MEMBARRIER: return LowerMEMBARRIER(Op,DAG);
case ISD::ATOMIC_CMP_SWAP: return LowerCMP_SWAP(Op,DAG);
case ISD::ATOMIC_LOAD_SUB: return LowerLOAD_SUB(Op,DAG);
case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG);
@@ -7640,6 +8440,7 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::CTLZ: return LowerCTLZ(Op, DAG);
case ISD::CTTZ: return LowerCTTZ(Op, DAG);
case ISD::MUL: return LowerMUL_V2I64(Op, DAG);
+ case ISD::SHL: return LowerSHL(Op, DAG);
case ISD::SADDO:
case ISD::UADDO:
case ISD::SSUBO:
@@ -7852,6 +8653,40 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
case X86ISD::AND: return "X86ISD::AND";
case X86ISD::MUL_IMM: return "X86ISD::MUL_IMM";
case X86ISD::PTEST: return "X86ISD::PTEST";
+ case X86ISD::TESTP: return "X86ISD::TESTP";
+ case X86ISD::PALIGN: return "X86ISD::PALIGN";
+ case X86ISD::PSHUFD: return "X86ISD::PSHUFD";
+ case X86ISD::PSHUFHW: return "X86ISD::PSHUFHW";
+ case X86ISD::PSHUFHW_LD: return "X86ISD::PSHUFHW_LD";
+ case X86ISD::PSHUFLW: return "X86ISD::PSHUFLW";
+ case X86ISD::PSHUFLW_LD: return "X86ISD::PSHUFLW_LD";
+ case X86ISD::SHUFPS: return "X86ISD::SHUFPS";
+ case X86ISD::SHUFPD: return "X86ISD::SHUFPD";
+ case X86ISD::MOVLHPS: return "X86ISD::MOVLHPS";
+ case X86ISD::MOVLHPD: return "X86ISD::MOVLHPD";
+ case X86ISD::MOVHLPS: return "X86ISD::MOVHLPS";
+ case X86ISD::MOVHLPD: return "X86ISD::MOVHLPD";
+ case X86ISD::MOVLPS: return "X86ISD::MOVLPS";
+ case X86ISD::MOVLPD: return "X86ISD::MOVLPD";
+ case X86ISD::MOVDDUP: return "X86ISD::MOVDDUP";
+ case X86ISD::MOVSHDUP: return "X86ISD::MOVSHDUP";
+ case X86ISD::MOVSLDUP: return "X86ISD::MOVSLDUP";
+ case X86ISD::MOVSHDUP_LD: return "X86ISD::MOVSHDUP_LD";
+ case X86ISD::MOVSLDUP_LD: return "X86ISD::MOVSLDUP_LD";
+ case X86ISD::MOVSD: return "X86ISD::MOVSD";
+ case X86ISD::MOVSS: return "X86ISD::MOVSS";
+ case X86ISD::UNPCKLPS: return "X86ISD::UNPCKLPS";
+ case X86ISD::UNPCKLPD: return "X86ISD::UNPCKLPD";
+ case X86ISD::UNPCKHPS: return "X86ISD::UNPCKHPS";
+ case X86ISD::UNPCKHPD: return "X86ISD::UNPCKHPD";
+ case X86ISD::PUNPCKLBW: return "X86ISD::PUNPCKLBW";
+ case X86ISD::PUNPCKLWD: return "X86ISD::PUNPCKLWD";
+ case X86ISD::PUNPCKLDQ: return "X86ISD::PUNPCKLDQ";
+ case X86ISD::PUNPCKLQDQ: return "X86ISD::PUNPCKLQDQ";
+ case X86ISD::PUNPCKHBW: return "X86ISD::PUNPCKHBW";
+ case X86ISD::PUNPCKHWD: return "X86ISD::PUNPCKHWD";
+ case X86ISD::PUNPCKHDQ: return "X86ISD::PUNPCKHDQ";
+ case X86ISD::PUNPCKHQDQ: return "X86ISD::PUNPCKHQDQ";
case X86ISD::VASTART_SAVE_XMM_REGS: return "X86ISD::VASTART_SAVE_XMM_REGS";
case X86ISD::MINGW_ALLOCA: return "X86ISD::MINGW_ALLOCA";
}
@@ -7863,6 +8698,7 @@ bool X86TargetLowering::isLegalAddressingMode(const AddrMode &AM,
const Type *Ty) const {
// X86 supports extremely general addressing modes.
CodeModel::Model M = getTargetMachine().getCodeModel();
+ Reloc::Model R = getTargetMachine().getRelocationModel();
// X86 allows a sign-extended 32-bit immediate field as a displacement.
if (!X86::isOffsetSuitableForCodeModel(AM.BaseOffs, M, AM.BaseGV != NULL))
@@ -7882,7 +8718,8 @@ bool X86TargetLowering::isLegalAddressingMode(const AddrMode &AM,
return false;
// If lower 4G is not available, then we must use rip-relative addressing.
- if (Subtarget->is64Bit() && (AM.BaseOffs || AM.Scale > 1))
+ if ((M != CodeModel::Small || R != Reloc::Static) &&
+ Subtarget->is64Bit() && (AM.BaseOffs || AM.Scale > 1))
return false;
}
@@ -8368,19 +9205,31 @@ X86TargetLowering::EmitAtomicMinMaxWithCustomInserter(MachineInstr *mInstr,
}
// FIXME: When we get size specific XMM0 registers, i.e. XMM0_V16I8
-// all of this code can be replaced with that in the .td file.
+// or XMM0_V32I8 in AVX all of this code can be replaced with that
+// in the .td file.
MachineBasicBlock *
X86TargetLowering::EmitPCMP(MachineInstr *MI, MachineBasicBlock *BB,
unsigned numArgs, bool memArg) const {
+ assert((Subtarget->hasSSE42() || Subtarget->hasAVX()) &&
+ "Target must have SSE4.2 or AVX features enabled");
+
DebugLoc dl = MI->getDebugLoc();
const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
unsigned Opc;
- if (memArg)
- Opc = numArgs == 3 ? X86::PCMPISTRM128rm : X86::PCMPESTRM128rm;
- else
- Opc = numArgs == 3 ? X86::PCMPISTRM128rr : X86::PCMPESTRM128rr;
+
+ if (!Subtarget->hasAVX()) {
+ if (memArg)
+ Opc = numArgs == 3 ? X86::PCMPISTRM128rm : X86::PCMPESTRM128rm;
+ else
+ Opc = numArgs == 3 ? X86::PCMPISTRM128rr : X86::PCMPESTRM128rr;
+ } else {
+ if (memArg)
+ Opc = numArgs == 3 ? X86::VPCMPISTRM128rm : X86::VPCMPESTRM128rm;
+ else
+ Opc = numArgs == 3 ? X86::VPCMPISTRM128rr : X86::VPCMPESTRM128rr;
+ }
MachineInstrBuilder MIB = BuildMI(BB, dl, TII->get(Opc));
@@ -8562,7 +9411,8 @@ X86TargetLowering::EmitLoweredMingwAlloca(MachineInstr *MI,
.addReg(X86::EAX, RegState::Implicit)
.addReg(X86::ESP, RegState::Implicit)
.addReg(X86::EAX, RegState::Define | RegState::Implicit)
- .addReg(X86::ESP, RegState::Define | RegState::Implicit);
+ .addReg(X86::ESP, RegState::Define | RegState::Implicit)
+ .addReg(X86::EFLAGS, RegState::Define | RegState::Implicit);
MI->eraseFromParent(); // The pseudo instruction is gone now.
return BB;
@@ -8579,6 +9429,7 @@ X86TargetLowering::EmitLoweredTLSCall(MachineInstr *MI,
= static_cast<const X86InstrInfo*>(getTargetMachine().getInstrInfo());
DebugLoc DL = MI->getDebugLoc();
MachineFunction *F = BB->getParent();
+ bool IsWin64 = Subtarget->isTargetWin64();
assert(MI->getOperand(3).isGlobal() && "This should be a global");
@@ -8590,7 +9441,7 @@ X86TargetLowering::EmitLoweredTLSCall(MachineInstr *MI,
.addGlobalAddress(MI->getOperand(3).getGlobal(), 0,
MI->getOperand(3).getTargetFlags())
.addReg(0);
- MIB = BuildMI(*BB, MI, DL, TII->get(X86::CALL64m));
+ MIB = BuildMI(*BB, MI, DL, TII->get(IsWin64 ? X86::WINCALL64m : X86::CALL64m));
addDirectMem(MIB, X86::RDI);
} else if (getTargetMachine().getRelocationModel() != Reloc::PIC_) {
MachineInstrBuilder MIB = BuildMI(*BB, MI, DL,
@@ -8727,12 +9578,16 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
}
// String/text processing lowering.
case X86::PCMPISTRM128REG:
+ case X86::VPCMPISTRM128REG:
return EmitPCMP(MI, BB, 3, false /* in-mem */);
case X86::PCMPISTRM128MEM:
+ case X86::VPCMPISTRM128MEM:
return EmitPCMP(MI, BB, 3, true /* in-mem */);
case X86::PCMPESTRM128REG:
+ case X86::VPCMPESTRM128REG:
return EmitPCMP(MI, BB, 5, false /* in mem */);
case X86::PCMPESTRM128MEM:
+ case X86::VPCMPESTRM128MEM:
return EmitPCMP(MI, BB, 5, true /* in mem */);
// Atomic Lowering.
@@ -8966,21 +9821,20 @@ static SDValue PerformShuffleCombine(SDNode *N, SelectionDAG &DAG,
const TargetLowering &TLI) {
DebugLoc dl = N->getDebugLoc();
EVT VT = N->getValueType(0);
- ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
if (VT.getSizeInBits() != 128)
return SDValue();
SmallVector<SDValue, 16> Elts;
for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; ++i)
- Elts.push_back(DAG.getShuffleScalarElt(SVN, i));
-
+ Elts.push_back(getShuffleScalarElt(N, i, DAG, 0));
+
return EltsFromConsecutiveLoads(VT, Elts, dl, DAG);
}
-/// PerformShuffleCombine - Detect vector gather/scatter index generation
-/// and convert it from being a bunch of shuffles and extracts to a simple
-/// store and scalar loads to extract the elements.
+/// PerformEXTRACT_VECTOR_ELTCombine - Detect vector gather/scatter index
+/// generation and convert it from being a bunch of shuffles and extracts
+/// to a simple store and scalar loads to extract the elements.
static SDValue PerformEXTRACT_VECTOR_ELTCombine(SDNode *N, SelectionDAG &DAG,
const TargetLowering &TLI) {
SDValue InputVector = N->getOperand(0);
@@ -9030,8 +9884,8 @@ static SDValue PerformEXTRACT_VECTOR_ELTCombine(SDNode *N, SelectionDAG &DAG,
// Store the value to a temporary stack slot.
SDValue StackPtr = DAG.CreateStackTemporary(InputVector.getValueType());
- SDValue Ch = DAG.getStore(DAG.getEntryNode(), dl, InputVector, StackPtr, NULL, 0,
- false, false, 0);
+ SDValue Ch = DAG.getStore(DAG.getEntryNode(), dl, InputVector, StackPtr, NULL,
+ 0, false, false, 0);
// Replace each use (extract) with a load of the appropriate element.
for (SmallVectorImpl<SDNode *>::iterator UI = Uses.begin(),
@@ -9045,11 +9899,12 @@ static SDValue PerformEXTRACT_VECTOR_ELTCombine(SDNode *N, SelectionDAG &DAG,
uint64_t Offset = EltSize * cast<ConstantSDNode>(Idx)->getZExtValue();
SDValue OffsetVal = DAG.getConstant(Offset, TLI.getPointerTy());
- SDValue ScalarAddr = DAG.getNode(ISD::ADD, dl, Idx.getValueType(), OffsetVal, StackPtr);
+ SDValue ScalarAddr = DAG.getNode(ISD::ADD, dl, Idx.getValueType(),
+ OffsetVal, StackPtr);
// Load the scalar.
- SDValue LoadScalar = DAG.getLoad(Extract->getValueType(0), dl, Ch, ScalarAddr,
- NULL, 0, false, false, 0);
+ SDValue LoadScalar = DAG.getLoad(Extract->getValueType(0), dl, Ch,
+ ScalarAddr, NULL, 0, false, false, 0);
// Replace the exact with the load.
DAG.ReplaceAllUsesOfValueWith(SDValue(Extract, 0), LoadScalar);
@@ -9087,8 +9942,7 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG,
// Converting this to a min would handle NaNs incorrectly, and swapping
// the operands would cause it to handle comparisons between positive
// and negative zero incorrectly.
- if (!FiniteOnlyFPMath() &&
- (!DAG.isKnownNeverNaN(LHS) || !DAG.isKnownNeverNaN(RHS))) {
+ if (!DAG.isKnownNeverNaN(LHS) || !DAG.isKnownNeverNaN(RHS)) {
if (!UnsafeFPMath &&
!(DAG.isKnownNeverZero(LHS) || DAG.isKnownNeverZero(RHS)))
break;
@@ -9126,8 +9980,7 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG,
// Converting this to a max would handle NaNs incorrectly, and swapping
// the operands would cause it to handle comparisons between positive
// and negative zero incorrectly.
- if (!FiniteOnlyFPMath() &&
- (!DAG.isKnownNeverNaN(LHS) || !DAG.isKnownNeverNaN(RHS))) {
+ if (!DAG.isKnownNeverNaN(LHS) || !DAG.isKnownNeverNaN(RHS)) {
if (!UnsafeFPMath &&
!(DAG.isKnownNeverZero(LHS) || DAG.isKnownNeverZero(RHS)))
break;
@@ -9156,8 +10009,7 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG,
// cause it to handle NaNs incorrectly.
if (!UnsafeFPMath &&
!(DAG.isKnownNeverZero(LHS) || DAG.isKnownNeverZero(RHS))) {
- if (!FiniteOnlyFPMath() &&
- (!DAG.isKnownNeverNaN(LHS) || !DAG.isKnownNeverNaN(RHS)))
+ if (!DAG.isKnownNeverNaN(LHS) || !DAG.isKnownNeverNaN(RHS))
break;
std::swap(LHS, RHS);
}
@@ -9182,8 +10034,7 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG,
case ISD::SETULT:
// Converting this to a max would handle NaNs incorrectly.
- if (!FiniteOnlyFPMath() &&
- (!DAG.isKnownNeverNaN(LHS) || !DAG.isKnownNeverNaN(RHS)))
+ if (!DAG.isKnownNeverNaN(LHS) || !DAG.isKnownNeverNaN(RHS))
break;
Opcode = X86ISD::FMAX;
break;
@@ -9193,8 +10044,7 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG,
// cause it to handle NaNs incorrectly.
if (!UnsafeFPMath &&
!DAG.isKnownNeverZero(LHS) && !DAG.isKnownNeverZero(RHS)) {
- if (!FiniteOnlyFPMath() &&
- (!DAG.isKnownNeverNaN(LHS) || !DAG.isKnownNeverNaN(RHS)))
+ if (!DAG.isKnownNeverNaN(LHS) || !DAG.isKnownNeverNaN(RHS))
break;
std::swap(LHS, RHS);
}
@@ -9905,7 +10755,6 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
SelectionDAG &DAG = DCI.DAG;
switch (N->getOpcode()) {
default: break;
- case ISD::VECTOR_SHUFFLE: return PerformShuffleCombine(N, DAG, *this);
case ISD::EXTRACT_VECTOR_ELT:
return PerformEXTRACT_VECTOR_ELTCombine(N, DAG, *this);
case ISD::SELECT: return PerformSELECTCombine(N, DAG, Subtarget);
@@ -9922,6 +10771,28 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
case X86ISD::BT: return PerformBTCombine(N, DAG, DCI);
case X86ISD::VZEXT_MOVL: return PerformVZEXT_MOVLCombine(N, DAG);
case ISD::ZERO_EXTEND: return PerformZExtCombine(N, DAG);
+ case X86ISD::SHUFPS: // Handle all target specific shuffles
+ case X86ISD::SHUFPD:
+ case X86ISD::PUNPCKHBW:
+ case X86ISD::PUNPCKHWD:
+ case X86ISD::PUNPCKHDQ:
+ case X86ISD::PUNPCKHQDQ:
+ case X86ISD::UNPCKHPS:
+ case X86ISD::UNPCKHPD:
+ case X86ISD::PUNPCKLBW:
+ case X86ISD::PUNPCKLWD:
+ case X86ISD::PUNPCKLDQ:
+ case X86ISD::PUNPCKLQDQ:
+ case X86ISD::UNPCKLPS:
+ case X86ISD::UNPCKLPD:
+ case X86ISD::MOVHLPS:
+ case X86ISD::MOVLHPS:
+ case X86ISD::PSHUFD:
+ case X86ISD::PSHUFHW:
+ case X86ISD::PSHUFLW:
+ case X86ISD::MOVSS:
+ case X86ISD::MOVSD:
+ case ISD::VECTOR_SHUFFLE: return PerformShuffleCombine(N, DAG, *this);
}
return SDValue();
@@ -9956,14 +10827,6 @@ bool X86TargetLowering::isTypeDesirableForOp(unsigned Opc, EVT VT) const {
}
}
-static bool MayFoldLoad(SDValue Op) {
- return Op.hasOneUse() && ISD::isNormalLoad(Op.getNode());
-}
-
-static bool MayFoldIntoStore(SDValue Op) {
- return Op.hasOneUse() && ISD::isNormalStore(*Op.getNode()->use_begin());
-}
-
/// IsDesirableToPromoteOp - This method query the target whether it is
/// beneficial for dag combiner to promote the specified node. If true, it
/// should return the desired promotion type by reference.
diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h
index 4e4daa4bc5ca9..d2d9b28a03967 100644
--- a/lib/Target/X86/X86ISelLowering.h
+++ b/lib/Target/X86/X86ISelLowering.h
@@ -248,6 +248,44 @@ namespace llvm {
// PTEST - Vector bitwise comparisons
PTEST,
+ // TESTP - Vector packed fp sign bitwise comparisons
+ TESTP,
+
+ // Several flavors of instructions with vector shuffle behaviors.
+ PALIGN,
+ PSHUFD,
+ PSHUFHW,
+ PSHUFLW,
+ PSHUFHW_LD,
+ PSHUFLW_LD,
+ SHUFPD,
+ SHUFPS,
+ MOVDDUP,
+ MOVSHDUP,
+ MOVSLDUP,
+ MOVSHDUP_LD,
+ MOVSLDUP_LD,
+ MOVLHPS,
+ MOVLHPD,
+ MOVHLPS,
+ MOVHLPD,
+ MOVLPS,
+ MOVLPD,
+ MOVSD,
+ MOVSS,
+ UNPCKLPS,
+ UNPCKLPD,
+ UNPCKHPS,
+ UNPCKHPD,
+ PUNPCKLBW,
+ PUNPCKLWD,
+ PUNPCKLDQ,
+ PUNPCKLQDQ,
+ PUNPCKHBW,
+ PUNPCKHWD,
+ PUNPCKHDQ,
+ PUNPCKHQDQ,
+
// VASTART_SAVE_XMM_REGS - Save xmm argument registers to the stack,
// according to %al. An operator is needed so that this can be expanded
// with control flow.
@@ -265,7 +303,13 @@ namespace llvm {
ATOMXOR64_DAG,
ATOMAND64_DAG,
ATOMNAND64_DAG,
- ATOMSWAP64_DAG
+ ATOMSWAP64_DAG,
+
+ // Memory barrier
+ MEMBARRIER,
+ MFENCE,
+ SFENCE,
+ LFENCE
// WARNING: Do not add anything in the end unless you want the node to
// have memop! In fact, starting from ATOMADD64_DAG all opcodes will be
@@ -584,12 +628,19 @@ namespace llvm {
/// getFunctionAlignment - Return the Log2 alignment of this function.
virtual unsigned getFunctionAlignment(const Function *F) const;
+ unsigned getRegPressureLimit(const TargetRegisterClass *RC,
+ MachineFunction &MF) const;
+
/// getStackCookieLocation - Return true if the target stores stack
/// protector cookies at a fixed offset in some non-standard address
/// space, and populates the address space and offset as
/// appropriate.
virtual bool getStackCookieLocation(unsigned &AddressSpace, unsigned &Offset) const;
+ protected:
+ std::pair<const TargetRegisterClass*, uint8_t>
+ findRepresentativeClass(EVT VT) const;
+
private:
/// Subtarget - Keep a pointer to the X86Subtarget around so that we can
/// make the right decision when generating code for different targets.
@@ -710,11 +761,16 @@ namespace llvm {
SDValue LowerCTLZ(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerCTTZ(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerMUL_V2I64(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerSHL(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerXALUO(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerCMP_SWAP(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerLOAD_SUB(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerREADCYCLECOUNTER(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerMEMBARRIER(SDValue Op, SelectionDAG &DAG) const;
+
+ // Utility functions to help LowerVECTOR_SHUFFLE
+ SDValue LowerVECTOR_SHUFFLEv8i16(SDValue Op, SelectionDAG &DAG) const;
virtual SDValue
LowerFormalArguments(SDValue Chain,
diff --git a/lib/Target/X86/X86Instr64bit.td b/lib/Target/X86/X86Instr64bit.td
index 42d0e7f9778ab..0884b61425e9e 100644
--- a/lib/Target/X86/X86Instr64bit.td
+++ b/lib/Target/X86/X86Instr64bit.td
@@ -73,11 +73,7 @@ def GetLo32XForm : SDNodeXForm<imm, [{
return getI32Imm((unsigned)N->getZExtValue());
}]>;
-def i64immSExt32 : PatLeaf<(i64 imm), [{
- // i64immSExt32 predicate - True if the 64-bit immediate fits in a 32-bit
- // sign extended field.
- return (int64_t)N->getZExtValue() == (int32_t)N->getZExtValue();
-}]>;
+def i64immSExt32 : PatLeaf<(i64 imm), [{ return i64immSExt32(N); }]>;
def i64immZExt32 : PatLeaf<(i64 imm), [{
@@ -158,7 +154,7 @@ let isCall = 1 in
// FIXME: We need to teach codegen about single list of call-clobbered
// registers.
-let isCall = 1 in
+let isCall = 1, isCodeGenOnly = 1 in
// All calls clobber the non-callee saved registers. RSP is marked as
// a use to prevent stack-pointer assignments that appear immediately
// before calls from potentially appearing dead. Uses for argument
@@ -168,7 +164,7 @@ let isCall = 1 in
MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7,
XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, EFLAGS],
Uses = [RSP] in {
- def WINCALL64pcrel32 : I<0xE8, RawFrm,
+ def WINCALL64pcrel32 : Ii32PCRel<0xE8, RawFrm,
(outs), (ins i64i32imm_pcrel:$dst, variable_ops),
"call\t$dst", []>,
Requires<[IsWin64]>;
@@ -182,7 +178,8 @@ let isCall = 1 in
}
-let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in
+let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1,
+ isCodeGenOnly = 1 in
let Defs = [RAX, RCX, RDX, RSI, RDI, R8, R9, R10, R11,
FP0, FP1, FP2, FP3, FP4, FP5, FP6, ST0, ST1,
MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7,
@@ -216,9 +213,9 @@ let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in {
def JMP64pcrel32 : I<0xE9, RawFrm, (outs), (ins brtarget:$dst),
"jmp{q}\t$dst", []>;
def JMP64r : I<0xFF, MRM4r, (outs), (ins GR64:$dst), "jmp{q}\t{*}$dst",
- [(brind GR64:$dst)]>;
+ [(brind GR64:$dst)]>, Requires<[In64BitMode]>;
def JMP64m : I<0xFF, MRM4m, (outs), (ins i64mem:$dst), "jmp{q}\t{*}$dst",
- [(brind (loadi64 addr:$dst))]>;
+ [(brind (loadi64 addr:$dst))]>, Requires<[In64BitMode]>;
def FARJMP64 : RI<0xFF, MRM5m, (outs), (ins opaque80mem:$dst),
"ljmp{q}\t{*}$dst", []>;
}
@@ -246,7 +243,7 @@ def POPCNT64rm : RI<0xB8, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src),
let Defs = [RBP,RSP], Uses = [RBP,RSP], mayLoad = 1, neverHasSideEffects = 1 in
def LEAVE64 : I<0xC9, RawFrm,
- (outs), (ins), "leave", []>;
+ (outs), (ins), "leave", []>, Requires<[In64BitMode]>;
let Defs = [RSP], Uses = [RSP], neverHasSideEffects=1 in {
let mayLoad = 1 in {
def POP64r : I<0x58, AddRegFrm,
@@ -330,7 +327,7 @@ def CMPS64 : RI<0xA7, RawFrm, (outs), (ins), "cmpsq", []>;
// Fast system-call instructions
def SYSEXIT64 : RI<0x35, RawFrm,
- (outs), (ins), "sysexit", []>, TB;
+ (outs), (ins), "sysexit", []>, TB, Requires<[In64BitMode]>;
//===----------------------------------------------------------------------===//
// Move Instructions...
@@ -374,6 +371,7 @@ def MOV64mi32 : RIi32<0xC7, MRM0m, (outs), (ins i64mem:$dst, i64i32imm:$src),
[(store i64immSExt32:$src, addr:$dst)]>;
/// Versions of MOV64rr, MOV64rm, and MOV64mr for i64mem_TC and GR64_TC.
+let isCodeGenOnly = 1 in {
let neverHasSideEffects = 1 in
def MOV64rr_TC : RI<0x89, MRMDestReg, (outs GR64_TC:$dst), (ins GR64_TC:$src),
"mov{q}\t{$src, $dst|$dst, $src}", []>;
@@ -388,7 +386,13 @@ let mayStore = 1 in
def MOV64mr_TC : RI<0x89, MRMDestMem, (outs), (ins i64mem_TC:$dst, GR64_TC:$src),
"mov{q}\t{$src, $dst|$dst, $src}",
[]>;
+}
+// FIXME: These definitions are utterly broken
+// Just leave them commented out for now because they're useless outside
+// of the large code model, and most compilers won't generate the instructions
+// in question.
+/*
def MOV64o8a : RIi8<0xA0, RawFrm, (outs), (ins offset8:$src),
"mov{q}\t{$src, %rax|%rax, $src}", []>;
def MOV64o64a : RIi32<0xA1, RawFrm, (outs), (ins offset64:$src),
@@ -397,6 +401,7 @@ def MOV64ao8 : RIi8<0xA2, RawFrm, (outs offset8:$dst), (ins),
"mov{q}\t{%rax, $dst|$dst, %rax}", []>;
def MOV64ao64 : RIi32<0xA3, RawFrm, (outs offset64:$dst), (ins),
"mov{q}\t{%rax, $dst|$dst, %rax}", []>;
+*/
// Moves to and from segment registers
def MOV64rs : RI<0x8C, MRMDestReg, (outs GR64:$dst), (ins SEGMENT_REG:$src),
@@ -1316,14 +1321,13 @@ def BT64mr : RI<0xA3, MRMDestMem, (outs), (ins i64mem:$src1, GR64:$src2),
[]
>, TB;
-def BT64ri8 : Ii8<0xBA, MRM4r, (outs), (ins GR64:$src1, i64i8imm:$src2),
+def BT64ri8 : RIi8<0xBA, MRM4r, (outs), (ins GR64:$src1, i64i8imm:$src2),
"bt{q}\t{$src2, $src1|$src1, $src2}",
- [(set EFLAGS, (X86bt GR64:$src1, i64immSExt8:$src2))]>, TB,
- REX_W;
+ [(set EFLAGS, (X86bt GR64:$src1, i64immSExt8:$src2))]>, TB;
// Note that these instructions don't need FastBTMem because that
// only applies when the other operand is in a register. When it's
// an immediate, bt is still fast.
-def BT64mi8 : Ii8<0xBA, MRM4m, (outs), (ins i64mem:$src1, i64i8imm:$src2),
+def BT64mi8 : RIi8<0xBA, MRM4m, (outs), (ins i64mem:$src1, i64i8imm:$src2),
"bt{q}\t{$src2, $src1|$src1, $src2}",
[(set EFLAGS, (X86bt (loadi64 addr:$src1),
i64immSExt8:$src2))]>, TB;
@@ -1537,116 +1541,6 @@ def : Pat<(i64 (anyext (i8 (X86setcc_c X86_COND_B, EFLAGS)))),
(SETB_C64r)>;
//===----------------------------------------------------------------------===//
-// Conversion Instructions...
-//
-
-// f64 -> signed i64
-def CVTSD2SI64rr: RSDI<0x2D, MRMSrcReg, (outs GR64:$dst), (ins FR64:$src),
- "cvtsd2si{q}\t{$src, $dst|$dst, $src}", []>;
-def CVTSD2SI64rm: RSDI<0x2D, MRMSrcMem, (outs GR64:$dst), (ins f64mem:$src),
- "cvtsd2si{q}\t{$src, $dst|$dst, $src}", []>;
-def Int_CVTSD2SI64rr: RSDI<0x2D, MRMSrcReg, (outs GR64:$dst), (ins VR128:$src),
- "cvtsd2si{q}\t{$src, $dst|$dst, $src}",
- [(set GR64:$dst,
- (int_x86_sse2_cvtsd2si64 VR128:$src))]>;
-def Int_CVTSD2SI64rm: RSDI<0x2D, MRMSrcMem, (outs GR64:$dst),
- (ins f128mem:$src),
- "cvtsd2si{q}\t{$src, $dst|$dst, $src}",
- [(set GR64:$dst, (int_x86_sse2_cvtsd2si64
- (load addr:$src)))]>;
-def CVTTSD2SI64rr: RSDI<0x2C, MRMSrcReg, (outs GR64:$dst), (ins FR64:$src),
- "cvttsd2si{q}\t{$src, $dst|$dst, $src}",
- [(set GR64:$dst, (fp_to_sint FR64:$src))]>;
-def CVTTSD2SI64rm: RSDI<0x2C, MRMSrcMem, (outs GR64:$dst), (ins f64mem:$src),
- "cvttsd2si{q}\t{$src, $dst|$dst, $src}",
- [(set GR64:$dst, (fp_to_sint (loadf64 addr:$src)))]>;
-def Int_CVTTSD2SI64rr: RSDI<0x2C, MRMSrcReg, (outs GR64:$dst), (ins VR128:$src),
- "cvttsd2si{q}\t{$src, $dst|$dst, $src}",
- [(set GR64:$dst,
- (int_x86_sse2_cvttsd2si64 VR128:$src))]>;
-def Int_CVTTSD2SI64rm: RSDI<0x2C, MRMSrcMem, (outs GR64:$dst),
- (ins f128mem:$src),
- "cvttsd2si{q}\t{$src, $dst|$dst, $src}",
- [(set GR64:$dst,
- (int_x86_sse2_cvttsd2si64
- (load addr:$src)))]>;
-
-// Signed i64 -> f64
-def CVTSI2SD64rr: RSDI<0x2A, MRMSrcReg, (outs FR64:$dst), (ins GR64:$src),
- "cvtsi2sd{q}\t{$src, $dst|$dst, $src}",
- [(set FR64:$dst, (sint_to_fp GR64:$src))]>;
-def CVTSI2SD64rm: RSDI<0x2A, MRMSrcMem, (outs FR64:$dst), (ins i64mem:$src),
- "cvtsi2sd{q}\t{$src, $dst|$dst, $src}",
- [(set FR64:$dst, (sint_to_fp (loadi64 addr:$src)))]>;
-
-let Constraints = "$src1 = $dst" in {
-def Int_CVTSI2SD64rr: RSDI<0x2A, MRMSrcReg,
- (outs VR128:$dst), (ins VR128:$src1, GR64:$src2),
- "cvtsi2sd{q}\t{$src2, $dst|$dst, $src2}",
- [(set VR128:$dst,
- (int_x86_sse2_cvtsi642sd VR128:$src1,
- GR64:$src2))]>;
-def Int_CVTSI2SD64rm: RSDI<0x2A, MRMSrcMem,
- (outs VR128:$dst), (ins VR128:$src1, i64mem:$src2),
- "cvtsi2sd{q}\t{$src2, $dst|$dst, $src2}",
- [(set VR128:$dst,
- (int_x86_sse2_cvtsi642sd VR128:$src1,
- (loadi64 addr:$src2)))]>;
-} // Constraints = "$src1 = $dst"
-
-// Signed i64 -> f32
-def CVTSI2SS64rr: RSSI<0x2A, MRMSrcReg, (outs FR32:$dst), (ins GR64:$src),
- "cvtsi2ss{q}\t{$src, $dst|$dst, $src}",
- [(set FR32:$dst, (sint_to_fp GR64:$src))]>;
-def CVTSI2SS64rm: RSSI<0x2A, MRMSrcMem, (outs FR32:$dst), (ins i64mem:$src),
- "cvtsi2ss{q}\t{$src, $dst|$dst, $src}",
- [(set FR32:$dst, (sint_to_fp (loadi64 addr:$src)))]>;
-
-let Constraints = "$src1 = $dst" in {
- def Int_CVTSI2SS64rr : RSSI<0x2A, MRMSrcReg,
- (outs VR128:$dst), (ins VR128:$src1, GR64:$src2),
- "cvtsi2ss{q}\t{$src2, $dst|$dst, $src2}",
- [(set VR128:$dst,
- (int_x86_sse_cvtsi642ss VR128:$src1,
- GR64:$src2))]>;
- def Int_CVTSI2SS64rm : RSSI<0x2A, MRMSrcMem,
- (outs VR128:$dst),
- (ins VR128:$src1, i64mem:$src2),
- "cvtsi2ss{q}\t{$src2, $dst|$dst, $src2}",
- [(set VR128:$dst,
- (int_x86_sse_cvtsi642ss VR128:$src1,
- (loadi64 addr:$src2)))]>;
-} // Constraints = "$src1 = $dst"
-
-// f32 -> signed i64
-def CVTSS2SI64rr: RSSI<0x2D, MRMSrcReg, (outs GR64:$dst), (ins FR32:$src),
- "cvtss2si{q}\t{$src, $dst|$dst, $src}", []>;
-def CVTSS2SI64rm: RSSI<0x2D, MRMSrcMem, (outs GR64:$dst), (ins f32mem:$src),
- "cvtss2si{q}\t{$src, $dst|$dst, $src}", []>;
-def Int_CVTSS2SI64rr: RSSI<0x2D, MRMSrcReg, (outs GR64:$dst), (ins VR128:$src),
- "cvtss2si{q}\t{$src, $dst|$dst, $src}",
- [(set GR64:$dst,
- (int_x86_sse_cvtss2si64 VR128:$src))]>;
-def Int_CVTSS2SI64rm: RSSI<0x2D, MRMSrcMem, (outs GR64:$dst), (ins f32mem:$src),
- "cvtss2si{q}\t{$src, $dst|$dst, $src}",
- [(set GR64:$dst, (int_x86_sse_cvtss2si64
- (load addr:$src)))]>;
-def CVTTSS2SI64rr: RSSI<0x2C, MRMSrcReg, (outs GR64:$dst), (ins FR32:$src),
- "cvttss2si{q}\t{$src, $dst|$dst, $src}",
- [(set GR64:$dst, (fp_to_sint FR32:$src))]>;
-def CVTTSS2SI64rm: RSSI<0x2C, MRMSrcMem, (outs GR64:$dst), (ins f32mem:$src),
- "cvttss2si{q}\t{$src, $dst|$dst, $src}",
- [(set GR64:$dst, (fp_to_sint (loadf32 addr:$src)))]>;
-def Int_CVTTSS2SI64rr: RSSI<0x2C, MRMSrcReg, (outs GR64:$dst), (ins VR128:$src),
- "cvttss2si{q}\t{$src, $dst|$dst, $src}",
- [(set GR64:$dst,
- (int_x86_sse_cvttss2si64 VR128:$src))]>;
-def Int_CVTTSS2SI64rm: RSSI<0x2C, MRMSrcMem, (outs GR64:$dst),
- (ins f32mem:$src),
- "cvttss2si{q}\t{$src, $dst|$dst, $src}",
- [(set GR64:$dst,
- (int_x86_sse_cvttss2si64 (load addr:$src)))]>;
-
// Descriptor-table support instructions
// LLDT is not interpreted specially in 64-bit mode because there is no sign
@@ -1726,6 +1620,14 @@ def MOV64FSrm : RI<0x8B, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src),
// Atomic Instructions
//===----------------------------------------------------------------------===//
+// TODO: Get this to fold the constant into the instruction.
+let hasSideEffects = 1, Defs = [ESP] in
+def Int_MemBarrierNoSSE64 : RI<0x09, MRM1r, (outs), (ins GR64:$zero),
+ "lock\n\t"
+ "or{q}\t{$zero, (%rsp)|(%rsp), $zero}",
+ [(X86MemBarrierNoSSE GR64:$zero)]>,
+ Requires<[In64BitMode]>, LOCK;
+
let Defs = [RAX, EFLAGS], Uses = [RAX] in {
def LCMPXCHG64 : RI<0xB1, MRMDestMem, (outs), (ins i64mem:$ptr, GR64:$swap),
"lock\n\t"
@@ -1772,7 +1674,7 @@ def XCHG64ar : RI<0x90, AddRegFrm, (outs), (ins GR64:$src),
// Optimized codegen when the non-memory output is not used.
let Defs = [EFLAGS], mayLoad = 1, mayStore = 1 in {
// FIXME: Use normal add / sub instructions and add lock prefix dynamically.
-def LOCK_ADD64mr : RI<0x03, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src2),
+def LOCK_ADD64mr : RI<0x01, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src2),
"lock\n\t"
"add{q}\t{$src2, $dst|$dst, $src2}", []>, LOCK;
def LOCK_ADD64mi8 : RIi8<0x83, MRM0m, (outs),
diff --git a/lib/Target/X86/X86InstrFMA.td b/lib/Target/X86/X86InstrFMA.td
new file mode 100644
index 0000000000000..d868773d2d690
--- /dev/null
+++ b/lib/Target/X86/X86InstrFMA.td
@@ -0,0 +1,60 @@
+//====- X86InstrFMA.td - Describe the X86 Instruction Set --*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes FMA (Fused Multiply-Add) instructions.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// FMA3 - Intel 3 operand Fused Multiply-Add instructions
+//===----------------------------------------------------------------------===//
+
+multiclass fma_rm<bits<8> opc, string OpcodeStr> {
+ def r : FMA3<opc, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ []>;
+ def m : FMA3<opc, MRMSrcMem, (outs VR128:$dst),
+ (ins VR128:$src1, f128mem:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ []>;
+ def rY : FMA3<opc, MRMSrcReg, (outs VR256:$dst),
+ (ins VR256:$src1, VR256:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ []>;
+ def mY : FMA3<opc, MRMSrcMem, (outs VR256:$dst),
+ (ins VR256:$src1, f256mem:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ []>;
+}
+
+multiclass fma_forms<bits<8> opc132, bits<8> opc213, bits<8> opc231,
+ string OpcodeStr, string PackTy> {
+ defm r132 : fma_rm<opc132, !strconcat(OpcodeStr, !strconcat("132", PackTy))>;
+ defm r213 : fma_rm<opc213, !strconcat(OpcodeStr, !strconcat("213", PackTy))>;
+ defm r231 : fma_rm<opc231, !strconcat(OpcodeStr, !strconcat("231", PackTy))>;
+}
+
+let isAsmParserOnly = 1 in {
+ // Fused Multiply-Add
+ defm VFMADDPS : fma_forms<0x98, 0xA8, 0xB8, "vfmadd", "ps">;
+ defm VFMADDPD : fma_forms<0x98, 0xA8, 0xB8, "vfmadd", "pd">, VEX_W;
+ defm VFMADDSUBPS : fma_forms<0x96, 0xA6, 0xB6, "vfmaddsub", "ps">;
+ defm VFMADDSUBPD : fma_forms<0x96, 0xA6, 0xB6, "vfmaddsub", "pd">, VEX_W;
+ defm VFMSUBADDPS : fma_forms<0x97, 0xA7, 0xB7, "vfmsubadd", "ps">;
+ defm VFMSUBADDPD : fma_forms<0x97, 0xA7, 0xB7, "vfmsubadd", "pd">, VEX_W;
+ defm VFMSUBPS : fma_forms<0x9A, 0xAA, 0xBA, "vfmsub", "ps">;
+ defm VFMSUBPD : fma_forms<0x9A, 0xAA, 0xBA, "vfmsub", "pd">, VEX_W;
+
+ // Fused Negative Multiply-Add
+ defm VFNMADDPS : fma_forms<0x9C, 0xAC, 0xBC, "vfnmadd", "ps">;
+ defm VFNMADDPD : fma_forms<0x9C, 0xAC, 0xBC, "vfnmadd", "pd">, VEX_W;
+ defm VFNMSUBPS : fma_forms<0x9E, 0xAE, 0xBE, "vfnmsub", "ps">;
+ defm VFNMSUBPD : fma_forms<0x9E, 0xAE, 0xBE, "vfnmsub", "pd">, VEX_W;
+}
diff --git a/lib/Target/X86/X86InstrFPStack.td b/lib/Target/X86/X86InstrFPStack.td
index da93de988d506..9c9bcc7d0b6a0 100644
--- a/lib/Target/X86/X86InstrFPStack.td
+++ b/lib/Target/X86/X86InstrFPStack.td
@@ -108,10 +108,6 @@ let usesCustomInserter = 1 in { // Expanded after instruction selection.
[(X86fp_to_i64mem RFP80:$src, addr:$dst)]>;
}
-let isTerminator = 1 in
- let Defs = [FP0, FP1, FP2, FP3, FP4, FP5, FP6] in
- def FP_REG_KILL : I<0, Pseudo, (outs), (ins), "##FP_REG_KILL", []>;
-
// All FP Stack operations are represented with four instructions here. The
// first three instructions, generated by the instruction selector, use "RFP32"
// "RFP64" or "RFP80" registers: traditional register files to reference 32-bit,
@@ -157,7 +153,7 @@ def FpSET_ST1_64 : FpI_<(outs), (ins RFP64:$src), SpecialFP, []>; // ST(1) = FPR
def FpSET_ST1_80 : FpI_<(outs), (ins RFP80:$src), SpecialFP, []>; // ST(1) = FPR
}
-// FpIf32, FpIf64 - Floating Point Psuedo Instruction template.
+// FpIf32, FpIf64 - Floating Point Pseudo Instruction template.
// f32 instructions can use SSE1 and are predicated on FPStackf32 == !SSE1.
// f64 instructions can use SSE2 and are predicated on FPStackf64 == !SSE2.
// f80 instructions cannot use SSE and use neither of these.
diff --git a/lib/Target/X86/X86InstrFormats.td b/lib/Target/X86/X86InstrFormats.td
index cc3fdf1efd7b6..79187e9a76d70 100644
--- a/lib/Target/X86/X86InstrFormats.td
+++ b/lib/Target/X86/X86InstrFormats.td
@@ -39,6 +39,7 @@ def MRM_E8 : Format<39>;
def MRM_F0 : Format<40>;
def MRM_F8 : Format<41>;
def MRM_F9 : Format<42>;
+def RawFrmImm16 : Format<43>;
// ImmType - This specifies the immediate type used by an instruction. This is
// part of the ad-hoc solution used to emit machine instruction encodings by our
@@ -210,7 +211,7 @@ class Ii32PCRel<bits<8> o, Format f, dag outs, dag ins, string asm,
class FPI<bits<8> o, Format F, dag outs, dag ins, string asm>
: I<o, F, outs, ins, asm, []> {}
-// FpI_ - Floating Point Psuedo Instruction template. Not Predicated.
+// FpI_ - Floating Point Pseudo Instruction template. Not Predicated.
class FpI_<dag outs, dag ins, FPFormat fp, list<dag> pattern>
: X86Inst<0, Pseudo, NoImm, outs, ins, ""> {
let FPForm = fp;
@@ -224,13 +225,13 @@ class FpI_<dag outs, dag ins, FPFormat fp, list<dag> pattern>
// Iseg32 - 16-bit segment selector, 32-bit offset
class Iseg16 <bits<8> o, Format f, dag outs, dag ins, string asm,
- list<dag> pattern> : X86Inst<o, f, NoImm, outs, ins, asm> {
+ list<dag> pattern> : X86Inst<o, f, Imm16, outs, ins, asm> {
let Pattern = pattern;
let CodeSize = 3;
}
class Iseg32 <bits<8> o, Format f, dag outs, dag ins, string asm,
- list<dag> pattern> : X86Inst<o, f, NoImm, outs, ins, asm> {
+ list<dag> pattern> : X86Inst<o, f, Imm32, outs, ins, asm> {
let Pattern = pattern;
let CodeSize = 3;
}
@@ -411,6 +412,20 @@ class SS42AI<bits<8> o, Format F, dag outs, dag ins, string asm,
: Ii8<o, F, outs, ins, asm, pattern, SSEPackedInt>, TA,
Requires<[HasSSE42]>;
+// AVX Instruction Templates:
+// Instructions introduced in AVX (no SSE equivalent forms)
+//
+// AVX8I - AVX instructions with T8 and OpSize prefix.
+// AVXAIi8 - AVX instructions with TA, OpSize prefix and ImmT = Imm8.
+class AVX8I<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag> pattern>
+ : I<o, F, outs, ins, asm, pattern, SSEPackedInt>, T8, OpSize,
+ Requires<[HasAVX]>;
+class AVXAIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag> pattern>
+ : Ii8<o, F, outs, ins, asm, pattern, SSEPackedInt>, TA, OpSize,
+ Requires<[HasAVX]>;
+
// AES Instruction Templates:
//
// AES8I
@@ -425,6 +440,18 @@ class AESAI<bits<8> o, Format F, dag outs, dag ins, string asm,
: Ii8<o, F, outs, ins, asm, pattern, SSEPackedInt>, TA,
Requires<[HasAES]>;
+// CLMUL Instruction Templates
+class CLMULIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag>pattern>
+ : Ii8<o, F, outs, ins, asm, pattern, SSEPackedInt>, TA,
+ OpSize, VEX_4V, Requires<[HasAVX, HasCLMUL]>;
+
+// FMA3 Instruction Templates
+class FMA3<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag>pattern>
+ : I<o, F, outs, ins, asm, pattern, SSEPackedInt>, T8,
+ OpSize, VEX_4V, Requires<[HasFMA3]>;
+
// X86-64 Instruction templates...
//
diff --git a/lib/Target/X86/X86InstrFragmentsSIMD.td b/lib/Target/X86/X86InstrFragmentsSIMD.td
index 71c4e8bc147fa..01149b699213d 100644
--- a/lib/Target/X86/X86InstrFragmentsSIMD.td
+++ b/lib/Target/X86/X86InstrFragmentsSIMD.td
@@ -117,9 +117,67 @@ def X86pcmpgtd : SDNode<"X86ISD::PCMPGTD", SDTIntBinOp>;
def X86pcmpgtq : SDNode<"X86ISD::PCMPGTQ", SDTIntBinOp>;
def SDTX86CmpPTest : SDTypeProfile<1, 2, [SDTCisVT<0, i32>,
- SDTCisVT<1, v4f32>,
- SDTCisVT<2, v4f32>]>;
+ SDTCisVec<1>,
+ SDTCisSameAs<2, 1>]>;
def X86ptest : SDNode<"X86ISD::PTEST", SDTX86CmpPTest>;
+def X86testp : SDNode<"X86ISD::TESTP", SDTX86CmpPTest>;
+
+// Specific shuffle nodes - At some point ISD::VECTOR_SHUFFLE will always get
+// translated into one of the target nodes below during lowering.
+// Note: this is a work in progress...
+def SDTShuff1Op : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisSameAs<0,1>]>;
+def SDTShuff2Op : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>,
+ SDTCisSameAs<0,2>]>;
+
+def SDTShuff2OpI : SDTypeProfile<1, 2, [SDTCisVec<0>,
+ SDTCisSameAs<0,1>, SDTCisInt<2>]>;
+def SDTShuff3OpI : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>,
+ SDTCisSameAs<0,2>, SDTCisInt<3>]>;
+
+def SDTShuff2OpLdI : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisPtrTy<1>,
+ SDTCisInt<2>]>;
+
+def X86PAlign : SDNode<"X86ISD::PALIGN", SDTShuff3OpI>;
+
+def X86PShufd : SDNode<"X86ISD::PSHUFD", SDTShuff2OpI>;
+def X86PShufhw : SDNode<"X86ISD::PSHUFHW", SDTShuff2OpI>;
+def X86PShuflw : SDNode<"X86ISD::PSHUFLW", SDTShuff2OpI>;
+
+def X86PShufhwLd : SDNode<"X86ISD::PSHUFHW_LD", SDTShuff2OpLdI>;
+def X86PShuflwLd : SDNode<"X86ISD::PSHUFLW_LD", SDTShuff2OpLdI>;
+
+def X86Shufpd : SDNode<"X86ISD::SHUFPD", SDTShuff3OpI>;
+def X86Shufps : SDNode<"X86ISD::SHUFPS", SDTShuff3OpI>;
+
+def X86Movddup : SDNode<"X86ISD::MOVDDUP", SDTShuff1Op>;
+def X86Movshdup : SDNode<"X86ISD::MOVSHDUP", SDTShuff1Op>;
+def X86Movsldup : SDNode<"X86ISD::MOVSLDUP", SDTShuff1Op>;
+
+def X86Movsd : SDNode<"X86ISD::MOVSD", SDTShuff2Op>;
+def X86Movss : SDNode<"X86ISD::MOVSS", SDTShuff2Op>;
+
+def X86Movlhps : SDNode<"X86ISD::MOVLHPS", SDTShuff2Op>;
+def X86Movlhpd : SDNode<"X86ISD::MOVLHPD", SDTShuff2Op>;
+def X86Movhlps : SDNode<"X86ISD::MOVHLPS", SDTShuff2Op>;
+def X86Movhlpd : SDNode<"X86ISD::MOVHLPD", SDTShuff2Op>;
+
+def X86Movlps : SDNode<"X86ISD::MOVLPS", SDTShuff2Op>;
+def X86Movlpd : SDNode<"X86ISD::MOVLPD", SDTShuff2Op>;
+
+def X86Unpcklps : SDNode<"X86ISD::UNPCKLPS", SDTShuff2Op>;
+def X86Unpcklpd : SDNode<"X86ISD::UNPCKLPD", SDTShuff2Op>;
+def X86Unpckhps : SDNode<"X86ISD::UNPCKHPS", SDTShuff2Op>;
+def X86Unpckhpd : SDNode<"X86ISD::UNPCKHPD", SDTShuff2Op>;
+
+def X86Punpcklbw : SDNode<"X86ISD::PUNPCKLBW", SDTShuff2Op>;
+def X86Punpcklwd : SDNode<"X86ISD::PUNPCKLWD", SDTShuff2Op>;
+def X86Punpckldq : SDNode<"X86ISD::PUNPCKLDQ", SDTShuff2Op>;
+def X86Punpcklqdq : SDNode<"X86ISD::PUNPCKLQDQ", SDTShuff2Op>;
+
+def X86Punpckhbw : SDNode<"X86ISD::PUNPCKHBW", SDTShuff2Op>;
+def X86Punpckhwd : SDNode<"X86ISD::PUNPCKHWD", SDTShuff2Op>;
+def X86Punpckhdq : SDNode<"X86ISD::PUNPCKHDQ", SDTShuff2Op>;
+def X86Punpckhqdq : SDNode<"X86ISD::PUNPCKHQDQ", SDTShuff2Op>;
//===----------------------------------------------------------------------===//
// SSE Complex Patterns
@@ -148,12 +206,13 @@ def sdmem : Operand<v2f64> {
// SSE pattern fragments
//===----------------------------------------------------------------------===//
+// 128-bit load pattern fragments
def loadv4f32 : PatFrag<(ops node:$ptr), (v4f32 (load node:$ptr))>;
def loadv2f64 : PatFrag<(ops node:$ptr), (v2f64 (load node:$ptr))>;
def loadv4i32 : PatFrag<(ops node:$ptr), (v4i32 (load node:$ptr))>;
def loadv2i64 : PatFrag<(ops node:$ptr), (v2i64 (load node:$ptr))>;
-// FIXME: move this to a more appropriate place after all AVX is done.
+// 256-bit load pattern fragments
def loadv8f32 : PatFrag<(ops node:$ptr), (v8f32 (load node:$ptr))>;
def loadv4f64 : PatFrag<(ops node:$ptr), (v4f64 (load node:$ptr))>;
def loadv8i32 : PatFrag<(ops node:$ptr), (v8i32 (load node:$ptr))>;
@@ -174,6 +233,8 @@ def alignedloadfsf32 : PatFrag<(ops node:$ptr),
(f32 (alignedload node:$ptr))>;
def alignedloadfsf64 : PatFrag<(ops node:$ptr),
(f64 (alignedload node:$ptr))>;
+
+// 128-bit aligned load pattern fragments
def alignedloadv4f32 : PatFrag<(ops node:$ptr),
(v4f32 (alignedload node:$ptr))>;
def alignedloadv2f64 : PatFrag<(ops node:$ptr),
@@ -183,7 +244,7 @@ def alignedloadv4i32 : PatFrag<(ops node:$ptr),
def alignedloadv2i64 : PatFrag<(ops node:$ptr),
(v2i64 (alignedload node:$ptr))>;
-// FIXME: move this to a more appropriate place after all AVX is done.
+// 256-bit aligned load pattern fragments
def alignedloadv8f32 : PatFrag<(ops node:$ptr),
(v8f32 (alignedload node:$ptr))>;
def alignedloadv4f64 : PatFrag<(ops node:$ptr),
@@ -206,15 +267,20 @@ def memop : PatFrag<(ops node:$ptr), (load node:$ptr), [{
def memopfsf32 : PatFrag<(ops node:$ptr), (f32 (memop node:$ptr))>;
def memopfsf64 : PatFrag<(ops node:$ptr), (f64 (memop node:$ptr))>;
+
+// 128-bit memop pattern fragments
def memopv4f32 : PatFrag<(ops node:$ptr), (v4f32 (memop node:$ptr))>;
def memopv2f64 : PatFrag<(ops node:$ptr), (v2f64 (memop node:$ptr))>;
def memopv4i32 : PatFrag<(ops node:$ptr), (v4i32 (memop node:$ptr))>;
def memopv2i64 : PatFrag<(ops node:$ptr), (v2i64 (memop node:$ptr))>;
def memopv16i8 : PatFrag<(ops node:$ptr), (v16i8 (memop node:$ptr))>;
-// FIXME: move this to a more appropriate place after all AVX is done.
+// 256-bit memop pattern fragments
+def memopv32i8 : PatFrag<(ops node:$ptr), (v32i8 (memop node:$ptr))>;
def memopv8f32 : PatFrag<(ops node:$ptr), (v8f32 (memop node:$ptr))>;
def memopv4f64 : PatFrag<(ops node:$ptr), (v4f64 (memop node:$ptr))>;
+def memopv4i64 : PatFrag<(ops node:$ptr), (v4i64 (memop node:$ptr))>;
+def memopv8i32 : PatFrag<(ops node:$ptr), (v8i32 (memop node:$ptr))>;
// SSSE3 uses MMX registers for some instructions. They aren't aligned on a
// 16-byte boundary.
@@ -254,6 +320,7 @@ def unalignednontemporalstore : PatFrag<(ops node:$val, node:$ptr),
return false;
}]>;
+// 128-bit bitconvert pattern fragments
def bc_v4f32 : PatFrag<(ops node:$in), (v4f32 (bitconvert node:$in))>;
def bc_v2f64 : PatFrag<(ops node:$in), (v2f64 (bitconvert node:$in))>;
def bc_v16i8 : PatFrag<(ops node:$in), (v16i8 (bitconvert node:$in))>;
@@ -261,6 +328,9 @@ def bc_v8i16 : PatFrag<(ops node:$in), (v8i16 (bitconvert node:$in))>;
def bc_v4i32 : PatFrag<(ops node:$in), (v4i32 (bitconvert node:$in))>;
def bc_v2i64 : PatFrag<(ops node:$in), (v2i64 (bitconvert node:$in))>;
+// 256-bit bitconvert pattern fragments
+def bc_v8i32 : PatFrag<(ops node:$in), (v8i32 (bitconvert node:$in))>;
+
def vzmovl_v2i64 : PatFrag<(ops node:$src),
(bitconvert (v2i64 (X86vzmovl
(v2i64 (scalar_to_vector (loadi64 node:$src))))))>;
diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp
index ce471eadd78b2..5280940cf437b 100644
--- a/lib/Target/X86/X86InstrInfo.cpp
+++ b/lib/Target/X86/X86InstrInfo.cpp
@@ -235,6 +235,7 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
{ X86::BT64ri8, X86::BT64mi8, 1, 0 },
{ X86::CALL32r, X86::CALL32m, 1, 0 },
{ X86::CALL64r, X86::CALL64m, 1, 0 },
+ { X86::WINCALL64r, X86::WINCALL64m, 1, 0 },
{ X86::CMP16ri, X86::CMP16mi, 1, 0 },
{ X86::CMP16ri8, X86::CMP16mi8, 1, 0 },
{ X86::CMP16rr, X86::CMP16mr, 1, 0 },
@@ -667,46 +668,6 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
assert(AmbEntries.empty() && "Duplicated entries in unfolding maps?");
}
-bool X86InstrInfo::isMoveInstr(const MachineInstr& MI,
- unsigned &SrcReg, unsigned &DstReg,
- unsigned &SrcSubIdx, unsigned &DstSubIdx) const {
- switch (MI.getOpcode()) {
- default:
- return false;
- case X86::MOV8rr:
- case X86::MOV8rr_NOREX:
- case X86::MOV16rr:
- case X86::MOV32rr:
- case X86::MOV64rr:
- case X86::MOV32rr_TC:
- case X86::MOV64rr_TC:
-
- // FP Stack register class copies
- case X86::MOV_Fp3232: case X86::MOV_Fp6464: case X86::MOV_Fp8080:
- case X86::MOV_Fp3264: case X86::MOV_Fp3280:
- case X86::MOV_Fp6432: case X86::MOV_Fp8032:
-
- // Note that MOVSSrr and MOVSDrr are not considered copies. FR32 and FR64
- // copies are done with FsMOVAPSrr and FsMOVAPDrr.
-
- case X86::FsMOVAPSrr:
- case X86::FsMOVAPDrr:
- case X86::MOVAPSrr:
- case X86::MOVAPDrr:
- case X86::MOVDQArr:
- case X86::MMX_MOVQ64rr:
- assert(MI.getNumOperands() >= 2 &&
- MI.getOperand(0).isReg() &&
- MI.getOperand(1).isReg() &&
- "invalid register-register move instruction");
- SrcReg = MI.getOperand(1).getReg();
- DstReg = MI.getOperand(0).getReg();
- SrcSubIdx = MI.getOperand(1).getSubReg();
- DstSubIdx = MI.getOperand(0).getSubReg();
- return true;
- }
-}
-
bool
X86InstrInfo::isCoalescableExtInstr(const MachineInstr &MI,
unsigned &SrcReg, unsigned &DstReg,
@@ -827,7 +788,7 @@ static bool isFrameStoreOpcode(int Opcode) {
unsigned X86InstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
int &FrameIndex) const {
if (isFrameLoadOpcode(MI->getOpcode()))
- if (isFrameOperand(MI, 1, FrameIndex))
+ if (MI->getOperand(0).getSubReg() == 0 && isFrameOperand(MI, 1, FrameIndex))
return MI->getOperand(0).getReg();
return 0;
}
@@ -866,7 +827,8 @@ bool X86InstrInfo::hasLoadFromStackSlot(const MachineInstr *MI,
unsigned X86InstrInfo::isStoreToStackSlot(const MachineInstr *MI,
int &FrameIndex) const {
if (isFrameStoreOpcode(MI->getOpcode()))
- if (isFrameOperand(MI, 0, FrameIndex))
+ if (MI->getOperand(X86::AddrNumOperands).getSubReg() == 0 &&
+ isFrameOperand(MI, 0, FrameIndex))
return MI->getOperand(X86::AddrNumOperands).getReg();
return 0;
}
@@ -1664,14 +1626,6 @@ bool X86InstrInfo::isUnpredicatedTerminator(const MachineInstr *MI) const {
return !isPredicated(MI);
}
-// For purposes of branch analysis do not count FP_REG_KILL as a terminator.
-static bool isBrAnalysisUnpredicatedTerminator(const MachineInstr *MI,
- const X86InstrInfo &TII) {
- if (MI->getOpcode() == X86::FP_REG_KILL)
- return false;
- return TII.isUnpredicatedTerminator(MI);
-}
-
bool X86InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
MachineBasicBlock *&TBB,
MachineBasicBlock *&FBB,
@@ -1688,7 +1642,7 @@ bool X86InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
// Working from the bottom, when we see a non-terminator instruction, we're
// done.
- if (!isBrAnalysisUnpredicatedTerminator(I, *this))
+ if (!isUnpredicatedTerminator(I))
break;
// A terminator that isn't a branch can't easily be handled by this
@@ -1891,6 +1845,33 @@ static bool isHReg(unsigned Reg) {
return X86::GR8_ABCD_HRegClass.contains(Reg);
}
+// Try and copy between VR128/VR64 and GR64 registers.
+static unsigned CopyToFromAsymmetricReg(unsigned DestReg, unsigned SrcReg) {
+ // SrcReg(VR128) -> DestReg(GR64)
+ // SrcReg(VR64) -> DestReg(GR64)
+ // SrcReg(GR64) -> DestReg(VR128)
+ // SrcReg(GR64) -> DestReg(VR64)
+
+ if (X86::GR64RegClass.contains(DestReg)) {
+ if (X86::VR128RegClass.contains(SrcReg)) {
+ // Copy from a VR128 register to a GR64 register.
+ return X86::MOVPQIto64rr;
+ } else if (X86::VR64RegClass.contains(SrcReg)) {
+ // Copy from a VR64 register to a GR64 register.
+ return X86::MOVSDto64rr;
+ }
+ } else if (X86::GR64RegClass.contains(SrcReg)) {
+ // Copy from a GR64 register to a VR128 register.
+ if (X86::VR128RegClass.contains(DestReg))
+ return X86::MOV64toPQIrr;
+ // Copy from a GR64 register to a VR64 register.
+ else if (X86::VR64RegClass.contains(DestReg))
+ return X86::MOV64toSDrr;
+ }
+
+ return 0;
+}
+
void X86InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI, DebugLoc DL,
unsigned DestReg, unsigned SrcReg,
@@ -1915,6 +1896,8 @@ void X86InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
Opc = X86::MOVAPSrr;
else if (X86::VR64RegClass.contains(DestReg, SrcReg))
Opc = X86::MMX_MOVQ64rr;
+ else
+ Opc = CopyToFromAsymmetricReg(DestReg, SrcReg);
if (Opc) {
BuildMI(MBB, MI, DL, get(Opc), DestReg)
@@ -2046,6 +2029,8 @@ void X86InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
const TargetRegisterClass *RC,
const TargetRegisterInfo *TRI) const {
const MachineFunction &MF = *MBB.getParent();
+ assert(MF.getFrameInfo()->getObjectSize(FrameIdx) >= RC->getSize() &&
+ "Stack slot too small for store");
bool isAligned = (RI.getStackAlignment() >= 16) || RI.canRealignStack(MF);
unsigned Opc = getStoreRegOpcode(SrcReg, RC, isAligned, TM);
DebugLoc DL = MBB.findDebugLoc(MI);
@@ -2130,8 +2115,9 @@ bool X86InstrInfo::spillCalleeSavedRegisters(MachineBasicBlock &MBB,
CalleeFrameSize += SlotSize;
BuildMI(MBB, MI, DL, get(Opc)).addReg(Reg, RegState::Kill);
} else {
+ const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
storeRegToStackSlot(MBB, MI, Reg, true, CSI[i-1].getFrameIdx(),
- &X86::VR128RegClass, &RI);
+ RC, &RI);
}
}
@@ -2161,8 +2147,9 @@ bool X86InstrInfo::restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
if (!X86::VR128RegClass.contains(Reg) && !isWin64) {
BuildMI(MBB, MI, DL, get(Opc), Reg);
} else {
+ const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
loadRegFromStackSlot(MBB, MI, Reg, CSI[i].getFrameIdx(),
- &X86::VR128RegClass, &RI);
+ RC, &RI);
}
}
return true;
@@ -2423,10 +2410,17 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
Alignment = (*LoadMI->memoperands_begin())->getAlignment();
else
switch (LoadMI->getOpcode()) {
+ case X86::AVX_SET0PSY:
+ case X86::AVX_SET0PDY:
+ Alignment = 32;
+ break;
case X86::V_SET0PS:
case X86::V_SET0PD:
case X86::V_SET0PI:
case X86::V_SETALLONES:
+ case X86::AVX_SET0PS:
+ case X86::AVX_SET0PD:
+ case X86::AVX_SET0PI:
Alignment = 16;
break;
case X86::FsFLD0SD:
@@ -2453,12 +2447,22 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
} else if (Ops.size() != 1)
return NULL;
+ // Make sure the subregisters match.
+ // Otherwise we risk changing the size of the load.
+ if (LoadMI->getOperand(0).getSubReg() != MI->getOperand(Ops[0]).getSubReg())
+ return NULL;
+
SmallVector<MachineOperand,X86::AddrNumOperands> MOs;
switch (LoadMI->getOpcode()) {
case X86::V_SET0PS:
case X86::V_SET0PD:
case X86::V_SET0PI:
case X86::V_SETALLONES:
+ case X86::AVX_SET0PS:
+ case X86::AVX_SET0PD:
+ case X86::AVX_SET0PI:
+ case X86::AVX_SET0PSY:
+ case X86::AVX_SET0PDY:
case X86::FsFLD0SD:
case X86::FsFLD0SS: {
// Folding a V_SET0P? or V_SETALLONES as a load, to ease register pressure.
@@ -2485,10 +2489,13 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
// Create a constant-pool entry.
MachineConstantPool &MCP = *MF.getConstantPool();
const Type *Ty;
- if (LoadMI->getOpcode() == X86::FsFLD0SS)
+ unsigned Opc = LoadMI->getOpcode();
+ if (Opc == X86::FsFLD0SS)
Ty = Type::getFloatTy(MF.getFunction()->getContext());
- else if (LoadMI->getOpcode() == X86::FsFLD0SD)
+ else if (Opc == X86::FsFLD0SD)
Ty = Type::getDoubleTy(MF.getFunction()->getContext());
+ else if (Opc == X86::AVX_SET0PSY || Opc == X86::AVX_SET0PDY)
+ Ty = VectorType::get(Type::getFloatTy(MF.getFunction()->getContext()), 8);
else
Ty = VectorType::get(Type::getInt32Ty(MF.getFunction()->getContext()), 4);
const Constant *C = LoadMI->getOpcode() == X86::V_SETALLONES ?
@@ -2991,561 +2998,6 @@ bool X86InstrInfo::isX86_64ExtendedReg(unsigned RegNo) {
return false;
}
-
-/// determineREX - Determine if the MachineInstr has to be encoded with a X86-64
-/// REX prefix which specifies 1) 64-bit instructions, 2) non-default operand
-/// size, and 3) use of X86-64 extended registers.
-unsigned X86InstrInfo::determineREX(const MachineInstr &MI) {
- unsigned REX = 0;
- const TargetInstrDesc &Desc = MI.getDesc();
-
- // Pseudo instructions do not need REX prefix byte.
- if ((Desc.TSFlags & X86II::FormMask) == X86II::Pseudo)
- return 0;
- if (Desc.TSFlags & X86II::REX_W)
- REX |= 1 << 3;
-
- unsigned NumOps = Desc.getNumOperands();
- if (NumOps) {
- bool isTwoAddr = NumOps > 1 &&
- Desc.getOperandConstraint(1, TOI::TIED_TO) != -1;
-
- // If it accesses SPL, BPL, SIL, or DIL, then it requires a 0x40 REX prefix.
- unsigned i = isTwoAddr ? 1 : 0;
- for (unsigned e = NumOps; i != e; ++i) {
- const MachineOperand& MO = MI.getOperand(i);
- if (MO.isReg()) {
- unsigned Reg = MO.getReg();
- if (isX86_64NonExtLowByteReg(Reg))
- REX |= 0x40;
- }
- }
-
- switch (Desc.TSFlags & X86II::FormMask) {
- case X86II::MRMInitReg:
- if (isX86_64ExtendedReg(MI.getOperand(0)))
- REX |= (1 << 0) | (1 << 2);
- break;
- case X86II::MRMSrcReg: {
- if (isX86_64ExtendedReg(MI.getOperand(0)))
- REX |= 1 << 2;
- i = isTwoAddr ? 2 : 1;
- for (unsigned e = NumOps; i != e; ++i) {
- const MachineOperand& MO = MI.getOperand(i);
- if (isX86_64ExtendedReg(MO))
- REX |= 1 << 0;
- }
- break;
- }
- case X86II::MRMSrcMem: {
- if (isX86_64ExtendedReg(MI.getOperand(0)))
- REX |= 1 << 2;
- unsigned Bit = 0;
- i = isTwoAddr ? 2 : 1;
- for (; i != NumOps; ++i) {
- const MachineOperand& MO = MI.getOperand(i);
- if (MO.isReg()) {
- if (isX86_64ExtendedReg(MO))
- REX |= 1 << Bit;
- Bit++;
- }
- }
- break;
- }
- case X86II::MRM0m: case X86II::MRM1m:
- case X86II::MRM2m: case X86II::MRM3m:
- case X86II::MRM4m: case X86II::MRM5m:
- case X86II::MRM6m: case X86II::MRM7m:
- case X86II::MRMDestMem: {
- unsigned e = (isTwoAddr ? X86::AddrNumOperands+1 : X86::AddrNumOperands);
- i = isTwoAddr ? 1 : 0;
- if (NumOps > e && isX86_64ExtendedReg(MI.getOperand(e)))
- REX |= 1 << 2;
- unsigned Bit = 0;
- for (; i != e; ++i) {
- const MachineOperand& MO = MI.getOperand(i);
- if (MO.isReg()) {
- if (isX86_64ExtendedReg(MO))
- REX |= 1 << Bit;
- Bit++;
- }
- }
- break;
- }
- default: {
- if (isX86_64ExtendedReg(MI.getOperand(0)))
- REX |= 1 << 0;
- i = isTwoAddr ? 2 : 1;
- for (unsigned e = NumOps; i != e; ++i) {
- const MachineOperand& MO = MI.getOperand(i);
- if (isX86_64ExtendedReg(MO))
- REX |= 1 << 2;
- }
- break;
- }
- }
- }
- return REX;
-}
-
-/// sizePCRelativeBlockAddress - This method returns the size of a PC
-/// relative block address instruction
-///
-static unsigned sizePCRelativeBlockAddress() {
- return 4;
-}
-
-/// sizeGlobalAddress - Give the size of the emission of this global address
-///
-static unsigned sizeGlobalAddress(bool dword) {
- return dword ? 8 : 4;
-}
-
-/// sizeConstPoolAddress - Give the size of the emission of this constant
-/// pool address
-///
-static unsigned sizeConstPoolAddress(bool dword) {
- return dword ? 8 : 4;
-}
-
-/// sizeExternalSymbolAddress - Give the size of the emission of this external
-/// symbol
-///
-static unsigned sizeExternalSymbolAddress(bool dword) {
- return dword ? 8 : 4;
-}
-
-/// sizeJumpTableAddress - Give the size of the emission of this jump
-/// table address
-///
-static unsigned sizeJumpTableAddress(bool dword) {
- return dword ? 8 : 4;
-}
-
-static unsigned sizeConstant(unsigned Size) {
- return Size;
-}
-
-static unsigned sizeRegModRMByte(){
- return 1;
-}
-
-static unsigned sizeSIBByte(){
- return 1;
-}
-
-static unsigned getDisplacementFieldSize(const MachineOperand *RelocOp) {
- unsigned FinalSize = 0;
- // If this is a simple integer displacement that doesn't require a relocation.
- if (!RelocOp) {
- FinalSize += sizeConstant(4);
- return FinalSize;
- }
-
- // Otherwise, this is something that requires a relocation.
- if (RelocOp->isGlobal()) {
- FinalSize += sizeGlobalAddress(false);
- } else if (RelocOp->isCPI()) {
- FinalSize += sizeConstPoolAddress(false);
- } else if (RelocOp->isJTI()) {
- FinalSize += sizeJumpTableAddress(false);
- } else {
- llvm_unreachable("Unknown value to relocate!");
- }
- return FinalSize;
-}
-
-static unsigned getMemModRMByteSize(const MachineInstr &MI, unsigned Op,
- bool IsPIC, bool Is64BitMode) {
- const MachineOperand &Op3 = MI.getOperand(Op+3);
- int DispVal = 0;
- const MachineOperand *DispForReloc = 0;
- unsigned FinalSize = 0;
-
- // Figure out what sort of displacement we have to handle here.
- if (Op3.isGlobal()) {
- DispForReloc = &Op3;
- } else if (Op3.isCPI()) {
- if (Is64BitMode || IsPIC) {
- DispForReloc = &Op3;
- } else {
- DispVal = 1;
- }
- } else if (Op3.isJTI()) {
- if (Is64BitMode || IsPIC) {
- DispForReloc = &Op3;
- } else {
- DispVal = 1;
- }
- } else {
- DispVal = 1;
- }
-
- const MachineOperand &Base = MI.getOperand(Op);
- const MachineOperand &IndexReg = MI.getOperand(Op+2);
-
- unsigned BaseReg = Base.getReg();
-
- // Is a SIB byte needed?
- if ((!Is64BitMode || DispForReloc || BaseReg != 0) &&
- IndexReg.getReg() == 0 &&
- (BaseReg == 0 || X86RegisterInfo::getX86RegNum(BaseReg) != N86::ESP)) {
- if (BaseReg == 0) { // Just a displacement?
- // Emit special case [disp32] encoding
- ++FinalSize;
- FinalSize += getDisplacementFieldSize(DispForReloc);
- } else {
- unsigned BaseRegNo = X86RegisterInfo::getX86RegNum(BaseReg);
- if (!DispForReloc && DispVal == 0 && BaseRegNo != N86::EBP) {
- // Emit simple indirect register encoding... [EAX] f.e.
- ++FinalSize;
- // Be pessimistic and assume it's a disp32, not a disp8
- } else {
- // Emit the most general non-SIB encoding: [REG+disp32]
- ++FinalSize;
- FinalSize += getDisplacementFieldSize(DispForReloc);
- }
- }
-
- } else { // We need a SIB byte, so start by outputting the ModR/M byte first
- assert(IndexReg.getReg() != X86::ESP &&
- IndexReg.getReg() != X86::RSP && "Cannot use ESP as index reg!");
-
- bool ForceDisp32 = false;
- if (BaseReg == 0 || DispForReloc) {
- // Emit the normal disp32 encoding.
- ++FinalSize;
- ForceDisp32 = true;
- } else {
- ++FinalSize;
- }
-
- FinalSize += sizeSIBByte();
-
- // Do we need to output a displacement?
- if (DispVal != 0 || ForceDisp32) {
- FinalSize += getDisplacementFieldSize(DispForReloc);
- }
- }
- return FinalSize;
-}
-
-
-static unsigned GetInstSizeWithDesc(const MachineInstr &MI,
- const TargetInstrDesc *Desc,
- bool IsPIC, bool Is64BitMode) {
-
- unsigned Opcode = Desc->Opcode;
- unsigned FinalSize = 0;
-
- // Emit the lock opcode prefix as needed.
- if (Desc->TSFlags & X86II::LOCK) ++FinalSize;
-
- // Emit segment override opcode prefix as needed.
- switch (Desc->TSFlags & X86II::SegOvrMask) {
- case X86II::FS:
- case X86II::GS:
- ++FinalSize;
- break;
- default: llvm_unreachable("Invalid segment!");
- case 0: break; // No segment override!
- }
-
- // Emit the repeat opcode prefix as needed.
- if ((Desc->TSFlags & X86II::Op0Mask) == X86II::REP) ++FinalSize;
-
- // Emit the operand size opcode prefix as needed.
- if (Desc->TSFlags & X86II::OpSize) ++FinalSize;
-
- // Emit the address size opcode prefix as needed.
- if (Desc->TSFlags & X86II::AdSize) ++FinalSize;
-
- bool Need0FPrefix = false;
- switch (Desc->TSFlags & X86II::Op0Mask) {
- case X86II::TB: // Two-byte opcode prefix
- case X86II::T8: // 0F 38
- case X86II::TA: // 0F 3A
- Need0FPrefix = true;
- break;
- case X86II::TF: // F2 0F 38
- ++FinalSize;
- Need0FPrefix = true;
- break;
- case X86II::REP: break; // already handled.
- case X86II::XS: // F3 0F
- ++FinalSize;
- Need0FPrefix = true;
- break;
- case X86II::XD: // F2 0F
- ++FinalSize;
- Need0FPrefix = true;
- break;
- case X86II::D8: case X86II::D9: case X86II::DA: case X86II::DB:
- case X86II::DC: case X86II::DD: case X86II::DE: case X86II::DF:
- ++FinalSize;
- break; // Two-byte opcode prefix
- default: llvm_unreachable("Invalid prefix!");
- case 0: break; // No prefix!
- }
-
- if (Is64BitMode) {
- // REX prefix
- unsigned REX = X86InstrInfo::determineREX(MI);
- if (REX)
- ++FinalSize;
- }
-
- // 0x0F escape code must be emitted just before the opcode.
- if (Need0FPrefix)
- ++FinalSize;
-
- switch (Desc->TSFlags & X86II::Op0Mask) {
- case X86II::T8: // 0F 38
- ++FinalSize;
- break;
- case X86II::TA: // 0F 3A
- ++FinalSize;
- break;
- case X86II::TF: // F2 0F 38
- ++FinalSize;
- break;
- }
-
- // If this is a two-address instruction, skip one of the register operands.
- unsigned NumOps = Desc->getNumOperands();
- unsigned CurOp = 0;
- if (NumOps > 1 && Desc->getOperandConstraint(1, TOI::TIED_TO) != -1)
- CurOp++;
- else if (NumOps > 2 && Desc->getOperandConstraint(NumOps-1, TOI::TIED_TO)== 0)
- // Skip the last source operand that is tied_to the dest reg. e.g. LXADD32
- --NumOps;
-
- switch (Desc->TSFlags & X86II::FormMask) {
- default: llvm_unreachable("Unknown FormMask value in X86 MachineCodeEmitter!");
- case X86II::Pseudo:
- // Remember the current PC offset, this is the PIC relocation
- // base address.
- switch (Opcode) {
- default:
- break;
- case TargetOpcode::INLINEASM: {
- const MachineFunction *MF = MI.getParent()->getParent();
- const TargetInstrInfo &TII = *MF->getTarget().getInstrInfo();
- FinalSize += TII.getInlineAsmLength(MI.getOperand(0).getSymbolName(),
- *MF->getTarget().getMCAsmInfo());
- break;
- }
- case TargetOpcode::DBG_LABEL:
- case TargetOpcode::EH_LABEL:
- case TargetOpcode::DBG_VALUE:
- break;
- case TargetOpcode::IMPLICIT_DEF:
- case TargetOpcode::KILL:
- case X86::FP_REG_KILL:
- break;
- case X86::MOVPC32r: {
- // This emits the "call" portion of this pseudo instruction.
- ++FinalSize;
- FinalSize += sizeConstant(X86II::getSizeOfImm(Desc->TSFlags));
- break;
- }
- }
- CurOp = NumOps;
- break;
- case X86II::RawFrm:
- ++FinalSize;
-
- if (CurOp != NumOps) {
- const MachineOperand &MO = MI.getOperand(CurOp++);
- if (MO.isMBB()) {
- FinalSize += sizePCRelativeBlockAddress();
- } else if (MO.isGlobal()) {
- FinalSize += sizeGlobalAddress(false);
- } else if (MO.isSymbol()) {
- FinalSize += sizeExternalSymbolAddress(false);
- } else if (MO.isImm()) {
- FinalSize += sizeConstant(X86II::getSizeOfImm(Desc->TSFlags));
- } else {
- llvm_unreachable("Unknown RawFrm operand!");
- }
- }
- break;
-
- case X86II::AddRegFrm:
- ++FinalSize;
- ++CurOp;
-
- if (CurOp != NumOps) {
- const MachineOperand &MO1 = MI.getOperand(CurOp++);
- unsigned Size = X86II::getSizeOfImm(Desc->TSFlags);
- if (MO1.isImm())
- FinalSize += sizeConstant(Size);
- else {
- bool dword = false;
- if (Opcode == X86::MOV64ri)
- dword = true;
- if (MO1.isGlobal()) {
- FinalSize += sizeGlobalAddress(dword);
- } else if (MO1.isSymbol())
- FinalSize += sizeExternalSymbolAddress(dword);
- else if (MO1.isCPI())
- FinalSize += sizeConstPoolAddress(dword);
- else if (MO1.isJTI())
- FinalSize += sizeJumpTableAddress(dword);
- }
- }
- break;
-
- case X86II::MRMDestReg: {
- ++FinalSize;
- FinalSize += sizeRegModRMByte();
- CurOp += 2;
- if (CurOp != NumOps) {
- ++CurOp;
- FinalSize += sizeConstant(X86II::getSizeOfImm(Desc->TSFlags));
- }
- break;
- }
- case X86II::MRMDestMem: {
- ++FinalSize;
- FinalSize += getMemModRMByteSize(MI, CurOp, IsPIC, Is64BitMode);
- CurOp += X86::AddrNumOperands + 1;
- if (CurOp != NumOps) {
- ++CurOp;
- FinalSize += sizeConstant(X86II::getSizeOfImm(Desc->TSFlags));
- }
- break;
- }
-
- case X86II::MRMSrcReg:
- ++FinalSize;
- FinalSize += sizeRegModRMByte();
- CurOp += 2;
- if (CurOp != NumOps) {
- ++CurOp;
- FinalSize += sizeConstant(X86II::getSizeOfImm(Desc->TSFlags));
- }
- break;
-
- case X86II::MRMSrcMem: {
- ++FinalSize;
- FinalSize += getMemModRMByteSize(MI, CurOp+1, IsPIC, Is64BitMode);
- CurOp += X86::AddrNumOperands + 1;
- if (CurOp != NumOps) {
- ++CurOp;
- FinalSize += sizeConstant(X86II::getSizeOfImm(Desc->TSFlags));
- }
- break;
- }
-
- case X86II::MRM0r: case X86II::MRM1r:
- case X86II::MRM2r: case X86II::MRM3r:
- case X86II::MRM4r: case X86II::MRM5r:
- case X86II::MRM6r: case X86II::MRM7r:
- ++FinalSize;
- if (Desc->getOpcode() == X86::LFENCE ||
- Desc->getOpcode() == X86::MFENCE) {
- // Special handling of lfence and mfence;
- FinalSize += sizeRegModRMByte();
- } else if (Desc->getOpcode() == X86::MONITOR ||
- Desc->getOpcode() == X86::MWAIT) {
- // Special handling of monitor and mwait.
- FinalSize += sizeRegModRMByte() + 1; // +1 for the opcode.
- } else {
- ++CurOp;
- FinalSize += sizeRegModRMByte();
- }
-
- if (CurOp != NumOps) {
- const MachineOperand &MO1 = MI.getOperand(CurOp++);
- unsigned Size = X86II::getSizeOfImm(Desc->TSFlags);
- if (MO1.isImm())
- FinalSize += sizeConstant(Size);
- else {
- bool dword = false;
- if (Opcode == X86::MOV64ri32)
- dword = true;
- if (MO1.isGlobal()) {
- FinalSize += sizeGlobalAddress(dword);
- } else if (MO1.isSymbol())
- FinalSize += sizeExternalSymbolAddress(dword);
- else if (MO1.isCPI())
- FinalSize += sizeConstPoolAddress(dword);
- else if (MO1.isJTI())
- FinalSize += sizeJumpTableAddress(dword);
- }
- }
- break;
-
- case X86II::MRM0m: case X86II::MRM1m:
- case X86II::MRM2m: case X86II::MRM3m:
- case X86II::MRM4m: case X86II::MRM5m:
- case X86II::MRM6m: case X86II::MRM7m: {
-
- ++FinalSize;
- FinalSize += getMemModRMByteSize(MI, CurOp, IsPIC, Is64BitMode);
- CurOp += X86::AddrNumOperands;
-
- if (CurOp != NumOps) {
- const MachineOperand &MO = MI.getOperand(CurOp++);
- unsigned Size = X86II::getSizeOfImm(Desc->TSFlags);
- if (MO.isImm())
- FinalSize += sizeConstant(Size);
- else {
- bool dword = false;
- if (Opcode == X86::MOV64mi32)
- dword = true;
- if (MO.isGlobal()) {
- FinalSize += sizeGlobalAddress(dword);
- } else if (MO.isSymbol())
- FinalSize += sizeExternalSymbolAddress(dword);
- else if (MO.isCPI())
- FinalSize += sizeConstPoolAddress(dword);
- else if (MO.isJTI())
- FinalSize += sizeJumpTableAddress(dword);
- }
- }
- break;
-
- case X86II::MRM_C1:
- case X86II::MRM_C8:
- case X86II::MRM_C9:
- case X86II::MRM_E8:
- case X86II::MRM_F0:
- FinalSize += 2;
- break;
- }
-
- case X86II::MRMInitReg:
- ++FinalSize;
- // Duplicate register, used by things like MOV8r0 (aka xor reg,reg).
- FinalSize += sizeRegModRMByte();
- ++CurOp;
- break;
- }
-
- if (!Desc->isVariadic() && CurOp != NumOps) {
- std::string msg;
- raw_string_ostream Msg(msg);
- Msg << "Cannot determine size: " << MI;
- report_fatal_error(Msg.str());
- }
-
-
- return FinalSize;
-}
-
-
-unsigned X86InstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const {
- const TargetInstrDesc &Desc = MI->getDesc();
- bool IsPIC = TM.getRelocationModel() == Reloc::PIC_;
- bool Is64BitMode = TM.getSubtargetImpl()->is64Bit();
- unsigned Size = GetInstSizeWithDesc(*MI, &Desc, IsPIC, Is64BitMode);
- if (Desc.getOpcode() == X86::MOVPC32r)
- Size += GetInstSizeWithDesc(*MI, &get(X86::POP32r), IsPIC, Is64BitMode);
- return Size;
-}
-
/// getGlobalBaseReg - Return a virtual register initialized with the
/// the global base register value. Output instructions required to
/// initialize the register in the function entry block, if necessary.
@@ -3573,7 +3025,7 @@ unsigned X86InstrInfo::getGlobalBaseReg(MachineFunction *MF) const {
// that we don't include here. We don't want to replace instructions selected
// by intrinsics.
static const unsigned ReplaceableInstrs[][3] = {
- //PackedInt PackedSingle PackedDouble
+ //PackedSingle PackedDouble PackedInt
{ X86::MOVAPSmr, X86::MOVAPDmr, X86::MOVDQAmr },
{ X86::MOVAPSrm, X86::MOVAPDrm, X86::MOVDQArm },
{ X86::MOVAPSrr, X86::MOVAPDrr, X86::MOVDQArr },
@@ -3589,6 +3041,22 @@ static const unsigned ReplaceableInstrs[][3] = {
{ X86::V_SET0PS, X86::V_SET0PD, X86::V_SET0PI },
{ X86::XORPSrm, X86::XORPDrm, X86::PXORrm },
{ X86::XORPSrr, X86::XORPDrr, X86::PXORrr },
+ // AVX 128-bit support
+ { X86::VMOVAPSmr, X86::VMOVAPDmr, X86::VMOVDQAmr },
+ { X86::VMOVAPSrm, X86::VMOVAPDrm, X86::VMOVDQArm },
+ { X86::VMOVAPSrr, X86::VMOVAPDrr, X86::VMOVDQArr },
+ { X86::VMOVUPSmr, X86::VMOVUPDmr, X86::VMOVDQUmr },
+ { X86::VMOVUPSrm, X86::VMOVUPDrm, X86::VMOVDQUrm },
+ { X86::VMOVNTPSmr, X86::VMOVNTPDmr, X86::VMOVNTDQmr },
+ { X86::VANDNPSrm, X86::VANDNPDrm, X86::VPANDNrm },
+ { X86::VANDNPSrr, X86::VANDNPDrr, X86::VPANDNrr },
+ { X86::VANDPSrm, X86::VANDPDrm, X86::VPANDrm },
+ { X86::VANDPSrr, X86::VANDPDrr, X86::VPANDrr },
+ { X86::VORPSrm, X86::VORPDrm, X86::VPORrm },
+ { X86::VORPSrr, X86::VORPDrr, X86::VPORrr },
+ { X86::AVX_SET0PS, X86::AVX_SET0PD, X86::AVX_SET0PI },
+ { X86::VXORPSrm, X86::VXORPDrm, X86::VPXORrm },
+ { X86::VXORPSrr, X86::VXORPDrr, X86::VPXORrr },
};
// FIXME: Some shuffle and unpack instructions have equivalents in different
@@ -3627,7 +3095,7 @@ namespace {
/// global base register for x86-32.
struct CGBR : public MachineFunctionPass {
static char ID;
- CGBR() : MachineFunctionPass(&ID) {}
+ CGBR() : MachineFunctionPass(ID) {}
virtual bool runOnMachineFunction(MachineFunction &MF) {
const X86TargetMachine *TM =
diff --git a/lib/Target/X86/X86InstrInfo.h b/lib/Target/X86/X86InstrInfo.h
index ad0217adb4758..f33620641e88c 100644
--- a/lib/Target/X86/X86InstrInfo.h
+++ b/lib/Target/X86/X86InstrInfo.h
@@ -311,6 +311,12 @@ namespace X86II {
MRM_F0 = 40,
MRM_F8 = 41,
MRM_F9 = 42,
+
+ /// RawFrmImm16 - This is used for CALL FAR instructions, which have two
+ /// immediates, the first of which is a 16 or 32-bit immediate (specified by
+ /// the imm encoding) and the second is a 16-bit fixed value. In the AMD
+ /// manual, this operand is described as pntr16:32 and pntr16:16
+ RawFrmImm16 = 43,
FormMask = 63,
@@ -439,27 +445,27 @@ namespace X86II {
//===------------------------------------------------------------------===//
// VEX - The opcode prefix used by AVX instructions
- VEX = 1ULL << 32,
+ VEX = 1U << 0,
// VEX_W - Has a opcode specific functionality, but is used in the same
// way as REX_W is for regular SSE instructions.
- VEX_W = 1ULL << 33,
+ VEX_W = 1U << 1,
// VEX_4V - Used to specify an additional AVX/SSE register. Several 2
// address instructions in SSE are represented as 3 address ones in AVX
// and the additional register is encoded in VEX_VVVV prefix.
- VEX_4V = 1ULL << 34,
+ VEX_4V = 1U << 2,
// VEX_I8IMM - Specifies that the last register used in a AVX instruction,
// must be encoded in the i8 immediate field. This usually happens in
// instructions with 4 operands.
- VEX_I8IMM = 1ULL << 35,
+ VEX_I8IMM = 1U << 3,
// VEX_L - Stands for a bit in the VEX opcode prefix meaning the current
// instruction uses 256-bit wide registers. This is usually auto detected if
// a VR256 register is used, but some AVX instructions also have this field
// marked when using a f256 memory references.
- VEX_L = 1ULL << 36
+ VEX_L = 1U << 4
};
// getBaseOpcodeFor - This function returns the "base" X86 opcode for the
@@ -522,11 +528,12 @@ namespace X86II {
case X86II::AddRegFrm:
case X86II::MRMDestReg:
case X86II::MRMSrcReg:
+ case X86II::RawFrmImm16:
return -1;
case X86II::MRMDestMem:
return 0;
case X86II::MRMSrcMem: {
- bool HasVEX_4V = TSFlags & X86II::VEX_4V;
+ bool HasVEX_4V = (TSFlags >> 32) & X86II::VEX_4V;
unsigned FirstMemOp = 1;
if (HasVEX_4V)
++FirstMemOp;// Skip the register source (which is encoded in VEX_VVVV).
@@ -610,12 +617,6 @@ public:
///
virtual const X86RegisterInfo &getRegisterInfo() const { return RI; }
- /// Return true if the instruction is a register to register move and return
- /// the source and dest operands and their sub-register indices by reference.
- virtual bool isMoveInstr(const MachineInstr &MI,
- unsigned &SrcReg, unsigned &DstReg,
- unsigned &SrcSubIdx, unsigned &DstSubIdx) const;
-
/// isCoalescableExtInstr - Return true if the instruction is a "coalescable"
/// extension instruction. That is, it's like a copy where it's legal for the
/// source to overlap the destination. e.g. X86::MOVSX64rr32. If this returns
@@ -826,16 +827,11 @@ public:
if (!MO.isReg()) return false;
return isX86_64ExtendedReg(MO.getReg());
}
- static unsigned determineREX(const MachineInstr &MI);
/// isX86_64ExtendedReg - Is the MachineOperand a x86-64 extended (r8 or
/// higher) register? e.g. r8, xmm8, xmm13, etc.
static bool isX86_64ExtendedReg(unsigned RegNo);
- /// GetInstSize - Returns the size of the specified MachineInstr.
- ///
- virtual unsigned GetInstSizeInBytes(const MachineInstr *MI) const;
-
/// getGlobalBaseReg - Return a virtual register initialized with the
/// the global base register value. Output instructions required to
/// initialize the register in the function entry block, if necessary.
diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td
index 1efef5a80b1ba..09b7721a621d3 100644
--- a/lib/Target/X86/X86InstrInfo.td
+++ b/lib/Target/X86/X86InstrInfo.td
@@ -80,6 +80,21 @@ def SDT_X86EHRET : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
def SDT_X86TCRET : SDTypeProfile<0, 2, [SDTCisPtrTy<0>, SDTCisVT<1, i32>]>;
+def SDT_X86MEMBARRIER : SDTypeProfile<0, 0, []>;
+def SDT_X86MEMBARRIERNoSSE : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
+
+def X86MemBarrier : SDNode<"X86ISD::MEMBARRIER", SDT_X86MEMBARRIER,
+ [SDNPHasChain]>;
+def X86MemBarrierNoSSE : SDNode<"X86ISD::MEMBARRIER", SDT_X86MEMBARRIERNoSSE,
+ [SDNPHasChain]>;
+def X86MFence : SDNode<"X86ISD::MFENCE", SDT_X86MEMBARRIER,
+ [SDNPHasChain]>;
+def X86SFence : SDNode<"X86ISD::SFENCE", SDT_X86MEMBARRIER,
+ [SDNPHasChain]>;
+def X86LFence : SDNode<"X86ISD::LFENCE", SDT_X86MEMBARRIER,
+ [SDNPHasChain]>;
+
+
def X86bsf : SDNode<"X86ISD::BSF", SDTUnaryArithWithFlags>;
def X86bsr : SDNode<"X86ISD::BSR", SDTUnaryArithWithFlags>;
def X86shld : SDNode<"X86ISD::SHLD", SDTIntShiftDOp>;
@@ -222,7 +237,7 @@ def i16mem : X86MemOperand<"printi16mem">;
def i32mem : X86MemOperand<"printi32mem">;
def i64mem : X86MemOperand<"printi64mem">;
def i128mem : X86MemOperand<"printi128mem">;
-//def i256mem : X86MemOperand<"printi256mem">;
+def i256mem : X86MemOperand<"printi256mem">;
def f32mem : X86MemOperand<"printf32mem">;
def f64mem : X86MemOperand<"printf64mem">;
def f80mem : X86MemOperand<"printf80mem">;
@@ -333,15 +348,21 @@ def tls32addr : ComplexPattern<i32, 5, "SelectTLSADDRAddr",
// X86 Instruction Predicate Definitions.
def HasCMov : Predicate<"Subtarget->hasCMov()">;
def NoCMov : Predicate<"!Subtarget->hasCMov()">;
-def HasMMX : Predicate<"Subtarget->hasMMX()">;
-def HasSSE1 : Predicate<"Subtarget->hasSSE1()">;
-def HasSSE2 : Predicate<"Subtarget->hasSSE2()">;
-def HasSSE3 : Predicate<"Subtarget->hasSSE3()">;
-def HasSSSE3 : Predicate<"Subtarget->hasSSSE3()">;
-def HasSSE41 : Predicate<"Subtarget->hasSSE41()">;
-def HasSSE42 : Predicate<"Subtarget->hasSSE42()">;
-def HasSSE4A : Predicate<"Subtarget->hasSSE4A()">;
+
+// FIXME: temporary hack to let codegen assert or generate poor code in case
+// no AVX version of the desired intructions is present, this is better for
+// incremental dev (without fallbacks it's easier to spot what's missing)
+def HasMMX : Predicate<"Subtarget->hasMMX() && !Subtarget->hasAVX()">;
+def HasSSE1 : Predicate<"Subtarget->hasSSE1() && !Subtarget->hasAVX()">;
+def HasSSE2 : Predicate<"Subtarget->hasSSE2() && !Subtarget->hasAVX()">;
+def HasSSE3 : Predicate<"Subtarget->hasSSE3() && !Subtarget->hasAVX()">;
+def HasSSSE3 : Predicate<"Subtarget->hasSSSE3() && !Subtarget->hasAVX()">;
+def HasSSE41 : Predicate<"Subtarget->hasSSE41() && !Subtarget->hasAVX()">;
+def HasSSE42 : Predicate<"Subtarget->hasSSE42() && !Subtarget->hasAVX()">;
+def HasSSE4A : Predicate<"Subtarget->hasSSE4A() && !Subtarget->hasAVX()">;
+
def HasAVX : Predicate<"Subtarget->hasAVX()">;
+def HasCLMUL : Predicate<"Subtarget->hasCLMUL()">;
def HasFMA3 : Predicate<"Subtarget->hasFMA3()">;
def HasFMA4 : Predicate<"Subtarget->hasFMA4()">;
def FPStackf32 : Predicate<"!Subtarget->hasSSE1()">;
@@ -393,9 +414,7 @@ def X86_COND_O : PatLeaf<(i8 13)>;
def X86_COND_P : PatLeaf<(i8 14)>; // alt. COND_PE
def X86_COND_S : PatLeaf<(i8 15)>;
-def immSext8 : PatLeaf<(imm), [{
- return N->getSExtValue() == (int8_t)N->getSExtValue();
-}]>;
+def immSext8 : PatLeaf<(imm), [{ return immSext8(N); }]>;
def i16immSExt8 : PatLeaf<(i16 immSext8)>;
def i32immSExt8 : PatLeaf<(i32 immSext8)>;
@@ -559,9 +578,10 @@ def VASTART_SAVE_XMM_REGS : I<0, Pseudo,
// The main point of having separate instruction are extra unmodelled effects
// (compared to ordinary calls) like stack pointer change.
-def MINGW_ALLOCA : I<0, Pseudo, (outs), (ins),
- "# dynamic stack allocation",
- [(X86MingwAlloca)]>;
+let Defs = [EAX, ESP, EFLAGS], Uses = [ESP] in
+ def MINGW_ALLOCA : I<0, Pseudo, (outs), (ins),
+ "# dynamic stack allocation",
+ [(X86MingwAlloca)]>;
}
// Nop
@@ -574,10 +594,14 @@ let neverHasSideEffects = 1 in {
}
// Trap
-def INTO : I<0xce, RawFrm, (outs), (ins), "into", []>;
-def INT3 : I<0xcc, RawFrm, (outs), (ins), "int3", []>;
+let Uses = [EFLAGS] in {
+ def INTO : I<0xce, RawFrm, (outs), (ins), "into", []>;
+}
+def INT3 : I<0xcc, RawFrm, (outs), (ins), "int3",
+ [(int_x86_int (i8 3))]>;
// FIXME: need to make sure that "int $3" matches int3
-def INT : Ii8<0xcd, RawFrm, (outs), (ins i8imm:$trap), "int\t$trap", []>;
+def INT : Ii8<0xcd, RawFrm, (outs), (ins i8imm:$trap), "int\t$trap",
+ [(int_x86_int imm:$trap)]>;
def IRET16 : I<0xcf, RawFrm, (outs), (ins), "iret{w}", []>, OpSize;
def IRET32 : I<0xcf, RawFrm, (outs), (ins), "iret{l}", []>;
@@ -650,16 +674,16 @@ let Uses = [ECX], isBranch = 1, isTerminator = 1 in
// Indirect branches
let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in {
def JMP32r : I<0xFF, MRM4r, (outs), (ins GR32:$dst), "jmp{l}\t{*}$dst",
- [(brind GR32:$dst)]>;
+ [(brind GR32:$dst)]>, Requires<[In32BitMode]>;
def JMP32m : I<0xFF, MRM4m, (outs), (ins i32mem:$dst), "jmp{l}\t{*}$dst",
- [(brind (loadi32 addr:$dst))]>;
+ [(brind (loadi32 addr:$dst))]>, Requires<[In32BitMode]>;
- def FARJMP16i : Iseg16<0xEA, RawFrm, (outs),
- (ins i16imm:$seg, i16imm:$off),
- "ljmp{w}\t$seg, $off", []>, OpSize;
- def FARJMP32i : Iseg32<0xEA, RawFrm, (outs),
- (ins i16imm:$seg, i32imm:$off),
- "ljmp{l}\t$seg, $off", []>;
+ def FARJMP16i : Iseg16<0xEA, RawFrmImm16, (outs),
+ (ins i16imm:$off, i16imm:$seg),
+ "ljmp{w}\t{$seg, $off|$off, $seg}", []>, OpSize;
+ def FARJMP32i : Iseg32<0xEA, RawFrmImm16, (outs),
+ (ins i32imm:$off, i16imm:$seg),
+ "ljmp{l}\t{$seg, $off|$off, $seg}", []>;
def FARJMP16m : I<0xFF, MRM5m, (outs), (ins opaque32mem:$dst),
"ljmp{w}\t{*}$dst", []>, OpSize;
@@ -670,9 +694,9 @@ let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in {
// Loop instructions
-def LOOP : I<0xE2, RawFrm, (outs), (ins brtarget8:$dst), "loop\t$dst", []>;
-def LOOPE : I<0xE1, RawFrm, (outs), (ins brtarget8:$dst), "loope\t$dst", []>;
-def LOOPNE : I<0xE0, RawFrm, (outs), (ins brtarget8:$dst), "loopne\t$dst", []>;
+def LOOP : Ii8PCRel<0xE2, RawFrm, (outs), (ins brtarget8:$dst), "loop\t$dst", []>;
+def LOOPE : Ii8PCRel<0xE1, RawFrm, (outs), (ins brtarget8:$dst), "loope\t$dst", []>;
+def LOOPNE : Ii8PCRel<0xE0, RawFrm, (outs), (ins brtarget8:$dst), "loopne\t$dst", []>;
//===----------------------------------------------------------------------===//
// Call Instructions...
@@ -695,12 +719,12 @@ let isCall = 1 in
def CALL32m : I<0xFF, MRM2m, (outs), (ins i32mem:$dst, variable_ops),
"call\t{*}$dst", [(X86call (loadi32 addr:$dst))]>;
- def FARCALL16i : Iseg16<0x9A, RawFrm, (outs),
- (ins i16imm:$seg, i16imm:$off),
- "lcall{w}\t$seg, $off", []>, OpSize;
- def FARCALL32i : Iseg32<0x9A, RawFrm, (outs),
- (ins i16imm:$seg, i32imm:$off),
- "lcall{l}\t$seg, $off", []>;
+ def FARCALL16i : Iseg16<0x9A, RawFrmImm16, (outs),
+ (ins i16imm:$off, i16imm:$seg),
+ "lcall{w}\t{$seg, $off|$off, $seg}", []>, OpSize;
+ def FARCALL32i : Iseg32<0x9A, RawFrmImm16, (outs),
+ (ins i32imm:$off, i16imm:$seg),
+ "lcall{l}\t{$seg, $off|$off, $seg}", []>;
def FARCALL16m : I<0xFF, MRM3m, (outs), (ins opaque32mem:$dst),
"lcall{w}\t{*}$dst", []>, OpSize;
@@ -721,7 +745,8 @@ def ENTER : I<0xC8, RawFrm, (outs), (ins i16imm:$len, i8imm:$lvl),
// Tail call stuff.
-let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in
+let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1,
+ isCodeGenOnly = 1 in
let Defs = [EAX, ECX, EDX, FP0, FP1, FP2, FP3, FP4, FP5, FP6, ST0,
MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7,
XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
@@ -756,7 +781,7 @@ let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in
//
let Defs = [EBP, ESP], Uses = [EBP, ESP], mayLoad = 1, neverHasSideEffects=1 in
def LEAVE : I<0xC9, RawFrm,
- (outs), (ins), "leave", []>;
+ (outs), (ins), "leave", []>, Requires<[In32BitMode]>;
def POPCNT16rr : I<0xB8, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src),
"popcnt{w}\t{$src, $dst|$dst, $src}", []>, OpSize, XS;
@@ -934,7 +959,7 @@ def SYSRET : I<0x07, RawFrm,
def SYSENTER : I<0x34, RawFrm,
(outs), (ins), "sysenter", []>, TB;
def SYSEXIT : I<0x35, RawFrm,
- (outs), (ins), "sysexit", []>, TB;
+ (outs), (ins), "sysexit", []>, TB, Requires<[In32BitMode]>;
def WAIT : I<0x9B, RawFrm, (outs), (ins), "wait", []>;
@@ -1025,17 +1050,23 @@ def MOV32mi : Ii32<0xC7, MRM0m, (outs), (ins i32mem:$dst, i32imm:$src),
/// moffs8, moffs16 and moffs32 versions of moves. The immediate is a
/// 32-bit offset from the PC. These are only valid in x86-32 mode.
def MOV8o8a : Ii32 <0xA0, RawFrm, (outs), (ins offset8:$src),
- "mov{b}\t{$src, %al|%al, $src}", []>;
+ "mov{b}\t{$src, %al|%al, $src}", []>,
+ Requires<[In32BitMode]>;
def MOV16o16a : Ii32 <0xA1, RawFrm, (outs), (ins offset16:$src),
- "mov{w}\t{$src, %ax|%ax, $src}", []>, OpSize;
+ "mov{w}\t{$src, %ax|%ax, $src}", []>, OpSize,
+ Requires<[In32BitMode]>;
def MOV32o32a : Ii32 <0xA1, RawFrm, (outs), (ins offset32:$src),
- "mov{l}\t{$src, %eax|%eax, $src}", []>;
+ "mov{l}\t{$src, %eax|%eax, $src}", []>,
+ Requires<[In32BitMode]>;
def MOV8ao8 : Ii32 <0xA2, RawFrm, (outs offset8:$dst), (ins),
- "mov{b}\t{%al, $dst|$dst, %al}", []>;
+ "mov{b}\t{%al, $dst|$dst, %al}", []>,
+ Requires<[In32BitMode]>;
def MOV16ao16 : Ii32 <0xA3, RawFrm, (outs offset16:$dst), (ins),
- "mov{w}\t{%ax, $dst|$dst, %ax}", []>, OpSize;
+ "mov{w}\t{%ax, $dst|$dst, %ax}", []>, OpSize,
+ Requires<[In32BitMode]>;
def MOV32ao32 : Ii32 <0xA3, RawFrm, (outs offset32:$dst), (ins),
- "mov{l}\t{%eax, $dst|$dst, %eax}", []>;
+ "mov{l}\t{%eax, $dst|$dst, %eax}", []>,
+ Requires<[In32BitMode]>;
// Moves to and from segment registers
def MOV16rs : I<0x8C, MRMDestReg, (outs GR16:$dst), (ins SEGMENT_REG:$src),
@@ -1087,6 +1118,7 @@ def MOV32mr : I<0x89, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src),
[(store GR32:$src, addr:$dst)]>;
/// Versions of MOV32rr, MOV32rm, and MOV32mr for i32mem_TC and GR32_TC.
+let isCodeGenOnly = 1 in {
let neverHasSideEffects = 1 in
def MOV32rr_TC : I<0x89, MRMDestReg, (outs GR32_TC:$dst), (ins GR32_TC:$src),
"mov{l}\t{$src, $dst|$dst, $src}", []>;
@@ -1101,10 +1133,12 @@ let mayStore = 1 in
def MOV32mr_TC : I<0x89, MRMDestMem, (outs), (ins i32mem_TC:$dst, GR32_TC:$src),
"mov{l}\t{$src, $dst|$dst, $src}",
[]>;
+}
// Versions of MOV8rr, MOV8mr, and MOV8rm that use i8mem_NOREX and GR8_NOREX so
// that they can be used for copying and storing h registers, which can't be
// encoded when a REX prefix is present.
+let isCodeGenOnly = 1 in {
let neverHasSideEffects = 1 in
def MOV8rr_NOREX : I<0x88, MRMDestReg,
(outs GR8_NOREX:$dst), (ins GR8_NOREX:$src),
@@ -1118,6 +1152,7 @@ let mayLoad = 1,
def MOV8rm_NOREX : I<0x8A, MRMSrcMem,
(outs GR8_NOREX:$dst), (ins i8mem_NOREX:$src),
"mov{b}\t{$src, $dst|$dst, $src} # NOREX", []>;
+}
// Moves to and from debug registers
def MOV32rd : I<0x21, MRMDestReg, (outs GR32:$dst), (ins DEBUG_REG:$src),
@@ -1137,7 +1172,7 @@ def MOV32cr : I<0x22, MRMSrcReg, (outs CONTROL_REG:$dst), (ins GR32:$src),
// Extra precision multiplication
-// AL is really implied by AX, by the registers in Defs must match the
+// AL is really implied by AX, but the registers in Defs must match the
// SDNode results (i8, i32).
let Defs = [AL,EFLAGS,AX], Uses = [AL] in
def MUL8r : I<0xF6, MRM4r, (outs), (ins GR8:$src), "mul{b}\t$src",
@@ -3895,6 +3930,20 @@ def EH_RETURN : I<0xC3, RawFrm, (outs), (ins GR32:$addr),
// Atomic support
//
+// Memory barriers
+
+// TODO: Get this to fold the constant into the instruction.
+def OR32mrLocked : I<0x09, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$zero),
+ "lock\n\t"
+ "or{l}\t{$zero, $dst|$dst, $zero}",
+ []>, Requires<[In32BitMode]>, LOCK;
+
+let hasSideEffects = 1 in {
+def Int_MemBarrier : I<0, Pseudo, (outs), (ins),
+ "#MEMBARRIER",
+ [(X86MemBarrier)]>, Requires<[HasSSE2]>;
+}
+
// Atomic swap. These are just normal xchg instructions. But since a memory
// operand is referenced, the atomicity is ensured.
let Constraints = "$val = $dst" in {
@@ -4928,6 +4977,12 @@ include "X86Instr64bit.td"
include "X86InstrFragmentsSIMD.td"
//===----------------------------------------------------------------------===//
+// FMA - Fused Multiply-Add support (requires FMA)
+//===----------------------------------------------------------------------===//
+
+include "X86InstrFMA.td"
+
+//===----------------------------------------------------------------------===//
// XMM Floating point support (requires SSE / SSE2)
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/X86/X86InstrMMX.td b/lib/Target/X86/X86InstrMMX.td
index 6cf7ac83620e8..11d4179534dce 100644
--- a/lib/Target/X86/X86InstrMMX.td
+++ b/lib/Target/X86/X86InstrMMX.td
@@ -164,7 +164,7 @@ let neverHasSideEffects = 1 in
def MMX_MOVQ2FR64rr: SSDIi8<0xD6, MRMSrcReg, (outs FR64:$dst), (ins VR64:$src),
"movq2dq\t{$src, $dst|$dst, $src}", []>;
-def MMX_MOVFR642Qrr: SSDIi8<0xD6, MRMSrcReg, (outs VR64:$dst), (ins FR64:$src),
+def MMX_MOVFR642Qrr: SDIi8<0xD6, MRMSrcReg, (outs VR64:$dst), (ins FR64:$src),
"movdq2q\t{$src, $dst|$dst, $src}", []>;
def MMX_MOVNTQmr : MMXI<0xE7, MRMDestMem, (outs), (ins i64mem:$dst, VR64:$src),
diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td
index ebe161b46bdcb..f5466f83f5192 100644
--- a/lib/Target/X86/X86InstrSSE.td
+++ b/lib/Target/X86/X86InstrSSE.td
@@ -142,7 +142,7 @@ multiclass sse12_fp_packed_int<bits<8> opc, string OpcodeStr, RegisterClass RC,
!if(Is2Addr,
!strconcat(asm, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
- [(set RC:$dst, (!nameconcat<Intrinsic>("int_x86_sse",
+ [(set RC:$dst, (!nameconcat<Intrinsic>("int_x86_",
!strconcat(SSEVer, !strconcat("_",
!strconcat(OpcodeStr, FPSizeStr))))
RC:$src1, RC:$src2))], d>;
@@ -150,7 +150,7 @@ multiclass sse12_fp_packed_int<bits<8> opc, string OpcodeStr, RegisterClass RC,
!if(Is2Addr,
!strconcat(asm, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
- [(set RC:$dst, (!nameconcat<Intrinsic>("int_x86_sse",
+ [(set RC:$dst, (!nameconcat<Intrinsic>("int_x86_",
!strconcat(SSEVer, !strconcat("_",
!strconcat(OpcodeStr, FPSizeStr))))
RC:$src1, (mem_frag addr:$src2)))], d>;
@@ -256,10 +256,10 @@ def MOVSDmr : SDI<0x11, MRMDestMem, (outs), (ins f64mem:$dst, FR64:$src),
let isAsmParserOnly = 1 in {
def VMOVSSmr : SI<0x11, MRMDestMem, (outs), (ins f32mem:$dst, FR32:$src),
"movss\t{$src, $dst|$dst, $src}",
- [(store FR32:$src, addr:$dst)]>, XS, VEX_4V;
+ [(store FR32:$src, addr:$dst)]>, XS, VEX;
def VMOVSDmr : SI<0x11, MRMDestMem, (outs), (ins f64mem:$dst, FR64:$src),
"movsd\t{$src, $dst|$dst, $src}",
- [(store FR64:$src, addr:$dst)]>, XD, VEX_4V;
+ [(store FR64:$src, addr:$dst)]>, XD, VEX;
}
// Extract and store.
@@ -340,6 +340,15 @@ def VMOVUPDYmr : VPDI<0x11, MRMDestMem, (outs), (ins f256mem:$dst, VR256:$src),
"movupd\t{$src, $dst|$dst, $src}",
[(store (v4f64 VR256:$src), addr:$dst)]>, VEX;
}
+
+def : Pat<(int_x86_avx_loadu_ps_256 addr:$src), (VMOVUPSYrm addr:$src)>;
+def : Pat<(int_x86_avx_storeu_ps_256 addr:$dst, VR256:$src),
+ (VMOVUPSYmr addr:$dst, VR256:$src)>;
+
+def : Pat<(int_x86_avx_loadu_pd_256 addr:$src), (VMOVUPDYrm addr:$src)>;
+def : Pat<(int_x86_avx_storeu_pd_256 addr:$dst, VR256:$src),
+ (VMOVUPDYmr addr:$dst, VR256:$src)>;
+
def MOVAPSmr : PSI<0x29, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
"movaps\t{$src, $dst|$dst, $src}",
[(alignedstore (v4f32 VR128:$src), addr:$dst)]>;
@@ -516,6 +525,14 @@ multiclass sse12_cvt_s<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
[(set DstRC:$dst, (OpNode (ld_frag addr:$src)))]>;
}
+multiclass sse12_cvt_s_np<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
+ X86MemOperand x86memop, string asm> {
+ def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src), asm,
+ []>;
+ def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src), asm,
+ []>;
+}
+
multiclass sse12_cvt_p<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
SDNode OpNode, X86MemOperand x86memop, PatFrag ld_frag,
string asm, Domain d> {
@@ -526,35 +543,58 @@ multiclass sse12_cvt_p<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
}
multiclass sse12_vcvt_avx<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
- SDNode OpNode, X86MemOperand x86memop, PatFrag ld_frag,
- string asm> {
+ X86MemOperand x86memop, string asm> {
def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins DstRC:$src1, SrcRC:$src),
- asm, []>;
+ !strconcat(asm,"\t{$src, $src1, $dst|$dst, $src1, $src}"), []>;
def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst),
- (ins DstRC:$src1, x86memop:$src), asm, []>;
+ (ins DstRC:$src1, x86memop:$src),
+ !strconcat(asm,"\t{$src, $src1, $dst|$dst, $src1, $src}"), []>;
}
let isAsmParserOnly = 1 in {
-defm VCVTTSS2SI : sse12_cvt_s<0x2C, FR32, GR32, fp_to_sint, f32mem, loadf32,
- "cvttss2si\t{$src, $dst|$dst, $src}">, XS, VEX;
-defm VCVTTSD2SI : sse12_cvt_s<0x2C, FR64, GR32, fp_to_sint, f64mem, loadf64,
- "cvttsd2si\t{$src, $dst|$dst, $src}">, XD, VEX;
-defm VCVTSI2SS : sse12_vcvt_avx<0x2A, GR32, FR32, sint_to_fp, i32mem, loadi32,
- "cvtsi2ss\t{$src, $src1, $dst|$dst, $src1, $src}">, XS,
- VEX_4V;
-defm VCVTSI2SD : sse12_vcvt_avx<0x2A, GR32, FR64, sint_to_fp, i32mem, loadi32,
- "cvtsi2sd\t{$src, $src1, $dst|$dst, $src1, $src}">, XD,
- VEX_4V;
+defm VCVTTSS2SI : sse12_cvt_s<0x2C, FR32, GR32, fp_to_sint, f32mem, loadf32,
+ "cvttss2si\t{$src, $dst|$dst, $src}">, XS, VEX;
+defm VCVTTSS2SI64 : sse12_cvt_s<0x2C, FR32, GR64, fp_to_sint, f32mem, loadf32,
+ "cvttss2si\t{$src, $dst|$dst, $src}">, XS, VEX,
+ VEX_W;
+defm VCVTTSD2SI : sse12_cvt_s<0x2C, FR64, GR32, fp_to_sint, f64mem, loadf64,
+ "cvttsd2si\t{$src, $dst|$dst, $src}">, XD, VEX;
+defm VCVTTSD2SI64 : sse12_cvt_s<0x2C, FR64, GR64, fp_to_sint, f64mem, loadf64,
+ "cvttsd2si\t{$src, $dst|$dst, $src}">, XD,
+ VEX, VEX_W;
+
+// The assembler can recognize rr 64-bit instructions by seeing a rxx
+// register, but the same isn't true when only using memory operands,
+// provide other assembly "l" and "q" forms to address this explicitly
+// where appropriate to do so.
+defm VCVTSI2SS : sse12_vcvt_avx<0x2A, GR32, FR32, i32mem, "cvtsi2ss">, XS,
+ VEX_4V;
+defm VCVTSI2SS64 : sse12_vcvt_avx<0x2A, GR64, FR32, i64mem, "cvtsi2ss{q}">, XS,
+ VEX_4V, VEX_W;
+defm VCVTSI2SD : sse12_vcvt_avx<0x2A, GR32, FR64, i32mem, "cvtsi2sd">, XD,
+ VEX_4V;
+defm VCVTSI2SDL : sse12_vcvt_avx<0x2A, GR32, FR64, i32mem, "cvtsi2sd{l}">, XD,
+ VEX_4V;
+defm VCVTSI2SD64 : sse12_vcvt_avx<0x2A, GR64, FR64, i64mem, "cvtsi2sd{q}">, XD,
+ VEX_4V, VEX_W;
}
defm CVTTSS2SI : sse12_cvt_s<0x2C, FR32, GR32, fp_to_sint, f32mem, loadf32,
"cvttss2si\t{$src, $dst|$dst, $src}">, XS;
+defm CVTTSS2SI64 : sse12_cvt_s<0x2C, FR32, GR64, fp_to_sint, f32mem, loadf32,
+ "cvttss2si{q}\t{$src, $dst|$dst, $src}">, XS, REX_W;
defm CVTTSD2SI : sse12_cvt_s<0x2C, FR64, GR32, fp_to_sint, f64mem, loadf64,
"cvttsd2si\t{$src, $dst|$dst, $src}">, XD;
+defm CVTTSD2SI64 : sse12_cvt_s<0x2C, FR64, GR64, fp_to_sint, f64mem, loadf64,
+ "cvttsd2si{q}\t{$src, $dst|$dst, $src}">, XD, REX_W;
defm CVTSI2SS : sse12_cvt_s<0x2A, GR32, FR32, sint_to_fp, i32mem, loadi32,
"cvtsi2ss\t{$src, $dst|$dst, $src}">, XS;
+defm CVTSI2SS64 : sse12_cvt_s<0x2A, GR64, FR32, sint_to_fp, i64mem, loadi64,
+ "cvtsi2ss{q}\t{$src, $dst|$dst, $src}">, XS, REX_W;
defm CVTSI2SD : sse12_cvt_s<0x2A, GR32, FR64, sint_to_fp, i32mem, loadi32,
"cvtsi2sd\t{$src, $dst|$dst, $src}">, XD;
+defm CVTSI2SD64 : sse12_cvt_s<0x2A, GR64, FR64, sint_to_fp, i64mem, loadi64,
+ "cvtsi2sd{q}\t{$src, $dst|$dst, $src}">, XD, REX_W;
// Conversion Instructions Intrinsics - Match intrinsics which expect MM
// and/or XMM operand(s).
@@ -570,10 +610,12 @@ multiclass sse12_cvt_pint<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
multiclass sse12_cvt_sint<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
Intrinsic Int, X86MemOperand x86memop, PatFrag ld_frag,
string asm> {
- def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src), asm,
- [(set DstRC:$dst, (Int SrcRC:$src))]>;
- def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src), asm,
- [(set DstRC:$dst, (Int (ld_frag addr:$src)))]>;
+ def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src),
+ !strconcat(asm, "\t{$src, $dst|$dst, $src}"),
+ [(set DstRC:$dst, (Int SrcRC:$src))]>;
+ def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src),
+ !strconcat(asm, "\t{$src, $dst|$dst, $src}"),
+ [(set DstRC:$dst, (Int (ld_frag addr:$src)))]>;
}
multiclass sse12_cvt_pint_3addr<bits<8> opc, RegisterClass SrcRC,
@@ -588,35 +630,79 @@ multiclass sse12_cvt_pint_3addr<bits<8> opc, RegisterClass SrcRC,
multiclass sse12_cvt_sint_3addr<bits<8> opc, RegisterClass SrcRC,
RegisterClass DstRC, Intrinsic Int, X86MemOperand x86memop,
- PatFrag ld_frag, string asm> {
+ PatFrag ld_frag, string asm, bit Is2Addr = 1> {
def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins DstRC:$src1, SrcRC:$src2),
- asm, [(set DstRC:$dst, (Int DstRC:$src1, SrcRC:$src2))]>;
+ !if(Is2Addr,
+ !strconcat(asm, "\t{$src2, $dst|$dst, $src2}"),
+ !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+ [(set DstRC:$dst, (Int DstRC:$src1, SrcRC:$src2))]>;
def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst),
- (ins DstRC:$src1, x86memop:$src2), asm,
+ (ins DstRC:$src1, x86memop:$src2),
+ !if(Is2Addr,
+ !strconcat(asm, "\t{$src2, $dst|$dst, $src2}"),
+ !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
[(set DstRC:$dst, (Int DstRC:$src1, (ld_frag addr:$src2)))]>;
}
let isAsmParserOnly = 1 in {
defm Int_VCVTSS2SI : sse12_cvt_sint<0x2D, VR128, GR32, int_x86_sse_cvtss2si,
- f32mem, load, "cvtss2si\t{$src, $dst|$dst, $src}">, XS,
- VEX;
+ f32mem, load, "cvtss2si">, XS, VEX;
+ defm Int_VCVTSS2SI64 : sse12_cvt_sint<0x2D, VR128, GR64,
+ int_x86_sse_cvtss2si64, f32mem, load, "cvtss2si">,
+ XS, VEX, VEX_W;
defm Int_VCVTSD2SI : sse12_cvt_sint<0x2D, VR128, GR32, int_x86_sse2_cvtsd2si,
- f128mem, load, "cvtsd2si\t{$src, $dst|$dst, $src}">, XD,
- VEX;
+ f128mem, load, "cvtsd2si">, XD, VEX;
+ defm Int_VCVTSD2SI64 : sse12_cvt_sint<0x2D, VR128, GR64,
+ int_x86_sse2_cvtsd2si64, f128mem, load, "cvtsd2si">,
+ XD, VEX, VEX_W;
+
+ // FIXME: The asm matcher has a hack to ignore instructions with _Int and Int_
+ // Get rid of this hack or rename the intrinsics, there are several
+ // intructions that only match with the intrinsic form, why create duplicates
+ // to let them be recognized by the assembler?
+ defm VCVTSD2SI_alt : sse12_cvt_s_np<0x2D, FR64, GR32, f64mem,
+ "cvtsd2si\t{$src, $dst|$dst, $src}">, XD, VEX;
+ defm VCVTSD2SI64 : sse12_cvt_s_np<0x2D, FR64, GR64, f64mem,
+ "cvtsd2si\t{$src, $dst|$dst, $src}">, XD, VEX, VEX_W;
}
defm Int_CVTSS2SI : sse12_cvt_sint<0x2D, VR128, GR32, int_x86_sse_cvtss2si,
- f32mem, load, "cvtss2si\t{$src, $dst|$dst, $src}">, XS;
+ f32mem, load, "cvtss2si">, XS;
+defm Int_CVTSS2SI64 : sse12_cvt_sint<0x2D, VR128, GR64, int_x86_sse_cvtss2si64,
+ f32mem, load, "cvtss2si{q}">, XS, REX_W;
defm Int_CVTSD2SI : sse12_cvt_sint<0x2D, VR128, GR32, int_x86_sse2_cvtsd2si,
- f128mem, load, "cvtsd2si\t{$src, $dst|$dst, $src}">, XD;
+ f128mem, load, "cvtsd2si">, XD;
+defm Int_CVTSD2SI64 : sse12_cvt_sint<0x2D, VR128, GR64, int_x86_sse2_cvtsd2si64,
+ f128mem, load, "cvtsd2si">, XD, REX_W;
+defm CVTSD2SI64 : sse12_cvt_s_np<0x2D, VR128, GR64, f64mem, "cvtsd2si{q}">, XD,
+ REX_W;
+
+let isAsmParserOnly = 1 in {
+ defm Int_VCVTSI2SS : sse12_cvt_sint_3addr<0x2A, GR32, VR128,
+ int_x86_sse_cvtsi2ss, i32mem, loadi32, "cvtsi2ss", 0>, XS, VEX_4V;
+ defm Int_VCVTSI2SS64 : sse12_cvt_sint_3addr<0x2A, GR64, VR128,
+ int_x86_sse_cvtsi642ss, i64mem, loadi64, "cvtsi2ss", 0>, XS, VEX_4V,
+ VEX_W;
+ defm Int_VCVTSI2SD : sse12_cvt_sint_3addr<0x2A, GR32, VR128,
+ int_x86_sse2_cvtsi2sd, i32mem, loadi32, "cvtsi2sd", 0>, XD, VEX_4V;
+ defm Int_VCVTSI2SD64 : sse12_cvt_sint_3addr<0x2A, GR64, VR128,
+ int_x86_sse2_cvtsi642sd, i64mem, loadi64, "cvtsi2sd", 0>, XD,
+ VEX_4V, VEX_W;
+}
let Constraints = "$src1 = $dst" in {
defm Int_CVTSI2SS : sse12_cvt_sint_3addr<0x2A, GR32, VR128,
int_x86_sse_cvtsi2ss, i32mem, loadi32,
- "cvtsi2ss\t{$src2, $dst|$dst, $src2}">, XS;
+ "cvtsi2ss">, XS;
+ defm Int_CVTSI2SS64 : sse12_cvt_sint_3addr<0x2A, GR64, VR128,
+ int_x86_sse_cvtsi642ss, i64mem, loadi64,
+ "cvtsi2ss{q}">, XS, REX_W;
defm Int_CVTSI2SD : sse12_cvt_sint_3addr<0x2A, GR32, VR128,
int_x86_sse2_cvtsi2sd, i32mem, loadi32,
- "cvtsi2ss\t{$src2, $dst|$dst, $src2}">, XD;
+ "cvtsi2sd">, XD;
+ defm Int_CVTSI2SD64 : sse12_cvt_sint_3addr<0x2A, GR64, VR128,
+ int_x86_sse2_cvtsi642sd, i64mem, loadi64,
+ "cvtsi2sd">, XD, REX_W;
}
// Instructions below don't have an AVX form.
@@ -645,35 +731,48 @@ let Constraints = "$src1 = $dst" in {
/// SSE 1 Only
// Aliases for intrinsics
-let isAsmParserOnly = 1, Pattern = []<dag> in {
-defm Int_VCVTTSS2SI : sse12_cvt_sint_3addr<0x2C, VR128, GR32,
- int_x86_sse_cvttss2si, f32mem, load,
- "cvttss2si\t{$src2, $src1, $dst|$dst, $src1, $src2}">, XS;
-defm Int_VCVTTSD2SI : sse12_cvt_sint_3addr<0x2C, VR128, GR32,
- int_x86_sse2_cvttsd2si, f128mem, load,
- "cvttss2si\t{$src2, $src1, $dst|$dst, $src1, $src2}">, XD;
+let isAsmParserOnly = 1 in {
+defm Int_VCVTTSS2SI : sse12_cvt_sint<0x2C, VR128, GR32, int_x86_sse_cvttss2si,
+ f32mem, load, "cvttss2si">, XS, VEX;
+defm Int_VCVTTSS2SI64 : sse12_cvt_sint<0x2C, VR128, GR64,
+ int_x86_sse_cvttss2si64, f32mem, load,
+ "cvttss2si">, XS, VEX, VEX_W;
+defm Int_VCVTTSD2SI : sse12_cvt_sint<0x2C, VR128, GR32, int_x86_sse2_cvttsd2si,
+ f128mem, load, "cvttss2si">, XD, VEX;
+defm Int_VCVTTSD2SI64 : sse12_cvt_sint<0x2C, VR128, GR64,
+ int_x86_sse2_cvttsd2si64, f128mem, load,
+ "cvttss2si">, XD, VEX, VEX_W;
}
defm Int_CVTTSS2SI : sse12_cvt_sint<0x2C, VR128, GR32, int_x86_sse_cvttss2si,
- f32mem, load, "cvttss2si\t{$src, $dst|$dst, $src}">,
- XS;
+ f32mem, load, "cvttss2si">, XS;
+defm Int_CVTTSS2SI64 : sse12_cvt_sint<0x2C, VR128, GR64,
+ int_x86_sse_cvttss2si64, f32mem, load,
+ "cvttss2si{q}">, XS, REX_W;
defm Int_CVTTSD2SI : sse12_cvt_sint<0x2C, VR128, GR32, int_x86_sse2_cvttsd2si,
- f128mem, load, "cvttss2si\t{$src, $dst|$dst, $src}">,
- XD;
+ f128mem, load, "cvttss2si">, XD;
+defm Int_CVTTSD2SI64 : sse12_cvt_sint<0x2C, VR128, GR64,
+ int_x86_sse2_cvttsd2si64, f128mem, load,
+ "cvttss2si{q}">, XD, REX_W;
let isAsmParserOnly = 1, Pattern = []<dag> in {
-defm VCVTSS2SI : sse12_cvt_s<0x2D, FR32, GR32, undef, f32mem, load,
- "cvtss2si{l}\t{$src, $dst|$dst, $src}">, XS, VEX;
-defm VCVTDQ2PS : sse12_cvt_p<0x5B, VR128, VR128, undef, f128mem, load,
- "cvtdq2ps\t{$src, $dst|$dst, $src}",
- SSEPackedSingle>, TB, VEX;
-defm VCVTDQ2PSY : sse12_cvt_p<0x5B, VR256, VR256, undef, f256mem, load,
- "cvtdq2ps\t{$src, $dst|$dst, $src}",
- SSEPackedSingle>, TB, VEX;
+defm VCVTSS2SI : sse12_cvt_s<0x2D, FR32, GR32, undef, f32mem, load,
+ "cvtss2si{l}\t{$src, $dst|$dst, $src}">, XS, VEX;
+defm VCVTSS2SI64 : sse12_cvt_s<0x2D, FR32, GR64, undef, f32mem, load,
+ "cvtss2si\t{$src, $dst|$dst, $src}">, XS, VEX,
+ VEX_W;
+defm VCVTDQ2PS : sse12_cvt_p<0x5B, VR128, VR128, undef, i128mem, load,
+ "cvtdq2ps\t{$src, $dst|$dst, $src}",
+ SSEPackedSingle>, TB, VEX;
+defm VCVTDQ2PSY : sse12_cvt_p<0x5B, VR256, VR256, undef, i256mem, load,
+ "cvtdq2ps\t{$src, $dst|$dst, $src}",
+ SSEPackedSingle>, TB, VEX;
}
let Pattern = []<dag> in {
defm CVTSS2SI : sse12_cvt_s<0x2D, FR32, GR32, undef, f32mem, load /*dummy*/,
"cvtss2si{l}\t{$src, $dst|$dst, $src}">, XS;
-defm CVTDQ2PS : sse12_cvt_p<0x5B, VR128, VR128, undef, f128mem, load /*dummy*/,
+defm CVTSS2SI64 : sse12_cvt_s<0x2D, FR32, GR64, undef, f32mem, load /*dummy*/,
+ "cvtss2si{q}\t{$src, $dst|$dst, $src}">, XS, REX_W;
+defm CVTDQ2PS : sse12_cvt_p<0x5B, VR128, VR128, undef, i128mem, load /*dummy*/,
"cvtdq2ps\t{$src, $dst|$dst, $src}",
SSEPackedSingle>, TB; /* PD SSE3 form is avaiable */
}
@@ -701,13 +800,11 @@ def CVTSD2SSrm : I<0x5A, MRMSrcMem, (outs FR32:$dst), (ins f64mem:$src),
let isAsmParserOnly = 1 in
defm Int_VCVTSD2SS: sse12_cvt_sint_3addr<0x5A, VR128, VR128,
- int_x86_sse2_cvtsd2ss, f64mem, load,
- "cvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}">,
- XS, VEX_4V;
+ int_x86_sse2_cvtsd2ss, f64mem, load, "cvtsd2ss", 0>,
+ XS, VEX_4V;
let Constraints = "$src1 = $dst" in
defm Int_CVTSD2SS: sse12_cvt_sint_3addr<0x5A, VR128, VR128,
- int_x86_sse2_cvtsd2ss, f64mem, load,
- "cvtsd2ss\t{$src2, $dst|$dst, $src2}">, XS;
+ int_x86_sse2_cvtsd2ss, f64mem, load, "cvtsd2ss">, XS;
// Convert scalar single to scalar double
let isAsmParserOnly = 1 in { // SSE2 instructions with XS prefix
@@ -806,6 +903,7 @@ def Int_CVTDQ2PDrm : I<0xE6, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
(bitconvert (memopv2i64 addr:$src))))]>,
XS, Requires<[HasSSE2]>;
+
// Convert packed single/double fp to doubleword
let isAsmParserOnly = 1 in {
def VCVTPS2DQrr : VPDI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
@@ -964,11 +1062,11 @@ def CVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src),
let isAsmParserOnly = 1 in {
def Int_VCVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
- "cvtps2pd\t{$src, $dst|$dst, $src}",
+ "vcvtps2pd\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (int_x86_sse2_cvtps2pd VR128:$src))]>,
VEX, Requires<[HasAVX]>;
def Int_VCVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src),
- "cvtps2pd\t{$src, $dst|$dst, $src}",
+ "vcvtps2pd\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (int_x86_sse2_cvtps2pd
(load addr:$src)))]>,
VEX, Requires<[HasAVX]>;
@@ -1029,6 +1127,39 @@ def Int_CVTPD2PSrm : PDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
[(set VR128:$dst, (int_x86_sse2_cvtpd2ps
(memop addr:$src)))]>;
+// AVX 256-bit register conversion intrinsics
+// FIXME: Migrate SSE conversion intrinsics matching to use patterns as below
+// whenever possible to avoid declaring two versions of each one.
+def : Pat<(int_x86_avx_cvtdq2_ps_256 VR256:$src),
+ (VCVTDQ2PSYrr VR256:$src)>;
+def : Pat<(int_x86_avx_cvtdq2_ps_256 (memopv8i32 addr:$src)),
+ (VCVTDQ2PSYrm addr:$src)>;
+
+def : Pat<(int_x86_avx_cvt_pd2_ps_256 VR256:$src),
+ (VCVTPD2PSYrr VR256:$src)>;
+def : Pat<(int_x86_avx_cvt_pd2_ps_256 (memopv4f64 addr:$src)),
+ (VCVTPD2PSYrm addr:$src)>;
+
+def : Pat<(int_x86_avx_cvt_ps2dq_256 VR256:$src),
+ (VCVTPS2DQYrr VR256:$src)>;
+def : Pat<(int_x86_avx_cvt_ps2dq_256 (memopv8f32 addr:$src)),
+ (VCVTPS2DQYrm addr:$src)>;
+
+def : Pat<(int_x86_avx_cvt_ps2_pd_256 VR128:$src),
+ (VCVTPS2PDYrr VR128:$src)>;
+def : Pat<(int_x86_avx_cvt_ps2_pd_256 (memopv4f32 addr:$src)),
+ (VCVTPS2PDYrm addr:$src)>;
+
+def : Pat<(int_x86_avx_cvtt_pd2dq_256 VR256:$src),
+ (VCVTTPD2DQYrr VR256:$src)>;
+def : Pat<(int_x86_avx_cvtt_pd2dq_256 (memopv4f64 addr:$src)),
+ (VCVTTPD2DQYrm addr:$src)>;
+
+def : Pat<(int_x86_avx_cvtt_ps2dq_256 VR256:$src),
+ (VCVTTPS2DQYrr VR256:$src)>;
+def : Pat<(int_x86_avx_cvtt_ps2dq_256 (memopv8f32 addr:$src)),
+ (VCVTTPS2DQYrm addr:$src)>;
+
//===----------------------------------------------------------------------===//
// SSE 1 & 2 - Compare Instructions
//===----------------------------------------------------------------------===//
@@ -1193,16 +1324,14 @@ let isAsmParserOnly = 1 in {
"cmp${cc}pd\t{$src, $src1, $dst|$dst, $src1, $src}",
"cmppd\t{$src2, $src, $src1, $dst|$dst, $src1, $src, $src2}",
SSEPackedDouble>, OpSize, VEX_4V;
- let Pattern = []<dag> in {
- defm VCMPPSY : sse12_cmp_packed<VR256, f256mem, int_x86_sse_cmp_ps,
- "cmp${cc}ps\t{$src, $src1, $dst|$dst, $src1, $src}",
- "cmpps\t{$src2, $src, $src1, $dst|$dst, $src1, $src, $src2}",
- SSEPackedSingle>, VEX_4V;
- defm VCMPPDY : sse12_cmp_packed<VR256, f256mem, int_x86_sse2_cmp_pd,
- "cmp${cc}pd\t{$src, $src1, $dst|$dst, $src1, $src}",
- "cmppd\t{$src2, $src, $src1, $dst|$dst, $src1, $src, $src2}",
- SSEPackedDouble>, OpSize, VEX_4V;
- }
+ defm VCMPPSY : sse12_cmp_packed<VR256, f256mem, int_x86_avx_cmp_ps_256,
+ "cmp${cc}ps\t{$src, $src1, $dst|$dst, $src1, $src}",
+ "cmpps\t{$src2, $src, $src1, $dst|$dst, $src1, $src, $src2}",
+ SSEPackedSingle>, VEX_4V;
+ defm VCMPPDY : sse12_cmp_packed<VR256, f256mem, int_x86_avx_cmp_pd_256,
+ "cmp${cc}pd\t{$src, $src1, $dst|$dst, $src1, $src}",
+ "cmppd\t{$src2, $src, $src1, $dst|$dst, $src1, $src, $src2}",
+ SSEPackedDouble>, OpSize, VEX_4V;
}
let Constraints = "$src1 = $dst" in {
defm CMPPS : sse12_cmp_packed<VR128, f128mem, int_x86_sse_cmp_ps,
@@ -1232,24 +1361,30 @@ def : Pat<(v2i64 (X86cmppd (v2f64 VR128:$src1), (memop addr:$src2), imm:$cc)),
multiclass sse12_shuffle<RegisterClass RC, X86MemOperand x86memop,
ValueType vt, string asm, PatFrag mem_frag,
Domain d, bit IsConvertibleToThreeAddress = 0> {
- def rmi : PIi8<0xC6, MRMSrcMem, (outs VR128:$dst),
- (ins VR128:$src1, f128mem:$src2, i8imm:$src3), asm,
- [(set VR128:$dst, (vt (shufp:$src3
- VR128:$src1, (mem_frag addr:$src2))))], d>;
+ def rmi : PIi8<0xC6, MRMSrcMem, (outs RC:$dst),
+ (ins RC:$src1, f128mem:$src2, i8imm:$src3), asm,
+ [(set RC:$dst, (vt (shufp:$src3
+ RC:$src1, (mem_frag addr:$src2))))], d>;
let isConvertibleToThreeAddress = IsConvertibleToThreeAddress in
- def rri : PIi8<0xC6, MRMSrcReg, (outs VR128:$dst),
- (ins VR128:$src1, VR128:$src2, i8imm:$src3), asm,
- [(set VR128:$dst,
- (vt (shufp:$src3 VR128:$src1, VR128:$src2)))], d>;
+ def rri : PIi8<0xC6, MRMSrcReg, (outs RC:$dst),
+ (ins RC:$src1, RC:$src2, i8imm:$src3), asm,
+ [(set RC:$dst,
+ (vt (shufp:$src3 RC:$src1, RC:$src2)))], d>;
}
let isAsmParserOnly = 1 in {
- defm VSHUFPS : sse12_shuffle<VR128, f128mem, v4f32,
- "shufps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
- memopv4f32, SSEPackedSingle>, VEX_4V;
- defm VSHUFPD : sse12_shuffle<VR128, f128mem, v2f64,
- "shufpd\t{$src3, $src2, $src1, $dst|$dst, $src2, $src2, $src3}",
- memopv2f64, SSEPackedDouble>, OpSize, VEX_4V;
+ defm VSHUFPS : sse12_shuffle<VR128, f128mem, v4f32,
+ "shufps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
+ memopv4f32, SSEPackedSingle>, VEX_4V;
+ defm VSHUFPSY : sse12_shuffle<VR256, f256mem, v8f32,
+ "shufps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
+ memopv8f32, SSEPackedSingle>, VEX_4V;
+ defm VSHUFPD : sse12_shuffle<VR128, f128mem, v2f64,
+ "shufpd\t{$src3, $src2, $src1, $dst|$dst, $src2, $src2, $src3}",
+ memopv2f64, SSEPackedDouble>, OpSize, VEX_4V;
+ defm VSHUFPDY : sse12_shuffle<VR256, f256mem, v4f64,
+ "shufpd\t{$src3, $src2, $src1, $dst|$dst, $src2, $src2, $src3}",
+ memopv4f64, SSEPackedDouble>, OpSize, VEX_4V;
}
let Constraints = "$src1 = $dst" in {
@@ -1351,12 +1486,23 @@ let isAsmParserOnly = 1 in {
defm VMOVMSKPD : sse12_extr_sign_mask<VR128, int_x86_sse2_movmsk_pd,
"movmskpd", SSEPackedDouble>, OpSize,
VEX;
- // FIXME: merge with multiclass above when the intrinsics come.
- def VMOVMSKPSYrr : PI<0x50, MRMSrcReg, (outs GR32:$dst), (ins VR256:$src),
+ defm VMOVMSKPSY : sse12_extr_sign_mask<VR256, int_x86_avx_movmsk_ps_256,
+ "movmskps", SSEPackedSingle>, VEX;
+ defm VMOVMSKPDY : sse12_extr_sign_mask<VR256, int_x86_avx_movmsk_pd_256,
+ "movmskpd", SSEPackedDouble>, OpSize,
+ VEX;
+
+ // Assembler Only
+ def VMOVMSKPSr64r : PI<0x50, MRMSrcReg, (outs GR64:$dst), (ins VR128:$src),
+ "movmskps\t{$src, $dst|$dst, $src}", [], SSEPackedSingle>, VEX;
+ def VMOVMSKPDr64r : PI<0x50, MRMSrcReg, (outs GR64:$dst), (ins VR128:$src),
+ "movmskpd\t{$src, $dst|$dst, $src}", [], SSEPackedDouble>, OpSize,
+ VEX;
+ def VMOVMSKPSYr64r : PI<0x50, MRMSrcReg, (outs GR64:$dst), (ins VR256:$src),
"movmskps\t{$src, $dst|$dst, $src}", [], SSEPackedSingle>, VEX;
- def VMOVMSKPDYrr : PI<0x50, MRMSrcReg, (outs GR32:$dst), (ins VR256:$src),
+ def VMOVMSKPDYr64r : PI<0x50, MRMSrcReg, (outs GR64:$dst), (ins VR256:$src),
"movmskpd\t{$src, $dst|$dst, $src}", [], SSEPackedDouble>, OpSize,
- VEX;
+ VEX;
}
//===----------------------------------------------------------------------===//
@@ -1536,6 +1682,9 @@ let isCommutable = 0 in
///
/// These three forms can each be reg+reg or reg+mem.
///
+
+/// FIXME: once all 256-bit intrinsics are matched, cleanup and refactor those
+/// classes below
multiclass basic_sse12_fp_binop_s<bits<8> opc, string OpcodeStr, SDNode OpNode,
bit Is2Addr = 1> {
defm SS : sse12_fp_scalar<opc, !strconcat(OpcodeStr, "ss"),
@@ -1565,7 +1714,7 @@ multiclass basic_sse12_fp_binop_p_y<bits<8> opc, string OpcodeStr,
}
multiclass basic_sse12_fp_binop_s_int<bits<8> opc, string OpcodeStr,
- bit Is2Addr = 1> {
+ bit Is2Addr = 1> {
defm SS : sse12_fp_scalar_int<opc, OpcodeStr, VR128,
!strconcat(OpcodeStr, "ss"), "", "_ss", ssmem, sse_load_f32, Is2Addr>, XS;
defm SD : sse12_fp_scalar_int<opc, OpcodeStr, VR128,
@@ -1573,37 +1722,57 @@ multiclass basic_sse12_fp_binop_s_int<bits<8> opc, string OpcodeStr,
}
multiclass basic_sse12_fp_binop_p_int<bits<8> opc, string OpcodeStr,
- bit Is2Addr = 1> {
+ bit Is2Addr = 1> {
defm PS : sse12_fp_packed_int<opc, OpcodeStr, VR128,
- !strconcat(OpcodeStr, "ps"), "", "_ps", f128mem, memopv4f32,
+ !strconcat(OpcodeStr, "ps"), "sse", "_ps", f128mem, memopv4f32,
SSEPackedSingle, Is2Addr>, TB;
defm PD : sse12_fp_packed_int<opc, OpcodeStr, VR128,
- !strconcat(OpcodeStr, "pd"), "2", "_pd", f128mem, memopv2f64,
+ !strconcat(OpcodeStr, "pd"), "sse2", "_pd", f128mem, memopv2f64,
SSEPackedDouble, Is2Addr>, TB, OpSize;
}
+multiclass basic_sse12_fp_binop_p_y_int<bits<8> opc, string OpcodeStr> {
+ defm PSY : sse12_fp_packed_int<opc, OpcodeStr, VR256,
+ !strconcat(OpcodeStr, "ps"), "avx", "_ps_256", f256mem, memopv8f32,
+ SSEPackedSingle, 0>, TB;
+
+ defm PDY : sse12_fp_packed_int<opc, OpcodeStr, VR256,
+ !strconcat(OpcodeStr, "pd"), "avx", "_pd_256", f256mem, memopv4f64,
+ SSEPackedDouble, 0>, TB, OpSize;
+}
+
// Binary Arithmetic instructions
let isAsmParserOnly = 1 in {
defm VADD : basic_sse12_fp_binop_s<0x58, "add", fadd, 0>,
+ basic_sse12_fp_binop_s_int<0x58, "add", 0>,
basic_sse12_fp_binop_p<0x58, "add", fadd, 0>,
basic_sse12_fp_binop_p_y<0x58, "add", fadd>, VEX_4V;
defm VMUL : basic_sse12_fp_binop_s<0x59, "mul", fmul, 0>,
+ basic_sse12_fp_binop_s_int<0x59, "mul", 0>,
basic_sse12_fp_binop_p<0x59, "mul", fmul, 0>,
basic_sse12_fp_binop_p_y<0x59, "mul", fmul>, VEX_4V;
let isCommutable = 0 in {
defm VSUB : basic_sse12_fp_binop_s<0x5C, "sub", fsub, 0>,
+ basic_sse12_fp_binop_s_int<0x5C, "sub", 0>,
basic_sse12_fp_binop_p<0x5C, "sub", fsub, 0>,
basic_sse12_fp_binop_p_y<0x5C, "sub", fsub>, VEX_4V;
defm VDIV : basic_sse12_fp_binop_s<0x5E, "div", fdiv, 0>,
+ basic_sse12_fp_binop_s_int<0x5E, "div", 0>,
basic_sse12_fp_binop_p<0x5E, "div", fdiv, 0>,
basic_sse12_fp_binop_p_y<0x5E, "div", fdiv>, VEX_4V;
defm VMAX : basic_sse12_fp_binop_s<0x5F, "max", X86fmax, 0>,
+ basic_sse12_fp_binop_s_int<0x5F, "max", 0>,
basic_sse12_fp_binop_p<0x5F, "max", X86fmax, 0>,
- basic_sse12_fp_binop_p_y<0x5F, "max", X86fmax>, VEX_4V;
+ basic_sse12_fp_binop_p_int<0x5F, "max", 0>,
+ basic_sse12_fp_binop_p_y<0x5F, "max", X86fmax>,
+ basic_sse12_fp_binop_p_y_int<0x5F, "max">, VEX_4V;
defm VMIN : basic_sse12_fp_binop_s<0x5D, "min", X86fmin, 0>,
+ basic_sse12_fp_binop_s_int<0x5D, "min", 0>,
basic_sse12_fp_binop_p<0x5D, "min", X86fmin, 0>,
+ basic_sse12_fp_binop_p_int<0x5D, "min", 0>,
+ basic_sse12_fp_binop_p_y_int<0x5D, "min">,
basic_sse12_fp_binop_p_y<0x5D, "min", X86fmin>, VEX_4V;
}
}
@@ -1668,20 +1837,20 @@ multiclass sse1_fp_unop_s<bits<8> opc, string OpcodeStr,
multiclass sse1_fp_unop_s_avx<bits<8> opc, string OpcodeStr,
SDNode OpNode, Intrinsic F32Int> {
def SSr : SSI<opc, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src1, FR32:$src2),
- !strconcat(!strconcat("v", OpcodeStr),
+ !strconcat(OpcodeStr,
"ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>;
def SSm : I<opc, MRMSrcMem, (outs FR32:$dst), (ins FR32:$src1, f32mem:$src2),
- !strconcat(!strconcat("v", OpcodeStr),
+ !strconcat(OpcodeStr,
"ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[]>, XS, Requires<[HasAVX, OptForSize]>;
- def SSr_Int : SSI<opc, MRMSrcReg, (outs VR128:$dst),
- (ins VR128:$src1, VR128:$src2),
- !strconcat(!strconcat("v", OpcodeStr),
- "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>;
- def SSm_Int : SSI<opc, MRMSrcMem, (outs VR128:$dst),
- (ins VR128:$src1, ssmem:$src2),
- !strconcat(!strconcat("v", OpcodeStr),
- "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>;
+ def SSr_Int : SSI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ !strconcat(OpcodeStr,
+ "ss\t{$src, $dst, $dst|$dst, $dst, $src}"),
+ [(set VR128:$dst, (F32Int VR128:$src))]>;
+ def SSm_Int : SSI<opc, MRMSrcMem, (outs VR128:$dst), (ins ssmem:$src),
+ !strconcat(OpcodeStr,
+ "ss\t{$src, $dst, $dst|$dst, $dst, $src}"),
+ [(set VR128:$dst, (F32Int sse_load_f32:$src))]>;
}
/// sse1_fp_unop_p - SSE1 unops in packed form.
@@ -1715,6 +1884,16 @@ multiclass sse1_fp_unop_p_int<bits<8> opc, string OpcodeStr,
[(set VR128:$dst, (V4F32Int (memopv4f32 addr:$src)))]>;
}
+/// sse1_fp_unop_p_y_int - AVX 256-bit intrinsics unops in packed forms.
+multiclass sse1_fp_unop_p_y_int<bits<8> opc, string OpcodeStr,
+ Intrinsic V4F32Int> {
+ def PSYr_Int : PSI<opc, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
+ !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"),
+ [(set VR256:$dst, (V4F32Int VR256:$src))]>;
+ def PSYm_Int : PSI<opc, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src),
+ !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"),
+ [(set VR256:$dst, (V4F32Int (memopv8f32 addr:$src)))]>;
+}
/// sse2_fp_unop_s - SSE2 unops in scalar form.
multiclass sse2_fp_unop_s<bits<8> opc, string OpcodeStr,
@@ -1738,21 +1917,19 @@ multiclass sse2_fp_unop_s<bits<8> opc, string OpcodeStr,
/// sse2_fp_unop_s_avx - AVX SSE2 unops in scalar form.
multiclass sse2_fp_unop_s_avx<bits<8> opc, string OpcodeStr,
SDNode OpNode, Intrinsic F64Int> {
- def SDr : VSDI<opc, MRMSrcReg, (outs FR64:$dst), (ins FR64:$src1, FR64:$src2),
- !strconcat(OpcodeStr,
- "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>;
- def SDm : VSDI<opc, MRMSrcMem, (outs FR64:$dst),
- (ins FR64:$src1, f64mem:$src2),
- !strconcat(OpcodeStr,
- "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>;
- def SDr_Int : VSDI<opc, MRMSrcReg, (outs VR128:$dst),
- (ins VR128:$src1, VR128:$src2),
- !strconcat(OpcodeStr, "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- []>;
- def SDm_Int : VSDI<opc, MRMSrcMem, (outs VR128:$dst),
- (ins VR128:$src1, sdmem:$src2),
- !strconcat(OpcodeStr, "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- []>;
+ def SDr : SDI<opc, MRMSrcReg, (outs FR64:$dst), (ins FR64:$src1, FR64:$src2),
+ !strconcat(OpcodeStr,
+ "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>;
+ def SDm : SDI<opc, MRMSrcMem, (outs FR64:$dst),
+ (ins FR64:$src1, f64mem:$src2),
+ !strconcat(OpcodeStr,
+ "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>;
+ def SDr_Int : SDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ !strconcat(OpcodeStr, "sd\t{$src, $dst, $dst|$dst, $dst, $src}"),
+ [(set VR128:$dst, (F64Int VR128:$src))]>;
+ def SDm_Int : SDI<opc, MRMSrcMem, (outs VR128:$dst), (ins sdmem:$src),
+ !strconcat(OpcodeStr, "sd\t{$src, $dst, $dst|$dst, $dst, $src}"),
+ [(set VR128:$dst, (F64Int sse_load_f64:$src))]>;
}
/// sse2_fp_unop_p - SSE2 unops in vector forms.
@@ -1787,29 +1964,48 @@ multiclass sse2_fp_unop_p_int<bits<8> opc, string OpcodeStr,
[(set VR128:$dst, (V2F64Int (memopv2f64 addr:$src)))]>;
}
+/// sse2_fp_unop_p_y_int - AVX 256-bit intrinsic unops in vector forms.
+multiclass sse2_fp_unop_p_y_int<bits<8> opc, string OpcodeStr,
+ Intrinsic V2F64Int> {
+ def PDYr_Int : PDI<opc, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
+ !strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"),
+ [(set VR256:$dst, (V2F64Int VR256:$src))]>;
+ def PDYm_Int : PDI<opc, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src),
+ !strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"),
+ [(set VR256:$dst, (V2F64Int (memopv4f64 addr:$src)))]>;
+}
+
let isAsmParserOnly = 1, Predicates = [HasAVX] in {
// Square root.
- defm VSQRT : sse1_fp_unop_s_avx<0x51, "sqrt", fsqrt, int_x86_sse_sqrt_ss>,
- sse2_fp_unop_s_avx<0x51, "sqrt", fsqrt, int_x86_sse2_sqrt_sd>,
+ defm VSQRT : sse1_fp_unop_s_avx<0x51, "vsqrt", fsqrt, int_x86_sse_sqrt_ss>,
+ sse2_fp_unop_s_avx<0x51, "vsqrt", fsqrt, int_x86_sse2_sqrt_sd>,
VEX_4V;
defm VSQRT : sse1_fp_unop_p<0x51, "vsqrt", fsqrt>,
sse2_fp_unop_p<0x51, "vsqrt", fsqrt>,
sse1_fp_unop_p_y<0x51, "vsqrt", fsqrt>,
sse2_fp_unop_p_y<0x51, "vsqrt", fsqrt>,
+ sse1_fp_unop_p_int<0x51, "vsqrt", int_x86_sse_sqrt_ps>,
+ sse2_fp_unop_p_int<0x51, "vsqrt", int_x86_sse2_sqrt_pd>,
+ sse1_fp_unop_p_y_int<0x51, "vsqrt", int_x86_avx_sqrt_ps_256>,
+ sse2_fp_unop_p_y_int<0x51, "vsqrt", int_x86_avx_sqrt_pd_256>,
VEX;
// Reciprocal approximations. Note that these typically require refinement
// in order to obtain suitable precision.
- defm VRSQRT : sse1_fp_unop_s_avx<0x52, "rsqrt", X86frsqrt,
+ defm VRSQRT : sse1_fp_unop_s_avx<0x52, "vrsqrt", X86frsqrt,
int_x86_sse_rsqrt_ss>, VEX_4V;
defm VRSQRT : sse1_fp_unop_p<0x52, "vrsqrt", X86frsqrt>,
- sse1_fp_unop_p_y<0x52, "vrsqrt", X86frsqrt>, VEX;
+ sse1_fp_unop_p_y<0x52, "vrsqrt", X86frsqrt>,
+ sse1_fp_unop_p_y_int<0x52, "vrsqrt", int_x86_avx_rsqrt_ps_256>,
+ sse1_fp_unop_p_int<0x52, "vrsqrt", int_x86_sse_rsqrt_ps>, VEX;
- defm VRCP : sse1_fp_unop_s_avx<0x53, "rcp", X86frcp, int_x86_sse_rcp_ss>,
+ defm VRCP : sse1_fp_unop_s_avx<0x53, "vrcp", X86frcp, int_x86_sse_rcp_ss>,
VEX_4V;
defm VRCP : sse1_fp_unop_p<0x53, "vrcp", X86frcp>,
- sse1_fp_unop_p_y<0x53, "vrcp", X86frcp>, VEX;
+ sse1_fp_unop_p_y<0x53, "vrcp", X86frcp>,
+ sse1_fp_unop_p_y_int<0x53, "vrcp", int_x86_avx_rcp_ps_256>,
+ sse1_fp_unop_p_int<0x53, "vrcp", int_x86_sse_rcp_ps>, VEX;
}
// Square root.
@@ -1898,6 +2094,13 @@ let isAsmParserOnly = 1 in {
}
}
+def : Pat<(int_x86_avx_movnt_dq_256 addr:$dst, VR256:$src),
+ (VMOVNTDQYmr addr:$dst, VR256:$src)>;
+def : Pat<(int_x86_avx_movnt_pd_256 addr:$dst, VR256:$src),
+ (VMOVNTPDYmr addr:$dst, VR256:$src)>;
+def : Pat<(int_x86_avx_movnt_ps_256 addr:$dst, VR256:$src),
+ (VMOVNTPSYmr addr:$dst, VR256:$src)>;
+
def MOVNTPSmr_Int : PSI<0x2B, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src),
"movntps\t{$src, $dst|$dst, $src}",
[(int_x86_sse_movnt_ps addr:$dst, VR128:$src)]>;
@@ -1961,11 +2164,14 @@ def PREFETCHNTA : PSI<0x18, MRM0m, (outs), (ins i8mem:$src),
// Load, store, and memory fence
def SFENCE : I<0xAE, MRM_F8, (outs), (ins), "sfence", [(int_x86_sse_sfence)]>,
TB, Requires<[HasSSE1]>;
+def : Pat<(X86SFence), (SFENCE)>;
// Alias instructions that map zero vector to pxor / xorp* for sse.
// We set canFoldAsLoad because this can be converted to a constant-pool
// load of an all-zeros value if folding it would be beneficial.
-// FIXME: Change encoding to pseudo!
+// FIXME: Change encoding to pseudo! This is blocked right now by the x86
+// JIT implementatioan, it does not expand the instructions below like
+// X86MCInstLower does.
let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
isCodeGenOnly = 1 in {
def V_SET0PS : PSI<0x57, MRMInitReg, (outs VR128:$dst), (ins), "",
@@ -1977,6 +2183,26 @@ def V_SET0PI : PDI<0xEF, MRMInitReg, (outs VR128:$dst), (ins), "",
[(set VR128:$dst, (v4i32 immAllZerosV))]>;
}
+// The same as done above but for AVX. The 128-bit versions are the
+// same, but re-encoded. The 256-bit does not support PI version.
+// FIXME: Change encoding to pseudo! This is blocked right now by the x86
+// JIT implementatioan, it does not expand the instructions below like
+// X86MCInstLower does.
+let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
+ isCodeGenOnly = 1, Predicates = [HasAVX] in {
+def AVX_SET0PS : PSI<0x57, MRMInitReg, (outs VR128:$dst), (ins), "",
+ [(set VR128:$dst, (v4f32 immAllZerosV))]>, VEX_4V;
+def AVX_SET0PD : PDI<0x57, MRMInitReg, (outs VR128:$dst), (ins), "",
+ [(set VR128:$dst, (v2f64 immAllZerosV))]>, VEX_4V;
+def AVX_SET0PSY : PSI<0x57, MRMInitReg, (outs VR256:$dst), (ins), "",
+ [(set VR256:$dst, (v8f32 immAllZerosV))]>, VEX_4V;
+def AVX_SET0PDY : PDI<0x57, MRMInitReg, (outs VR256:$dst), (ins), "",
+ [(set VR256:$dst, (v4f64 immAllZerosV))]>, VEX_4V;
+let ExeDomain = SSEPackedInt in
+def AVX_SET0PI : PDI<0xEF, MRMInitReg, (outs VR128:$dst), (ins), "",
+ [(set VR128:$dst, (v4i32 immAllZerosV))]>;
+}
+
def : Pat<(v2i64 immAllZerosV), (V_SET0PI)>;
def : Pat<(v8i16 immAllZerosV), (V_SET0PI)>;
def : Pat<(v16i8 immAllZerosV), (V_SET0PI)>;
@@ -2003,35 +2229,47 @@ def STMXCSR : PSI<0xAE, MRM3m, (outs), (ins i32mem:$dst),
//===---------------------------------------------------------------------===//
// SSE2 - Move Aligned/Unaligned Packed Integer Instructions
//===---------------------------------------------------------------------===//
+
let ExeDomain = SSEPackedInt in { // SSE integer instructions
let isAsmParserOnly = 1 in {
- let neverHasSideEffects = 1 in
- def VMOVDQArr : VPDI<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
- "movdqa\t{$src, $dst|$dst, $src}", []>, VEX;
- def VMOVDQUrr : VPDI<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
- "movdqu\t{$src, $dst|$dst, $src}", []>, XS, VEX;
+ let neverHasSideEffects = 1 in {
+ def VMOVDQArr : VPDI<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ "movdqa\t{$src, $dst|$dst, $src}", []>, VEX;
+ def VMOVDQAYrr : VPDI<0x6F, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
+ "movdqa\t{$src, $dst|$dst, $src}", []>, VEX;
+ }
+ def VMOVDQUrr : VPDI<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ "movdqu\t{$src, $dst|$dst, $src}", []>, XS, VEX;
+ def VMOVDQUYrr : VPDI<0x6F, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
+ "movdqu\t{$src, $dst|$dst, $src}", []>, XS, VEX;
let canFoldAsLoad = 1, mayLoad = 1 in {
- def VMOVDQArm : VPDI<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
- "movdqa\t{$src, $dst|$dst, $src}",
- [/*(set VR128:$dst, (alignedloadv2i64 addr:$src))*/]>,
- VEX;
- def VMOVDQUrm : I<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
- "vmovdqu\t{$src, $dst|$dst, $src}",
- [/*(set VR128:$dst, (loadv2i64 addr:$src))*/]>,
- XS, VEX, Requires<[HasAVX]>;
+ def VMOVDQArm : VPDI<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
+ "movdqa\t{$src, $dst|$dst, $src}", []>, VEX;
+ def VMOVDQAYrm : VPDI<0x6F, MRMSrcMem, (outs VR256:$dst), (ins i256mem:$src),
+ "movdqa\t{$src, $dst|$dst, $src}", []>, VEX;
+ let Predicates = [HasAVX] in {
+ def VMOVDQUrm : I<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
+ "vmovdqu\t{$src, $dst|$dst, $src}",[]>, XS, VEX;
+ def VMOVDQUYrm : I<0x6F, MRMSrcMem, (outs VR256:$dst), (ins i256mem:$src),
+ "vmovdqu\t{$src, $dst|$dst, $src}",[]>, XS, VEX;
+ }
}
let mayStore = 1 in {
- def VMOVDQAmr : VPDI<0x7F, MRMDestMem, (outs),
- (ins i128mem:$dst, VR128:$src),
- "movdqa\t{$src, $dst|$dst, $src}",
- [/*(alignedstore (v2i64 VR128:$src), addr:$dst)*/]>, VEX;
- def VMOVDQUmr : I<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src),
- "vmovdqu\t{$src, $dst|$dst, $src}",
- [/*(store (v2i64 VR128:$src), addr:$dst)*/]>,
- XS, VEX, Requires<[HasAVX]>;
+ def VMOVDQAmr : VPDI<0x7F, MRMDestMem, (outs),
+ (ins i128mem:$dst, VR128:$src),
+ "movdqa\t{$src, $dst|$dst, $src}", []>, VEX;
+ def VMOVDQAYmr : VPDI<0x7F, MRMDestMem, (outs),
+ (ins i256mem:$dst, VR256:$src),
+ "movdqa\t{$src, $dst|$dst, $src}", []>, VEX;
+ let Predicates = [HasAVX] in {
+ def VMOVDQUmr : I<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src),
+ "vmovdqu\t{$src, $dst|$dst, $src}",[]>, XS, VEX;
+ def VMOVDQUYmr : I<0x7F, MRMDestMem, (outs), (ins i256mem:$dst, VR256:$src),
+ "vmovdqu\t{$src, $dst|$dst, $src}",[]>, XS, VEX;
+ }
}
}
@@ -2084,6 +2322,10 @@ def MOVDQUmr_Int : I<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src),
} // ExeDomain = SSEPackedInt
+def : Pat<(int_x86_avx_loadu_dq_256 addr:$src), (VMOVDQUYrm addr:$src)>;
+def : Pat<(int_x86_avx_storeu_dq_256 addr:$dst, VR256:$src),
+ (VMOVDQUYmr addr:$dst, VR256:$src)>;
+
//===---------------------------------------------------------------------===//
// SSE2 - Packed Integer Arithmetic Instructions
//===---------------------------------------------------------------------===//
@@ -2376,6 +2618,25 @@ let ExeDomain = SSEPackedInt in {
}
} // Constraints = "$src1 = $dst"
+let Predicates = [HasAVX] in {
+ def : Pat<(int_x86_sse2_psll_dq VR128:$src1, imm:$src2),
+ (v2i64 (VPSLLDQri VR128:$src1, (BYTE_imm imm:$src2)))>;
+ def : Pat<(int_x86_sse2_psrl_dq VR128:$src1, imm:$src2),
+ (v2i64 (VPSRLDQri VR128:$src1, (BYTE_imm imm:$src2)))>;
+ def : Pat<(int_x86_sse2_psll_dq_bs VR128:$src1, imm:$src2),
+ (v2i64 (VPSLLDQri VR128:$src1, imm:$src2))>;
+ def : Pat<(int_x86_sse2_psrl_dq_bs VR128:$src1, imm:$src2),
+ (v2i64 (VPSRLDQri VR128:$src1, imm:$src2))>;
+ def : Pat<(v2f64 (X86fsrl VR128:$src1, i32immSExt8:$src2)),
+ (v2f64 (VPSRLDQri VR128:$src1, (BYTE_imm imm:$src2)))>;
+
+ // Shift up / down and insert zero's.
+ def : Pat<(v2i64 (X86vshl VR128:$src, (i8 imm:$amt))),
+ (v2i64 (VPSLLDQri VR128:$src, (BYTE_imm imm:$amt)))>;
+ def : Pat<(v2i64 (X86vshr VR128:$src, (i8 imm:$amt))),
+ (v2i64 (VPSRLDQri VR128:$src, (BYTE_imm imm:$amt)))>;
+}
+
let Predicates = [HasSSE2] in {
def : Pat<(int_x86_sse2_psll_dq VR128:$src1, imm:$src2),
(v2i64 (PSLLDQri VR128:$src1, (BYTE_imm imm:$src2)))>;
@@ -2662,11 +2923,16 @@ def PEXTRWri : PDIi8<0xC5, MRMSrcReg,
imm:$src2))]>;
// Insert
-let isAsmParserOnly = 1, Predicates = [HasAVX] in
- defm PINSRW : sse2_pinsrw<0>, OpSize, VEX_4V;
+let isAsmParserOnly = 1, Predicates = [HasAVX] in {
+ defm VPINSRW : sse2_pinsrw<0>, OpSize, VEX_4V;
+ def VPINSRWrr64i : Ii8<0xC4, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, GR64:$src2, i32i8imm:$src3),
+ "vpinsrw\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
+ []>, OpSize, VEX_4V;
+}
let Constraints = "$src1 = $dst" in
- defm VPINSRW : sse2_pinsrw, TB, OpSize;
+ defm PINSRW : sse2_pinsrw, TB, OpSize, Requires<[HasSSE2]>;
} // ExeDomain = SSEPackedInt
@@ -2676,10 +2942,13 @@ let Constraints = "$src1 = $dst" in
let ExeDomain = SSEPackedInt in {
-let isAsmParserOnly = 1 in
-def VPMOVMSKBrr : VPDI<0xD7, MRMSrcReg, (outs GR32:$dst), (ins VR128:$src),
+let isAsmParserOnly = 1 in {
+def VPMOVMSKBrr : VPDI<0xD7, MRMSrcReg, (outs GR32:$dst), (ins VR128:$src),
"pmovmskb\t{$src, $dst|$dst, $src}",
[(set GR32:$dst, (int_x86_sse2_pmovmskb_128 VR128:$src))]>, VEX;
+def VPMOVMSKBr64r : VPDI<0xD7, MRMSrcReg, (outs GR64:$dst), (ins VR128:$src),
+ "pmovmskb\t{$src, $dst|$dst, $src}", []>, VEX;
+}
def PMOVMSKBrr : PDI<0xD7, MRMSrcReg, (outs GR32:$dst), (ins VR128:$src),
"pmovmskb\t{$src, $dst|$dst, $src}",
[(set GR32:$dst, (int_x86_sse2_pmovmskb_128 VR128:$src))]>;
@@ -2939,18 +3208,20 @@ def : Pat<(v2i64 (X86vzmovl (bc_v2i64 (loadv4i32 addr:$src)))),
// Instructions to match in the assembler
let isAsmParserOnly = 1 in {
-// This instructions is in fact an alias to movd with 64 bit dst
def VMOVQs64rr : VPDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR64:$src),
"movq\t{$src, $dst|$dst, $src}", []>, VEX, VEX_W;
def VMOVQd64rr : VPDI<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128:$src),
"movq\t{$src, $dst|$dst, $src}", []>, VEX, VEX_W;
+// Recognize "movd" with GR64 destination, but encode as a "movq"
+def VMOVQd64rr_alt : VPDI<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128:$src),
+ "movd\t{$src, $dst|$dst, $src}", []>, VEX, VEX_W;
}
// Instructions for the disassembler
// xr = XMM register
// xm = mem64
-let isAsmParserOnly = 1 in
+let isAsmParserOnly = 1, Predicates = [HasAVX] in
def VMOVQxrxr: I<0x7E, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"vmovq\t{$src, $dst|$dst, $src}", []>, VEX, XS;
def MOVQxrxr : I<0x7E, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
@@ -2970,19 +3241,14 @@ def LFENCE : I<0xAE, MRM_E8, (outs), (ins),
"lfence", [(int_x86_sse2_lfence)]>, TB, Requires<[HasSSE2]>;
def MFENCE : I<0xAE, MRM_F0, (outs), (ins),
"mfence", [(int_x86_sse2_mfence)]>, TB, Requires<[HasSSE2]>;
+def : Pat<(X86LFence), (LFENCE)>;
+def : Pat<(X86MFence), (MFENCE)>;
+
// Pause. This "instruction" is encoded as "rep; nop", so even though it
// was introduced with SSE2, it's backward compatible.
def PAUSE : I<0x90, RawFrm, (outs), (ins), "pause", []>, REP;
-//TODO: custom lower this so as to never even generate the noop
-def : Pat<(membarrier (i8 imm), (i8 imm), (i8 imm), (i8 imm),
- (i8 0)), (NOOP)>;
-def : Pat<(membarrier (i8 0), (i8 0), (i8 0), (i8 1), (i8 1)), (SFENCE)>;
-def : Pat<(membarrier (i8 1), (i8 0), (i8 0), (i8 0), (i8 1)), (LFENCE)>;
-def : Pat<(membarrier (i8 imm), (i8 imm), (i8 imm), (i8 imm),
- (i8 1)), (MFENCE)>;
-
// Alias instructions that map zero vector to pxor / xorp* for sse.
// We set canFoldAsLoad because this can be converted to a constant-pool
// load of an all-ones value if folding it would be beneficial.
@@ -3027,13 +3293,13 @@ def CVTPD2DQrr : S3DI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
// Convert Packed DW Integers to Packed Double FP
let isAsmParserOnly = 1, Predicates = [HasAVX] in {
def VCVTDQ2PDrm : S3SI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
- "vcvtdq2pd\t{$src, $dst|$dst, $src}", []>, VEX;
+ "vcvtdq2pd\t{$src, $dst|$dst, $src}", []>, VEX;
def VCVTDQ2PDrr : S3SI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
- "vcvtdq2pd\t{$src, $dst|$dst, $src}", []>, VEX;
+ "vcvtdq2pd\t{$src, $dst|$dst, $src}", []>, VEX;
def VCVTDQ2PDYrm : S3SI<0xE6, MRMSrcMem, (outs VR256:$dst), (ins f128mem:$src),
- "vcvtdq2pd\t{$src, $dst|$dst, $src}", []>, VEX;
+ "vcvtdq2pd\t{$src, $dst|$dst, $src}", []>, VEX;
def VCVTDQ2PDYrr : S3SI<0xE6, MRMSrcReg, (outs VR256:$dst), (ins VR128:$src),
- "vcvtdq2pd\t{$src, $dst|$dst, $src}", []>, VEX;
+ "vcvtdq2pd\t{$src, $dst|$dst, $src}", []>, VEX;
}
def CVTDQ2PDrm : S3SI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
@@ -3041,6 +3307,17 @@ def CVTDQ2PDrm : S3SI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
def CVTDQ2PDrr : S3SI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"cvtdq2pd\t{$src, $dst|$dst, $src}", []>;
+// AVX 256-bit register conversion intrinsics
+def : Pat<(int_x86_avx_cvtdq2_pd_256 VR128:$src),
+ (VCVTDQ2PDYrr VR128:$src)>;
+def : Pat<(int_x86_avx_cvtdq2_pd_256 (memopv4i32 addr:$src)),
+ (VCVTDQ2PDYrm addr:$src)>;
+
+def : Pat<(int_x86_avx_cvt_pd2dq_256 VR256:$src),
+ (VCVTPD2DQYrr VR256:$src)>;
+def : Pat<(int_x86_avx_cvt_pd2dq_256 (memopv4f64 addr:$src)),
+ (VCVTPD2DQYrm addr:$src)>;
+
//===---------------------------------------------------------------------===//
// SSE3 - Move Instructions
//===---------------------------------------------------------------------===//
@@ -3057,9 +3334,20 @@ def rm : S3SI<op, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
(memopv4f32 addr:$src), (undef)))]>;
}
+multiclass sse3_replicate_sfp_y<bits<8> op, PatFrag rep_frag,
+ string OpcodeStr> {
+def rr : S3SI<op, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>;
+def rm : S3SI<op, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>;
+}
+
let isAsmParserOnly = 1, Predicates = [HasAVX] in {
-defm VMOVSHDUP : sse3_replicate_sfp<0x16, movshdup, "vmovshdup">, VEX;
-defm VMOVSLDUP : sse3_replicate_sfp<0x12, movsldup, "vmovsldup">, VEX;
+ // FIXME: Merge above classes when we have patterns for the ymm version
+ defm VMOVSHDUP : sse3_replicate_sfp<0x16, movshdup, "vmovshdup">, VEX;
+ defm VMOVSLDUP : sse3_replicate_sfp<0x12, movsldup, "vmovsldup">, VEX;
+ defm VMOVSHDUPY : sse3_replicate_sfp_y<0x16, movshdup, "vmovshdup">, VEX;
+ defm VMOVSLDUPY : sse3_replicate_sfp_y<0x12, movsldup, "vmovsldup">, VEX;
}
defm MOVSHDUP : sse3_replicate_sfp<0x16, movshdup, "movshdup">;
defm MOVSLDUP : sse3_replicate_sfp<0x12, movsldup, "movsldup">;
@@ -3076,15 +3364,31 @@ def rm : S3DI<0x12, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src),
(undef))))]>;
}
-let isAsmParserOnly = 1, Predicates = [HasAVX] in
- defm VMOVDDUP : sse3_replicate_dfp<"vmovddup">, VEX;
+multiclass sse3_replicate_dfp_y<string OpcodeStr> {
+def rr : S3DI<0x12, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+ []>;
+def rm : S3DI<0x12, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+ []>;
+}
+
+let isAsmParserOnly = 1, Predicates = [HasAVX] in {
+ // FIXME: Merge above classes when we have patterns for the ymm version
+ defm VMOVDDUP : sse3_replicate_dfp<"vmovddup">, VEX;
+ defm VMOVDDUPY : sse3_replicate_dfp_y<"vmovddup">, VEX;
+}
defm MOVDDUP : sse3_replicate_dfp<"movddup">;
// Move Unaligned Integer
-let isAsmParserOnly = 1 in
+let isAsmParserOnly = 1, Predicates = [HasAVX] in {
def VLDDQUrm : S3DI<0xF0, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
- "vlddqu\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (int_x86_sse3_ldu_dq addr:$src))]>, VEX;
+ "vlddqu\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse3_ldu_dq addr:$src))]>, VEX;
+ def VLDDQUYrm : S3DI<0xF0, MRMSrcMem, (outs VR256:$dst), (ins i256mem:$src),
+ "vlddqu\t{$src, $dst|$dst, $src}",
+ [(set VR256:$dst, (int_x86_avx_ldu_dq_256 addr:$src))]>, VEX;
+}
def LDDQUrm : S3DI<0xF0, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
"lddqu\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (int_x86_sse3_ldu_dq addr:$src))]>;
@@ -3125,35 +3429,39 @@ let AddedComplexity = 20 in
// SSE3 - Arithmetic
//===---------------------------------------------------------------------===//
-multiclass sse3_addsub<Intrinsic Int, string OpcodeStr, bit Is2Addr = 1> {
+multiclass sse3_addsub<Intrinsic Int, string OpcodeStr, RegisterClass RC,
+ X86MemOperand x86memop, bit Is2Addr = 1> {
def rr : I<0xD0, MRMSrcReg,
- (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
+ (outs RC:$dst), (ins RC:$src1, RC:$src2),
!if(Is2Addr,
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
- [(set VR128:$dst, (Int VR128:$src1,
- VR128:$src2))]>;
+ [(set RC:$dst, (Int RC:$src1, RC:$src2))]>;
def rm : I<0xD0, MRMSrcMem,
- (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2),
+ (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
!if(Is2Addr,
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
- [(set VR128:$dst, (Int VR128:$src1,
- (memop addr:$src2)))]>;
-
+ [(set RC:$dst, (Int RC:$src1, (memop addr:$src2)))]>;
}
let isAsmParserOnly = 1, Predicates = [HasAVX],
ExeDomain = SSEPackedDouble in {
- defm VADDSUBPS : sse3_addsub<int_x86_sse3_addsub_ps, "vaddsubps", 0>, XD,
- VEX_4V;
- defm VADDSUBPD : sse3_addsub<int_x86_sse3_addsub_pd, "vaddsubpd", 0>, OpSize,
- VEX_4V;
+ defm VADDSUBPS : sse3_addsub<int_x86_sse3_addsub_ps, "vaddsubps", VR128,
+ f128mem, 0>, XD, VEX_4V;
+ defm VADDSUBPD : sse3_addsub<int_x86_sse3_addsub_pd, "vaddsubpd", VR128,
+ f128mem, 0>, OpSize, VEX_4V;
+ defm VADDSUBPSY : sse3_addsub<int_x86_avx_addsub_ps_256, "vaddsubps", VR256,
+ f256mem, 0>, XD, VEX_4V;
+ defm VADDSUBPDY : sse3_addsub<int_x86_avx_addsub_pd_256, "vaddsubpd", VR256,
+ f256mem, 0>, OpSize, VEX_4V;
}
let Constraints = "$src1 = $dst", Predicates = [HasSSE3],
ExeDomain = SSEPackedDouble in {
- defm ADDSUBPS : sse3_addsub<int_x86_sse3_addsub_ps, "addsubps">, XD;
- defm ADDSUBPD : sse3_addsub<int_x86_sse3_addsub_pd, "addsubpd">, TB, OpSize;
+ defm ADDSUBPS : sse3_addsub<int_x86_sse3_addsub_ps, "addsubps", VR128,
+ f128mem>, XD;
+ defm ADDSUBPD : sse3_addsub<int_x86_sse3_addsub_pd, "addsubpd", VR128,
+ f128mem>, TB, OpSize;
}
//===---------------------------------------------------------------------===//
@@ -3161,61 +3469,72 @@ let Constraints = "$src1 = $dst", Predicates = [HasSSE3],
//===---------------------------------------------------------------------===//
// Horizontal ops
-class S3D_Intrr<bits<8> o, string OpcodeStr, Intrinsic IntId, bit Is2Addr = 1>
- : S3DI<o, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
+multiclass S3D_Int<bits<8> o, string OpcodeStr, ValueType vt, RegisterClass RC,
+ X86MemOperand x86memop, Intrinsic IntId, bit Is2Addr = 1> {
+ def rr : S3DI<o, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2),
!if(Is2Addr,
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
- [(set VR128:$dst, (v4f32 (IntId VR128:$src1, VR128:$src2)))]>;
-class S3D_Intrm<bits<8> o, string OpcodeStr, Intrinsic IntId, bit Is2Addr = 1>
- : S3DI<o, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2),
+ [(set RC:$dst, (vt (IntId RC:$src1, RC:$src2)))]>;
+
+ def rm : S3DI<o, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
!if(Is2Addr,
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
- [(set VR128:$dst, (v4f32 (IntId VR128:$src1, (memop addr:$src2))))]>;
-class S3_Intrr<bits<8> o, string OpcodeStr, Intrinsic IntId, bit Is2Addr = 1>
- : S3I<o, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
+ [(set RC:$dst, (vt (IntId RC:$src1, (memop addr:$src2))))]>;
+}
+multiclass S3_Int<bits<8> o, string OpcodeStr, ValueType vt, RegisterClass RC,
+ X86MemOperand x86memop, Intrinsic IntId, bit Is2Addr = 1> {
+ def rr : S3I<o, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2),
!if(Is2Addr,
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
- [(set VR128:$dst, (v2f64 (IntId VR128:$src1, VR128:$src2)))]>;
-class S3_Intrm<bits<8> o, string OpcodeStr, Intrinsic IntId, bit Is2Addr = 1>
- : S3I<o, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2),
+ [(set RC:$dst, (vt (IntId RC:$src1, RC:$src2)))]>;
+
+ def rm : S3I<o, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
!if(Is2Addr,
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
- [(set VR128:$dst, (v2f64 (IntId VR128:$src1, (memopv2f64 addr:$src2))))]>;
+ [(set RC:$dst, (vt (IntId RC:$src1, (memop addr:$src2))))]>;
+}
let isAsmParserOnly = 1, Predicates = [HasAVX] in {
- def VHADDPSrr : S3D_Intrr<0x7C, "vhaddps", int_x86_sse3_hadd_ps, 0>, VEX_4V;
- def VHADDPSrm : S3D_Intrm<0x7C, "vhaddps", int_x86_sse3_hadd_ps, 0>, VEX_4V;
- def VHADDPDrr : S3_Intrr <0x7C, "vhaddpd", int_x86_sse3_hadd_pd, 0>, VEX_4V;
- def VHADDPDrm : S3_Intrm <0x7C, "vhaddpd", int_x86_sse3_hadd_pd, 0>, VEX_4V;
- def VHSUBPSrr : S3D_Intrr<0x7D, "vhsubps", int_x86_sse3_hsub_ps, 0>, VEX_4V;
- def VHSUBPSrm : S3D_Intrm<0x7D, "vhsubps", int_x86_sse3_hsub_ps, 0>, VEX_4V;
- def VHSUBPDrr : S3_Intrr <0x7D, "vhsubpd", int_x86_sse3_hsub_pd, 0>, VEX_4V;
- def VHSUBPDrm : S3_Intrm <0x7D, "vhsubpd", int_x86_sse3_hsub_pd, 0>, VEX_4V;
+ defm VHADDPS : S3D_Int<0x7C, "vhaddps", v4f32, VR128, f128mem,
+ int_x86_sse3_hadd_ps, 0>, VEX_4V;
+ defm VHADDPD : S3_Int <0x7C, "vhaddpd", v2f64, VR128, f128mem,
+ int_x86_sse3_hadd_pd, 0>, VEX_4V;
+ defm VHSUBPS : S3D_Int<0x7D, "vhsubps", v4f32, VR128, f128mem,
+ int_x86_sse3_hsub_ps, 0>, VEX_4V;
+ defm VHSUBPD : S3_Int <0x7D, "vhsubpd", v2f64, VR128, f128mem,
+ int_x86_sse3_hsub_pd, 0>, VEX_4V;
+ defm VHADDPSY : S3D_Int<0x7C, "vhaddps", v8f32, VR256, f256mem,
+ int_x86_avx_hadd_ps_256, 0>, VEX_4V;
+ defm VHADDPDY : S3_Int <0x7C, "vhaddpd", v4f64, VR256, f256mem,
+ int_x86_avx_hadd_pd_256, 0>, VEX_4V;
+ defm VHSUBPSY : S3D_Int<0x7D, "vhsubps", v8f32, VR256, f256mem,
+ int_x86_avx_hsub_ps_256, 0>, VEX_4V;
+ defm VHSUBPDY : S3_Int <0x7D, "vhsubpd", v4f64, VR256, f256mem,
+ int_x86_avx_hsub_pd_256, 0>, VEX_4V;
}
let Constraints = "$src1 = $dst" in {
- def HADDPSrr : S3D_Intrr<0x7C, "haddps", int_x86_sse3_hadd_ps>;
- def HADDPSrm : S3D_Intrm<0x7C, "haddps", int_x86_sse3_hadd_ps>;
- def HADDPDrr : S3_Intrr <0x7C, "haddpd", int_x86_sse3_hadd_pd>;
- def HADDPDrm : S3_Intrm <0x7C, "haddpd", int_x86_sse3_hadd_pd>;
- def HSUBPSrr : S3D_Intrr<0x7D, "hsubps", int_x86_sse3_hsub_ps>;
- def HSUBPSrm : S3D_Intrm<0x7D, "hsubps", int_x86_sse3_hsub_ps>;
- def HSUBPDrr : S3_Intrr <0x7D, "hsubpd", int_x86_sse3_hsub_pd>;
- def HSUBPDrm : S3_Intrm <0x7D, "hsubpd", int_x86_sse3_hsub_pd>;
+ defm HADDPS : S3D_Int<0x7C, "haddps", v4f32, VR128, f128mem,
+ int_x86_sse3_hadd_ps>;
+ defm HADDPD : S3_Int<0x7C, "haddpd", v2f64, VR128, f128mem,
+ int_x86_sse3_hadd_pd>;
+ defm HSUBPS : S3D_Int<0x7D, "hsubps", v4f32, VR128, f128mem,
+ int_x86_sse3_hsub_ps>;
+ defm HSUBPD : S3_Int<0x7D, "hsubpd", v2f64, VR128, f128mem,
+ int_x86_sse3_hsub_pd>;
}
//===---------------------------------------------------------------------===//
// SSSE3 - Packed Absolute Instructions
//===---------------------------------------------------------------------===//
-/// SS3I_unop_rm_int - Simple SSSE3 unary op whose type can be v*{i8,i16,i32}.
-multiclass SS3I_unop_rm_int<bits<8> opc, string OpcodeStr,
- PatFrag mem_frag64, PatFrag mem_frag128,
- Intrinsic IntId64, Intrinsic IntId128> {
+/// SS3I_unop_rm_int_mm - Simple SSSE3 unary whose type can be v*{i8,i16,i32}.
+multiclass SS3I_unop_rm_int_mm<bits<8> opc, string OpcodeStr,
+ PatFrag mem_frag64, Intrinsic IntId64> {
def rr64 : SS38I<opc, MRMSrcReg, (outs VR64:$dst), (ins VR64:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
[(set VR64:$dst, (IntId64 VR64:$src))]>;
@@ -3224,7 +3543,11 @@ multiclass SS3I_unop_rm_int<bits<8> opc, string OpcodeStr,
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
[(set VR64:$dst,
(IntId64 (bitconvert (mem_frag64 addr:$src))))]>;
+}
+/// SS3I_unop_rm_int - Simple SSSE3 unary op whose type can be v*{i8,i16,i32}.
+multiclass SS3I_unop_rm_int<bits<8> opc, string OpcodeStr,
+ PatFrag mem_frag128, Intrinsic IntId128> {
def rr128 : SS38I<opc, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
@@ -3240,26 +3563,28 @@ multiclass SS3I_unop_rm_int<bits<8> opc, string OpcodeStr,
}
let isAsmParserOnly = 1, Predicates = [HasAVX] in {
- defm VPABSB : SS3I_unop_rm_int<0x1C, "vpabsb", memopv8i8, memopv16i8,
- int_x86_ssse3_pabs_b,
+ defm VPABSB : SS3I_unop_rm_int<0x1C, "vpabsb", memopv16i8,
int_x86_ssse3_pabs_b_128>, VEX;
- defm VPABSW : SS3I_unop_rm_int<0x1D, "vpabsw", memopv4i16, memopv8i16,
- int_x86_ssse3_pabs_w,
+ defm VPABSW : SS3I_unop_rm_int<0x1D, "vpabsw", memopv8i16,
int_x86_ssse3_pabs_w_128>, VEX;
- defm VPABSD : SS3I_unop_rm_int<0x1E, "vpabsd", memopv2i32, memopv4i32,
- int_x86_ssse3_pabs_d,
+ defm VPABSD : SS3I_unop_rm_int<0x1E, "vpabsd", memopv4i32,
int_x86_ssse3_pabs_d_128>, VEX;
}
-defm PABSB : SS3I_unop_rm_int<0x1C, "pabsb", memopv8i8, memopv16i8,
- int_x86_ssse3_pabs_b,
- int_x86_ssse3_pabs_b_128>;
-defm PABSW : SS3I_unop_rm_int<0x1D, "pabsw", memopv4i16, memopv8i16,
- int_x86_ssse3_pabs_w,
- int_x86_ssse3_pabs_w_128>;
-defm PABSD : SS3I_unop_rm_int<0x1E, "pabsd", memopv2i32, memopv4i32,
- int_x86_ssse3_pabs_d,
- int_x86_ssse3_pabs_d_128>;
+defm PABSB : SS3I_unop_rm_int<0x1C, "pabsb", memopv16i8,
+ int_x86_ssse3_pabs_b_128>,
+ SS3I_unop_rm_int_mm<0x1C, "pabsb", memopv8i8,
+ int_x86_ssse3_pabs_b>;
+
+defm PABSW : SS3I_unop_rm_int<0x1D, "pabsw", memopv8i16,
+ int_x86_ssse3_pabs_w_128>,
+ SS3I_unop_rm_int_mm<0x1D, "pabsw", memopv4i16,
+ int_x86_ssse3_pabs_w>;
+
+defm PABSD : SS3I_unop_rm_int<0x1E, "pabsd", memopv4i32,
+ int_x86_ssse3_pabs_d_128>,
+ SS3I_unop_rm_int_mm<0x1E, "pabsd", memopv2i32,
+ int_x86_ssse3_pabs_d>;
//===---------------------------------------------------------------------===//
// SSSE3 - Packed Binary Operator Instructions
@@ -3267,26 +3592,9 @@ defm PABSD : SS3I_unop_rm_int<0x1E, "pabsd", memopv2i32, memopv4i32,
/// SS3I_binop_rm_int - Simple SSSE3 bin op whose type can be v*{i8,i16,i32}.
multiclass SS3I_binop_rm_int<bits<8> opc, string OpcodeStr,
- PatFrag mem_frag64, PatFrag mem_frag128,
- Intrinsic IntId64, Intrinsic IntId128,
+ PatFrag mem_frag128, Intrinsic IntId128,
bit Is2Addr = 1> {
let isCommutable = 1 in
- def rr64 : SS38I<opc, MRMSrcReg, (outs VR64:$dst),
- (ins VR64:$src1, VR64:$src2),
- !if(Is2Addr,
- !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
- !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
- [(set VR64:$dst, (IntId64 VR64:$src1, VR64:$src2))]>;
- def rm64 : SS38I<opc, MRMSrcMem, (outs VR64:$dst),
- (ins VR64:$src1, i64mem:$src2),
- !if(Is2Addr,
- !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
- !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
- [(set VR64:$dst,
- (IntId64 VR64:$src1,
- (bitconvert (memopv8i8 addr:$src2))))]>;
-
- let isCommutable = 1 in
def rr128 : SS38I<opc, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src1, VR128:$src2),
!if(Is2Addr,
@@ -3303,88 +3611,102 @@ multiclass SS3I_binop_rm_int<bits<8> opc, string OpcodeStr,
(IntId128 VR128:$src1,
(bitconvert (memopv16i8 addr:$src2))))]>, OpSize;
}
+multiclass SS3I_binop_rm_int_mm<bits<8> opc, string OpcodeStr,
+ PatFrag mem_frag64, Intrinsic IntId64> {
+ let isCommutable = 1 in
+ def rr64 : SS38I<opc, MRMSrcReg, (outs VR64:$dst),
+ (ins VR64:$src1, VR64:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ [(set VR64:$dst, (IntId64 VR64:$src1, VR64:$src2))]>;
+ def rm64 : SS38I<opc, MRMSrcMem, (outs VR64:$dst),
+ (ins VR64:$src1, i64mem:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ [(set VR64:$dst,
+ (IntId64 VR64:$src1,
+ (bitconvert (memopv8i8 addr:$src2))))]>;
+}
let isAsmParserOnly = 1, Predicates = [HasAVX] in {
let isCommutable = 0 in {
- defm VPHADDW : SS3I_binop_rm_int<0x01, "vphaddw", memopv4i16, memopv8i16,
- int_x86_ssse3_phadd_w,
+ defm VPHADDW : SS3I_binop_rm_int<0x01, "vphaddw", memopv8i16,
int_x86_ssse3_phadd_w_128, 0>, VEX_4V;
- defm VPHADDD : SS3I_binop_rm_int<0x02, "vphaddd", memopv2i32, memopv4i32,
- int_x86_ssse3_phadd_d,
+ defm VPHADDD : SS3I_binop_rm_int<0x02, "vphaddd", memopv4i32,
int_x86_ssse3_phadd_d_128, 0>, VEX_4V;
- defm VPHADDSW : SS3I_binop_rm_int<0x03, "vphaddsw", memopv4i16, memopv8i16,
- int_x86_ssse3_phadd_sw,
+ defm VPHADDSW : SS3I_binop_rm_int<0x03, "vphaddsw", memopv8i16,
int_x86_ssse3_phadd_sw_128, 0>, VEX_4V;
- defm VPHSUBW : SS3I_binop_rm_int<0x05, "vphsubw", memopv4i16, memopv8i16,
- int_x86_ssse3_phsub_w,
+ defm VPHSUBW : SS3I_binop_rm_int<0x05, "vphsubw", memopv8i16,
int_x86_ssse3_phsub_w_128, 0>, VEX_4V;
- defm VPHSUBD : SS3I_binop_rm_int<0x06, "vphsubd", memopv2i32, memopv4i32,
- int_x86_ssse3_phsub_d,
+ defm VPHSUBD : SS3I_binop_rm_int<0x06, "vphsubd", memopv4i32,
int_x86_ssse3_phsub_d_128, 0>, VEX_4V;
- defm VPHSUBSW : SS3I_binop_rm_int<0x07, "vphsubsw", memopv4i16, memopv8i16,
- int_x86_ssse3_phsub_sw,
+ defm VPHSUBSW : SS3I_binop_rm_int<0x07, "vphsubsw", memopv8i16,
int_x86_ssse3_phsub_sw_128, 0>, VEX_4V;
- defm VPMADDUBSW : SS3I_binop_rm_int<0x04, "vpmaddubsw", memopv8i8, memopv16i8,
- int_x86_ssse3_pmadd_ub_sw,
+ defm VPMADDUBSW : SS3I_binop_rm_int<0x04, "vpmaddubsw", memopv16i8,
int_x86_ssse3_pmadd_ub_sw_128, 0>, VEX_4V;
- defm VPSHUFB : SS3I_binop_rm_int<0x00, "vpshufb", memopv8i8, memopv16i8,
- int_x86_ssse3_pshuf_b,
+ defm VPSHUFB : SS3I_binop_rm_int<0x00, "vpshufb", memopv16i8,
int_x86_ssse3_pshuf_b_128, 0>, VEX_4V;
- defm VPSIGNB : SS3I_binop_rm_int<0x08, "vpsignb", memopv8i8, memopv16i8,
- int_x86_ssse3_psign_b,
+ defm VPSIGNB : SS3I_binop_rm_int<0x08, "vpsignb", memopv16i8,
int_x86_ssse3_psign_b_128, 0>, VEX_4V;
- defm VPSIGNW : SS3I_binop_rm_int<0x09, "vpsignw", memopv4i16, memopv8i16,
- int_x86_ssse3_psign_w,
+ defm VPSIGNW : SS3I_binop_rm_int<0x09, "vpsignw", memopv8i16,
int_x86_ssse3_psign_w_128, 0>, VEX_4V;
- defm VPSIGND : SS3I_binop_rm_int<0x0A, "vpsignd", memopv2i32, memopv4i32,
- int_x86_ssse3_psign_d,
+ defm VPSIGND : SS3I_binop_rm_int<0x0A, "vpsignd", memopv4i32,
int_x86_ssse3_psign_d_128, 0>, VEX_4V;
}
-defm VPMULHRSW : SS3I_binop_rm_int<0x0B, "vpmulhrsw", memopv4i16, memopv8i16,
- int_x86_ssse3_pmul_hr_sw,
+defm VPMULHRSW : SS3I_binop_rm_int<0x0B, "vpmulhrsw", memopv8i16,
int_x86_ssse3_pmul_hr_sw_128, 0>, VEX_4V;
}
// None of these have i8 immediate fields.
let ImmT = NoImm, Constraints = "$src1 = $dst" in {
let isCommutable = 0 in {
- defm PHADDW : SS3I_binop_rm_int<0x01, "phaddw", memopv4i16, memopv8i16,
- int_x86_ssse3_phadd_w,
- int_x86_ssse3_phadd_w_128>;
- defm PHADDD : SS3I_binop_rm_int<0x02, "phaddd", memopv2i32, memopv4i32,
- int_x86_ssse3_phadd_d,
- int_x86_ssse3_phadd_d_128>;
- defm PHADDSW : SS3I_binop_rm_int<0x03, "phaddsw", memopv4i16, memopv8i16,
- int_x86_ssse3_phadd_sw,
- int_x86_ssse3_phadd_sw_128>;
- defm PHSUBW : SS3I_binop_rm_int<0x05, "phsubw", memopv4i16, memopv8i16,
- int_x86_ssse3_phsub_w,
- int_x86_ssse3_phsub_w_128>;
- defm PHSUBD : SS3I_binop_rm_int<0x06, "phsubd", memopv2i32, memopv4i32,
- int_x86_ssse3_phsub_d,
- int_x86_ssse3_phsub_d_128>;
- defm PHSUBSW : SS3I_binop_rm_int<0x07, "phsubsw", memopv4i16, memopv8i16,
- int_x86_ssse3_phsub_sw,
- int_x86_ssse3_phsub_sw_128>;
- defm PMADDUBSW : SS3I_binop_rm_int<0x04, "pmaddubsw", memopv8i8, memopv16i8,
- int_x86_ssse3_pmadd_ub_sw,
- int_x86_ssse3_pmadd_ub_sw_128>;
- defm PSHUFB : SS3I_binop_rm_int<0x00, "pshufb", memopv8i8, memopv16i8,
- int_x86_ssse3_pshuf_b,
- int_x86_ssse3_pshuf_b_128>;
- defm PSIGNB : SS3I_binop_rm_int<0x08, "psignb", memopv8i8, memopv16i8,
- int_x86_ssse3_psign_b,
- int_x86_ssse3_psign_b_128>;
- defm PSIGNW : SS3I_binop_rm_int<0x09, "psignw", memopv4i16, memopv8i16,
- int_x86_ssse3_psign_w,
- int_x86_ssse3_psign_w_128>;
- defm PSIGND : SS3I_binop_rm_int<0x0A, "psignd", memopv2i32, memopv4i32,
- int_x86_ssse3_psign_d,
- int_x86_ssse3_psign_d_128>;
-}
-defm PMULHRSW : SS3I_binop_rm_int<0x0B, "pmulhrsw", memopv4i16, memopv8i16,
- int_x86_ssse3_pmul_hr_sw,
- int_x86_ssse3_pmul_hr_sw_128>;
+ defm PHADDW : SS3I_binop_rm_int<0x01, "phaddw", memopv8i16,
+ int_x86_ssse3_phadd_w_128>,
+ SS3I_binop_rm_int_mm<0x01, "phaddw", memopv4i16,
+ int_x86_ssse3_phadd_w>;
+ defm PHADDD : SS3I_binop_rm_int<0x02, "phaddd", memopv4i32,
+ int_x86_ssse3_phadd_d_128>,
+ SS3I_binop_rm_int_mm<0x02, "phaddd", memopv2i32,
+ int_x86_ssse3_phadd_d>;
+ defm PHADDSW : SS3I_binop_rm_int<0x03, "phaddsw", memopv8i16,
+ int_x86_ssse3_phadd_sw_128>,
+ SS3I_binop_rm_int_mm<0x03, "phaddsw", memopv4i16,
+ int_x86_ssse3_phadd_sw>;
+ defm PHSUBW : SS3I_binop_rm_int<0x05, "phsubw", memopv8i16,
+ int_x86_ssse3_phsub_w_128>,
+ SS3I_binop_rm_int_mm<0x05, "phsubw", memopv4i16,
+ int_x86_ssse3_phsub_w>;
+ defm PHSUBD : SS3I_binop_rm_int<0x06, "phsubd", memopv4i32,
+ int_x86_ssse3_phsub_d_128>,
+ SS3I_binop_rm_int_mm<0x06, "phsubd", memopv2i32,
+ int_x86_ssse3_phsub_d>;
+ defm PHSUBSW : SS3I_binop_rm_int<0x07, "phsubsw", memopv8i16,
+ int_x86_ssse3_phsub_sw_128>,
+ SS3I_binop_rm_int_mm<0x07, "phsubsw", memopv4i16,
+ int_x86_ssse3_phsub_sw>;
+ defm PMADDUBSW : SS3I_binop_rm_int<0x04, "pmaddubsw", memopv16i8,
+ int_x86_ssse3_pmadd_ub_sw_128>,
+ SS3I_binop_rm_int_mm<0x04, "pmaddubsw", memopv8i8,
+ int_x86_ssse3_pmadd_ub_sw>;
+ defm PSHUFB : SS3I_binop_rm_int<0x00, "pshufb", memopv8i8,
+ int_x86_ssse3_pshuf_b_128>,
+ SS3I_binop_rm_int_mm<0x00, "pshufb", memopv8i8,
+ int_x86_ssse3_pshuf_b>;
+ defm PSIGNB : SS3I_binop_rm_int<0x08, "psignb", memopv16i8,
+ int_x86_ssse3_psign_b_128>,
+ SS3I_binop_rm_int_mm<0x08, "psignb", memopv8i8,
+ int_x86_ssse3_psign_b>;
+ defm PSIGNW : SS3I_binop_rm_int<0x09, "psignw", memopv8i16,
+ int_x86_ssse3_psign_w_128>,
+ SS3I_binop_rm_int_mm<0x09, "psignw", memopv4i16,
+ int_x86_ssse3_psign_w>;
+ defm PSIGND : SS3I_binop_rm_int<0x0A, "psignd", memopv4i32,
+ int_x86_ssse3_psign_d_128>,
+ SS3I_binop_rm_int_mm<0x0A, "psignd", memopv2i32,
+ int_x86_ssse3_psign_d>;
+}
+defm PMULHRSW : SS3I_binop_rm_int<0x0B, "pmulhrsw", memopv8i16,
+ int_x86_ssse3_pmul_hr_sw_128>,
+ SS3I_binop_rm_int_mm<0x0B, "pmulhrsw", memopv4i16,
+ int_x86_ssse3_pmul_hr_sw>;
}
def : Pat<(X86pshufb VR128:$src, VR128:$mask),
@@ -3396,22 +3718,16 @@ def : Pat<(X86pshufb VR128:$src, (bc_v16i8 (memopv2i64 addr:$mask))),
// SSSE3 - Packed Align Instruction Patterns
//===---------------------------------------------------------------------===//
-multiclass sse3_palign<string asm, bit Is2Addr = 1> {
+multiclass ssse3_palign_mm<string asm> {
def R64rr : SS3AI<0x0F, MRMSrcReg, (outs VR64:$dst),
(ins VR64:$src1, VR64:$src2, i8imm:$src3),
- !if(Is2Addr,
- !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
- !strconcat(asm,
- "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
- []>;
+ !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), []>;
def R64rm : SS3AI<0x0F, MRMSrcMem, (outs VR64:$dst),
(ins VR64:$src1, i64mem:$src2, i8imm:$src3),
- !if(Is2Addr,
- !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
- !strconcat(asm,
- "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
- []>;
+ !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), []>;
+}
+multiclass ssse3_palign<string asm, bit Is2Addr = 1> {
def R128rr : SS3AI<0x0F, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src1, VR128:$src2, i8imm:$src3),
!if(Is2Addr,
@@ -3429,9 +3745,10 @@ multiclass sse3_palign<string asm, bit Is2Addr = 1> {
}
let isAsmParserOnly = 1, Predicates = [HasAVX] in
- defm VPALIGN : sse3_palign<"vpalignr", 0>, VEX_4V;
+ defm VPALIGN : ssse3_palign<"vpalignr", 0>, VEX_4V;
let Constraints = "$src1 = $dst" in
- defm PALIGN : sse3_palign<"palignr">;
+ defm PALIGN : ssse3_palign<"palignr">,
+ ssse3_palign_mm<"palignr">;
let AddedComplexity = 5 in {
@@ -3732,31 +4049,62 @@ def : Pat<(v2i32 (fp_to_sint (v2f64 VR128:$src))),
(Int_CVTTPD2PIrr VR128:$src)>, Requires<[HasSSE2]>;
// Use movaps / movups for SSE integer load / store (one byte shorter).
-def : Pat<(alignedloadv4i32 addr:$src),
- (MOVAPSrm addr:$src)>;
-def : Pat<(loadv4i32 addr:$src),
- (MOVUPSrm addr:$src)>;
-def : Pat<(alignedloadv2i64 addr:$src),
- (MOVAPSrm addr:$src)>;
-def : Pat<(loadv2i64 addr:$src),
- (MOVUPSrm addr:$src)>;
-
-def : Pat<(alignedstore (v2i64 VR128:$src), addr:$dst),
- (MOVAPSmr addr:$dst, VR128:$src)>;
-def : Pat<(alignedstore (v4i32 VR128:$src), addr:$dst),
- (MOVAPSmr addr:$dst, VR128:$src)>;
-def : Pat<(alignedstore (v8i16 VR128:$src), addr:$dst),
- (MOVAPSmr addr:$dst, VR128:$src)>;
-def : Pat<(alignedstore (v16i8 VR128:$src), addr:$dst),
- (MOVAPSmr addr:$dst, VR128:$src)>;
-def : Pat<(store (v2i64 VR128:$src), addr:$dst),
- (MOVUPSmr addr:$dst, VR128:$src)>;
-def : Pat<(store (v4i32 VR128:$src), addr:$dst),
- (MOVUPSmr addr:$dst, VR128:$src)>;
-def : Pat<(store (v8i16 VR128:$src), addr:$dst),
- (MOVUPSmr addr:$dst, VR128:$src)>;
-def : Pat<(store (v16i8 VR128:$src), addr:$dst),
- (MOVUPSmr addr:$dst, VR128:$src)>;
+let Predicates = [HasSSE1] in {
+ def : Pat<(alignedloadv4i32 addr:$src),
+ (MOVAPSrm addr:$src)>;
+ def : Pat<(loadv4i32 addr:$src),
+ (MOVUPSrm addr:$src)>;
+ def : Pat<(alignedloadv2i64 addr:$src),
+ (MOVAPSrm addr:$src)>;
+ def : Pat<(loadv2i64 addr:$src),
+ (MOVUPSrm addr:$src)>;
+
+ def : Pat<(alignedstore (v2i64 VR128:$src), addr:$dst),
+ (MOVAPSmr addr:$dst, VR128:$src)>;
+ def : Pat<(alignedstore (v4i32 VR128:$src), addr:$dst),
+ (MOVAPSmr addr:$dst, VR128:$src)>;
+ def : Pat<(alignedstore (v8i16 VR128:$src), addr:$dst),
+ (MOVAPSmr addr:$dst, VR128:$src)>;
+ def : Pat<(alignedstore (v16i8 VR128:$src), addr:$dst),
+ (MOVAPSmr addr:$dst, VR128:$src)>;
+ def : Pat<(store (v2i64 VR128:$src), addr:$dst),
+ (MOVUPSmr addr:$dst, VR128:$src)>;
+ def : Pat<(store (v4i32 VR128:$src), addr:$dst),
+ (MOVUPSmr addr:$dst, VR128:$src)>;
+ def : Pat<(store (v8i16 VR128:$src), addr:$dst),
+ (MOVUPSmr addr:$dst, VR128:$src)>;
+ def : Pat<(store (v16i8 VR128:$src), addr:$dst),
+ (MOVUPSmr addr:$dst, VR128:$src)>;
+}
+
+// Use vmovaps/vmovups for AVX 128-bit integer load/store (one byte shorter).
+let Predicates = [HasAVX] in {
+ def : Pat<(alignedloadv4i32 addr:$src),
+ (VMOVAPSrm addr:$src)>;
+ def : Pat<(loadv4i32 addr:$src),
+ (VMOVUPSrm addr:$src)>;
+ def : Pat<(alignedloadv2i64 addr:$src),
+ (VMOVAPSrm addr:$src)>;
+ def : Pat<(loadv2i64 addr:$src),
+ (VMOVUPSrm addr:$src)>;
+
+ def : Pat<(alignedstore (v2i64 VR128:$src), addr:$dst),
+ (VMOVAPSmr addr:$dst, VR128:$src)>;
+ def : Pat<(alignedstore (v4i32 VR128:$src), addr:$dst),
+ (VMOVAPSmr addr:$dst, VR128:$src)>;
+ def : Pat<(alignedstore (v8i16 VR128:$src), addr:$dst),
+ (VMOVAPSmr addr:$dst, VR128:$src)>;
+ def : Pat<(alignedstore (v16i8 VR128:$src), addr:$dst),
+ (VMOVAPSmr addr:$dst, VR128:$src)>;
+ def : Pat<(store (v2i64 VR128:$src), addr:$dst),
+ (VMOVUPSmr addr:$dst, VR128:$src)>;
+ def : Pat<(store (v4i32 VR128:$src), addr:$dst),
+ (VMOVUPSmr addr:$dst, VR128:$src)>;
+ def : Pat<(store (v8i16 VR128:$src), addr:$dst),
+ (VMOVUPSmr addr:$dst, VR128:$src)>;
+ def : Pat<(store (v16i8 VR128:$src), addr:$dst),
+ (VMOVUPSmr addr:$dst, VR128:$src)>;
+}
//===----------------------------------------------------------------------===//
// SSE4.1 - Packed Move with Sign/Zero Extend
@@ -3923,8 +4271,12 @@ multiclass SS41I_extract8<bits<8> opc, string OpcodeStr> {
// (store (i8 (trunc (X86pextrb (v16i8 VR128:$src1), imm:$src2))), addr:$dst)
}
-let isAsmParserOnly = 1, Predicates = [HasAVX] in
+let isAsmParserOnly = 1, Predicates = [HasAVX] in {
defm VPEXTRB : SS41I_extract8<0x14, "vpextrb">, VEX;
+ def VPEXTRBrr64 : SS4AIi8<0x14, MRMDestReg, (outs GR64:$dst),
+ (ins VR128:$src1, i32i8imm:$src2),
+ "vpextrb\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, OpSize, VEX;
+}
defm PEXTRB : SS41I_extract8<0x14, "pextrb">;
@@ -4007,8 +4359,13 @@ multiclass SS41I_extractf32<bits<8> opc, string OpcodeStr> {
addr:$dst)]>, OpSize;
}
-let isAsmParserOnly = 1, Predicates = [HasAVX] in
+let isAsmParserOnly = 1, Predicates = [HasAVX] in {
defm VEXTRACTPS : SS41I_extractf32<0x17, "vextractps">, VEX;
+ def VEXTRACTPSrr64 : SS4AIi8<0x17, MRMDestReg, (outs GR64:$dst),
+ (ins VR128:$src1, i32i8imm:$src2),
+ "vextractps \t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ []>, OpSize, VEX;
+}
defm EXTRACTPS : SS41I_extractf32<0x17, "extractps">;
// Also match an EXTRACTPS store when the store is done as f32 instead of i32.
@@ -4131,80 +4488,84 @@ let isAsmParserOnly = 1, Predicates = [HasAVX] in
defm VINSERTPS : SS41I_insertf32<0x21, "vinsertps", 0>, VEX_4V;
def : Pat<(int_x86_sse41_insertps VR128:$src1, VR128:$src2, imm:$src3),
- (INSERTPSrr VR128:$src1, VR128:$src2, imm:$src3)>;
+ (VINSERTPSrr VR128:$src1, VR128:$src2, imm:$src3)>,
+ Requires<[HasAVX]>;
+def : Pat<(int_x86_sse41_insertps VR128:$src1, VR128:$src2, imm:$src3),
+ (INSERTPSrr VR128:$src1, VR128:$src2, imm:$src3)>,
+ Requires<[HasSSE41]>;
//===----------------------------------------------------------------------===//
// SSE4.1 - Round Instructions
//===----------------------------------------------------------------------===//
-multiclass sse41_fp_unop_rm<bits<8> opcps, bits<8> opcpd,
- string OpcodeStr,
- Intrinsic V4F32Int,
- Intrinsic V2F64Int> {
+multiclass sse41_fp_unop_rm<bits<8> opcps, bits<8> opcpd, string OpcodeStr,
+ X86MemOperand x86memop, RegisterClass RC,
+ PatFrag mem_frag32, PatFrag mem_frag64,
+ Intrinsic V4F32Int, Intrinsic V2F64Int> {
// Intrinsic operation, reg.
// Vector intrinsic operation, reg
def PSr_Int : SS4AIi8<opcps, MRMSrcReg,
- (outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2),
+ (outs RC:$dst), (ins RC:$src1, i32i8imm:$src2),
!strconcat(OpcodeStr,
"ps\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [(set VR128:$dst, (V4F32Int VR128:$src1, imm:$src2))]>,
+ [(set RC:$dst, (V4F32Int RC:$src1, imm:$src2))]>,
OpSize;
// Vector intrinsic operation, mem
def PSm_Int : Ii8<opcps, MRMSrcMem,
- (outs VR128:$dst), (ins f128mem:$src1, i32i8imm:$src2),
+ (outs RC:$dst), (ins f256mem:$src1, i32i8imm:$src2),
!strconcat(OpcodeStr,
"ps\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [(set VR128:$dst,
- (V4F32Int (memopv4f32 addr:$src1),imm:$src2))]>,
+ [(set RC:$dst,
+ (V4F32Int (mem_frag32 addr:$src1),imm:$src2))]>,
TA, OpSize,
Requires<[HasSSE41]>;
// Vector intrinsic operation, reg
def PDr_Int : SS4AIi8<opcpd, MRMSrcReg,
- (outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2),
+ (outs RC:$dst), (ins RC:$src1, i32i8imm:$src2),
!strconcat(OpcodeStr,
"pd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [(set VR128:$dst, (V2F64Int VR128:$src1, imm:$src2))]>,
+ [(set RC:$dst, (V2F64Int RC:$src1, imm:$src2))]>,
OpSize;
// Vector intrinsic operation, mem
def PDm_Int : SS4AIi8<opcpd, MRMSrcMem,
- (outs VR128:$dst), (ins f128mem:$src1, i32i8imm:$src2),
+ (outs RC:$dst), (ins f256mem:$src1, i32i8imm:$src2),
!strconcat(OpcodeStr,
"pd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [(set VR128:$dst,
- (V2F64Int (memopv2f64 addr:$src1),imm:$src2))]>,
+ [(set RC:$dst,
+ (V2F64Int (mem_frag64 addr:$src1),imm:$src2))]>,
OpSize;
}
-multiclass sse41_fp_unop_rm_avx<bits<8> opcps, bits<8> opcpd,
- string OpcodeStr> {
+multiclass sse41_fp_unop_rm_avx_p<bits<8> opcps, bits<8> opcpd,
+ RegisterClass RC, X86MemOperand x86memop, string OpcodeStr> {
// Intrinsic operation, reg.
// Vector intrinsic operation, reg
def PSr : SS4AIi8<opcps, MRMSrcReg,
- (outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2),
+ (outs RC:$dst), (ins RC:$src1, i32i8imm:$src2),
!strconcat(OpcodeStr,
"ps\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[]>, OpSize;
// Vector intrinsic operation, mem
def PSm : Ii8<opcps, MRMSrcMem,
- (outs VR128:$dst), (ins f128mem:$src1, i32i8imm:$src2),
+ (outs RC:$dst), (ins x86memop:$src1, i32i8imm:$src2),
!strconcat(OpcodeStr,
"ps\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[]>, TA, OpSize, Requires<[HasSSE41]>;
// Vector intrinsic operation, reg
def PDr : SS4AIi8<opcpd, MRMSrcReg,
- (outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2),
+ (outs RC:$dst), (ins RC:$src1, i32i8imm:$src2),
!strconcat(OpcodeStr,
"pd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[]>, OpSize;
// Vector intrinsic operation, mem
def PDm : SS4AIi8<opcpd, MRMSrcMem,
- (outs VR128:$dst), (ins f128mem:$src1, i32i8imm:$src2),
+ (outs RC:$dst), (ins x86memop:$src1, i32i8imm:$src2),
!strconcat(OpcodeStr,
"pd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[]>, OpSize;
@@ -4261,8 +4622,8 @@ multiclass sse41_fp_binop_rm<bits<8> opcss, bits<8> opcsd,
OpSize;
}
-multiclass sse41_fp_binop_rm_avx<bits<8> opcss, bits<8> opcsd,
- string OpcodeStr> {
+multiclass sse41_fp_binop_rm_avx_s<bits<8> opcss, bits<8> opcsd,
+ string OpcodeStr> {
// Intrinsic operation, reg.
def SSr : SS4AIi8<opcss, MRMSrcReg,
(outs VR128:$dst), (ins VR128:$src1, VR128:$src2, i32i8imm:$src3),
@@ -4295,24 +4656,90 @@ multiclass sse41_fp_binop_rm_avx<bits<8> opcss, bits<8> opcsd,
// FP round - roundss, roundps, roundsd, roundpd
let isAsmParserOnly = 1, Predicates = [HasAVX] in {
// Intrinsic form
- defm VROUND : sse41_fp_unop_rm<0x08, 0x09, "vround",
- int_x86_sse41_round_ps, int_x86_sse41_round_pd>,
- VEX;
+ defm VROUND : sse41_fp_unop_rm<0x08, 0x09, "vround", f128mem, VR128,
+ memopv4f32, memopv2f64,
+ int_x86_sse41_round_ps,
+ int_x86_sse41_round_pd>, VEX;
+ defm VROUNDY : sse41_fp_unop_rm<0x08, 0x09, "vround", f256mem, VR256,
+ memopv8f32, memopv4f64,
+ int_x86_avx_round_ps_256,
+ int_x86_avx_round_pd_256>, VEX;
defm VROUND : sse41_fp_binop_rm<0x0A, 0x0B, "vround",
- int_x86_sse41_round_ss, int_x86_sse41_round_sd,
- 0>, VEX_4V;
+ int_x86_sse41_round_ss,
+ int_x86_sse41_round_sd, 0>, VEX_4V;
+
// Instructions for the assembler
- defm VROUND : sse41_fp_unop_rm_avx<0x08, 0x09, "vround">, VEX;
- defm VROUND : sse41_fp_binop_rm_avx<0x0A, 0x0B, "vround">, VEX_4V;
+ defm VROUND : sse41_fp_unop_rm_avx_p<0x08, 0x09, VR128, f128mem, "vround">,
+ VEX;
+ defm VROUNDY : sse41_fp_unop_rm_avx_p<0x08, 0x09, VR256, f256mem, "vround">,
+ VEX;
+ defm VROUND : sse41_fp_binop_rm_avx_s<0x0A, 0x0B, "vround">, VEX_4V;
}
-defm ROUND : sse41_fp_unop_rm<0x08, 0x09, "round",
+defm ROUND : sse41_fp_unop_rm<0x08, 0x09, "round", f128mem, VR128,
+ memopv4f32, memopv2f64,
int_x86_sse41_round_ps, int_x86_sse41_round_pd>;
let Constraints = "$src1 = $dst" in
defm ROUND : sse41_fp_binop_rm<0x0A, 0x0B, "round",
int_x86_sse41_round_ss, int_x86_sse41_round_sd>;
//===----------------------------------------------------------------------===//
+// SSE4.1 - Packed Bit Test
+//===----------------------------------------------------------------------===//
+
+// ptest instruction we'll lower to this in X86ISelLowering primarily from
+// the intel intrinsic that corresponds to this.
+let Defs = [EFLAGS], isAsmParserOnly = 1, Predicates = [HasAVX] in {
+def VPTESTrr : SS48I<0x17, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2),
+ "vptest\t{$src2, $src1|$src1, $src2}",
+ [(set EFLAGS, (X86ptest VR128:$src1, (v4f32 VR128:$src2)))]>,
+ OpSize, VEX;
+def VPTESTrm : SS48I<0x17, MRMSrcMem, (outs), (ins VR128:$src1, f128mem:$src2),
+ "vptest\t{$src2, $src1|$src1, $src2}",
+ [(set EFLAGS,(X86ptest VR128:$src1, (memopv4f32 addr:$src2)))]>,
+ OpSize, VEX;
+
+def VPTESTYrr : SS48I<0x17, MRMSrcReg, (outs), (ins VR256:$src1, VR256:$src2),
+ "vptest\t{$src2, $src1|$src1, $src2}",
+ [(set EFLAGS, (X86ptest VR256:$src1, (v4i64 VR256:$src2)))]>,
+ OpSize, VEX;
+def VPTESTYrm : SS48I<0x17, MRMSrcMem, (outs), (ins VR256:$src1, i256mem:$src2),
+ "vptest\t{$src2, $src1|$src1, $src2}",
+ [(set EFLAGS,(X86ptest VR256:$src1, (memopv4i64 addr:$src2)))]>,
+ OpSize, VEX;
+}
+
+let Defs = [EFLAGS] in {
+def PTESTrr : SS48I<0x17, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2),
+ "ptest \t{$src2, $src1|$src1, $src2}",
+ [(set EFLAGS, (X86ptest VR128:$src1, (v4f32 VR128:$src2)))]>,
+ OpSize;
+def PTESTrm : SS48I<0x17, MRMSrcMem, (outs), (ins VR128:$src1, f128mem:$src2),
+ "ptest \t{$src2, $src1|$src1, $src2}",
+ [(set EFLAGS, (X86ptest VR128:$src1, (memopv4f32 addr:$src2)))]>,
+ OpSize;
+}
+
+// The bit test instructions below are AVX only
+multiclass avx_bittest<bits<8> opc, string OpcodeStr, RegisterClass RC,
+ X86MemOperand x86memop, PatFrag mem_frag, ValueType vt> {
+ def rr : SS48I<opc, MRMSrcReg, (outs), (ins RC:$src1, RC:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"),
+ [(set EFLAGS, (X86testp RC:$src1, (vt RC:$src2)))]>, OpSize, VEX;
+ def rm : SS48I<opc, MRMSrcMem, (outs), (ins RC:$src1, x86memop:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"),
+ [(set EFLAGS, (X86testp RC:$src1, (mem_frag addr:$src2)))]>,
+ OpSize, VEX;
+}
+
+let Defs = [EFLAGS], isAsmParserOnly = 1, Predicates = [HasAVX] in {
+defm VTESTPS : avx_bittest<0x0E, "vtestps", VR128, f128mem, memopv4f32, v4f32>;
+defm VTESTPSY : avx_bittest<0x0E, "vtestps", VR256, f256mem, memopv8f32, v8f32>;
+defm VTESTPD : avx_bittest<0x0F, "vtestpd", VR128, f128mem, memopv2f64, v2f64>;
+defm VTESTPDY : avx_bittest<0x0F, "vtestpd", VR256, f256mem, memopv4f64, v4f64>;
+}
+
+//===----------------------------------------------------------------------===//
// SSE4.1 - Misc Instructions
//===----------------------------------------------------------------------===//
@@ -4431,79 +4858,104 @@ let Constraints = "$src1 = $dst" in
/// SS41I_binop_rmi_int - SSE 4.1 binary operator with 8-bit immediate
multiclass SS41I_binop_rmi_int<bits<8> opc, string OpcodeStr,
- Intrinsic IntId128, bit Is2Addr = 1> {
+ Intrinsic IntId, RegisterClass RC, PatFrag memop_frag,
+ X86MemOperand x86memop, bit Is2Addr = 1> {
let isCommutable = 1 in
- def rri : SS4AIi8<opc, MRMSrcReg, (outs VR128:$dst),
- (ins VR128:$src1, VR128:$src2, i32i8imm:$src3),
+ def rri : SS4AIi8<opc, MRMSrcReg, (outs RC:$dst),
+ (ins RC:$src1, RC:$src2, i32i8imm:$src3),
!if(Is2Addr,
!strconcat(OpcodeStr,
"\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
- [(set VR128:$dst,
- (IntId128 VR128:$src1, VR128:$src2, imm:$src3))]>,
+ [(set RC:$dst, (IntId RC:$src1, RC:$src2, imm:$src3))]>,
OpSize;
- def rmi : SS4AIi8<opc, MRMSrcMem, (outs VR128:$dst),
- (ins VR128:$src1, i128mem:$src2, i32i8imm:$src3),
+ def rmi : SS4AIi8<opc, MRMSrcMem, (outs RC:$dst),
+ (ins RC:$src1, x86memop:$src2, i32i8imm:$src3),
!if(Is2Addr,
!strconcat(OpcodeStr,
"\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
- [(set VR128:$dst,
- (IntId128 VR128:$src1,
- (bitconvert (memopv16i8 addr:$src2)), imm:$src3))]>,
+ [(set RC:$dst,
+ (IntId RC:$src1,
+ (bitconvert (memop_frag addr:$src2)), imm:$src3))]>,
OpSize;
}
let isAsmParserOnly = 1, Predicates = [HasAVX] in {
let isCommutable = 0 in {
defm VBLENDPS : SS41I_binop_rmi_int<0x0C, "vblendps", int_x86_sse41_blendps,
- 0>, VEX_4V;
+ VR128, memopv16i8, i128mem, 0>, VEX_4V;
defm VBLENDPD : SS41I_binop_rmi_int<0x0D, "vblendpd", int_x86_sse41_blendpd,
- 0>, VEX_4V;
+ VR128, memopv16i8, i128mem, 0>, VEX_4V;
+ defm VBLENDPSY : SS41I_binop_rmi_int<0x0C, "vblendps",
+ int_x86_avx_blend_ps_256, VR256, memopv32i8, i256mem, 0>, VEX_4V;
+ defm VBLENDPDY : SS41I_binop_rmi_int<0x0D, "vblendpd",
+ int_x86_avx_blend_pd_256, VR256, memopv32i8, i256mem, 0>, VEX_4V;
defm VPBLENDW : SS41I_binop_rmi_int<0x0E, "vpblendw", int_x86_sse41_pblendw,
- 0>, VEX_4V;
+ VR128, memopv16i8, i128mem, 0>, VEX_4V;
defm VMPSADBW : SS41I_binop_rmi_int<0x42, "vmpsadbw", int_x86_sse41_mpsadbw,
- 0>, VEX_4V;
+ VR128, memopv16i8, i128mem, 0>, VEX_4V;
}
defm VDPPS : SS41I_binop_rmi_int<0x40, "vdpps", int_x86_sse41_dpps,
- 0>, VEX_4V;
+ VR128, memopv16i8, i128mem, 0>, VEX_4V;
defm VDPPD : SS41I_binop_rmi_int<0x41, "vdppd", int_x86_sse41_dppd,
- 0>, VEX_4V;
+ VR128, memopv16i8, i128mem, 0>, VEX_4V;
+ defm VDPPSY : SS41I_binop_rmi_int<0x40, "vdpps", int_x86_avx_dp_ps_256,
+ VR256, memopv32i8, i256mem, 0>, VEX_4V;
}
let Constraints = "$src1 = $dst" in {
let isCommutable = 0 in {
- defm BLENDPS : SS41I_binop_rmi_int<0x0C, "blendps", int_x86_sse41_blendps>;
- defm BLENDPD : SS41I_binop_rmi_int<0x0D, "blendpd", int_x86_sse41_blendpd>;
- defm PBLENDW : SS41I_binop_rmi_int<0x0E, "pblendw", int_x86_sse41_pblendw>;
- defm MPSADBW : SS41I_binop_rmi_int<0x42, "mpsadbw", int_x86_sse41_mpsadbw>;
+ defm BLENDPS : SS41I_binop_rmi_int<0x0C, "blendps", int_x86_sse41_blendps,
+ VR128, memopv16i8, i128mem>;
+ defm BLENDPD : SS41I_binop_rmi_int<0x0D, "blendpd", int_x86_sse41_blendpd,
+ VR128, memopv16i8, i128mem>;
+ defm PBLENDW : SS41I_binop_rmi_int<0x0E, "pblendw", int_x86_sse41_pblendw,
+ VR128, memopv16i8, i128mem>;
+ defm MPSADBW : SS41I_binop_rmi_int<0x42, "mpsadbw", int_x86_sse41_mpsadbw,
+ VR128, memopv16i8, i128mem>;
}
- defm DPPS : SS41I_binop_rmi_int<0x40, "dpps", int_x86_sse41_dpps>;
- defm DPPD : SS41I_binop_rmi_int<0x41, "dppd", int_x86_sse41_dppd>;
+ defm DPPS : SS41I_binop_rmi_int<0x40, "dpps", int_x86_sse41_dpps,
+ VR128, memopv16i8, i128mem>;
+ defm DPPD : SS41I_binop_rmi_int<0x41, "dppd", int_x86_sse41_dppd,
+ VR128, memopv16i8, i128mem>;
}
/// SS41I_quaternary_int_avx - AVX SSE 4.1 with 4 operators
let isAsmParserOnly = 1, Predicates = [HasAVX] in {
- multiclass SS41I_quaternary_int_avx<bits<8> opc, string OpcodeStr> {
- def rr : I<opc, MRMSrcReg, (outs VR128:$dst),
- (ins VR128:$src1, VR128:$src2, VR128:$src3),
- !strconcat(OpcodeStr,
- "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
- [], SSEPackedInt>, OpSize, TA, VEX_4V, VEX_I8IMM;
-
- def rm : I<opc, MRMSrcMem, (outs VR128:$dst),
- (ins VR128:$src1, i128mem:$src2, VR128:$src3),
- !strconcat(OpcodeStr,
- "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
- [], SSEPackedInt>, OpSize, TA, VEX_4V, VEX_I8IMM;
- }
-}
-
-defm VBLENDVPD : SS41I_quaternary_int_avx<0x4B, "vblendvpd">;
-defm VBLENDVPS : SS41I_quaternary_int_avx<0x4A, "vblendvps">;
-defm VPBLENDVB : SS41I_quaternary_int_avx<0x4C, "vpblendvb">;
+multiclass SS41I_quaternary_int_avx<bits<8> opc, string OpcodeStr,
+ RegisterClass RC, X86MemOperand x86memop,
+ PatFrag mem_frag, Intrinsic IntId> {
+ def rr : I<opc, MRMSrcReg, (outs RC:$dst),
+ (ins RC:$src1, RC:$src2, RC:$src3),
+ !strconcat(OpcodeStr,
+ "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+ [(set RC:$dst, (IntId RC:$src1, RC:$src2, RC:$src3))],
+ SSEPackedInt>, OpSize, TA, VEX_4V, VEX_I8IMM;
+
+ def rm : I<opc, MRMSrcMem, (outs RC:$dst),
+ (ins RC:$src1, x86memop:$src2, RC:$src3),
+ !strconcat(OpcodeStr,
+ "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+ [(set RC:$dst,
+ (IntId RC:$src1, (bitconvert (mem_frag addr:$src2)),
+ RC:$src3))],
+ SSEPackedInt>, OpSize, TA, VEX_4V, VEX_I8IMM;
+}
+}
+
+defm VBLENDVPD : SS41I_quaternary_int_avx<0x4B, "vblendvpd", VR128, i128mem,
+ memopv16i8, int_x86_sse41_blendvpd>;
+defm VBLENDVPS : SS41I_quaternary_int_avx<0x4A, "vblendvps", VR128, i128mem,
+ memopv16i8, int_x86_sse41_blendvps>;
+defm VPBLENDVB : SS41I_quaternary_int_avx<0x4C, "vpblendvb", VR128, i128mem,
+ memopv16i8, int_x86_sse41_pblendvb>;
+defm VBLENDVPDY : SS41I_quaternary_int_avx<0x4B, "vblendvpd", VR256, i256mem,
+ memopv32i8, int_x86_avx_blendv_pd_256>;
+defm VBLENDVPSY : SS41I_quaternary_int_avx<0x4A, "vblendvps", VR256, i256mem,
+ memopv32i8, int_x86_avx_blendv_ps_256>;
/// SS41I_ternary_int - SSE 4.1 ternary operator
let Uses = [XMM0], Constraints = "$src1 = $dst" in {
@@ -4529,30 +4981,6 @@ defm BLENDVPD : SS41I_ternary_int<0x15, "blendvpd", int_x86_sse41_blendvpd>;
defm BLENDVPS : SS41I_ternary_int<0x14, "blendvps", int_x86_sse41_blendvps>;
defm PBLENDVB : SS41I_ternary_int<0x10, "pblendvb", int_x86_sse41_pblendvb>;
-// ptest instruction we'll lower to this in X86ISelLowering primarily from
-// the intel intrinsic that corresponds to this.
-let Defs = [EFLAGS], isAsmParserOnly = 1, Predicates = [HasAVX] in {
-def VPTESTrr : SS48I<0x17, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2),
- "vptest\t{$src2, $src1|$src1, $src2}",
- [(set EFLAGS, (X86ptest VR128:$src1, VR128:$src2))]>,
- OpSize, VEX;
-def VPTESTrm : SS48I<0x17, MRMSrcMem, (outs), (ins VR128:$src1, i128mem:$src2),
- "vptest\t{$src2, $src1|$src1, $src2}",
- [(set EFLAGS, (X86ptest VR128:$src1, (load addr:$src2)))]>,
- OpSize, VEX;
-}
-
-let Defs = [EFLAGS] in {
-def PTESTrr : SS48I<0x17, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2),
- "ptest \t{$src2, $src1|$src1, $src2}",
- [(set EFLAGS, (X86ptest VR128:$src1, VR128:$src2))]>,
- OpSize;
-def PTESTrm : SS48I<0x17, MRMSrcMem, (outs), (ins VR128:$src1, i128mem:$src2),
- "ptest \t{$src2, $src1|$src1, $src2}",
- [(set EFLAGS, (X86ptest VR128:$src1, (load addr:$src2)))]>,
- OpSize;
-}
-
let isAsmParserOnly = 1, Predicates = [HasAVX] in
def VMOVNTDQArm : SS48I<0x2A, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
"vmovntdqa\t{$src, $dst|$dst, $src}",
@@ -4603,17 +5031,20 @@ def : Pat<(v2i64 (X86pcmpgtq VR128:$src1, (memop addr:$src2))),
//===----------------------------------------------------------------------===//
// Packed Compare Implicit Length Strings, Return Mask
-let Defs = [EFLAGS], usesCustomInserter = 1 in {
- def PCMPISTRM128REG : SS42AI<0, Pseudo, (outs VR128:$dst),
- (ins VR128:$src1, VR128:$src2, i8imm:$src3),
- "#PCMPISTRM128rr PSEUDO!",
+multiclass pseudo_pcmpistrm<string asm> {
+ def REG : Ii8<0, Pseudo, (outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src2, i8imm:$src3), !strconcat(asm, "rr PSEUDO"),
[(set VR128:$dst, (int_x86_sse42_pcmpistrm128 VR128:$src1, VR128:$src2,
- imm:$src3))]>, OpSize;
- def PCMPISTRM128MEM : SS42AI<0, Pseudo, (outs VR128:$dst),
- (ins VR128:$src1, i128mem:$src2, i8imm:$src3),
- "#PCMPISTRM128rm PSEUDO!",
+ imm:$src3))]>;
+ def MEM : Ii8<0, Pseudo, (outs VR128:$dst),
+ (ins VR128:$src1, i128mem:$src2, i8imm:$src3), !strconcat(asm, "rm PSEUDO"),
[(set VR128:$dst, (int_x86_sse42_pcmpistrm128
- VR128:$src1, (load addr:$src2), imm:$src3))]>, OpSize;
+ VR128:$src1, (load addr:$src2), imm:$src3))]>;
+}
+
+let Defs = [EFLAGS], usesCustomInserter = 1 in {
+ defm PCMPISTRM128 : pseudo_pcmpistrm<"#PCMPISTRM128">, Requires<[HasSSE42]>;
+ defm VPCMPISTRM128 : pseudo_pcmpistrm<"#VPCMPISTRM128">, Requires<[HasAVX]>;
}
let Defs = [XMM0, EFLAGS], isAsmParserOnly = 1,
@@ -4636,20 +5067,20 @@ let Defs = [XMM0, EFLAGS] in {
}
// Packed Compare Explicit Length Strings, Return Mask
-let Defs = [EFLAGS], Uses = [EAX, EDX], usesCustomInserter = 1 in {
- def PCMPESTRM128REG : SS42AI<0, Pseudo, (outs VR128:$dst),
- (ins VR128:$src1, VR128:$src3, i8imm:$src5),
- "#PCMPESTRM128rr PSEUDO!",
- [(set VR128:$dst,
- (int_x86_sse42_pcmpestrm128
- VR128:$src1, EAX, VR128:$src3, EDX, imm:$src5))]>, OpSize;
-
- def PCMPESTRM128MEM : SS42AI<0, Pseudo, (outs VR128:$dst),
- (ins VR128:$src1, i128mem:$src3, i8imm:$src5),
- "#PCMPESTRM128rm PSEUDO!",
+multiclass pseudo_pcmpestrm<string asm> {
+ def REG : Ii8<0, Pseudo, (outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src3, i8imm:$src5), !strconcat(asm, "rr PSEUDO"),
+ [(set VR128:$dst, (int_x86_sse42_pcmpestrm128
+ VR128:$src1, EAX, VR128:$src3, EDX, imm:$src5))]>;
+ def MEM : Ii8<0, Pseudo, (outs VR128:$dst),
+ (ins VR128:$src1, i128mem:$src3, i8imm:$src5), !strconcat(asm, "rm PSEUDO"),
[(set VR128:$dst, (int_x86_sse42_pcmpestrm128
- VR128:$src1, EAX, (load addr:$src3), EDX, imm:$src5))]>,
- OpSize;
+ VR128:$src1, EAX, (load addr:$src3), EDX, imm:$src5))]>;
+}
+
+let Defs = [EFLAGS], Uses = [EAX, EDX], usesCustomInserter = 1 in {
+ defm PCMPESTRM128 : pseudo_pcmpestrm<"#PCMPESTRM128">, Requires<[HasSSE42]>;
+ defm VPCMPESTRM128 : pseudo_pcmpestrm<"#VPCMPESTRM128">, Requires<[HasAVX]>;
}
let isAsmParserOnly = 1, Predicates = [HasAVX],
@@ -4941,3 +5372,579 @@ def AESKEYGENASSIST128rm : AESAI<0xDF, MRMSrcMem, (outs VR128:$dst),
(int_x86_aesni_aeskeygenassist (bitconvert (memopv2i64 addr:$src1)),
imm:$src2))]>,
OpSize;
+
+//===----------------------------------------------------------------------===//
+// CLMUL Instructions
+//===----------------------------------------------------------------------===//
+
+// Only the AVX version of CLMUL instructions are described here.
+
+// Carry-less Multiplication instructions
+let isAsmParserOnly = 1 in {
+def VPCLMULQDQrr : CLMULIi8<0x44, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src2, i8imm:$src3),
+ "vpclmulqdq\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
+ []>;
+
+def VPCLMULQDQrm : CLMULIi8<0x44, MRMSrcMem, (outs VR128:$dst),
+ (ins VR128:$src1, i128mem:$src2, i8imm:$src3),
+ "vpclmulqdq\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
+ []>;
+
+// Assembler Only
+multiclass avx_vpclmul<string asm> {
+ def rr : I<0, Pseudo, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
+ !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ []>;
+
+ def rm : I<0, Pseudo, (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
+ !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ []>;
+}
+defm VPCLMULHQHQDQ : avx_vpclmul<"vpclmulhqhqdq">;
+defm VPCLMULHQLQDQ : avx_vpclmul<"vpclmulhqlqdq">;
+defm VPCLMULLQHQDQ : avx_vpclmul<"vpclmullqhqdq">;
+defm VPCLMULLQLQDQ : avx_vpclmul<"vpclmullqlqdq">;
+
+} // isAsmParserOnly
+
+//===----------------------------------------------------------------------===//
+// AVX Instructions
+//===----------------------------------------------------------------------===//
+
+let isAsmParserOnly = 1 in {
+
+// Load from memory and broadcast to all elements of the destination operand
+class avx_broadcast<bits<8> opc, string OpcodeStr, RegisterClass RC,
+ X86MemOperand x86memop, Intrinsic Int> :
+ AVX8I<opc, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+ [(set RC:$dst, (Int addr:$src))]>, VEX;
+
+def VBROADCASTSS : avx_broadcast<0x18, "vbroadcastss", VR128, f32mem,
+ int_x86_avx_vbroadcastss>;
+def VBROADCASTSSY : avx_broadcast<0x18, "vbroadcastss", VR256, f32mem,
+ int_x86_avx_vbroadcastss_256>;
+def VBROADCASTSD : avx_broadcast<0x19, "vbroadcastsd", VR256, f64mem,
+ int_x86_avx_vbroadcast_sd_256>;
+def VBROADCASTF128 : avx_broadcast<0x1A, "vbroadcastf128", VR256, f128mem,
+ int_x86_avx_vbroadcastf128_pd_256>;
+
+// Insert packed floating-point values
+def VINSERTF128rr : AVXAIi8<0x18, MRMSrcReg, (outs VR256:$dst),
+ (ins VR256:$src1, VR128:$src2, i8imm:$src3),
+ "vinsertf128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
+ []>, VEX_4V;
+def VINSERTF128rm : AVXAIi8<0x18, MRMSrcMem, (outs VR256:$dst),
+ (ins VR256:$src1, f128mem:$src2, i8imm:$src3),
+ "vinsertf128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
+ []>, VEX_4V;
+
+// Extract packed floating-point values
+def VEXTRACTF128rr : AVXAIi8<0x19, MRMDestReg, (outs VR128:$dst),
+ (ins VR256:$src1, i8imm:$src2),
+ "vextractf128\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ []>, VEX;
+def VEXTRACTF128mr : AVXAIi8<0x19, MRMDestMem, (outs),
+ (ins f128mem:$dst, VR256:$src1, i8imm:$src2),
+ "vextractf128\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ []>, VEX;
+
+// Conditional SIMD Packed Loads and Stores
+multiclass avx_movmask_rm<bits<8> opc_rm, bits<8> opc_mr, string OpcodeStr,
+ Intrinsic IntLd, Intrinsic IntLd256,
+ Intrinsic IntSt, Intrinsic IntSt256,
+ PatFrag pf128, PatFrag pf256> {
+ def rm : AVX8I<opc_rm, MRMSrcMem, (outs VR128:$dst),
+ (ins VR128:$src1, f128mem:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set VR128:$dst, (IntLd addr:$src2, VR128:$src1))]>,
+ VEX_4V;
+ def Yrm : AVX8I<opc_rm, MRMSrcMem, (outs VR256:$dst),
+ (ins VR256:$src1, f256mem:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set VR256:$dst, (IntLd256 addr:$src2, VR256:$src1))]>,
+ VEX_4V;
+ def mr : AVX8I<opc_mr, MRMDestMem, (outs),
+ (ins f128mem:$dst, VR128:$src1, VR128:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(IntSt addr:$dst, VR128:$src1, VR128:$src2)]>, VEX_4V;
+ def Ymr : AVX8I<opc_mr, MRMDestMem, (outs),
+ (ins f256mem:$dst, VR256:$src1, VR256:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(IntSt256 addr:$dst, VR256:$src1, VR256:$src2)]>, VEX_4V;
+}
+
+defm VMASKMOVPS : avx_movmask_rm<0x2C, 0x2E, "vmaskmovps",
+ int_x86_avx_maskload_ps,
+ int_x86_avx_maskload_ps_256,
+ int_x86_avx_maskstore_ps,
+ int_x86_avx_maskstore_ps_256,
+ memopv4f32, memopv8f32>;
+defm VMASKMOVPD : avx_movmask_rm<0x2D, 0x2F, "vmaskmovpd",
+ int_x86_avx_maskload_pd,
+ int_x86_avx_maskload_pd_256,
+ int_x86_avx_maskstore_pd,
+ int_x86_avx_maskstore_pd_256,
+ memopv2f64, memopv4f64>;
+
+// Permute Floating-Point Values
+multiclass avx_permil<bits<8> opc_rm, bits<8> opc_rmi, string OpcodeStr,
+ RegisterClass RC, X86MemOperand x86memop_f,
+ X86MemOperand x86memop_i, PatFrag f_frag, PatFrag i_frag,
+ Intrinsic IntVar, Intrinsic IntImm> {
+ def rr : AVX8I<opc_rm, MRMSrcReg, (outs RC:$dst),
+ (ins RC:$src1, RC:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set RC:$dst, (IntVar RC:$src1, RC:$src2))]>, VEX_4V;
+ def rm : AVX8I<opc_rm, MRMSrcMem, (outs RC:$dst),
+ (ins RC:$src1, x86memop_i:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set RC:$dst, (IntVar RC:$src1, (i_frag addr:$src2)))]>, VEX_4V;
+
+ def ri : AVXAIi8<opc_rmi, MRMSrcReg, (outs RC:$dst),
+ (ins RC:$src1, i8imm:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set RC:$dst, (IntImm RC:$src1, imm:$src2))]>, VEX;
+ def mi : AVXAIi8<opc_rmi, MRMSrcMem, (outs RC:$dst),
+ (ins x86memop_f:$src1, i8imm:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set RC:$dst, (IntImm (f_frag addr:$src1), imm:$src2))]>, VEX;
+}
+
+defm VPERMILPS : avx_permil<0x0C, 0x04, "vpermilps", VR128, f128mem, i128mem,
+ memopv4f32, memopv4i32,
+ int_x86_avx_vpermilvar_ps,
+ int_x86_avx_vpermil_ps>;
+defm VPERMILPSY : avx_permil<0x0C, 0x04, "vpermilps", VR256, f256mem, i256mem,
+ memopv8f32, memopv8i32,
+ int_x86_avx_vpermilvar_ps_256,
+ int_x86_avx_vpermil_ps_256>;
+defm VPERMILPD : avx_permil<0x0D, 0x05, "vpermilpd", VR128, f128mem, i128mem,
+ memopv2f64, memopv2i64,
+ int_x86_avx_vpermilvar_pd,
+ int_x86_avx_vpermil_pd>;
+defm VPERMILPDY : avx_permil<0x0D, 0x05, "vpermilpd", VR256, f256mem, i256mem,
+ memopv4f64, memopv4i64,
+ int_x86_avx_vpermilvar_pd_256,
+ int_x86_avx_vpermil_pd_256>;
+
+def VPERM2F128rr : AVXAIi8<0x06, MRMSrcReg, (outs VR256:$dst),
+ (ins VR256:$src1, VR256:$src2, i8imm:$src3),
+ "vperm2f128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
+ []>, VEX_4V;
+def VPERM2F128rm : AVXAIi8<0x06, MRMSrcMem, (outs VR256:$dst),
+ (ins VR256:$src1, f256mem:$src2, i8imm:$src3),
+ "vperm2f128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
+ []>, VEX_4V;
+
+// Zero All YMM registers
+def VZEROALL : I<0x77, RawFrm, (outs), (ins), "vzeroall",
+ [(int_x86_avx_vzeroall)]>, VEX, VEX_L, Requires<[HasAVX]>;
+
+// Zero Upper bits of YMM registers
+def VZEROUPPER : I<0x77, RawFrm, (outs), (ins), "vzeroupper",
+ [(int_x86_avx_vzeroupper)]>, VEX, Requires<[HasAVX]>;
+
+} // isAsmParserOnly
+
+def : Pat<(int_x86_avx_vinsertf128_pd_256 VR256:$src1, VR128:$src2, imm:$src3),
+ (VINSERTF128rr VR256:$src1, VR128:$src2, imm:$src3)>;
+def : Pat<(int_x86_avx_vinsertf128_ps_256 VR256:$src1, VR128:$src2, imm:$src3),
+ (VINSERTF128rr VR256:$src1, VR128:$src2, imm:$src3)>;
+def : Pat<(int_x86_avx_vinsertf128_si_256 VR256:$src1, VR128:$src2, imm:$src3),
+ (VINSERTF128rr VR256:$src1, VR128:$src2, imm:$src3)>;
+
+def : Pat<(int_x86_avx_vextractf128_pd_256 VR256:$src1, imm:$src2),
+ (VEXTRACTF128rr VR256:$src1, imm:$src2)>;
+def : Pat<(int_x86_avx_vextractf128_ps_256 VR256:$src1, imm:$src2),
+ (VEXTRACTF128rr VR256:$src1, imm:$src2)>;
+def : Pat<(int_x86_avx_vextractf128_si_256 VR256:$src1, imm:$src2),
+ (VEXTRACTF128rr VR256:$src1, imm:$src2)>;
+
+def : Pat<(int_x86_avx_vbroadcastf128_ps_256 addr:$src),
+ (VBROADCASTF128 addr:$src)>;
+
+def : Pat<(int_x86_avx_vperm2f128_ps_256 VR256:$src1, VR256:$src2, imm:$src3),
+ (VPERM2F128rr VR256:$src1, VR256:$src2, imm:$src3)>;
+def : Pat<(int_x86_avx_vperm2f128_pd_256 VR256:$src1, VR256:$src2, imm:$src3),
+ (VPERM2F128rr VR256:$src1, VR256:$src2, imm:$src3)>;
+def : Pat<(int_x86_avx_vperm2f128_si_256 VR256:$src1, VR256:$src2, imm:$src3),
+ (VPERM2F128rr VR256:$src1, VR256:$src2, imm:$src3)>;
+
+def : Pat<(int_x86_avx_vperm2f128_ps_256
+ VR256:$src1, (memopv8f32 addr:$src2), imm:$src3),
+ (VPERM2F128rm VR256:$src1, addr:$src2, imm:$src3)>;
+def : Pat<(int_x86_avx_vperm2f128_pd_256
+ VR256:$src1, (memopv4f64 addr:$src2), imm:$src3),
+ (VPERM2F128rm VR256:$src1, addr:$src2, imm:$src3)>;
+def : Pat<(int_x86_avx_vperm2f128_si_256
+ VR256:$src1, (memopv8i32 addr:$src2), imm:$src3),
+ (VPERM2F128rm VR256:$src1, addr:$src2, imm:$src3)>;
+
+//===----------------------------------------------------------------------===//
+// SSE Shuffle pattern fragments
+//===----------------------------------------------------------------------===//
+
+// This is part of a "work in progress" refactoring. The idea is that all
+// vector shuffles are going to be translated into target specific nodes and
+// directly matched by the patterns below (which can be changed along the way)
+// The AVX version of some but not all of them are described here, and more
+// should come in a near future.
+
+// Shuffle with PSHUFD instruction folding loads. The first two patterns match
+// SSE2 loads, which are always promoted to v2i64. The last one should match
+// the SSE1 case, where the only legal load is v4f32, but there is no PSHUFD
+// in SSE2, how does it ever worked? Anyway, the pattern will remain here until
+// we investigate further.
+def : Pat<(v4i32 (X86PShufd (bc_v4i32 (memopv2i64 addr:$src1)),
+ (i8 imm:$imm))),
+ (VPSHUFDmi addr:$src1, imm:$imm)>, Requires<[HasAVX]>;
+def : Pat<(v4i32 (X86PShufd (bc_v4i32 (memopv2i64 addr:$src1)),
+ (i8 imm:$imm))),
+ (PSHUFDmi addr:$src1, imm:$imm)>;
+def : Pat<(v4i32 (X86PShufd (bc_v4i32 (memopv4f32 addr:$src1)),
+ (i8 imm:$imm))),
+ (PSHUFDmi addr:$src1, imm:$imm)>; // FIXME: has this ever worked?
+
+// Shuffle with PSHUFD instruction.
+def : Pat<(v4f32 (X86PShufd VR128:$src1, (i8 imm:$imm))),
+ (VPSHUFDri VR128:$src1, imm:$imm)>, Requires<[HasAVX]>;
+def : Pat<(v4f32 (X86PShufd VR128:$src1, (i8 imm:$imm))),
+ (PSHUFDri VR128:$src1, imm:$imm)>;
+
+def : Pat<(v4i32 (X86PShufd VR128:$src1, (i8 imm:$imm))),
+ (VPSHUFDri VR128:$src1, imm:$imm)>, Requires<[HasAVX]>;
+def : Pat<(v4i32 (X86PShufd VR128:$src1, (i8 imm:$imm))),
+ (PSHUFDri VR128:$src1, imm:$imm)>;
+
+// Shuffle with SHUFPD instruction.
+def : Pat<(v2f64 (X86Shufps VR128:$src1,
+ (memopv2f64 addr:$src2), (i8 imm:$imm))),
+ (VSHUFPDrmi VR128:$src1, addr:$src2, imm:$imm)>, Requires<[HasAVX]>;
+def : Pat<(v2f64 (X86Shufps VR128:$src1,
+ (memopv2f64 addr:$src2), (i8 imm:$imm))),
+ (SHUFPDrmi VR128:$src1, addr:$src2, imm:$imm)>;
+
+def : Pat<(v2i64 (X86Shufpd VR128:$src1, VR128:$src2, (i8 imm:$imm))),
+ (VSHUFPDrri VR128:$src1, VR128:$src2, imm:$imm)>, Requires<[HasAVX]>;
+def : Pat<(v2i64 (X86Shufpd VR128:$src1, VR128:$src2, (i8 imm:$imm))),
+ (SHUFPDrri VR128:$src1, VR128:$src2, imm:$imm)>;
+
+def : Pat<(v2f64 (X86Shufpd VR128:$src1, VR128:$src2, (i8 imm:$imm))),
+ (VSHUFPDrri VR128:$src1, VR128:$src2, imm:$imm)>, Requires<[HasAVX]>;
+def : Pat<(v2f64 (X86Shufpd VR128:$src1, VR128:$src2, (i8 imm:$imm))),
+ (SHUFPDrri VR128:$src1, VR128:$src2, imm:$imm)>;
+
+// Shuffle with SHUFPS instruction.
+def : Pat<(v4f32 (X86Shufps VR128:$src1,
+ (memopv4f32 addr:$src2), (i8 imm:$imm))),
+ (VSHUFPSrmi VR128:$src1, addr:$src2, imm:$imm)>, Requires<[HasAVX]>;
+def : Pat<(v4f32 (X86Shufps VR128:$src1,
+ (memopv4f32 addr:$src2), (i8 imm:$imm))),
+ (SHUFPSrmi VR128:$src1, addr:$src2, imm:$imm)>;
+
+def : Pat<(v4f32 (X86Shufps VR128:$src1, VR128:$src2, (i8 imm:$imm))),
+ (VSHUFPSrri VR128:$src1, VR128:$src2, imm:$imm)>, Requires<[HasAVX]>;
+def : Pat<(v4f32 (X86Shufps VR128:$src1, VR128:$src2, (i8 imm:$imm))),
+ (SHUFPSrri VR128:$src1, VR128:$src2, imm:$imm)>;
+
+def : Pat<(v4i32 (X86Shufps VR128:$src1,
+ (bc_v4i32 (memopv2i64 addr:$src2)), (i8 imm:$imm))),
+ (VSHUFPSrmi VR128:$src1, addr:$src2, imm:$imm)>, Requires<[HasAVX]>;
+def : Pat<(v4i32 (X86Shufps VR128:$src1,
+ (bc_v4i32 (memopv2i64 addr:$src2)), (i8 imm:$imm))),
+ (SHUFPSrmi VR128:$src1, addr:$src2, imm:$imm)>;
+
+def : Pat<(v4i32 (X86Shufps VR128:$src1, VR128:$src2, (i8 imm:$imm))),
+ (VSHUFPSrri VR128:$src1, VR128:$src2, imm:$imm)>, Requires<[HasAVX]>;
+def : Pat<(v4i32 (X86Shufps VR128:$src1, VR128:$src2, (i8 imm:$imm))),
+ (SHUFPSrri VR128:$src1, VR128:$src2, imm:$imm)>;
+
+// Shuffle with MOVHLPS instruction
+def : Pat<(v4f32 (X86Movhlps VR128:$src1, VR128:$src2)),
+ (MOVHLPSrr VR128:$src1, VR128:$src2)>;
+def : Pat<(v4i32 (X86Movhlps VR128:$src1, VR128:$src2)),
+ (MOVHLPSrr VR128:$src1, VR128:$src2)>;
+
+// Shuffle with MOVDDUP instruction
+def : Pat<(X86Movddup (memopv2f64 addr:$src)),
+ (VMOVDDUPrm addr:$src)>, Requires<[HasAVX]>;
+def : Pat<(X86Movddup (memopv2f64 addr:$src)),
+ (MOVDDUPrm addr:$src)>;
+
+def : Pat<(X86Movddup (bc_v4f32 (memopv2f64 addr:$src))),
+ (VMOVDDUPrm addr:$src)>, Requires<[HasAVX]>;
+def : Pat<(X86Movddup (bc_v4f32 (memopv2f64 addr:$src))),
+ (MOVDDUPrm addr:$src)>;
+
+def : Pat<(X86Movddup (memopv2i64 addr:$src)),
+ (VMOVDDUPrm addr:$src)>, Requires<[HasAVX]>;
+def : Pat<(X86Movddup (memopv2i64 addr:$src)),
+ (MOVDDUPrm addr:$src)>;
+
+def : Pat<(X86Movddup (bc_v4i32 (memopv2i64 addr:$src))),
+ (VMOVDDUPrm addr:$src)>, Requires<[HasAVX]>;
+def : Pat<(X86Movddup (bc_v4i32 (memopv2i64 addr:$src))),
+ (MOVDDUPrm addr:$src)>;
+
+def : Pat<(X86Movddup (v2f64 (scalar_to_vector (loadf64 addr:$src)))),
+ (VMOVDDUPrm addr:$src)>, Requires<[HasAVX]>;
+def : Pat<(X86Movddup (v2f64 (scalar_to_vector (loadf64 addr:$src)))),
+ (MOVDDUPrm addr:$src)>;
+
+def : Pat<(X86Movddup (bc_v2f64
+ (v2i64 (scalar_to_vector (loadi64 addr:$src))))),
+ (VMOVDDUPrm addr:$src)>, Requires<[HasAVX]>;
+def : Pat<(X86Movddup (bc_v2f64
+ (v2i64 (scalar_to_vector (loadi64 addr:$src))))),
+ (MOVDDUPrm addr:$src)>;
+
+// Shuffle with UNPCKLPS
+def : Pat<(v4f32 (X86Unpcklps VR128:$src1, (memopv4f32 addr:$src2))),
+ (VUNPCKLPSrm VR128:$src1, addr:$src2)>, Requires<[HasAVX]>;
+def : Pat<(v4f32 (X86Unpcklps VR128:$src1, (memopv4f32 addr:$src2))),
+ (UNPCKLPSrm VR128:$src1, addr:$src2)>;
+
+def : Pat<(v4f32 (X86Unpcklps VR128:$src1, VR128:$src2)),
+ (VUNPCKLPSrr VR128:$src1, VR128:$src2)>, Requires<[HasAVX]>;
+def : Pat<(v4f32 (X86Unpcklps VR128:$src1, VR128:$src2)),
+ (UNPCKLPSrr VR128:$src1, VR128:$src2)>;
+
+// Shuffle with UNPCKHPS
+def : Pat<(v4f32 (X86Unpckhps VR128:$src1, (memopv4f32 addr:$src2))),
+ (VUNPCKHPSrm VR128:$src1, addr:$src2)>, Requires<[HasAVX]>;
+def : Pat<(v4f32 (X86Unpckhps VR128:$src1, (memopv4f32 addr:$src2))),
+ (UNPCKHPSrm VR128:$src1, addr:$src2)>;
+
+def : Pat<(v4f32 (X86Unpckhps VR128:$src1, VR128:$src2)),
+ (VUNPCKHPSrr VR128:$src1, VR128:$src2)>, Requires<[HasAVX]>;
+def : Pat<(v4f32 (X86Unpckhps VR128:$src1, VR128:$src2)),
+ (UNPCKHPSrr VR128:$src1, VR128:$src2)>;
+
+// Shuffle with UNPCKLPD
+def : Pat<(v2f64 (X86Unpcklpd VR128:$src1, (memopv2f64 addr:$src2))),
+ (VUNPCKLPSrm VR128:$src1, addr:$src2)>, Requires<[HasAVX]>;
+def : Pat<(v2f64 (X86Unpcklpd VR128:$src1, (memopv2f64 addr:$src2))),
+ (UNPCKLPSrm VR128:$src1, addr:$src2)>;
+
+def : Pat<(v2f64 (X86Unpcklpd VR128:$src1, VR128:$src2)),
+ (VUNPCKLPDrr VR128:$src1, VR128:$src2)>, Requires<[HasAVX]>;
+def : Pat<(v2f64 (X86Unpcklpd VR128:$src1, VR128:$src2)),
+ (UNPCKLPDrr VR128:$src1, VR128:$src2)>;
+
+// Shuffle with UNPCKHPD
+def : Pat<(v2f64 (X86Unpckhpd VR128:$src1, (memopv2f64 addr:$src2))),
+ (VUNPCKLPSrm VR128:$src1, addr:$src2)>, Requires<[HasAVX]>;
+def : Pat<(v2f64 (X86Unpckhpd VR128:$src1, (memopv2f64 addr:$src2))),
+ (UNPCKLPSrm VR128:$src1, addr:$src2)>;
+
+def : Pat<(v2f64 (X86Unpckhpd VR128:$src1, VR128:$src2)),
+ (VUNPCKHPDrr VR128:$src1, VR128:$src2)>, Requires<[HasAVX]>;
+def : Pat<(v2f64 (X86Unpckhpd VR128:$src1, VR128:$src2)),
+ (UNPCKHPDrr VR128:$src1, VR128:$src2)>;
+
+// Shuffle with PUNPCKLBW
+def : Pat<(v16i8 (X86Punpcklbw VR128:$src1,
+ (bc_v16i8 (memopv2i64 addr:$src2)))),
+ (PUNPCKLBWrm VR128:$src1, addr:$src2)>;
+def : Pat<(v16i8 (X86Punpcklbw VR128:$src1, VR128:$src2)),
+ (PUNPCKLBWrr VR128:$src1, VR128:$src2)>;
+
+// Shuffle with PUNPCKLWD
+def : Pat<(v8i16 (X86Punpcklwd VR128:$src1,
+ (bc_v8i16 (memopv2i64 addr:$src2)))),
+ (PUNPCKLWDrm VR128:$src1, addr:$src2)>;
+def : Pat<(v8i16 (X86Punpcklwd VR128:$src1, VR128:$src2)),
+ (PUNPCKLWDrr VR128:$src1, VR128:$src2)>;
+
+// Shuffle with PUNPCKLDQ
+def : Pat<(v4i32 (X86Punpckldq VR128:$src1,
+ (bc_v4i32 (memopv2i64 addr:$src2)))),
+ (PUNPCKLDQrm VR128:$src1, addr:$src2)>;
+def : Pat<(v4i32 (X86Punpckldq VR128:$src1, VR128:$src2)),
+ (PUNPCKLDQrr VR128:$src1, VR128:$src2)>;
+
+// Shuffle with PUNPCKLQDQ
+def : Pat<(v2i64 (X86Punpcklqdq VR128:$src1, (memopv2i64 addr:$src2))),
+ (PUNPCKLQDQrm VR128:$src1, addr:$src2)>;
+def : Pat<(v2i64 (X86Punpcklqdq VR128:$src1, VR128:$src2)),
+ (PUNPCKLQDQrr VR128:$src1, VR128:$src2)>;
+
+// Shuffle with PUNPCKHBW
+def : Pat<(v16i8 (X86Punpckhbw VR128:$src1,
+ (bc_v16i8 (memopv2i64 addr:$src2)))),
+ (PUNPCKHBWrm VR128:$src1, addr:$src2)>;
+def : Pat<(v16i8 (X86Punpckhbw VR128:$src1, VR128:$src2)),
+ (PUNPCKHBWrr VR128:$src1, VR128:$src2)>;
+
+// Shuffle with PUNPCKHWD
+def : Pat<(v8i16 (X86Punpckhwd VR128:$src1,
+ (bc_v8i16 (memopv2i64 addr:$src2)))),
+ (PUNPCKHWDrm VR128:$src1, addr:$src2)>;
+def : Pat<(v8i16 (X86Punpckhwd VR128:$src1, VR128:$src2)),
+ (PUNPCKHWDrr VR128:$src1, VR128:$src2)>;
+
+// Shuffle with PUNPCKHDQ
+def : Pat<(v4i32 (X86Punpckhdq VR128:$src1,
+ (bc_v4i32 (memopv2i64 addr:$src2)))),
+ (PUNPCKHDQrm VR128:$src1, addr:$src2)>;
+def : Pat<(v4i32 (X86Punpckhdq VR128:$src1, VR128:$src2)),
+ (PUNPCKHDQrr VR128:$src1, VR128:$src2)>;
+
+// Shuffle with PUNPCKHQDQ
+def : Pat<(v2i64 (X86Punpckhqdq VR128:$src1, (memopv2i64 addr:$src2))),
+ (PUNPCKHQDQrm VR128:$src1, addr:$src2)>;
+def : Pat<(v2i64 (X86Punpckhqdq VR128:$src1, VR128:$src2)),
+ (PUNPCKHQDQrr VR128:$src1, VR128:$src2)>;
+
+// Shuffle with MOVLHPS
+def : Pat<(X86Movlhps VR128:$src1,
+ (bc_v4f32 (v2f64 (scalar_to_vector (loadf64 addr:$src2))))),
+ (MOVHPSrm VR128:$src1, addr:$src2)>;
+def : Pat<(X86Movlhps VR128:$src1,
+ (bc_v4i32 (v2i64 (X86vzload addr:$src2)))),
+ (MOVHPSrm VR128:$src1, addr:$src2)>;
+def : Pat<(v4f32 (X86Movlhps VR128:$src1, VR128:$src2)),
+ (MOVLHPSrr VR128:$src1, VR128:$src2)>;
+def : Pat<(v4i32 (X86Movlhps VR128:$src1, VR128:$src2)),
+ (MOVLHPSrr VR128:$src1, VR128:$src2)>;
+def : Pat<(v2i64 (X86Movlhps VR128:$src1, VR128:$src2)),
+ (MOVLHPSrr (v2i64 VR128:$src1), VR128:$src2)>;
+
+// Shuffle with MOVLHPD
+def : Pat<(v2f64 (X86Movlhpd VR128:$src1,
+ (scalar_to_vector (loadf64 addr:$src2)))),
+ (MOVHPDrm VR128:$src1, addr:$src2)>;
+// FIXME: Instead of X86Unpcklpd, there should be a X86Movlhpd here, the problem
+// is during lowering, where it's not possible to recognize the load fold cause
+// it has two uses through a bitcast. One use disappears at isel time and the
+// fold opportunity reappears.
+def : Pat<(v2f64 (X86Unpcklpd VR128:$src1,
+ (scalar_to_vector (loadf64 addr:$src2)))),
+ (MOVHPDrm VR128:$src1, addr:$src2)>;
+
+// Shuffle with MOVSS
+def : Pat<(v4f32 (X86Movss VR128:$src1, (scalar_to_vector FR32:$src2))),
+ (MOVSSrr VR128:$src1, FR32:$src2)>;
+def : Pat<(v4i32 (X86Movss VR128:$src1, VR128:$src2)),
+ (MOVSSrr (v4i32 VR128:$src1),
+ (EXTRACT_SUBREG (v4i32 VR128:$src2), sub_ss))>;
+def : Pat<(v4f32 (X86Movss VR128:$src1, VR128:$src2)),
+ (MOVSSrr (v4f32 VR128:$src1),
+ (EXTRACT_SUBREG (v4f32 VR128:$src2), sub_ss))>;
+// FIXME: Instead of a X86Movss there should be a X86Movlps here, the problem
+// is during lowering, where it's not possible to recognize the load fold cause
+// it has two uses through a bitcast. One use disappears at isel time and the
+// fold opportunity reappears.
+def : Pat<(X86Movss VR128:$src1,
+ (bc_v4i32 (v2i64 (load addr:$src2)))),
+ (MOVLPSrm VR128:$src1, addr:$src2)>;
+
+// Shuffle with MOVSD
+def : Pat<(v2f64 (X86Movsd VR128:$src1, (scalar_to_vector FR64:$src2))),
+ (MOVSDrr VR128:$src1, FR64:$src2)>;
+def : Pat<(v2i64 (X86Movsd VR128:$src1, VR128:$src2)),
+ (MOVSDrr (v2i64 VR128:$src1),
+ (EXTRACT_SUBREG (v2i64 VR128:$src2), sub_sd))>;
+def : Pat<(v2f64 (X86Movsd VR128:$src1, VR128:$src2)),
+ (MOVSDrr (v2f64 VR128:$src1),
+ (EXTRACT_SUBREG (v2f64 VR128:$src2), sub_sd))>;
+def : Pat<(v4f32 (X86Movsd VR128:$src1, VR128:$src2)),
+ (MOVSDrr VR128:$src1, (EXTRACT_SUBREG (v4f32 VR128:$src2), sub_sd))>;
+def : Pat<(v4i32 (X86Movsd VR128:$src1, VR128:$src2)),
+ (MOVSDrr VR128:$src1, (EXTRACT_SUBREG (v4i32 VR128:$src2), sub_sd))>;
+
+// Shuffle with MOVSHDUP
+def : Pat<(v4i32 (X86Movshdup VR128:$src)),
+ (MOVSHDUPrr VR128:$src)>;
+def : Pat<(X86Movshdup (bc_v4i32 (memopv2i64 addr:$src))),
+ (MOVSHDUPrm addr:$src)>;
+
+def : Pat<(v4f32 (X86Movshdup VR128:$src)),
+ (MOVSHDUPrr VR128:$src)>;
+def : Pat<(X86Movshdup (memopv4f32 addr:$src)),
+ (MOVSHDUPrm addr:$src)>;
+
+// Shuffle with MOVSLDUP
+def : Pat<(v4i32 (X86Movsldup VR128:$src)),
+ (MOVSLDUPrr VR128:$src)>;
+def : Pat<(X86Movsldup (bc_v4i32 (memopv2i64 addr:$src))),
+ (MOVSLDUPrm addr:$src)>;
+
+def : Pat<(v4f32 (X86Movsldup VR128:$src)),
+ (MOVSLDUPrr VR128:$src)>;
+def : Pat<(X86Movsldup (memopv4f32 addr:$src)),
+ (MOVSLDUPrm addr:$src)>;
+
+// Shuffle with PSHUFHW
+def : Pat<(v8i16 (X86PShufhwLd addr:$src, (i8 imm:$imm))),
+ (PSHUFHWmi addr:$src, imm:$imm)>;
+def : Pat<(v8i16 (X86PShufhw VR128:$src, (i8 imm:$imm))),
+ (PSHUFHWri VR128:$src, imm:$imm)>;
+def : Pat<(v8i16 (X86PShufhw (bc_v8i16 (memopv2i64 addr:$src)), (i8 imm:$imm))),
+ (PSHUFHWmi addr:$src, imm:$imm)>;
+
+// Shuffle with PSHUFLW
+def : Pat<(v8i16 (X86PShuflwLd addr:$src, (i8 imm:$imm))),
+ (PSHUFLWmi addr:$src, imm:$imm)>;
+def : Pat<(v8i16 (X86PShuflw VR128:$src, (i8 imm:$imm))),
+ (PSHUFLWri VR128:$src, imm:$imm)>;
+def : Pat<(v8i16 (X86PShuflw (bc_v8i16 (memopv2i64 addr:$src)), (i8 imm:$imm))),
+ (PSHUFLWmi addr:$src, imm:$imm)>;
+
+// Shuffle with PALIGN
+def : Pat<(v1i64 (X86PAlign VR64:$src1, VR64:$src2, (i8 imm:$imm))),
+ (PALIGNR64rr VR64:$src2, VR64:$src1, imm:$imm)>;
+def : Pat<(v2i32 (X86PAlign VR64:$src1, VR64:$src2, (i8 imm:$imm))),
+ (PALIGNR64rr VR64:$src2, VR64:$src1, imm:$imm)>;
+def : Pat<(v4i16 (X86PAlign VR64:$src1, VR64:$src2, (i8 imm:$imm))),
+ (PALIGNR64rr VR64:$src2, VR64:$src1, imm:$imm)>;
+def : Pat<(v8i8 (X86PAlign VR64:$src1, VR64:$src2, (i8 imm:$imm))),
+ (PALIGNR64rr VR64:$src2, VR64:$src1, imm:$imm)>;
+
+def : Pat<(v4i32 (X86PAlign VR128:$src1, VR128:$src2, (i8 imm:$imm))),
+ (PALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>;
+def : Pat<(v4f32 (X86PAlign VR128:$src1, VR128:$src2, (i8 imm:$imm))),
+ (PALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>;
+def : Pat<(v8i16 (X86PAlign VR128:$src1, VR128:$src2, (i8 imm:$imm))),
+ (PALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>;
+def : Pat<(v16i8 (X86PAlign VR128:$src1, VR128:$src2, (i8 imm:$imm))),
+ (PALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>;
+
+// Shuffle with MOVLPS
+def : Pat<(v4f32 (X86Movlps VR128:$src1, (load addr:$src2))),
+ (MOVLPSrm VR128:$src1, addr:$src2)>;
+def : Pat<(v4i32 (X86Movlps VR128:$src1, (load addr:$src2))),
+ (MOVLPSrm VR128:$src1, addr:$src2)>;
+def : Pat<(X86Movlps VR128:$src1,
+ (bc_v4f32 (v2f64 (scalar_to_vector (loadf64 addr:$src2))))),
+ (MOVLPSrm VR128:$src1, addr:$src2)>;
+
+// Shuffle with MOVLPD
+def : Pat<(v2f64 (X86Movlpd VR128:$src1, (load addr:$src2))),
+ (MOVLPDrm VR128:$src1, addr:$src2)>;
+def : Pat<(v2i64 (X86Movlpd VR128:$src1, (load addr:$src2))),
+ (MOVLPDrm VR128:$src1, addr:$src2)>;
+def : Pat<(v2f64 (X86Movlpd VR128:$src1,
+ (scalar_to_vector (loadf64 addr:$src2)))),
+ (MOVLPDrm VR128:$src1, addr:$src2)>;
+
+// Extra patterns to match stores with MOVHPS/PD and MOVLPS/PD
+def : Pat<(store (f64 (vector_extract
+ (v2f64 (X86Unpckhps VR128:$src, (undef))), (iPTR 0))),addr:$dst),
+ (MOVHPSmr addr:$dst, VR128:$src)>;
+def : Pat<(store (f64 (vector_extract
+ (v2f64 (X86Unpckhpd VR128:$src, (undef))), (iPTR 0))),addr:$dst),
+ (MOVHPDmr addr:$dst, VR128:$src)>;
+
+def : Pat<(store (v4f32 (X86Movlps (load addr:$src1), VR128:$src2)),addr:$src1),
+ (MOVLPSmr addr:$src1, VR128:$src2)>;
+def : Pat<(store (v4i32 (X86Movlps
+ (bc_v4i32 (loadv2i64 addr:$src1)), VR128:$src2)), addr:$src1),
+ (MOVLPSmr addr:$src1, VR128:$src2)>;
+
+def : Pat<(store (v2f64 (X86Movlpd (load addr:$src1), VR128:$src2)),addr:$src1),
+ (MOVLPDmr addr:$src1, VR128:$src2)>;
+def : Pat<(store (v2i64 (X86Movlpd (load addr:$src1), VR128:$src2)),addr:$src1),
+ (MOVLPDmr addr:$src1, VR128:$src2)>;
diff --git a/lib/Target/X86/X86MCAsmInfo.cpp b/lib/Target/X86/X86MCAsmInfo.cpp
index 2b8720bac3438..36badb403e815 100644
--- a/lib/Target/X86/X86MCAsmInfo.cpp
+++ b/lib/Target/X86/X86MCAsmInfo.cpp
@@ -103,6 +103,9 @@ getNonexecutableStackSection(MCContext &Ctx) const {
}
X86MCAsmInfoCOFF::X86MCAsmInfoCOFF(const Triple &Triple) {
+ if (Triple.getArch() == Triple::x86_64)
+ GlobalPrefix = "";
+
AsmTransCBE = x86_asm_table;
AssemblerDialect = AsmWriterFlavor;
diff --git a/lib/Target/X86/X86MCCodeEmitter.cpp b/lib/Target/X86/X86MCCodeEmitter.cpp
index 23b0666f5f30f..9564fe0b92d4c 100644
--- a/lib/Target/X86/X86MCCodeEmitter.cpp
+++ b/lib/Target/X86/X86MCCodeEmitter.cpp
@@ -365,7 +365,7 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte,
const TargetInstrDesc &Desc,
raw_ostream &OS) const {
bool HasVEX_4V = false;
- if (TSFlags & X86II::VEX_4V)
+ if ((TSFlags >> 32) & X86II::VEX_4V)
HasVEX_4V = true;
// VEX_R: opcode externsion equivalent to REX.R in
@@ -429,10 +429,10 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte,
if (TSFlags & X86II::OpSize)
VEX_PP = 0x01;
- if (TSFlags & X86II::VEX_W)
+ if ((TSFlags >> 32) & X86II::VEX_W)
VEX_W = 1;
- if (TSFlags & X86II::VEX_L)
+ if ((TSFlags >> 32) & X86II::VEX_L)
VEX_L = 1;
switch (TSFlags & X86II::Op0Mask) {
@@ -469,33 +469,39 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte,
unsigned NumOps = MI.getNumOperands();
unsigned CurOp = 0;
+ bool IsDestMem = false;
switch (TSFlags & X86II::FormMask) {
case X86II::MRMInitReg: assert(0 && "FIXME: Remove this!");
+ case X86II::MRMDestMem:
+ IsDestMem = true;
+ // The important info for the VEX prefix is never beyond the address
+ // registers. Don't check beyond that.
+ NumOps = CurOp = X86::AddrNumOperands;
case X86II::MRM0m: case X86II::MRM1m:
case X86II::MRM2m: case X86II::MRM3m:
case X86II::MRM4m: case X86II::MRM5m:
case X86II::MRM6m: case X86II::MRM7m:
- case X86II::MRMDestMem:
- NumOps = CurOp = X86::AddrNumOperands;
case X86II::MRMSrcMem:
case X86II::MRMSrcReg:
if (MI.getNumOperands() > CurOp && MI.getOperand(CurOp).isReg() &&
X86InstrInfo::isX86_64ExtendedReg(MI.getOperand(CurOp).getReg()))
VEX_R = 0x0;
-
- // CurOp and NumOps are equal when VEX_R represents a register used
- // to index a memory destination (which is the last operand)
- CurOp = (CurOp == NumOps) ? 0 : CurOp+1;
+ CurOp++;
if (HasVEX_4V) {
- VEX_4V = getVEXRegisterEncoding(MI, CurOp);
+ VEX_4V = getVEXRegisterEncoding(MI, IsDestMem ? CurOp-1 : CurOp);
CurOp++;
}
+ // To only check operands before the memory address ones, start
+ // the search from the begining
+ if (IsDestMem)
+ CurOp = 0;
+
// If the last register should be encoded in the immediate field
// do not use any bit from VEX prefix to this register, ignore it
- if (TSFlags & X86II::VEX_I8IMM)
+ if ((TSFlags >> 32) & X86II::VEX_I8IMM)
NumOps--;
for (; CurOp != NumOps; ++CurOp) {
@@ -508,7 +514,10 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte,
VEX_X = 0x0;
}
break;
- default: // MRMDestReg, MRM0r-MRM7r
+ default: // MRMDestReg, MRM0r-MRM7r, RawFrm
+ if (!MI.getNumOperands())
+ break;
+
if (MI.getOperand(CurOp).isReg() &&
X86InstrInfo::isX86_64ExtendedReg(MI.getOperand(CurOp).getReg()))
VEX_B = 0;
@@ -524,7 +533,6 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte,
VEX_R = 0x0;
}
break;
- assert(0 && "Not implemented!");
}
// Emit segment override opcode prefix as needed.
@@ -793,9 +801,9 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS,
// It uses the VEX.VVVV field?
bool HasVEX_4V = false;
- if (TSFlags & X86II::VEX)
+ if ((TSFlags >> 32) & X86II::VEX)
HasVEXPrefix = true;
- if (TSFlags & X86II::VEX_4V)
+ if ((TSFlags >> 32) & X86II::VEX_4V)
HasVEX_4V = true;
// Determine where the memory operand starts, if present.
@@ -819,6 +827,14 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS,
case X86II::RawFrm:
EmitByte(BaseOpcode, CurByte, OS);
break;
+
+ case X86II::RawFrmImm16:
+ EmitByte(BaseOpcode, CurByte, OS);
+ EmitImmediate(MI.getOperand(CurOp++),
+ X86II::getSizeOfImm(TSFlags), getImmFixupKind(TSFlags),
+ CurByte, OS, Fixups);
+ EmitImmediate(MI.getOperand(CurOp++), 2, FK_Data_2, CurByte, OS, Fixups);
+ break;
case X86II::AddRegFrm:
EmitByte(BaseOpcode + GetX86RegNum(MI.getOperand(CurOp++)), CurByte, OS);
@@ -833,10 +849,15 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS,
case X86II::MRMDestMem:
EmitByte(BaseOpcode, CurByte, OS);
+ SrcRegNum = CurOp + X86::AddrNumOperands;
+
+ if (HasVEX_4V) // Skip 1st src (which is encoded in VEX_VVVV)
+ SrcRegNum++;
+
EmitMemModRMByte(MI, CurOp,
- GetX86RegNum(MI.getOperand(CurOp + X86::AddrNumOperands)),
+ GetX86RegNum(MI.getOperand(SrcRegNum)),
TSFlags, CurByte, OS, Fixups);
- CurOp += X86::AddrNumOperands + 1;
+ CurOp = SrcRegNum + 1;
break;
case X86II::MRMSrcReg:
@@ -934,7 +955,7 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS,
if (CurOp != NumOps) {
// The last source register of a 4 operand instruction in AVX is encoded
// in bits[7:4] of a immediate byte, and bits[3:0] are ignored.
- if (TSFlags & X86II::VEX_I8IMM) {
+ if ((TSFlags >> 32) & X86II::VEX_I8IMM) {
const MCOperand &MO = MI.getOperand(CurOp++);
bool IsExtReg =
X86InstrInfo::isX86_64ExtendedReg(MO.getReg());
diff --git a/lib/Target/X86/AsmPrinter/X86MCInstLower.cpp b/lib/Target/X86/X86MCInstLower.cpp
index e67fc06a6cd75..8c4620f921771 100644
--- a/lib/Target/X86/AsmPrinter/X86MCInstLower.cpp
+++ b/lib/Target/X86/X86MCInstLower.cpp
@@ -16,7 +16,6 @@
#include "X86AsmPrinter.h"
#include "X86COFFMachineModuleInfo.h"
#include "X86MCAsmInfo.h"
-#include "llvm/Analysis/DebugInfo.h"
#include "llvm/CodeGen/MachineModuleInfoImpls.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCExpr.h"
@@ -29,21 +28,19 @@
#include "llvm/Type.h"
using namespace llvm;
-
-const X86Subtarget &X86MCInstLower::getSubtarget() const {
- return AsmPrinter.getSubtarget();
-}
+X86MCInstLower::X86MCInstLower(Mangler *mang, const MachineFunction &mf,
+ X86AsmPrinter &asmprinter)
+: Ctx(mf.getContext()), Mang(mang), MF(mf), TM(mf.getTarget()),
+ MAI(*TM.getMCAsmInfo()), AsmPrinter(asmprinter) {}
MachineModuleInfoMachO &X86MCInstLower::getMachOMMI() const {
- assert(getSubtarget().isTargetDarwin() &&"Can only get MachO info on darwin");
- return AsmPrinter.MMI->getObjFileInfo<MachineModuleInfoMachO>();
+ return MF.getMMI().getObjFileInfo<MachineModuleInfoMachO>();
}
MCSymbol *X86MCInstLower::GetPICBaseSymbol() const {
- const TargetLowering *TLI = AsmPrinter.TM.getTargetLowering();
- return static_cast<const X86TargetLowering*>(TLI)->
- getPICBaseSymbol(AsmPrinter.MF, Ctx);
+ return static_cast<const X86TargetLowering*>(TM.getTargetLowering())->
+ getPICBaseSymbol(&MF, Ctx);
}
/// GetSymbolFromOperand - Lower an MO_GlobalAddress or MO_ExternalSymbol
@@ -56,7 +53,7 @@ GetSymbolFromOperand(const MachineOperand &MO) const {
if (!MO.isGlobal()) {
assert(MO.isSymbol());
- Name += AsmPrinter.MAI->getGlobalPrefix();
+ Name += MAI.getGlobalPrefix();
Name += MO.getSymbolName();
} else {
const GlobalValue *GV = MO.getGlobal();
@@ -91,7 +88,7 @@ GetSymbolFromOperand(const MachineOperand &MO) const {
assert(MO.isGlobal() && "Extern symbol not handled yet");
StubSym =
MachineModuleInfoImpl::
- StubValueTy(AsmPrinter.Mang->getSymbol(MO.getGlobal()),
+ StubValueTy(Mang->getSymbol(MO.getGlobal()),
!MO.getGlobal()->hasInternalLinkage());
}
return Sym;
@@ -105,7 +102,7 @@ GetSymbolFromOperand(const MachineOperand &MO) const {
assert(MO.isGlobal() && "Extern symbol not handled yet");
StubSym =
MachineModuleInfoImpl::
- StubValueTy(AsmPrinter.Mang->getSymbol(MO.getGlobal()),
+ StubValueTy(Mang->getSymbol(MO.getGlobal()),
!MO.getGlobal()->hasInternalLinkage());
}
return Sym;
@@ -121,7 +118,7 @@ GetSymbolFromOperand(const MachineOperand &MO) const {
if (MO.isGlobal()) {
StubSym =
MachineModuleInfoImpl::
- StubValueTy(AsmPrinter.Mang->getSymbol(MO.getGlobal()),
+ StubValueTy(Mang->getSymbol(MO.getGlobal()),
!MO.getGlobal()->hasInternalLinkage());
} else {
Name.erase(Name.end()-5, Name.end());
@@ -178,7 +175,7 @@ MCOperand X86MCInstLower::LowerSymbolOperand(const MachineOperand &MO,
Expr = MCBinaryExpr::CreateSub(Expr,
MCSymbolRefExpr::Create(GetPICBaseSymbol(), Ctx),
Ctx);
- if (MO.isJTI() && AsmPrinter.MAI->hasSetDirective()) {
+ if (MO.isJTI() && MAI.hasSetDirective()) {
// If .set directive is supported, use it to reduce the number of
// relocations the assembler will generate for differences between
// local labels. This is only safe when the symbols are in the same
@@ -255,7 +252,13 @@ static void SimplifyShortImmForm(MCInst &Inst, unsigned Opcode) {
}
/// \brief Simplify things like MOV32rm to MOV32o32a.
-static void SimplifyShortMoveForm(MCInst &Inst, unsigned Opcode) {
+static void SimplifyShortMoveForm(X86AsmPrinter &Printer, MCInst &Inst,
+ unsigned Opcode) {
+ // Don't make these simplifications in 64-bit mode; other assemblers don't
+ // perform them because they make the code larger.
+ if (Printer.getSubtarget().is64Bit())
+ return;
+
bool IsStore = Inst.getOperand(0).isReg() && Inst.getOperand(1).isReg();
unsigned AddrBase = IsStore;
unsigned RegOp = IsStore ? 0 : 5;
@@ -336,7 +339,7 @@ void X86MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const {
break;
case MachineOperand::MO_BlockAddress:
MCOp = LowerSymbolOperand(MO,
- AsmPrinter.GetBlockAddressSymbol(MO.getBlockAddress()));
+ AsmPrinter.GetBlockAddressSymbol(MO.getBlockAddress()));
break;
}
@@ -377,12 +380,17 @@ void X86MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const {
case X86::MMX_V_SET0: LowerUnaryToTwoAddr(OutMI, X86::MMX_PXORrr); break;
case X86::MMX_V_SETALLONES:
LowerUnaryToTwoAddr(OutMI, X86::MMX_PCMPEQDrr); break;
- case X86::FsFLD0SS: LowerUnaryToTwoAddr(OutMI, X86::PXORrr); break;
- case X86::FsFLD0SD: LowerUnaryToTwoAddr(OutMI, X86::PXORrr); break;
- case X86::V_SET0PS: LowerUnaryToTwoAddr(OutMI, X86::XORPSrr); break;
- case X86::V_SET0PD: LowerUnaryToTwoAddr(OutMI, X86::XORPDrr); break;
- case X86::V_SET0PI: LowerUnaryToTwoAddr(OutMI, X86::PXORrr); break;
- case X86::V_SETALLONES: LowerUnaryToTwoAddr(OutMI, X86::PCMPEQDrr); break;
+ case X86::FsFLD0SS: LowerUnaryToTwoAddr(OutMI, X86::PXORrr); break;
+ case X86::FsFLD0SD: LowerUnaryToTwoAddr(OutMI, X86::PXORrr); break;
+ case X86::V_SET0PS: LowerUnaryToTwoAddr(OutMI, X86::XORPSrr); break;
+ case X86::V_SET0PD: LowerUnaryToTwoAddr(OutMI, X86::XORPDrr); break;
+ case X86::V_SET0PI: LowerUnaryToTwoAddr(OutMI, X86::PXORrr); break;
+ case X86::V_SETALLONES: LowerUnaryToTwoAddr(OutMI, X86::PCMPEQDrr); break;
+ case X86::AVX_SET0PS: LowerUnaryToTwoAddr(OutMI, X86::VXORPSrr); break;
+ case X86::AVX_SET0PSY: LowerUnaryToTwoAddr(OutMI, X86::VXORPSYrr); break;
+ case X86::AVX_SET0PD: LowerUnaryToTwoAddr(OutMI, X86::VXORPDrr); break;
+ case X86::AVX_SET0PDY: LowerUnaryToTwoAddr(OutMI, X86::VXORPDYrr); break;
+ case X86::AVX_SET0PI: LowerUnaryToTwoAddr(OutMI, X86::VPXORrr); break;
case X86::MOV16r0:
LowerSubReg32_Op0(OutMI, X86::MOV32r0); // MOV16r0 -> MOV32r0
@@ -393,12 +401,14 @@ void X86MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const {
LowerUnaryToTwoAddr(OutMI, X86::XOR32rr); // MOV32r0 -> XOR32rr
break;
- // TAILJMPr64, CALL64r, CALL64pcrel32 - These instructions have
+ // TAILJMPr64, [WIN]CALL64r, [WIN]CALL64pcrel32 - These instructions have
// register inputs modeled as normal uses instead of implicit uses. As such,
// truncate off all but the first operand (the callee). FIXME: Change isel.
case X86::TAILJMPr64:
case X86::CALL64r:
- case X86::CALL64pcrel32: {
+ case X86::CALL64pcrel32:
+ case X86::WINCALL64r:
+ case X86::WINCALL64pcrel32: {
unsigned Opcode = OutMI.getOpcode();
MCOperand Saved = OutMI.getOperand(0);
OutMI = MCInst();
@@ -456,15 +466,13 @@ void X86MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const {
// MOV64ao8, MOV64o8a
// XCHG16ar, XCHG32ar, XCHG64ar
case X86::MOV8mr_NOREX:
- case X86::MOV8mr: SimplifyShortMoveForm(OutMI, X86::MOV8ao8); break;
+ case X86::MOV8mr: SimplifyShortMoveForm(AsmPrinter, OutMI, X86::MOV8ao8); break;
case X86::MOV8rm_NOREX:
- case X86::MOV8rm: SimplifyShortMoveForm(OutMI, X86::MOV8o8a); break;
- case X86::MOV16mr: SimplifyShortMoveForm(OutMI, X86::MOV16ao16); break;
- case X86::MOV16rm: SimplifyShortMoveForm(OutMI, X86::MOV16o16a); break;
- case X86::MOV32mr: SimplifyShortMoveForm(OutMI, X86::MOV32ao32); break;
- case X86::MOV32rm: SimplifyShortMoveForm(OutMI, X86::MOV32o32a); break;
- case X86::MOV64mr: SimplifyShortMoveForm(OutMI, X86::MOV64ao64); break;
- case X86::MOV64rm: SimplifyShortMoveForm(OutMI, X86::MOV64o64a); break;
+ case X86::MOV8rm: SimplifyShortMoveForm(AsmPrinter, OutMI, X86::MOV8o8a); break;
+ case X86::MOV16mr: SimplifyShortMoveForm(AsmPrinter, OutMI, X86::MOV16ao16); break;
+ case X86::MOV16rm: SimplifyShortMoveForm(AsmPrinter, OutMI, X86::MOV16o16a); break;
+ case X86::MOV32mr: SimplifyShortMoveForm(AsmPrinter, OutMI, X86::MOV32ao32); break;
+ case X86::MOV32rm: SimplifyShortMoveForm(AsmPrinter, OutMI, X86::MOV32o32a); break;
case X86::ADC8ri: SimplifyShortImmForm(OutMI, X86::ADC8i8); break;
case X86::ADC16ri: SimplifyShortImmForm(OutMI, X86::ADC16i16); break;
@@ -505,46 +513,9 @@ void X86MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const {
}
}
-void X86AsmPrinter::PrintDebugValueComment(const MachineInstr *MI,
- raw_ostream &O) {
- // Only the target-dependent form of DBG_VALUE should get here.
- // Referencing the offset and metadata as NOps-2 and NOps-1 is
- // probably portable to other targets; frame pointer location is not.
- unsigned NOps = MI->getNumOperands();
- assert(NOps==7);
- O << '\t' << MAI->getCommentString() << "DEBUG_VALUE: ";
- // cast away const; DIetc do not take const operands for some reason.
- DIVariable V(const_cast<MDNode *>(MI->getOperand(NOps-1).getMetadata()));
- if (V.getContext().isSubprogram())
- O << DISubprogram(V.getContext()).getDisplayName() << ":";
- O << V.getName();
- O << " <- ";
- // Frame address. Currently handles register +- offset only.
- O << '[';
- if (MI->getOperand(0).isReg() && MI->getOperand(0).getReg())
- printOperand(MI, 0, O);
- else
- O << "undef";
- O << '+'; printOperand(MI, 3, O);
- O << ']';
- O << "+";
- printOperand(MI, NOps-2, O);
-}
-
-MachineLocation
-X86AsmPrinter::getDebugValueLocation(const MachineInstr *MI) const {
- MachineLocation Location;
- assert (MI->getNumOperands() == 7 && "Invalid no. of machine operands!");
- // Frame address. Currently handles register +- offset only.
-
- if (MI->getOperand(0).isReg() && MI->getOperand(3).isImm())
- Location.set(MI->getOperand(0).getReg(), MI->getOperand(3).getImm());
- return Location;
-}
-
void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
- X86MCInstLower MCInstLowering(OutContext, Mang, *this);
+ X86MCInstLower MCInstLowering(Mang, *MF, *this);
switch (MI->getOpcode()) {
case TargetOpcode::DBG_VALUE:
if (isVerbose() && OutStreamer.hasRawTextSupport()) {
@@ -555,6 +526,12 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
}
return;
+ // Emit nothing here but a comment if we can.
+ case X86::Int_MemBarrier:
+ if (OutStreamer.hasRawTextSupport())
+ OutStreamer.EmitRawText(StringRef("\t#MEMBARRIER"));
+ return;
+
case X86::TAILJMPr:
case X86::TAILJMPd:
case X86::TAILJMPd64:
diff --git a/lib/Target/X86/AsmPrinter/X86MCInstLower.h b/lib/Target/X86/X86MCInstLower.h
index 9e5474fc81b33..539b09be6fd74 100644
--- a/lib/Target/X86/AsmPrinter/X86MCInstLower.h
+++ b/lib/Target/X86/X86MCInstLower.h
@@ -13,27 +13,30 @@
#include "llvm/Support/Compiler.h"
namespace llvm {
+ class MCAsmInfo;
class MCContext;
class MCInst;
class MCOperand;
class MCSymbol;
class MachineInstr;
+ class MachineFunction;
class MachineModuleInfoMachO;
class MachineOperand;
class Mangler;
+ class TargetMachine;
class X86AsmPrinter;
- class X86Subtarget;
/// X86MCInstLower - This class is used to lower an MachineInstr into an MCInst.
class LLVM_LIBRARY_VISIBILITY X86MCInstLower {
MCContext &Ctx;
Mangler *Mang;
+ const MachineFunction &MF;
+ const TargetMachine &TM;
+ const MCAsmInfo &MAI;
X86AsmPrinter &AsmPrinter;
-
- const X86Subtarget &getSubtarget() const;
public:
- X86MCInstLower(MCContext &ctx, Mangler *mang, X86AsmPrinter &asmprinter)
- : Ctx(ctx), Mang(mang), AsmPrinter(asmprinter) {}
+ X86MCInstLower(Mangler *mang, const MachineFunction &MF,
+ X86AsmPrinter &asmprinter);
void Lower(const MachineInstr *MI, MCInst &OutMI) const;
diff --git a/lib/Target/X86/X86RegisterInfo.cpp b/lib/Target/X86/X86RegisterInfo.cpp
index 5f31e00ebabd8..fedd49ebb5403 100644
--- a/lib/Target/X86/X86RegisterInfo.cpp
+++ b/lib/Target/X86/X86RegisterInfo.cpp
@@ -38,8 +38,15 @@
#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/CommandLine.h"
using namespace llvm;
+static cl::opt<bool>
+ForceStackAlign("force-align-stack",
+ cl::desc("Force align the stack to the minimum alignment"
+ " needed for the function."),
+ cl::init(false), cl::Hidden);
+
X86RegisterInfo::X86RegisterInfo(X86TargetMachine &tm,
const TargetInstrInfo &tii)
: X86GenRegisterInfo(tm.getSubtarget<X86Subtarget>().is64Bit() ?
@@ -193,6 +200,12 @@ unsigned X86RegisterInfo::getX86RegNum(unsigned RegNo) {
case X86::DR7:
return 7;
+ // Pseudo index registers are equivalent to a "none"
+ // scaled index (See Intel Manual 2A, table 2-3)
+ case X86::EIZ:
+ case X86::RIZ:
+ return 4;
+
default:
assert(isVirtualRegister(RegNo) && "Unknown physical register!");
llvm_unreachable("Register allocator hasn't allocated reg correctly yet!");
@@ -456,26 +469,29 @@ bool X86RegisterInfo::canRealignStack(const MachineFunction &MF) const {
bool X86RegisterInfo::needsStackRealignment(const MachineFunction &MF) const {
const MachineFrameInfo *MFI = MF.getFrameInfo();
const Function *F = MF.getFunction();
- bool requiresRealignment =
- RealignStack && ((MFI->getMaxAlignment() > StackAlign) ||
- F->hasFnAttr(Attribute::StackAlignment));
+ bool requiresRealignment = ((MFI->getMaxAlignment() > StackAlign) ||
+ F->hasFnAttr(Attribute::StackAlignment));
// FIXME: Currently we don't support stack realignment for functions with
// variable-sized allocas.
- // FIXME: Temporary disable the error - it seems to be too conservative.
+ // FIXME: It's more complicated than this...
if (0 && requiresRealignment && MFI->hasVarSizedObjects())
report_fatal_error(
"Stack realignment in presense of dynamic allocas is not supported");
-
- return (requiresRealignment && !MFI->hasVarSizedObjects());
+
+ // If we've requested that we force align the stack do so now.
+ if (ForceStackAlign)
+ return canRealignStack(MF);
+
+ return requiresRealignment && canRealignStack(MF);
}
-bool X86RegisterInfo::hasReservedCallFrame(MachineFunction &MF) const {
+bool X86RegisterInfo::hasReservedCallFrame(const MachineFunction &MF) const {
return !MF.getFrameInfo()->hasVarSizedObjects();
}
-bool X86RegisterInfo::hasReservedSpillSlot(MachineFunction &MF, unsigned Reg,
- int &FrameIdx) const {
+bool X86RegisterInfo::hasReservedSpillSlot(const MachineFunction &MF,
+ unsigned Reg, int &FrameIdx) const {
if (Reg == FramePtr && hasFP(MF)) {
FrameIdx = MF.getFrameInfo()->getObjectIndexBegin();
return true;
@@ -610,10 +626,9 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
MBB.erase(I);
}
-unsigned
+void
X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
- int SPAdj, FrameIndexValue *Value,
- RegScavenger *RS) const{
+ int SPAdj, RegScavenger *RS) const{
assert(SPAdj == 0 && "Unexpected");
unsigned i = 0;
@@ -660,7 +675,6 @@ X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
uint64_t Offset = FIOffset + (uint64_t)MI.getOperand(i+3).getOffset();
MI.getOperand(i+3).setOffset(Offset);
}
- return 0;
}
void
@@ -750,7 +764,7 @@ void mergeSPUpdatesUp(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI,
}
}
-/// mergeSPUpdatesUp - Merge two stack-manipulating instructions lower iterator.
+/// mergeSPUpdatesDown - Merge two stack-manipulating instructions lower iterator.
static
void mergeSPUpdatesDown(MachineBasicBlock &MBB,
MachineBasicBlock::iterator &MBBI,
@@ -901,6 +915,17 @@ void X86RegisterInfo::emitPrologue(MachineFunction &MF) const {
bool HasFP = hasFP(MF);
DebugLoc DL;
+ // If we're forcing a stack realignment we can't rely on just the frame
+ // info, we need to know the ABI stack alignment as well in case we
+ // have a call out. Otherwise just make sure we have some alignment - we'll
+ // go with the minimum SlotSize.
+ if (ForceStackAlign) {
+ if (MFI->hasCalls())
+ MaxAlign = (StackAlign > MaxAlign) ? StackAlign : MaxAlign;
+ else if (MaxAlign < SlotSize)
+ MaxAlign = SlotSize;
+ }
+
// Add RETADDR move area to callee saved frame size.
int TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta();
if (TailCallReturnAddrDelta < 0)
@@ -979,7 +1004,7 @@ void X86RegisterInfo::emitPrologue(MachineFunction &MF) const {
if (needsFrameMoves) {
// Mark the place where EBP/RBP was saved.
MCSymbol *FrameLabel = MMI.getContext().CreateTempSymbol();
- BuildMI(MBB, MBBI, DL, TII.get(X86::DBG_LABEL)).addSym(FrameLabel);
+ BuildMI(MBB, MBBI, DL, TII.get(X86::PROLOG_LABEL)).addSym(FrameLabel);
// Define the current CFA rule to use the provided offset.
if (StackSize) {
@@ -1007,7 +1032,7 @@ void X86RegisterInfo::emitPrologue(MachineFunction &MF) const {
if (needsFrameMoves) {
// Mark effective beginning of when frame pointer becomes valid.
MCSymbol *FrameLabel = MMI.getContext().CreateTempSymbol();
- BuildMI(MBB, MBBI, DL, TII.get(X86::DBG_LABEL)).addSym(FrameLabel);
+ BuildMI(MBB, MBBI, DL, TII.get(X86::PROLOG_LABEL)).addSym(FrameLabel);
// Define the current CFA to use the EBP/RBP register.
MachineLocation FPDst(FramePtr);
@@ -1047,7 +1072,7 @@ void X86RegisterInfo::emitPrologue(MachineFunction &MF) const {
if (!HasFP && needsFrameMoves) {
// Mark callee-saved push instruction.
MCSymbol *Label = MMI.getContext().CreateTempSymbol();
- BuildMI(MBB, MBBI, DL, TII.get(X86::DBG_LABEL)).addSym(Label);
+ BuildMI(MBB, MBBI, DL, TII.get(X86::PROLOG_LABEL)).addSym(Label);
// Define the current CFA rule to use the provided offset.
unsigned Ptr = StackSize ?
@@ -1062,7 +1087,17 @@ void X86RegisterInfo::emitPrologue(MachineFunction &MF) const {
DL = MBB.findDebugLoc(MBBI);
// Adjust stack pointer: ESP -= numbytes.
- if (NumBytes >= 4096 && Subtarget->isTargetCygMing()) {
+
+ // Windows and cygwin/mingw require a prologue helper routine when allocating
+ // more than 4K bytes on the stack. Windows uses __chkstk and cygwin/mingw
+ // uses __alloca. __alloca and the 32-bit version of __chkstk will probe
+ // the stack and adjust the stack pointer in one go. The 64-bit version
+ // of __chkstk is only responsible for probing the stack. The 64-bit
+ // prologue is responsible for adjusting the stack pointer. Touching the
+ // stack at 4K increments is necessary to ensure that the guard pages used
+ // by the OS virtual memory manager are allocated in correct sequence.
+ if (NumBytes >= 4096 &&
+ (Subtarget->isTargetCygMing() || Subtarget->isTargetWin32())) {
// Check, whether EAX is livein for this function.
bool isEAXAlive = false;
for (MachineRegisterInfo::livein_iterator
@@ -1073,16 +1108,16 @@ void X86RegisterInfo::emitPrologue(MachineFunction &MF) const {
Reg == X86::AH || Reg == X86::AL);
}
- // Function prologue calls _alloca to probe the stack when allocating more
- // than 4k bytes in one go. Touching the stack at 4K increments is necessary
- // to ensure that the guard pages used by the OS virtual memory manager are
- // allocated in correct sequence.
+
+ const char *StackProbeSymbol =
+ Subtarget->isTargetWindows() ? "_chkstk" : "_alloca";
if (!isEAXAlive) {
BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32ri), X86::EAX)
.addImm(NumBytes);
BuildMI(MBB, MBBI, DL, TII.get(X86::CALLpcrel32))
- .addExternalSymbol("_alloca")
- .addReg(StackPtr, RegState::Define | RegState::Implicit);
+ .addExternalSymbol(StackProbeSymbol)
+ .addReg(StackPtr, RegState::Define | RegState::Implicit)
+ .addReg(X86::EFLAGS, RegState::Define | RegState::Implicit);
} else {
// Save EAX
BuildMI(MBB, MBBI, DL, TII.get(X86::PUSH32r))
@@ -1093,8 +1128,9 @@ void X86RegisterInfo::emitPrologue(MachineFunction &MF) const {
BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32ri), X86::EAX)
.addImm(NumBytes - 4);
BuildMI(MBB, MBBI, DL, TII.get(X86::CALLpcrel32))
- .addExternalSymbol("_alloca")
- .addReg(StackPtr, RegState::Define | RegState::Implicit);
+ .addExternalSymbol(StackProbeSymbol)
+ .addReg(StackPtr, RegState::Define | RegState::Implicit)
+ .addReg(X86::EFLAGS, RegState::Define | RegState::Implicit);
// Restore EAX
MachineInstr *MI = addRegOffset(BuildMI(MF, DL, TII.get(X86::MOV32rm),
@@ -1119,7 +1155,7 @@ void X86RegisterInfo::emitPrologue(MachineFunction &MF) const {
if ((NumBytes || PushedRegs) && needsFrameMoves) {
// Mark end of stack pointer adjustment.
MCSymbol *Label = MMI.getContext().CreateTempSymbol();
- BuildMI(MBB, MBBI, DL, TII.get(X86::DBG_LABEL)).addSym(Label);
+ BuildMI(MBB, MBBI, DL, TII.get(X86::PROLOG_LABEL)).addSym(Label);
if (!HasFP && NumBytes) {
// Define the current CFA rule to use the provided offset.
@@ -1172,6 +1208,17 @@ void X86RegisterInfo::emitEpilogue(MachineFunction &MF,
unsigned CSSize = X86FI->getCalleeSavedFrameSize();
uint64_t NumBytes = 0;
+ // If we're forcing a stack realignment we can't rely on just the frame
+ // info, we need to know the ABI stack alignment as well in case we
+ // have a call out. Otherwise just make sure we have some alignment - we'll
+ // go with the minimum.
+ if (ForceStackAlign) {
+ if (MFI->hasCalls())
+ MaxAlign = (StackAlign > MaxAlign) ? StackAlign : MaxAlign;
+ else
+ MaxAlign = MaxAlign ? MaxAlign : 4;
+ }
+
if (hasFP(MF)) {
// Calculate required stack adjustment.
uint64_t FrameSize = StackSize - SlotSize;
@@ -1519,7 +1566,7 @@ unsigned getX86SubSuperRegister(unsigned Reg, EVT VT, bool High) {
namespace {
struct MSAH : public MachineFunctionPass {
static char ID;
- MSAH() : MachineFunctionPass(&ID) {}
+ MSAH() : MachineFunctionPass(ID) {}
virtual bool runOnMachineFunction(MachineFunction &MF) {
const X86TargetMachine *TM =
diff --git a/lib/Target/X86/X86RegisterInfo.h b/lib/Target/X86/X86RegisterInfo.h
index d852bcd2011c4..527df05c58fce 100644
--- a/lib/Target/X86/X86RegisterInfo.h
+++ b/lib/Target/X86/X86RegisterInfo.h
@@ -117,18 +117,17 @@ public:
bool needsStackRealignment(const MachineFunction &MF) const;
- bool hasReservedCallFrame(MachineFunction &MF) const;
+ bool hasReservedCallFrame(const MachineFunction &MF) const;
- bool hasReservedSpillSlot(MachineFunction &MF, unsigned Reg,
+ bool hasReservedSpillSlot(const MachineFunction &MF, unsigned Reg,
int &FrameIdx) const;
void eliminateCallFramePseudoInstr(MachineFunction &MF,
MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI) const;
- unsigned eliminateFrameIndex(MachineBasicBlock::iterator MI,
- int SPAdj, FrameIndexValue *Value = NULL,
- RegScavenger *RS = NULL) const;
+ void eliminateFrameIndex(MachineBasicBlock::iterator MI,
+ int SPAdj, RegScavenger *RS = NULL) const;
void processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
RegScavenger *RS = NULL) const;
diff --git a/lib/Target/X86/X86RegisterInfo.td b/lib/Target/X86/X86RegisterInfo.td
index 9f0382e3fae91..95269b15760e0 100644
--- a/lib/Target/X86/X86RegisterInfo.td
+++ b/lib/Target/X86/X86RegisterInfo.td
@@ -241,6 +241,10 @@ let Namespace = "X86" in {
def CR6 : Register<"cr6">;
def CR7 : Register<"cr7">;
def CR8 : Register<"cr8">;
+
+ // Pseudo index registers
+ def EIZ : Register<"eiz">;
+ def RIZ : Register<"riz">;
}
@@ -804,7 +808,7 @@ def VR128 : RegisterClass<"X86", [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],128,
}];
}
-def VR256 : RegisterClass<"X86", [v8i32, v4i64, v8f32, v4f64], 256,
+def VR256 : RegisterClass<"X86", [v32i8, v8i32, v4i64, v8f32, v4f64], 256,
[YMM0, YMM1, YMM2, YMM3, YMM4, YMM5, YMM6, YMM7,
YMM8, YMM9, YMM10, YMM11,
YMM12, YMM13, YMM14, YMM15]> {
@@ -829,4 +833,15 @@ def VR256 : RegisterClass<"X86", [v8i32, v4i64, v8f32, v4f64], 256,
// Status flags registers.
def CCR : RegisterClass<"X86", [i32], 32, [EFLAGS]> {
let CopyCost = -1; // Don't allow copying of status registers.
+
+ // EFLAGS is not allocatable.
+ let MethodProtos = [{
+ iterator allocation_order_end(const MachineFunction &MF) const;
+ }];
+ let MethodBodies = [{
+ CCRClass::iterator
+ CCRClass::allocation_order_end(const MachineFunction &MF) const {
+ return allocation_order_begin(MF);
+ }
+ }];
}
diff --git a/lib/Target/X86/X86ShuffleDecode.h b/lib/Target/X86/X86ShuffleDecode.h
new file mode 100644
index 0000000000000..df040520bc8f5
--- /dev/null
+++ b/lib/Target/X86/X86ShuffleDecode.h
@@ -0,0 +1,155 @@
+//===-- X86ShuffleDecode.h - X86 shuffle decode logic ---------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Define several functions to decode x86 specific shuffle semantics into a
+// generic vector mask.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef X86_SHUFFLE_DECODE_H
+#define X86_SHUFFLE_DECODE_H
+
+#include "llvm/ADT/SmallVector.h"
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// Vector Mask Decoding
+//===----------------------------------------------------------------------===//
+
+enum {
+ SM_SentinelZero = ~0U
+};
+
+static inline
+void DecodeINSERTPSMask(unsigned Imm, SmallVectorImpl<unsigned> &ShuffleMask) {
+ // Defaults the copying the dest value.
+ ShuffleMask.push_back(0);
+ ShuffleMask.push_back(1);
+ ShuffleMask.push_back(2);
+ ShuffleMask.push_back(3);
+
+ // Decode the immediate.
+ unsigned ZMask = Imm & 15;
+ unsigned CountD = (Imm >> 4) & 3;
+ unsigned CountS = (Imm >> 6) & 3;
+
+ // CountS selects which input element to use.
+ unsigned InVal = 4+CountS;
+ // CountD specifies which element of destination to update.
+ ShuffleMask[CountD] = InVal;
+ // ZMask zaps values, potentially overriding the CountD elt.
+ if (ZMask & 1) ShuffleMask[0] = SM_SentinelZero;
+ if (ZMask & 2) ShuffleMask[1] = SM_SentinelZero;
+ if (ZMask & 4) ShuffleMask[2] = SM_SentinelZero;
+ if (ZMask & 8) ShuffleMask[3] = SM_SentinelZero;
+}
+
+// <3,1> or <6,7,2,3>
+static void DecodeMOVHLPSMask(unsigned NElts,
+ SmallVectorImpl<unsigned> &ShuffleMask) {
+ for (unsigned i = NElts/2; i != NElts; ++i)
+ ShuffleMask.push_back(NElts+i);
+
+ for (unsigned i = NElts/2; i != NElts; ++i)
+ ShuffleMask.push_back(i);
+}
+
+// <0,2> or <0,1,4,5>
+static void DecodeMOVLHPSMask(unsigned NElts,
+ SmallVectorImpl<unsigned> &ShuffleMask) {
+ for (unsigned i = 0; i != NElts/2; ++i)
+ ShuffleMask.push_back(i);
+
+ for (unsigned i = 0; i != NElts/2; ++i)
+ ShuffleMask.push_back(NElts+i);
+}
+
+static void DecodePSHUFMask(unsigned NElts, unsigned Imm,
+ SmallVectorImpl<unsigned> &ShuffleMask) {
+ for (unsigned i = 0; i != NElts; ++i) {
+ ShuffleMask.push_back(Imm % NElts);
+ Imm /= NElts;
+ }
+}
+
+static void DecodePSHUFHWMask(unsigned Imm,
+ SmallVectorImpl<unsigned> &ShuffleMask) {
+ ShuffleMask.push_back(0);
+ ShuffleMask.push_back(1);
+ ShuffleMask.push_back(2);
+ ShuffleMask.push_back(3);
+ for (unsigned i = 0; i != 4; ++i) {
+ ShuffleMask.push_back(4+(Imm & 3));
+ Imm >>= 2;
+ }
+}
+
+static void DecodePSHUFLWMask(unsigned Imm,
+ SmallVectorImpl<unsigned> &ShuffleMask) {
+ for (unsigned i = 0; i != 4; ++i) {
+ ShuffleMask.push_back((Imm & 3));
+ Imm >>= 2;
+ }
+ ShuffleMask.push_back(4);
+ ShuffleMask.push_back(5);
+ ShuffleMask.push_back(6);
+ ShuffleMask.push_back(7);
+}
+
+static void DecodePUNPCKLMask(unsigned NElts,
+ SmallVectorImpl<unsigned> &ShuffleMask) {
+ for (unsigned i = 0; i != NElts/2; ++i) {
+ ShuffleMask.push_back(i);
+ ShuffleMask.push_back(i+NElts);
+ }
+}
+
+static void DecodePUNPCKHMask(unsigned NElts,
+ SmallVectorImpl<unsigned> &ShuffleMask) {
+ for (unsigned i = 0; i != NElts/2; ++i) {
+ ShuffleMask.push_back(i+NElts/2);
+ ShuffleMask.push_back(i+NElts+NElts/2);
+ }
+}
+
+static void DecodeSHUFPSMask(unsigned NElts, unsigned Imm,
+ SmallVectorImpl<unsigned> &ShuffleMask) {
+ // Part that reads from dest.
+ for (unsigned i = 0; i != NElts/2; ++i) {
+ ShuffleMask.push_back(Imm % NElts);
+ Imm /= NElts;
+ }
+ // Part that reads from src.
+ for (unsigned i = 0; i != NElts/2; ++i) {
+ ShuffleMask.push_back(Imm % NElts + NElts);
+ Imm /= NElts;
+ }
+}
+
+static void DecodeUNPCKHPMask(unsigned NElts,
+ SmallVectorImpl<unsigned> &ShuffleMask) {
+ for (unsigned i = 0; i != NElts/2; ++i) {
+ ShuffleMask.push_back(i+NElts/2); // Reads from dest
+ ShuffleMask.push_back(i+NElts+NElts/2); // Reads from src
+ }
+}
+
+
+/// DecodeUNPCKLPMask - This decodes the shuffle masks for unpcklps/unpcklpd
+/// etc. NElts indicates the number of elements in the vector allowing it to
+/// handle different datatypes and vector widths.
+static void DecodeUNPCKLPMask(unsigned NElts,
+ SmallVectorImpl<unsigned> &ShuffleMask) {
+ for (unsigned i = 0; i != NElts/2; ++i) {
+ ShuffleMask.push_back(i); // Reads from dest
+ ShuffleMask.push_back(i+NElts); // Reads from src
+ }
+}
+
+#endif
diff --git a/lib/Target/X86/X86Subtarget.cpp b/lib/Target/X86/X86Subtarget.cpp
index 4a10be518f03f..0d02e5ee472bb 100644
--- a/lib/Target/X86/X86Subtarget.cpp
+++ b/lib/Target/X86/X86Subtarget.cpp
@@ -73,7 +73,7 @@ ClassifyGlobalReference(const GlobalValue *GV, const TargetMachine &TM) const {
if (GV->hasDefaultVisibility() &&
(isDecl || GV->isWeakForLinker()))
return X86II::MO_GOTPCREL;
- } else {
+ } else if (!isTargetWin64()) {
assert(isTargetELF() && "Unknown rip-relative target");
// Extra load is needed for all externally visible.
@@ -260,9 +260,10 @@ void X86Subtarget::AutoDetectSubtargetFeatures() {
bool IsIntel = memcmp(text.c, "GenuineIntel", 12) == 0;
bool IsAMD = !IsIntel && memcmp(text.c, "AuthenticAMD", 12) == 0;
- HasFMA3 = IsIntel && ((ECX >> 12) & 0x1);
- HasAVX = ((ECX >> 28) & 0x1);
- HasAES = IsIntel && ((ECX >> 25) & 0x1);
+ HasCLMUL = IsIntel && ((ECX >> 1) & 0x1);
+ HasFMA3 = IsIntel && ((ECX >> 12) & 0x1);
+ HasAVX = ((ECX >> 28) & 0x1);
+ HasAES = IsIntel && ((ECX >> 25) & 0x1);
if (IsIntel || IsAMD) {
// Determine if bit test memory instructions are slow.
@@ -291,6 +292,7 @@ X86Subtarget::X86Subtarget(const std::string &TT, const std::string &FS,
, HasSSE4A(false)
, HasAVX(false)
, HasAES(false)
+ , HasCLMUL(false)
, HasFMA3(false)
, HasFMA4(false)
, IsBTMemSlow(false)
diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h
index 486dbc4e2e900..0ee91abe21f4e 100644
--- a/lib/Target/X86/X86Subtarget.h
+++ b/lib/Target/X86/X86Subtarget.h
@@ -74,6 +74,9 @@ protected:
/// HasAES - Target has AES instructions
bool HasAES;
+ /// HasCLMUL - Target has carry-less multiplication
+ bool HasCLMUL;
+
/// HasFMA3 - Target has 3-operand fused multiply-add
bool HasFMA3;
@@ -149,6 +152,7 @@ public:
bool has3DNowA() const { return X863DNowLevel >= ThreeDNowA; }
bool hasAVX() const { return HasAVX; }
bool hasAES() const { return HasAES; }
+ bool hasCLMUL() const { return HasCLMUL; }
bool hasFMA3() const { return HasFMA3; }
bool hasFMA4() const { return HasFMA4; }
bool isBTMemSlow() const { return IsBTMemSlow; }
@@ -182,6 +186,10 @@ public:
return Is64Bit && (isTargetMingw() || isTargetWindows());
}
+ bool isTargetWin32() const {
+ return !Is64Bit && (isTargetMingw() || isTargetWindows());
+ }
+
std::string getDataLayout() const {
const char *p;
if (is64Bit())
diff --git a/lib/Target/X86/X86TargetMachine.cpp b/lib/Target/X86/X86TargetMachine.cpp
index df00d3ffcc791..ce8636eb72b54 100644
--- a/lib/Target/X86/X86TargetMachine.cpp
+++ b/lib/Target/X86/X86TargetMachine.cpp
@@ -46,8 +46,15 @@ static MCStreamer *createMCStreamer(const Target &T, const std::string &TT,
bool RelaxAll) {
Triple TheTriple(TT);
switch (TheTriple.getOS()) {
- default:
+ case Triple::Darwin:
return createMachOStreamer(Ctx, TAB, _OS, _Emitter, RelaxAll);
+ case Triple::MinGW32:
+ case Triple::MinGW64:
+ case Triple::Cygwin:
+ case Triple::Win32:
+ return createWinCOFFStreamer(Ctx, TAB, *_Emitter, _OS, RelaxAll);
+ default:
+ return createELFStreamer(Ctx, TAB, _OS, _Emitter, RelaxAll);
}
}
@@ -105,15 +112,21 @@ X86TargetMachine::X86TargetMachine(const Target &T, const std::string &TT,
InstrInfo(*this), JITInfo(*this), TLInfo(*this), TSInfo(*this),
ELFWriterInfo(*this) {
DefRelocModel = getRelocationModel();
-
+
// If no relocation model was picked, default as appropriate for the target.
if (getRelocationModel() == Reloc::Default) {
- if (!Subtarget.isTargetDarwin())
- setRelocationModel(Reloc::Static);
- else if (Subtarget.is64Bit())
+ // Darwin defaults to PIC in 64 bit mode and dynamic-no-pic in 32 bit mode.
+ // Win64 requires rip-rel addressing, thus we force it to PIC. Otherwise we
+ // use static relocation model by default.
+ if (Subtarget.isTargetDarwin()) {
+ if (Subtarget.is64Bit())
+ setRelocationModel(Reloc::PIC_);
+ else
+ setRelocationModel(Reloc::DynamicNoPIC);
+ } else if (Subtarget.isTargetWin64())
setRelocationModel(Reloc::PIC_);
else
- setRelocationModel(Reloc::DynamicNoPIC);
+ setRelocationModel(Reloc::Static);
}
assert(getRelocationModel() != Reloc::Default &&
@@ -136,29 +149,27 @@ X86TargetMachine::X86TargetMachine(const Target &T, const std::string &TT,
Subtarget.isTargetDarwin() &&
is64Bit)
setRelocationModel(Reloc::PIC_);
-
+
// Determine the PICStyle based on the target selected.
if (getRelocationModel() == Reloc::Static) {
// Unless we're in PIC or DynamicNoPIC mode, set the PIC style to None.
Subtarget.setPICStyle(PICStyles::None);
+ } else if (Subtarget.is64Bit()) {
+ // PIC in 64 bit mode is always rip-rel.
+ Subtarget.setPICStyle(PICStyles::RIPRel);
} else if (Subtarget.isTargetCygMing()) {
Subtarget.setPICStyle(PICStyles::None);
} else if (Subtarget.isTargetDarwin()) {
- if (Subtarget.is64Bit())
- Subtarget.setPICStyle(PICStyles::RIPRel);
- else if (getRelocationModel() == Reloc::PIC_)
+ if (getRelocationModel() == Reloc::PIC_)
Subtarget.setPICStyle(PICStyles::StubPIC);
else {
assert(getRelocationModel() == Reloc::DynamicNoPIC);
Subtarget.setPICStyle(PICStyles::StubDynamicNoPIC);
}
} else if (Subtarget.isTargetELF()) {
- if (Subtarget.is64Bit())
- Subtarget.setPICStyle(PICStyles::RIPRel);
- else
- Subtarget.setPICStyle(PICStyles::GOT);
+ Subtarget.setPICStyle(PICStyles::GOT);
}
-
+
// Finally, if we have "none" as our PIC style, force to static mode.
if (Subtarget.getPICStyle() == PICStyles::None)
setRelocationModel(Reloc::Static);
@@ -182,9 +193,6 @@ bool X86TargetMachine::addInstSelector(PassManagerBase &PM,
bool X86TargetMachine::addPreRegAlloc(PassManagerBase &PM,
CodeGenOpt::Level OptLevel) {
- // Install a pass to insert x87 FP_REG_KILL instructions, as needed.
- PM.add(createX87FPRegKillInserterPass());
-
PM.add(createX86MaxStackAlignmentHeuristicPass());
return false; // -print-machineinstr shouldn't print after this.
}
diff --git a/lib/Target/XCore/AsmPrinter/XCoreAsmPrinter.cpp b/lib/Target/XCore/AsmPrinter/XCoreAsmPrinter.cpp
index 6656bdc10eae4..8f06dd32662f2 100644
--- a/lib/Target/XCore/AsmPrinter/XCoreAsmPrinter.cpp
+++ b/lib/Target/XCore/AsmPrinter/XCoreAsmPrinter.cpp
@@ -264,15 +264,13 @@ bool XCoreAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
void XCoreAsmPrinter::EmitInstruction(const MachineInstr *MI) {
SmallString<128> Str;
raw_svector_ostream O(Str);
-
+
// Check for mov mnemonic
- unsigned src, dst, srcSR, dstSR;
- if (TM.getInstrInfo()->isMoveInstr(*MI, src, dst, srcSR, dstSR)) {
- O << "\tmov " << getRegisterName(dst) << ", ";
- O << getRegisterName(src);
- } else {
+ if (MI->getOpcode() == XCore::ADD_2rus && !MI->getOperand(2).getImm())
+ O << "\tmov " << getRegisterName(MI->getOperand(0).getReg()) << ", "
+ << getRegisterName(MI->getOperand(1).getReg());
+ else
printInstruction(MI, O);
- }
OutStreamer.EmitRawText(O.str());
}
diff --git a/lib/Target/XCore/CMakeLists.txt b/lib/Target/XCore/CMakeLists.txt
index 1b8e7edfc7ca9..38b35d7666c0e 100644
--- a/lib/Target/XCore/CMakeLists.txt
+++ b/lib/Target/XCore/CMakeLists.txt
@@ -10,7 +10,7 @@ tablegen(XCoreGenDAGISel.inc -gen-dag-isel)
tablegen(XCoreGenCallingConv.inc -gen-callingconv)
tablegen(XCoreGenSubtarget.inc -gen-subtarget)
-add_llvm_target(XCore
+add_llvm_target(XCoreCodeGen
XCoreFrameInfo.cpp
XCoreInstrInfo.cpp
XCoreISelDAGToDAG.cpp
diff --git a/lib/Target/XCore/XCoreISelDAGToDAG.cpp b/lib/Target/XCore/XCoreISelDAGToDAG.cpp
index 5564ddf133eaf..755ece7e9abac 100644
--- a/lib/Target/XCore/XCoreISelDAGToDAG.cpp
+++ b/lib/Target/XCore/XCoreISelDAGToDAG.cpp
@@ -56,6 +56,17 @@ namespace {
return CurDAG->getTargetConstant(Imm, MVT::i32);
}
+ inline bool immMskBitp(SDNode *inN) const {
+ ConstantSDNode *N = cast<ConstantSDNode>(inN);
+ uint32_t value = (uint32_t)N->getZExtValue();
+ if (!isMask_32(value)) {
+ return false;
+ }
+ int msksize = 32 - CountLeadingZeros_32(value);
+ return (msksize >= 1 && msksize <= 8) ||
+ msksize == 16 || msksize == 24 || msksize == 32;
+ }
+
// Complex Pattern Selectors.
bool SelectADDRspii(SDNode *Op, SDValue Addr, SDValue &Base,
SDValue &Offset);
@@ -151,17 +162,15 @@ SDNode *XCoreDAGToDAGISel::Select(SDNode *N) {
switch (N->getOpcode()) {
default: break;
case ISD::Constant: {
- if (Predicate_immMskBitp(N)) {
+ uint64_t Val = cast<ConstantSDNode>(N)->getZExtValue();
+ if (immMskBitp(N)) {
// Transformation function: get the size of a mask
- int64_t MaskVal = cast<ConstantSDNode>(N)->getZExtValue();
- assert(isMask_32(MaskVal));
// Look for the first non-zero bit
- SDValue MskSize = getI32Imm(32 - CountLeadingZeros_32(MaskVal));
+ SDValue MskSize = getI32Imm(32 - CountLeadingZeros_32(Val));
return CurDAG->getMachineNode(XCore::MKMSK_rus, dl,
MVT::i32, MskSize);
}
- else if (! Predicate_immU16(N)) {
- unsigned Val = cast<ConstantSDNode>(N)->getZExtValue();
+ else if (!isUInt<16>(Val)) {
SDValue CPIdx =
CurDAG->getTargetConstantPool(ConstantInt::get(
Type::getInt32Ty(*CurDAG->getContext()), Val),
diff --git a/lib/Target/XCore/XCoreInstrInfo.cpp b/lib/Target/XCore/XCoreInstrInfo.cpp
index dd90ea9767705..ad00046af17de 100644
--- a/lib/Target/XCore/XCoreInstrInfo.cpp
+++ b/lib/Target/XCore/XCoreInstrInfo.cpp
@@ -46,33 +46,6 @@ static bool isZeroImm(const MachineOperand &op) {
return op.isImm() && op.getImm() == 0;
}
-/// Return true if the instruction is a register to register move and
-/// leave the source and dest operands in the passed parameters.
-///
-bool XCoreInstrInfo::isMoveInstr(const MachineInstr &MI,
- unsigned &SrcReg, unsigned &DstReg,
- unsigned &SrcSR, unsigned &DstSR) const {
- SrcSR = DstSR = 0; // No sub-registers.
-
- // We look for 4 kinds of patterns here:
- // add dst, src, 0
- // sub dst, src, 0
- // or dst, src, src
- // and dst, src, src
- if ((MI.getOpcode() == XCore::ADD_2rus || MI.getOpcode() == XCore::SUB_2rus)
- && isZeroImm(MI.getOperand(2))) {
- DstReg = MI.getOperand(0).getReg();
- SrcReg = MI.getOperand(1).getReg();
- return true;
- } else if ((MI.getOpcode() == XCore::OR_3r || MI.getOpcode() == XCore::AND_3r)
- && MI.getOperand(1).getReg() == MI.getOperand(2).getReg()) {
- DstReg = MI.getOperand(0).getReg();
- SrcReg = MI.getOperand(1).getReg();
- return true;
- }
- return false;
-}
-
/// isLoadFromStackSlot - If the specified machine instruction is a direct
/// load from a stack slot, return the virtual or physical register number of
/// the destination along with the FrameIndex of the loaded stack slot. If
@@ -437,7 +410,7 @@ bool XCoreInstrInfo::spillCalleeSavedRegisters(MachineBasicBlock &MBB,
it->getFrameIdx(), RC, &RI);
if (emitFrameMoves) {
MCSymbol *SaveLabel = MF->getContext().CreateTempSymbol();
- BuildMI(MBB, MI, DL, get(XCore::DBG_LABEL)).addSym(SaveLabel);
+ BuildMI(MBB, MI, DL, get(XCore::PROLOG_LABEL)).addSym(SaveLabel);
XFI->getSpillLabels().push_back(std::make_pair(SaveLabel, *it));
}
}
diff --git a/lib/Target/XCore/XCoreInstrInfo.h b/lib/Target/XCore/XCoreInstrInfo.h
index e5b0171579fce..d2b116eef0d8f 100644
--- a/lib/Target/XCore/XCoreInstrInfo.h
+++ b/lib/Target/XCore/XCoreInstrInfo.h
@@ -30,12 +30,6 @@ public:
///
virtual const TargetRegisterInfo &getRegisterInfo() const { return RI; }
- /// Return true if the instruction is a register to register move and return
- /// the source and dest operands and their sub-register indices by reference.
- virtual bool isMoveInstr(const MachineInstr &MI,
- unsigned &SrcReg, unsigned &DstReg,
- unsigned &SrcSubIdx, unsigned &DstSubIdx) const;
-
/// isLoadFromStackSlot - If the specified machine instruction is a direct
/// load from a stack slot, return the virtual or physical register number of
/// the destination along with the FrameIndex of the loaded stack slot. If
diff --git a/lib/Target/XCore/XCoreInstrInfo.td b/lib/Target/XCore/XCoreInstrInfo.td
index 19b9b1f8c00c6..6b3b39ba1d494 100644
--- a/lib/Target/XCore/XCoreInstrInfo.td
+++ b/lib/Target/XCore/XCoreInstrInfo.td
@@ -140,17 +140,7 @@ def immU20 : PatLeaf<(imm), [{
return (uint32_t)N->getZExtValue() < (1 << 20);
}]>;
-def immMskBitp : PatLeaf<(imm), [{
- uint32_t value = (uint32_t)N->getZExtValue();
- if (!isMask_32(value)) {
- return false;
- }
- int msksize = 32 - CountLeadingZeros_32(value);
- return (msksize >= 1 && msksize <= 8)
- || msksize == 16
- || msksize == 24
- || msksize == 32;
-}]>;
+def immMskBitp : PatLeaf<(imm), [{ return immMskBitp(N); }]>;
def immBitp : PatLeaf<(imm), [{
uint32_t value = (uint32_t)N->getZExtValue();
diff --git a/lib/Target/XCore/XCoreRegisterInfo.cpp b/lib/Target/XCore/XCoreRegisterInfo.cpp
index 2a88342180e40..f82e59814e775 100644
--- a/lib/Target/XCore/XCoreRegisterInfo.cpp
+++ b/lib/Target/XCore/XCoreRegisterInfo.cpp
@@ -155,10 +155,9 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
MBB.erase(I);
}
-unsigned
+void
XCoreRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
- int SPAdj, FrameIndexValue *Value,
- RegScavenger *RS) const {
+ int SPAdj, RegScavenger *RS) const {
assert(SPAdj == 0 && "Unexpected");
MachineInstr &MI = *II;
DebugLoc dl = MI.getDebugLoc();
@@ -291,7 +290,6 @@ XCoreRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
}
// Erase old instruction.
MBB.erase(II);
- return 0;
}
void
@@ -420,7 +418,7 @@ void XCoreRegisterInfo::emitPrologue(MachineFunction &MF) const {
// Show update of SP.
MCSymbol *FrameLabel = MMI->getContext().CreateTempSymbol();
- BuildMI(MBB, MBBI, dl, TII.get(XCore::DBG_LABEL)).addSym(FrameLabel);
+ BuildMI(MBB, MBBI, dl, TII.get(XCore::PROLOG_LABEL)).addSym(FrameLabel);
MachineLocation SPDst(MachineLocation::VirtualFP);
MachineLocation SPSrc(MachineLocation::VirtualFP, -FrameSize * 4);
@@ -439,7 +437,7 @@ void XCoreRegisterInfo::emitPrologue(MachineFunction &MF) const {
if (emitFrameMoves) {
MCSymbol *SaveLRLabel = MMI->getContext().CreateTempSymbol();
- BuildMI(MBB, MBBI, dl, TII.get(XCore::DBG_LABEL)).addSym(SaveLRLabel);
+ BuildMI(MBB, MBBI, dl, TII.get(XCore::PROLOG_LABEL)).addSym(SaveLRLabel);
MachineLocation CSDst(MachineLocation::VirtualFP, LRSpillOffset);
MachineLocation CSSrc(XCore::LR);
MMI->getFrameMoves().push_back(MachineMove(SaveLRLabel, CSDst, CSSrc));
@@ -455,7 +453,7 @@ void XCoreRegisterInfo::emitPrologue(MachineFunction &MF) const {
MBB.addLiveIn(XCore::R10);
if (emitFrameMoves) {
MCSymbol *SaveR10Label = MMI->getContext().CreateTempSymbol();
- BuildMI(MBB, MBBI, dl, TII.get(XCore::DBG_LABEL)).addSym(SaveR10Label);
+ BuildMI(MBB, MBBI, dl, TII.get(XCore::PROLOG_LABEL)).addSym(SaveR10Label);
MachineLocation CSDst(MachineLocation::VirtualFP, FPSpillOffset);
MachineLocation CSSrc(XCore::R10);
MMI->getFrameMoves().push_back(MachineMove(SaveR10Label, CSDst, CSSrc));
@@ -467,7 +465,7 @@ void XCoreRegisterInfo::emitPrologue(MachineFunction &MF) const {
if (emitFrameMoves) {
// Show FP is now valid.
MCSymbol *FrameLabel = MMI->getContext().CreateTempSymbol();
- BuildMI(MBB, MBBI, dl, TII.get(XCore::DBG_LABEL)).addSym(FrameLabel);
+ BuildMI(MBB, MBBI, dl, TII.get(XCore::PROLOG_LABEL)).addSym(FrameLabel);
MachineLocation SPDst(FramePtr);
MachineLocation SPSrc(MachineLocation::VirtualFP);
MMI->getFrameMoves().push_back(MachineMove(FrameLabel, SPDst, SPSrc));
diff --git a/lib/Target/XCore/XCoreRegisterInfo.h b/lib/Target/XCore/XCoreRegisterInfo.h
index 66132ba8ff66f..e636c1c7298aa 100644
--- a/lib/Target/XCore/XCoreRegisterInfo.h
+++ b/lib/Target/XCore/XCoreRegisterInfo.h
@@ -54,9 +54,8 @@ public:
MachineBasicBlock &MBB,
MachineBasicBlock::iterator I) const;
- unsigned eliminateFrameIndex(MachineBasicBlock::iterator II,
- int SPAdj, FrameIndexValue *Value = NULL,
- RegScavenger *RS = NULL) const;
+ void eliminateFrameIndex(MachineBasicBlock::iterator II,
+ int SPAdj, RegScavenger *RS = NULL) const;
void processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
RegScavenger *RS = NULL) const;
diff --git a/lib/Transforms/Hello/Hello.cpp b/lib/Transforms/Hello/Hello.cpp
index abfa514e20cfe..838d5505490f5 100644
--- a/lib/Transforms/Hello/Hello.cpp
+++ b/lib/Transforms/Hello/Hello.cpp
@@ -25,7 +25,7 @@ namespace {
// Hello - The first implementation, without getAnalysisUsage.
struct Hello : public FunctionPass {
static char ID; // Pass identification, replacement for typeid
- Hello() : FunctionPass(&ID) {}
+ Hello() : FunctionPass(ID) {}
virtual bool runOnFunction(Function &F) {
++HelloCounter;
@@ -37,13 +37,13 @@ namespace {
}
char Hello::ID = 0;
-static RegisterPass<Hello> X("hello", "Hello World Pass");
+INITIALIZE_PASS(Hello, "hello", "Hello World Pass", false, false);
namespace {
// Hello2 - The second implementation with getAnalysisUsage implemented.
struct Hello2 : public FunctionPass {
static char ID; // Pass identification, replacement for typeid
- Hello2() : FunctionPass(&ID) {}
+ Hello2() : FunctionPass(ID) {}
virtual bool runOnFunction(Function &F) {
++HelloCounter;
@@ -60,5 +60,6 @@ namespace {
}
char Hello2::ID = 0;
-static RegisterPass<Hello2>
-Y("hello2", "Hello World Pass (with getAnalysisUsage implemented)");
+INITIALIZE_PASS(Hello2, "hello2",
+ "Hello World Pass (with getAnalysisUsage implemented)",
+ false, false);
diff --git a/lib/Transforms/IPO/ArgumentPromotion.cpp b/lib/Transforms/IPO/ArgumentPromotion.cpp
index 28ea079abd360..0c77e1fd8cff4 100644
--- a/lib/Transforms/IPO/ArgumentPromotion.cpp
+++ b/lib/Transforms/IPO/ArgumentPromotion.cpp
@@ -67,7 +67,7 @@ namespace {
virtual bool runOnSCC(CallGraphSCC &SCC);
static char ID; // Pass identification, replacement for typeid
explicit ArgPromotion(unsigned maxElements = 3)
- : CallGraphSCCPass(&ID), maxElements(maxElements) {}
+ : CallGraphSCCPass(ID), maxElements(maxElements) {}
/// A vector used to hold the indices of a single GEP instruction
typedef std::vector<uint64_t> IndicesVector;
@@ -84,8 +84,8 @@ namespace {
}
char ArgPromotion::ID = 0;
-static RegisterPass<ArgPromotion>
-X("argpromotion", "Promote 'by reference' arguments to scalars");
+INITIALIZE_PASS(ArgPromotion, "argpromotion",
+ "Promote 'by reference' arguments to scalars", false, false);
Pass *llvm::createArgumentPromotionPass(unsigned maxElements) {
return new ArgPromotion(maxElements);
@@ -208,8 +208,8 @@ static bool AllCalleesPassInValidPointerForArgument(Argument *Arg) {
// have direct callees.
for (Value::use_iterator UI = Callee->use_begin(), E = Callee->use_end();
UI != E; ++UI) {
- CallSite CS = CallSite::get(*UI);
- assert(CS.getInstruction() && "Should only have direct calls!");
+ CallSite CS(*UI);
+ assert(CS && "Should only have direct calls!");
if (!IsAlwaysValidPointer(CS.getArgument(ArgNo)))
return false;
@@ -619,14 +619,13 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F,
// Get a new callgraph node for NF.
CallGraphNode *NF_CGN = CG.getOrInsertFunction(NF);
-
// Loop over all of the callers of the function, transforming the call sites
// to pass in the loaded pointers.
//
SmallVector<Value*, 16> Args;
while (!F->use_empty()) {
- CallSite CS = CallSite::get(F->use_back());
+ CallSite CS(F->use_back());
assert(CS.getCalledFunction() == F);
Instruction *Call = CS.getInstruction();
const AttrListPtr &CallPAL = CS.getAttributes();
diff --git a/lib/Transforms/IPO/ConstantMerge.cpp b/lib/Transforms/IPO/ConstantMerge.cpp
index 3c05f88027a62..64e8d792dc3ad 100644
--- a/lib/Transforms/IPO/ConstantMerge.cpp
+++ b/lib/Transforms/IPO/ConstantMerge.cpp
@@ -19,10 +19,12 @@
#define DEBUG_TYPE "constmerge"
#include "llvm/Transforms/IPO.h"
+#include "llvm/Constants.h"
#include "llvm/DerivedTypes.h"
#include "llvm/Module.h"
#include "llvm/Pass.h"
#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/Statistic.h"
using namespace llvm;
@@ -31,7 +33,7 @@ STATISTIC(NumMerged, "Number of global constants merged");
namespace {
struct ConstantMerge : public ModulePass {
static char ID; // Pass identification, replacement for typeid
- ConstantMerge() : ModulePass(&ID) {}
+ ConstantMerge() : ModulePass(ID) {}
// run - For this pass, process all of the globals in the module,
// eliminating duplicate constants.
@@ -41,12 +43,32 @@ namespace {
}
char ConstantMerge::ID = 0;
-static RegisterPass<ConstantMerge>
-X("constmerge", "Merge Duplicate Global Constants");
+INITIALIZE_PASS(ConstantMerge, "constmerge",
+ "Merge Duplicate Global Constants", false, false);
ModulePass *llvm::createConstantMergePass() { return new ConstantMerge(); }
+
+
+/// Find values that are marked as llvm.used.
+static void FindUsedValues(GlobalVariable *LLVMUsed,
+ SmallPtrSet<const GlobalValue*, 8> &UsedValues) {
+ if (LLVMUsed == 0) return;
+ ConstantArray *Inits = dyn_cast<ConstantArray>(LLVMUsed->getInitializer());
+ if (Inits == 0) return;
+
+ for (unsigned i = 0, e = Inits->getNumOperands(); i != e; ++i)
+ if (GlobalValue *GV =
+ dyn_cast<GlobalValue>(Inits->getOperand(i)->stripPointerCasts()))
+ UsedValues.insert(GV);
+}
+
bool ConstantMerge::runOnModule(Module &M) {
+ // Find all the globals that are marked "used". These cannot be merged.
+ SmallPtrSet<const GlobalValue*, 8> UsedGlobals;
+ FindUsedValues(M.getGlobalVariable("llvm.used"), UsedGlobals);
+ FindUsedValues(M.getGlobalVariable("llvm.compiler.used"), UsedGlobals);
+
// Map unique constant/section pairs to globals. We don't want to merge
// globals in different sections.
DenseMap<Constant*, GlobalVariable*> CMap;
@@ -79,9 +101,13 @@ bool ConstantMerge::runOnModule(Module &M) {
// Only process constants with initializers in the default addres space.
if (!GV->isConstant() ||!GV->hasDefinitiveInitializer() ||
- GV->getType()->getAddressSpace() != 0 || !GV->getSection().empty())
+ GV->getType()->getAddressSpace() != 0 || !GV->getSection().empty() ||
+ // Don't touch values marked with attribute(used).
+ UsedGlobals.count(GV))
continue;
+
+
Constant *Init = GV->getInitializer();
// Check to see if the initializer is already known.
diff --git a/lib/Transforms/IPO/DeadArgumentElimination.cpp b/lib/Transforms/IPO/DeadArgumentElimination.cpp
index 475eee8b19e4b..47df235424e2f 100644
--- a/lib/Transforms/IPO/DeadArgumentElimination.cpp
+++ b/lib/Transforms/IPO/DeadArgumentElimination.cpp
@@ -122,11 +122,11 @@ namespace {
protected:
// DAH uses this to specify a different ID.
- explicit DAE(void *ID) : ModulePass(ID) {}
+ explicit DAE(char &ID) : ModulePass(ID) {}
public:
static char ID; // Pass identification, replacement for typeid
- DAE() : ModulePass(&ID) {}
+ DAE() : ModulePass(ID) {}
bool runOnModule(Module &M);
@@ -151,8 +151,7 @@ namespace {
char DAE::ID = 0;
-static RegisterPass<DAE>
-X("deadargelim", "Dead Argument Elimination");
+INITIALIZE_PASS(DAE, "deadargelim", "Dead Argument Elimination", false, false);
namespace {
/// DAH - DeadArgumentHacking pass - Same as dead argument elimination, but
@@ -160,15 +159,16 @@ namespace {
/// by bugpoint.
struct DAH : public DAE {
static char ID;
- DAH() : DAE(&ID) {}
+ DAH() : DAE(ID) {}
virtual bool ShouldHackArguments() const { return true; }
};
}
char DAH::ID = 0;
-static RegisterPass<DAH>
-Y("deadarghaX0r", "Dead Argument Hacking (BUGPOINT USE ONLY; DO NOT USE)");
+INITIALIZE_PASS(DAH, "deadarghaX0r",
+ "Dead Argument Hacking (BUGPOINT USE ONLY; DO NOT USE)",
+ false, false);
/// createDeadArgEliminationPass - This pass removes arguments from functions
/// which are not used by the body of the function.
@@ -220,11 +220,11 @@ bool DAE::DeleteDeadVarargs(Function &Fn) {
//
std::vector<Value*> Args;
while (!Fn.use_empty()) {
- CallSite CS = CallSite::get(Fn.use_back());
+ CallSite CS(Fn.use_back());
Instruction *Call = CS.getInstruction();
// Pass all the same arguments.
- Args.assign(CS.arg_begin(), CS.arg_begin()+NumArgs);
+ Args.assign(CS.arg_begin(), CS.arg_begin() + NumArgs);
// Drop any attributes that were on the vararg arguments.
AttrListPtr PAL = CS.getAttributes();
@@ -250,8 +250,7 @@ bool DAE::DeleteDeadVarargs(Function &Fn) {
if (cast<CallInst>(Call)->isTailCall())
cast<CallInst>(New)->setTailCall();
}
- if (MDNode *N = Call->getDbgMetadata())
- New->setDbgMetadata(N);
+ New->setDebugLoc(Call->getDebugLoc());
Args.clear();
@@ -725,7 +724,7 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) {
//
std::vector<Value*> Args;
while (!F->use_empty()) {
- CallSite CS = CallSite::get(F->use_back());
+ CallSite CS(F->use_back());
Instruction *Call = CS.getInstruction();
AttributesVec.clear();
@@ -780,8 +779,7 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) {
if (cast<CallInst>(Call)->isTailCall())
cast<CallInst>(New)->setTailCall();
}
- if (MDNode *N = Call->getDbgMetadata())
- New->setDbgMetadata(N);
+ New->setDebugLoc(Call->getDebugLoc());
Args.clear();
diff --git a/lib/Transforms/IPO/DeadTypeElimination.cpp b/lib/Transforms/IPO/DeadTypeElimination.cpp
index 662fbb5cd4130..5dc50c5bef32f 100644
--- a/lib/Transforms/IPO/DeadTypeElimination.cpp
+++ b/lib/Transforms/IPO/DeadTypeElimination.cpp
@@ -26,7 +26,7 @@ STATISTIC(NumKilled, "Number of unused typenames removed from symtab");
namespace {
struct DTE : public ModulePass {
static char ID; // Pass identification, replacement for typeid
- DTE() : ModulePass(&ID) {}
+ DTE() : ModulePass(ID) {}
// doPassInitialization - For this pass, it removes global symbol table
// entries for primitive types. These are never used for linking in GCC and
@@ -45,7 +45,7 @@ namespace {
}
char DTE::ID = 0;
-static RegisterPass<DTE> X("deadtypeelim", "Dead Type Elimination");
+INITIALIZE_PASS(DTE, "deadtypeelim", "Dead Type Elimination", false, false);
ModulePass *llvm::createDeadTypeEliminationPass() {
return new DTE();
diff --git a/lib/Transforms/IPO/ExtractGV.cpp b/lib/Transforms/IPO/ExtractGV.cpp
index 7f67e48ade839..45c5fe76ba7c5 100644
--- a/lib/Transforms/IPO/ExtractGV.cpp
+++ b/lib/Transforms/IPO/ExtractGV.cpp
@@ -17,15 +17,15 @@
#include "llvm/Pass.h"
#include "llvm/Constants.h"
#include "llvm/Transforms/IPO.h"
+#include "llvm/ADT/SetVector.h"
#include <algorithm>
using namespace llvm;
namespace {
/// @brief A pass to extract specific functions and their dependencies.
class GVExtractorPass : public ModulePass {
- std::vector<GlobalValue*> Named;
+ SetVector<GlobalValue *> Named;
bool deleteStuff;
- bool reLink;
public:
static char ID; // Pass identification, replacement for typeid
@@ -33,135 +33,42 @@ namespace {
/// specified function. Otherwise, it deletes as much of the module as
/// possible, except for the function specified.
///
- explicit GVExtractorPass(std::vector<GlobalValue*>& GVs, bool deleteS = true,
- bool relinkCallees = false)
- : ModulePass(&ID), Named(GVs), deleteStuff(deleteS),
- reLink(relinkCallees) {}
+ explicit GVExtractorPass(std::vector<GlobalValue*>& GVs, bool deleteS = true)
+ : ModulePass(ID), Named(GVs.begin(), GVs.end()), deleteStuff(deleteS) {}
bool runOnModule(Module &M) {
- if (Named.size() == 0) {
- return false; // Nothing to extract
- }
-
-
- if (deleteStuff)
- return deleteGV();
- M.setModuleInlineAsm("");
- return isolateGV(M);
- }
-
- bool deleteGV() {
- for (std::vector<GlobalValue*>::iterator GI = Named.begin(),
- GE = Named.end(); GI != GE; ++GI) {
- if (Function* NamedFunc = dyn_cast<Function>(*GI)) {
- // If we're in relinking mode, set linkage of all internal callees to
- // external. This will allow us extract function, and then - link
- // everything together
- if (reLink) {
- for (Function::iterator B = NamedFunc->begin(), BE = NamedFunc->end();
- B != BE; ++B) {
- for (BasicBlock::iterator I = B->begin(), E = B->end();
- I != E; ++I) {
- if (CallInst* callInst = dyn_cast<CallInst>(&*I)) {
- Function* Callee = callInst->getCalledFunction();
- if (Callee && Callee->hasLocalLinkage())
- Callee->setLinkage(GlobalValue::ExternalLinkage);
- }
- }
- }
- }
-
- NamedFunc->setLinkage(GlobalValue::ExternalLinkage);
- NamedFunc->deleteBody();
- assert(NamedFunc->isDeclaration() && "This didn't make the function external!");
- } else {
- if (!(*GI)->isDeclaration()) {
- cast<GlobalVariable>(*GI)->setInitializer(0); //clear the initializer
- (*GI)->setLinkage(GlobalValue::ExternalLinkage);
- }
- }
- }
- return true;
- }
-
- bool isolateGV(Module &M) {
- // Mark all globals internal
- // FIXME: what should we do with private linkage?
- for (Module::global_iterator I = M.global_begin(), E = M.global_end(); I != E; ++I)
+ // Visit the global inline asm.
+ if (!deleteStuff)
+ M.setModuleInlineAsm("");
+
+ // For simplicity, just give all GlobalValues ExternalLinkage. A trickier
+ // implementation could figure out which GlobalValues are actually
+ // referenced by the Named set, and which GlobalValues in the rest of
+ // the module are referenced by the NamedSet, and get away with leaving
+ // more internal and private things internal and private. But for now,
+ // be conservative and simple.
+
+ // Visit the GlobalVariables.
+ for (Module::global_iterator I = M.global_begin(), E = M.global_end();
+ I != E; ++I)
if (!I->isDeclaration()) {
- I->setLinkage(GlobalValue::InternalLinkage);
+ if (I->hasLocalLinkage())
+ I->setVisibility(GlobalValue::HiddenVisibility);
+ I->setLinkage(GlobalValue::ExternalLinkage);
+ if (deleteStuff == Named.count(I))
+ I->setInitializer(0);
}
+
+ // Visit the Functions.
for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I)
if (!I->isDeclaration()) {
- I->setLinkage(GlobalValue::InternalLinkage);
- }
-
- // Make sure our result is globally accessible...
- // by putting them in the used array
- {
- std::vector<Constant *> AUGs;
- const Type *SBP=
- Type::getInt8PtrTy(M.getContext());
- for (std::vector<GlobalValue*>::iterator GI = Named.begin(),
- GE = Named.end(); GI != GE; ++GI) {
- (*GI)->setLinkage(GlobalValue::ExternalLinkage);
- AUGs.push_back(ConstantExpr::getBitCast(*GI, SBP));
- }
- ArrayType *AT = ArrayType::get(SBP, AUGs.size());
- Constant *Init = ConstantArray::get(AT, AUGs);
- GlobalValue *gv = new GlobalVariable(M, AT, false,
- GlobalValue::AppendingLinkage,
- Init, "llvm.used");
- gv->setSection("llvm.metadata");
- }
-
- // All of the functions may be used by global variables or the named
- // globals. Loop through them and create a new, external functions that
- // can be "used", instead of ones with bodies.
- std::vector<Function*> NewFunctions;
-
- Function *Last = --M.end(); // Figure out where the last real fn is.
-
- for (Module::iterator I = M.begin(); ; ++I) {
- if (std::find(Named.begin(), Named.end(), &*I) == Named.end()) {
- Function *New = Function::Create(I->getFunctionType(),
- GlobalValue::ExternalLinkage);
- New->copyAttributesFrom(I);
-
- // If it's not the named function, delete the body of the function
- I->dropAllReferences();
-
- M.getFunctionList().push_back(New);
- NewFunctions.push_back(New);
- New->takeName(I);
+ if (I->hasLocalLinkage())
+ I->setVisibility(GlobalValue::HiddenVisibility);
+ I->setLinkage(GlobalValue::ExternalLinkage);
+ if (deleteStuff == Named.count(I))
+ I->deleteBody();
}
- if (&*I == Last) break; // Stop after processing the last function
- }
-
- // Now that we have replacements all set up, loop through the module,
- // deleting the old functions, replacing them with the newly created
- // functions.
- if (!NewFunctions.empty()) {
- unsigned FuncNum = 0;
- Module::iterator I = M.begin();
- do {
- if (std::find(Named.begin(), Named.end(), &*I) == Named.end()) {
- // Make everything that uses the old function use the new dummy fn
- I->replaceAllUsesWith(NewFunctions[FuncNum++]);
-
- Function *Old = I;
- ++I; // Move the iterator to the new function
-
- // Delete the old function!
- M.getFunctionList().erase(Old);
-
- } else {
- ++I; // Skip the function we are extracting
- }
- } while (&*I != NewFunctions[0]);
- }
-
return true;
}
};
@@ -170,6 +77,6 @@ namespace {
}
ModulePass *llvm::createGVExtractionPass(std::vector<GlobalValue*>& GVs,
- bool deleteFn, bool relinkCallees) {
- return new GVExtractorPass(GVs, deleteFn, relinkCallees);
+ bool deleteFn) {
+ return new GVExtractorPass(GVs, deleteFn);
}
diff --git a/lib/Transforms/IPO/FunctionAttrs.cpp b/lib/Transforms/IPO/FunctionAttrs.cpp
index 9bd7af61c531f..6165ba023f737 100644
--- a/lib/Transforms/IPO/FunctionAttrs.cpp
+++ b/lib/Transforms/IPO/FunctionAttrs.cpp
@@ -41,7 +41,7 @@ STATISTIC(NumNoAlias, "Number of function returns marked noalias");
namespace {
struct FunctionAttrs : public CallGraphSCCPass {
static char ID; // Pass identification, replacement for typeid
- FunctionAttrs() : CallGraphSCCPass(&ID) {}
+ FunctionAttrs() : CallGraphSCCPass(ID) {}
// runOnSCC - Analyze the SCC, performing the transformation if possible.
bool runOnSCC(CallGraphSCC &SCC);
@@ -69,8 +69,8 @@ namespace {
}
char FunctionAttrs::ID = 0;
-static RegisterPass<FunctionAttrs>
-X("functionattrs", "Deduce function attributes");
+INITIALIZE_PASS(FunctionAttrs, "functionattrs",
+ "Deduce function attributes", false, false);
Pass *llvm::createFunctionAttrsPass() { return new FunctionAttrs(); }
@@ -162,14 +162,14 @@ bool FunctionAttrs::AddReadAttrs(const CallGraphSCC &SCC) {
// Some instructions can be ignored even if they read or write memory.
// Detect these now, skipping to the next instruction if one is found.
- CallSite CS = CallSite::get(I);
- if (CS.getInstruction() && CS.getCalledFunction()) {
+ CallSite CS(cast<Value>(I));
+ if (CS && CS.getCalledFunction()) {
// Ignore calls to functions in the same SCC.
if (SCCNodes.count(CS.getCalledFunction()))
continue;
// Ignore intrinsics that only access local memory.
if (unsigned id = CS.getCalledFunction()->getIntrinsicID())
- if (AliasAnalysis::getModRefBehavior(id) ==
+ if (AliasAnalysis::getIntrinsicModRefBehavior(id) ==
AliasAnalysis::AccessesArguments) {
// Check that all pointer arguments point to local memory.
for (CallSite::arg_iterator CI = CS.arg_begin(), CE = CS.arg_end();
diff --git a/lib/Transforms/IPO/GlobalDCE.cpp b/lib/Transforms/IPO/GlobalDCE.cpp
index 44216a6df99c4..aa18601b9aeca 100644
--- a/lib/Transforms/IPO/GlobalDCE.cpp
+++ b/lib/Transforms/IPO/GlobalDCE.cpp
@@ -31,7 +31,7 @@ STATISTIC(NumVariables, "Number of global variables removed");
namespace {
struct GlobalDCE : public ModulePass {
static char ID; // Pass identification, replacement for typeid
- GlobalDCE() : ModulePass(&ID) {}
+ GlobalDCE() : ModulePass(ID) {}
// run - Do the GlobalDCE pass on the specified module, optionally updating
// the specified callgraph to reflect the changes.
@@ -51,7 +51,8 @@ namespace {
}
char GlobalDCE::ID = 0;
-static RegisterPass<GlobalDCE> X("globaldce", "Dead Global Elimination");
+INITIALIZE_PASS(GlobalDCE, "globaldce",
+ "Dead Global Elimination", false, false);
ModulePass *llvm::createGlobalDCEPass() { return new GlobalDCE(); }
diff --git a/lib/Transforms/IPO/GlobalOpt.cpp b/lib/Transforms/IPO/GlobalOpt.cpp
index 735a1c47c39b5..a77af549caa13 100644
--- a/lib/Transforms/IPO/GlobalOpt.cpp
+++ b/lib/Transforms/IPO/GlobalOpt.cpp
@@ -59,7 +59,7 @@ namespace {
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
}
static char ID; // Pass identification, replacement for typeid
- GlobalOpt() : ModulePass(&ID) {}
+ GlobalOpt() : ModulePass(ID) {}
bool runOnModule(Module &M);
@@ -74,7 +74,8 @@ namespace {
}
char GlobalOpt::ID = 0;
-static RegisterPass<GlobalOpt> X("globalopt", "Global Variable Optimizer");
+INITIALIZE_PASS(GlobalOpt, "globalopt",
+ "Global Variable Optimizer", false, false);
ModulePass *llvm::createGlobalOptimizerPass() { return new GlobalOpt(); }
@@ -1467,7 +1468,7 @@ static bool TryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV,
TargetData *TD) {
if (!TD)
return false;
-
+
// If this is a malloc of an abstract type, don't touch it.
if (!AllocTy->isSized())
return false;
@@ -2077,7 +2078,7 @@ static bool isSimpleEnoughPointerToCommit(Constant *C) {
return false;
// The first index must be zero.
- ConstantInt *CI = dyn_cast<ConstantInt>(*next(CE->op_begin()));
+ ConstantInt *CI = dyn_cast<ConstantInt>(*llvm::next(CE->op_begin()));
if (!CI || !CI->isZero()) return false;
// The remaining indices must be compile-time known integers within the
@@ -2302,7 +2303,8 @@ static bool EvaluateFunction(Function *F, Constant *&RetVal,
if (isa<InlineAsm>(CI->getCalledValue())) return false;
// Resolve function pointers.
- Function *Callee = dyn_cast<Function>(getVal(Values, CI->getCalledValue()));
+ Function *Callee = dyn_cast<Function>(getVal(Values,
+ CI->getCalledValue()));
if (!Callee) return false; // Cannot resolve.
SmallVector<Constant*, 8> Formals;
diff --git a/lib/Transforms/IPO/IPConstantPropagation.cpp b/lib/Transforms/IPO/IPConstantPropagation.cpp
index e4db235b1d108..1b3cf7810cc68 100644
--- a/lib/Transforms/IPO/IPConstantPropagation.cpp
+++ b/lib/Transforms/IPO/IPConstantPropagation.cpp
@@ -35,7 +35,7 @@ namespace {
///
struct IPCP : public ModulePass {
static char ID; // Pass identification, replacement for typeid
- IPCP() : ModulePass(&ID) {}
+ IPCP() : ModulePass(ID) {}
bool runOnModule(Module &M);
private:
@@ -45,8 +45,8 @@ namespace {
}
char IPCP::ID = 0;
-static RegisterPass<IPCP>
-X("ipconstprop", "Interprocedural constant propagation");
+INITIALIZE_PASS(IPCP, "ipconstprop",
+ "Interprocedural constant propagation", false, false);
ModulePass *llvm::createIPConstantPropagationPass() { return new IPCP(); }
@@ -94,7 +94,7 @@ bool IPCP::PropagateConstantsIntoArguments(Function &F) {
if (!isa<CallInst>(U) && !isa<InvokeInst>(U))
return false;
- CallSite CS = CallSite::get(cast<Instruction>(U));
+ CallSite CS(cast<Instruction>(U));
if (!CS.isCallee(UI))
return false;
@@ -219,7 +219,7 @@ bool IPCP::PropagateConstantReturn(Function &F) {
// constant.
bool MadeChange = false;
for (Value::use_iterator UI = F.use_begin(), E = F.use_end(); UI != E; ++UI) {
- CallSite CS = CallSite::get(*UI);
+ CallSite CS(*UI);
Instruction* Call = CS.getInstruction();
// Not a call instruction or a call instruction that's not calling F
diff --git a/lib/Transforms/IPO/InlineAlways.cpp b/lib/Transforms/IPO/InlineAlways.cpp
index 8e312e7d91855..ecc60ad069325 100644
--- a/lib/Transforms/IPO/InlineAlways.cpp
+++ b/lib/Transforms/IPO/InlineAlways.cpp
@@ -36,7 +36,7 @@ namespace {
InlineCostAnalyzer CA;
public:
// Use extremely low threshold.
- AlwaysInliner() : Inliner(&ID, -2000000000) {}
+ AlwaysInliner() : Inliner(ID, -2000000000) {}
static char ID; // Pass identification, replacement for typeid
InlineCost getInlineCost(CallSite CS) {
return CA.getInlineCost(CS, NeverInline);
@@ -61,8 +61,8 @@ namespace {
}
char AlwaysInliner::ID = 0;
-static RegisterPass<AlwaysInliner>
-X("always-inline", "Inliner for always_inline functions");
+INITIALIZE_PASS(AlwaysInliner, "always-inline",
+ "Inliner for always_inline functions", false, false);
Pass *llvm::createAlwaysInlinerPass() { return new AlwaysInliner(); }
diff --git a/lib/Transforms/IPO/InlineSimple.cpp b/lib/Transforms/IPO/InlineSimple.cpp
index 74b4a1c10ece2..9c6637dfe5ad6 100644
--- a/lib/Transforms/IPO/InlineSimple.cpp
+++ b/lib/Transforms/IPO/InlineSimple.cpp
@@ -33,8 +33,8 @@ namespace {
SmallPtrSet<const Function*, 16> NeverInline;
InlineCostAnalyzer CA;
public:
- SimpleInliner() : Inliner(&ID) {}
- SimpleInliner(int Threshold) : Inliner(&ID, Threshold) {}
+ SimpleInliner() : Inliner(ID) {}
+ SimpleInliner(int Threshold) : Inliner(ID, Threshold) {}
static char ID; // Pass identification, replacement for typeid
InlineCost getInlineCost(CallSite CS) {
return CA.getInlineCost(CS, NeverInline);
@@ -56,8 +56,8 @@ namespace {
}
char SimpleInliner::ID = 0;
-static RegisterPass<SimpleInliner>
-X("inline", "Function Integration/Inlining");
+INITIALIZE_PASS(SimpleInliner, "inline",
+ "Function Integration/Inlining", false, false);
Pass *llvm::createFunctionInliningPass() { return new SimpleInliner(); }
diff --git a/lib/Transforms/IPO/Inliner.cpp b/lib/Transforms/IPO/Inliner.cpp
index 9bb01f5699fe3..4983e8e13a3ee 100644
--- a/lib/Transforms/IPO/Inliner.cpp
+++ b/lib/Transforms/IPO/Inliner.cpp
@@ -48,10 +48,10 @@ HintThreshold("inlinehint-threshold", cl::Hidden, cl::init(325),
// Threshold to use when optsize is specified (and there is no -inline-limit).
const int OptSizeThreshold = 75;
-Inliner::Inliner(void *ID)
+Inliner::Inliner(char &ID)
: CallGraphSCCPass(ID), InlineThreshold(InlineLimit) {}
-Inliner::Inliner(void *ID, int Threshold)
+Inliner::Inliner(char &ID, int Threshold)
: CallGraphSCCPass(ID), InlineThreshold(Threshold) {}
/// getAnalysisUsage - For this class, we declare that we require and preserve
@@ -238,11 +238,11 @@ bool Inliner::shouldInline(CallSite CS) {
bool someOuterCallWouldNotBeInlined = false;
for (Value::use_iterator I = Caller->use_begin(), E =Caller->use_end();
I != E; ++I) {
- CallSite CS2 = CallSite::get(*I);
+ CallSite CS2(*I);
// If this isn't a call to Caller (it could be some other sort
// of reference) skip it.
- if (CS2.getInstruction() == 0 || CS2.getCalledFunction() != Caller)
+ if (!CS2 || CS2.getCalledFunction() != Caller)
continue;
InlineCost IC2 = getInlineCost(CS2);
@@ -334,10 +334,10 @@ bool Inliner::runOnSCC(CallGraphSCC &SCC) {
for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB)
for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
- CallSite CS = CallSite::get(I);
+ CallSite CS(cast<Value>(I));
// If this isn't a call, or it is a call to an intrinsic, it can
// never be inlined.
- if (CS.getInstruction() == 0 || isa<IntrinsicInst>(I))
+ if (!CS || isa<IntrinsicInst>(I))
continue;
// If this is a direct call to an external function, we can never inline
diff --git a/lib/Transforms/IPO/Internalize.cpp b/lib/Transforms/IPO/Internalize.cpp
index 47abb7dfd8121..a1d919fd8a042 100644
--- a/lib/Transforms/IPO/Internalize.cpp
+++ b/lib/Transforms/IPO/Internalize.cpp
@@ -63,11 +63,11 @@ namespace {
} // end anonymous namespace
char InternalizePass::ID = 0;
-static RegisterPass<InternalizePass>
-X("internalize", "Internalize Global Symbols");
+INITIALIZE_PASS(InternalizePass, "internalize",
+ "Internalize Global Symbols", false, false);
InternalizePass::InternalizePass(bool AllButMain)
- : ModulePass(&ID), AllButMain(AllButMain){
+ : ModulePass(ID), AllButMain(AllButMain){
if (!APIFile.empty()) // If a filename is specified, use it.
LoadFile(APIFile.c_str());
if (!APIList.empty()) // If a list is specified, use it as well.
@@ -75,7 +75,7 @@ InternalizePass::InternalizePass(bool AllButMain)
}
InternalizePass::InternalizePass(const std::vector<const char *>&exportList)
- : ModulePass(&ID), AllButMain(false){
+ : ModulePass(ID), AllButMain(false){
for(std::vector<const char *>::const_iterator itr = exportList.begin();
itr != exportList.end(); itr++) {
ExternalNames.insert(*itr);
diff --git a/lib/Transforms/IPO/LoopExtractor.cpp b/lib/Transforms/IPO/LoopExtractor.cpp
index cb813303facb9..f88dff67d7c98 100644
--- a/lib/Transforms/IPO/LoopExtractor.cpp
+++ b/lib/Transforms/IPO/LoopExtractor.cpp
@@ -37,7 +37,7 @@ namespace {
unsigned NumLoops;
explicit LoopExtractor(unsigned numLoops = ~0)
- : LoopPass(&ID), NumLoops(numLoops) {}
+ : LoopPass(ID), NumLoops(numLoops) {}
virtual bool runOnLoop(Loop *L, LPPassManager &LPM);
@@ -50,8 +50,8 @@ namespace {
}
char LoopExtractor::ID = 0;
-static RegisterPass<LoopExtractor>
-X("loop-extract", "Extract loops into new functions");
+INITIALIZE_PASS(LoopExtractor, "loop-extract",
+ "Extract loops into new functions", false, false);
namespace {
/// SingleLoopExtractor - For bugpoint.
@@ -62,8 +62,8 @@ namespace {
} // End anonymous namespace
char SingleLoopExtractor::ID = 0;
-static RegisterPass<SingleLoopExtractor>
-Y("loop-extract-single", "Extract at most one loop into a new function");
+INITIALIZE_PASS(SingleLoopExtractor, "loop-extract-single",
+ "Extract at most one loop into a new function", false, false);
// createLoopExtractorPass - This pass extracts all natural loops from the
// program into a function if it can.
@@ -147,27 +147,26 @@ namespace {
std::vector<std::pair<std::string, std::string> > BlocksToNotExtractByName;
public:
static char ID; // Pass identification, replacement for typeid
- explicit BlockExtractorPass(const std::vector<BasicBlock*> &B)
- : ModulePass(&ID), BlocksToNotExtract(B) {
+ BlockExtractorPass() : ModulePass(ID) {
if (!BlockFile.empty())
LoadFile(BlockFile.c_str());
}
- BlockExtractorPass() : ModulePass(&ID) {}
bool runOnModule(Module &M);
};
}
char BlockExtractorPass::ID = 0;
-static RegisterPass<BlockExtractorPass>
-XX("extract-blocks", "Extract Basic Blocks From Module (for bugpoint use)");
+INITIALIZE_PASS(BlockExtractorPass, "extract-blocks",
+ "Extract Basic Blocks From Module (for bugpoint use)",
+ false, false);
// createBlockExtractorPass - This pass extracts all blocks (except those
// specified in the argument list) from the functions in the module.
//
-ModulePass *llvm::createBlockExtractorPass(const std::vector<BasicBlock*> &BTNE)
+ModulePass *llvm::createBlockExtractorPass()
{
- return new BlockExtractorPass(BTNE);
+ return new BlockExtractorPass();
}
void BlockExtractorPass::LoadFile(const char *Filename) {
diff --git a/lib/Transforms/IPO/LowerSetJmp.cpp b/lib/Transforms/IPO/LowerSetJmp.cpp
index 76cfef8335c98..6c715de04b763 100644
--- a/lib/Transforms/IPO/LowerSetJmp.cpp
+++ b/lib/Transforms/IPO/LowerSetJmp.cpp
@@ -109,7 +109,7 @@ namespace {
bool IsTransformableFunction(StringRef Name);
public:
static char ID; // Pass identification, replacement for typeid
- LowerSetJmp() : ModulePass(&ID) {}
+ LowerSetJmp() : ModulePass(ID) {}
void visitCallInst(CallInst& CI);
void visitInvokeInst(InvokeInst& II);
@@ -122,7 +122,7 @@ namespace {
} // end anonymous namespace
char LowerSetJmp::ID = 0;
-static RegisterPass<LowerSetJmp> X("lowersetjmp", "Lower Set Jump");
+INITIALIZE_PASS(LowerSetJmp, "lowersetjmp", "Lower Set Jump", false, false);
// run - Run the transformation on the program. We grab the function
// prototypes for longjmp and setjmp. If they are used in the program,
diff --git a/lib/Transforms/IPO/MergeFunctions.cpp b/lib/Transforms/IPO/MergeFunctions.cpp
index aeeafe7fd19dc..5d838f98aa082 100644
--- a/lib/Transforms/IPO/MergeFunctions.cpp
+++ b/lib/Transforms/IPO/MergeFunctions.cpp
@@ -29,44 +29,27 @@
//
// Many functions have their address taken by the virtual function table for
// the object they belong to. However, as long as it's only used for a lookup
-// and call, this is irrelevant, and we'd like to fold such implementations.
+// and call, this is irrelevant, and we'd like to fold such functions.
//
-// * use SCC to cut down on pair-wise comparisons and solve larger cycles.
+// * switch from n^2 pair-wise comparisons to an n-way comparison for each
+// bucket.
//
-// The current implementation loops over a pair-wise comparison of all
-// functions in the program where the two functions in the pair are treated as
-// assumed to be equal until proven otherwise. We could both use fewer
-// comparisons and optimize more complex cases if we used strongly connected
-// components of the call graph.
-//
-// * be smarter about bitcast.
+// * be smarter about bitcasts.
//
// In order to fold functions, we will sometimes add either bitcast instructions
// or bitcast constant expressions. Unfortunately, this can confound further
// analysis since the two functions differ where one has a bitcast and the
-// other doesn't. We should learn to peer through bitcasts without imposing bad
-// performance properties.
-//
-// * don't emit aliases for Mach-O.
-//
-// Mach-O doesn't support aliases which means that we must avoid introducing
-// them in the bitcode on architectures which don't support them, such as
-// Mac OSX. There's a few approaches to this problem;
-// a) teach codegen to lower global aliases to thunks on platforms which don't
-// support them.
-// b) always emit thunks, and create a separate thunk-to-alias pass which
-// runs on ELF systems. This has the added benefit of transforming other
-// thunks such as those produced by a C++ frontend into aliases when legal
-// to do so.
+// other doesn't. We should learn to look through bitcasts.
//
//===----------------------------------------------------------------------===//
#define DEBUG_TYPE "mergefunc"
#include "llvm/Transforms/IPO.h"
-#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/FoldingSet.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/STLExtras.h"
#include "llvm/Constants.h"
#include "llvm/InlineAsm.h"
#include "llvm/Instructions.h"
@@ -76,68 +59,103 @@
#include "llvm/Support/CallSite.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/IRBuilder.h"
+#include "llvm/Support/ValueHandle.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetData.h"
-#include <map>
#include <vector>
using namespace llvm;
STATISTIC(NumFunctionsMerged, "Number of functions merged");
namespace {
+ /// MergeFunctions finds functions which will generate identical machine code,
+ /// by considering all pointer types to be equivalent. Once identified,
+ /// MergeFunctions will fold them by replacing a call to one to a call to a
+ /// bitcast of the other.
+ ///
class MergeFunctions : public ModulePass {
public:
- static char ID; // Pass identification, replacement for typeid
- MergeFunctions() : ModulePass(&ID) {}
+ static char ID;
+ MergeFunctions() : ModulePass(ID) {}
bool runOnModule(Module &M);
private:
- bool isEquivalentGEP(const GetElementPtrInst *GEP1,
- const GetElementPtrInst *GEP2);
-
- bool equals(const BasicBlock *BB1, const BasicBlock *BB2);
- bool equals(const Function *F, const Function *G);
+ /// MergeTwoFunctions - Merge two equivalent functions. Upon completion, G
+ /// may be deleted, or may be converted into a thunk. In either case, it
+ /// should never be visited again.
+ void MergeTwoFunctions(Function *F, Function *G) const;
- bool compare(const Value *V1, const Value *V2);
+ /// WriteThunk - Replace G with a simple tail call to bitcast(F). Also
+ /// replace direct uses of G with bitcast(F).
+ void WriteThunk(Function *F, Function *G) const;
- const Function *LHS, *RHS;
- typedef DenseMap<const Value *, unsigned long> IDMap;
- IDMap Map;
- DenseMap<const Function *, IDMap> Domains;
- DenseMap<const Function *, unsigned long> DomainCount;
TargetData *TD;
};
}
char MergeFunctions::ID = 0;
-static RegisterPass<MergeFunctions> X("mergefunc", "Merge Functions");
+INITIALIZE_PASS(MergeFunctions, "mergefunc", "Merge Functions", false, false);
ModulePass *llvm::createMergeFunctionsPass() {
return new MergeFunctions();
}
-// ===----------------------------------------------------------------------===
-// Comparison of functions
-// ===----------------------------------------------------------------------===
+namespace {
+/// FunctionComparator - Compares two functions to determine whether or not
+/// they will generate machine code with the same behaviour. TargetData is
+/// used if available. The comparator always fails conservatively (erring on the
+/// side of claiming that two functions are different).
+class FunctionComparator {
+public:
+ FunctionComparator(const TargetData *TD, const Function *F1,
+ const Function *F2)
+ : F1(F1), F2(F2), TD(TD), IDMap1Count(0), IDMap2Count(0) {}
+
+ /// Compare - test whether the two functions have equivalent behaviour.
+ bool Compare();
+
+private:
+ /// Compare - test whether two basic blocks have equivalent behaviour.
+ bool Compare(const BasicBlock *BB1, const BasicBlock *BB2);
+
+ /// Enumerate - Assign or look up previously assigned numbers for the two
+ /// values, and return whether the numbers are equal. Numbers are assigned in
+ /// the order visited.
+ bool Enumerate(const Value *V1, const Value *V2);
+
+ /// isEquivalentOperation - Compare two Instructions for equivalence, similar
+ /// to Instruction::isSameOperationAs but with modifications to the type
+ /// comparison.
+ bool isEquivalentOperation(const Instruction *I1,
+ const Instruction *I2) const;
+
+ /// isEquivalentGEP - Compare two GEPs for equivalent pointer arithmetic.
+ bool isEquivalentGEP(const GEPOperator *GEP1, const GEPOperator *GEP2);
+ bool isEquivalentGEP(const GetElementPtrInst *GEP1,
+ const GetElementPtrInst *GEP2) {
+ return isEquivalentGEP(cast<GEPOperator>(GEP1), cast<GEPOperator>(GEP2));
+ }
-static unsigned long hash(const Function *F) {
- const FunctionType *FTy = F->getFunctionType();
+ /// isEquivalentType - Compare two Types, treating all pointer types as equal.
+ bool isEquivalentType(const Type *Ty1, const Type *Ty2) const;
- FoldingSetNodeID ID;
- ID.AddInteger(F->size());
- ID.AddInteger(F->getCallingConv());
- ID.AddBoolean(F->hasGC());
- ID.AddBoolean(FTy->isVarArg());
- ID.AddInteger(FTy->getReturnType()->getTypeID());
- for (unsigned i = 0, e = FTy->getNumParams(); i != e; ++i)
- ID.AddInteger(FTy->getParamType(i)->getTypeID());
- return ID.ComputeHash();
+ // The two functions undergoing comparison.
+ const Function *F1, *F2;
+
+ const TargetData *TD;
+
+ typedef DenseMap<const Value *, unsigned long> IDMap;
+ IDMap Map1, Map2;
+ unsigned long IDMap1Count, IDMap2Count;
+};
}
-/// isEquivalentType - any two pointers are equivalent. Otherwise, standard
-/// type equivalence rules apply.
-static bool isEquivalentType(const Type *Ty1, const Type *Ty2) {
+/// isEquivalentType - any two pointers in the same address space are
+/// equivalent. Otherwise, standard type equivalence rules apply.
+bool FunctionComparator::isEquivalentType(const Type *Ty1,
+ const Type *Ty2) const {
if (Ty1 == Ty2)
return true;
if (Ty1->getTypeID() != Ty2->getTypeID())
@@ -184,21 +202,6 @@ static bool isEquivalentType(const Type *Ty1, const Type *Ty2) {
return true;
}
- case Type::UnionTyID: {
- const UnionType *UTy1 = cast<UnionType>(Ty1);
- const UnionType *UTy2 = cast<UnionType>(Ty2);
-
- // TODO: we could be fancy with union(A, union(A, B)) === union(A, B), etc.
- if (UTy1->getNumElements() != UTy2->getNumElements())
- return false;
-
- for (unsigned i = 0, e = UTy1->getNumElements(); i != e; ++i) {
- if (!isEquivalentType(UTy1->getElementType(i), UTy2->getElementType(i)))
- return false;
- }
- return true;
- }
-
case Type::FunctionTyID: {
const FunctionType *FTy1 = cast<FunctionType>(Ty1);
const FunctionType *FTy2 = cast<FunctionType>(Ty2);
@@ -216,11 +219,18 @@ static bool isEquivalentType(const Type *Ty1, const Type *Ty2) {
return true;
}
- case Type::ArrayTyID:
+ case Type::ArrayTyID: {
+ const ArrayType *ATy1 = cast<ArrayType>(Ty1);
+ const ArrayType *ATy2 = cast<ArrayType>(Ty2);
+ return ATy1->getNumElements() == ATy2->getNumElements() &&
+ isEquivalentType(ATy1->getElementType(), ATy2->getElementType());
+ }
+
case Type::VectorTyID: {
- const SequentialType *STy1 = cast<SequentialType>(Ty1);
- const SequentialType *STy2 = cast<SequentialType>(Ty2);
- return isEquivalentType(STy1->getElementType(), STy2->getElementType());
+ const VectorType *VTy1 = cast<VectorType>(Ty1);
+ const VectorType *VTy2 = cast<VectorType>(Ty2);
+ return VTy1->getNumElements() == VTy2->getNumElements() &&
+ isEquivalentType(VTy1->getElementType(), VTy2->getElementType());
}
}
}
@@ -228,8 +238,8 @@ static bool isEquivalentType(const Type *Ty1, const Type *Ty2) {
/// isEquivalentOperation - determine whether the two operations are the same
/// except that pointer-to-A and pointer-to-B are equivalent. This should be
/// kept in sync with Instruction::isSameOperationAs.
-static bool
-isEquivalentOperation(const Instruction *I1, const Instruction *I2) {
+bool FunctionComparator::isEquivalentOperation(const Instruction *I1,
+ const Instruction *I2) const {
if (I1->getOpcode() != I2->getOpcode() ||
I1->getNumOperands() != I2->getNumOperands() ||
!isEquivalentType(I1->getType(), I2->getType()) ||
@@ -281,18 +291,15 @@ isEquivalentOperation(const Instruction *I1, const Instruction *I2) {
return true;
}
-bool MergeFunctions::isEquivalentGEP(const GetElementPtrInst *GEP1,
- const GetElementPtrInst *GEP2) {
+/// isEquivalentGEP - determine whether two GEP operations perform the same
+/// underlying arithmetic.
+bool FunctionComparator::isEquivalentGEP(const GEPOperator *GEP1,
+ const GEPOperator *GEP2) {
+ // When we have target data, we can reduce the GEP down to the value in bytes
+ // added to the address.
if (TD && GEP1->hasAllConstantIndices() && GEP2->hasAllConstantIndices()) {
- SmallVector<Value *, 8> Indices1, Indices2;
- for (GetElementPtrInst::const_op_iterator I = GEP1->idx_begin(),
- E = GEP1->idx_end(); I != E; ++I) {
- Indices1.push_back(*I);
- }
- for (GetElementPtrInst::const_op_iterator I = GEP2->idx_begin(),
- E = GEP2->idx_end(); I != E; ++I) {
- Indices2.push_back(*I);
- }
+ SmallVector<Value *, 8> Indices1(GEP1->idx_begin(), GEP1->idx_end());
+ SmallVector<Value *, 8> Indices2(GEP2->idx_begin(), GEP2->idx_end());
uint64_t Offset1 = TD->getIndexedOffset(GEP1->getPointerOperandType(),
Indices1.data(), Indices1.size());
uint64_t Offset2 = TD->getIndexedOffset(GEP2->getPointerOperandType(),
@@ -300,7 +307,6 @@ bool MergeFunctions::isEquivalentGEP(const GetElementPtrInst *GEP1,
return Offset1 == Offset2;
}
- // Equivalent types aren't enough.
if (GEP1->getPointerOperand()->getType() !=
GEP2->getPointerOperand()->getType())
return false;
@@ -309,19 +315,26 @@ bool MergeFunctions::isEquivalentGEP(const GetElementPtrInst *GEP1,
return false;
for (unsigned i = 0, e = GEP1->getNumOperands(); i != e; ++i) {
- if (!compare(GEP1->getOperand(i), GEP2->getOperand(i)))
+ if (!Enumerate(GEP1->getOperand(i), GEP2->getOperand(i)))
return false;
}
return true;
}
-bool MergeFunctions::compare(const Value *V1, const Value *V2) {
- if (V1 == LHS || V1 == RHS)
- if (V2 == LHS || V2 == RHS)
- return true;
+/// Enumerate - Compare two values used by the two functions under pair-wise
+/// comparison. If this is the first time the values are seen, they're added to
+/// the mapping so that we will detect mismatches on next use.
+bool FunctionComparator::Enumerate(const Value *V1, const Value *V2) {
+ // Check for function @f1 referring to itself and function @f2 referring to
+ // itself, or referring to each other, or both referring to either of them.
+ // They're all equivalent if the two functions are otherwise equivalent.
+ if (V1 == F1 && V2 == F2)
+ return true;
+ if (V1 == F2 && V2 == F1)
+ return true;
- // TODO: constant expressions in terms of LHS and RHS
+ // TODO: constant expressions with GEP or references to F1 or F2.
if (isa<Constant>(V1))
return V1 == V2;
@@ -332,228 +345,138 @@ bool MergeFunctions::compare(const Value *V1, const Value *V2) {
IA1->getConstraintString() == IA2->getConstraintString();
}
- // We enumerate constants globally and arguments, basic blocks or
- // instructions within the function they belong to.
- const Function *Domain1 = NULL;
- if (const Argument *A = dyn_cast<Argument>(V1)) {
- Domain1 = A->getParent();
- } else if (const BasicBlock *BB = dyn_cast<BasicBlock>(V1)) {
- Domain1 = BB->getParent();
- } else if (const Instruction *I = dyn_cast<Instruction>(V1)) {
- Domain1 = I->getParent()->getParent();
- }
-
- const Function *Domain2 = NULL;
- if (const Argument *A = dyn_cast<Argument>(V2)) {
- Domain2 = A->getParent();
- } else if (const BasicBlock *BB = dyn_cast<BasicBlock>(V2)) {
- Domain2 = BB->getParent();
- } else if (const Instruction *I = dyn_cast<Instruction>(V2)) {
- Domain2 = I->getParent()->getParent();
- }
-
- if (Domain1 != Domain2)
- if (Domain1 != LHS && Domain1 != RHS)
- if (Domain2 != LHS && Domain2 != RHS)
- return false;
-
- IDMap &Map1 = Domains[Domain1];
unsigned long &ID1 = Map1[V1];
if (!ID1)
- ID1 = ++DomainCount[Domain1];
+ ID1 = ++IDMap1Count;
- IDMap &Map2 = Domains[Domain2];
unsigned long &ID2 = Map2[V2];
if (!ID2)
- ID2 = ++DomainCount[Domain2];
+ ID2 = ++IDMap2Count;
return ID1 == ID2;
}
-bool MergeFunctions::equals(const BasicBlock *BB1, const BasicBlock *BB2) {
- BasicBlock::const_iterator FI = BB1->begin(), FE = BB1->end();
- BasicBlock::const_iterator GI = BB2->begin(), GE = BB2->end();
+/// Compare - test whether two basic blocks have equivalent behaviour.
+bool FunctionComparator::Compare(const BasicBlock *BB1, const BasicBlock *BB2) {
+ BasicBlock::const_iterator F1I = BB1->begin(), F1E = BB1->end();
+ BasicBlock::const_iterator F2I = BB2->begin(), F2E = BB2->end();
do {
- if (!compare(FI, GI))
+ if (!Enumerate(F1I, F2I))
return false;
- if (isa<GetElementPtrInst>(FI) && isa<GetElementPtrInst>(GI)) {
- const GetElementPtrInst *GEP1 = cast<GetElementPtrInst>(FI);
- const GetElementPtrInst *GEP2 = cast<GetElementPtrInst>(GI);
+ if (const GetElementPtrInst *GEP1 = dyn_cast<GetElementPtrInst>(F1I)) {
+ const GetElementPtrInst *GEP2 = dyn_cast<GetElementPtrInst>(F2I);
+ if (!GEP2)
+ return false;
- if (!compare(GEP1->getPointerOperand(), GEP2->getPointerOperand()))
+ if (!Enumerate(GEP1->getPointerOperand(), GEP2->getPointerOperand()))
return false;
if (!isEquivalentGEP(GEP1, GEP2))
return false;
} else {
- if (!isEquivalentOperation(FI, GI))
+ if (!isEquivalentOperation(F1I, F2I))
return false;
- for (unsigned i = 0, e = FI->getNumOperands(); i != e; ++i) {
- Value *OpF = FI->getOperand(i);
- Value *OpG = GI->getOperand(i);
+ assert(F1I->getNumOperands() == F2I->getNumOperands());
+ for (unsigned i = 0, e = F1I->getNumOperands(); i != e; ++i) {
+ Value *OpF1 = F1I->getOperand(i);
+ Value *OpF2 = F2I->getOperand(i);
- if (!compare(OpF, OpG))
+ if (!Enumerate(OpF1, OpF2))
return false;
- if (OpF->getValueID() != OpG->getValueID() ||
- !isEquivalentType(OpF->getType(), OpG->getType()))
+ if (OpF1->getValueID() != OpF2->getValueID() ||
+ !isEquivalentType(OpF1->getType(), OpF2->getType()))
return false;
}
}
- ++FI, ++GI;
- } while (FI != FE && GI != GE);
+ ++F1I, ++F2I;
+ } while (F1I != F1E && F2I != F2E);
- return FI == FE && GI == GE;
+ return F1I == F1E && F2I == F2E;
}
-bool MergeFunctions::equals(const Function *F, const Function *G) {
+/// Compare - test whether the two functions have equivalent behaviour.
+bool FunctionComparator::Compare() {
// We need to recheck everything, but check the things that weren't included
// in the hash first.
- if (F->getAttributes() != G->getAttributes())
+ if (F1->getAttributes() != F2->getAttributes())
return false;
- if (F->hasGC() != G->hasGC())
+ if (F1->hasGC() != F2->hasGC())
return false;
- if (F->hasGC() && F->getGC() != G->getGC())
+ if (F1->hasGC() && F1->getGC() != F2->getGC())
return false;
- if (F->hasSection() != G->hasSection())
+ if (F1->hasSection() != F2->hasSection())
return false;
- if (F->hasSection() && F->getSection() != G->getSection())
+ if (F1->hasSection() && F1->getSection() != F2->getSection())
return false;
- if (F->isVarArg() != G->isVarArg())
+ if (F1->isVarArg() != F2->isVarArg())
return false;
// TODO: if it's internal and only used in direct calls, we could handle this
// case too.
- if (F->getCallingConv() != G->getCallingConv())
+ if (F1->getCallingConv() != F2->getCallingConv())
return false;
- if (!isEquivalentType(F->getFunctionType(), G->getFunctionType()))
+ if (!isEquivalentType(F1->getFunctionType(), F2->getFunctionType()))
return false;
- assert(F->arg_size() == G->arg_size() &&
+ assert(F1->arg_size() == F2->arg_size() &&
"Identical functions have a different number of args.");
- LHS = F;
- RHS = G;
-
// Visit the arguments so that they get enumerated in the order they're
// passed in.
- for (Function::const_arg_iterator fi = F->arg_begin(), gi = G->arg_begin(),
- fe = F->arg_end(); fi != fe; ++fi, ++gi) {
- if (!compare(fi, gi))
+ for (Function::const_arg_iterator f1i = F1->arg_begin(),
+ f2i = F2->arg_begin(), f1e = F1->arg_end(); f1i != f1e; ++f1i, ++f2i) {
+ if (!Enumerate(f1i, f2i))
llvm_unreachable("Arguments repeat");
}
- SmallVector<const BasicBlock *, 8> FBBs, GBBs;
- SmallSet<const BasicBlock *, 128> VisitedBBs; // in terms of F.
- FBBs.push_back(&F->getEntryBlock());
- GBBs.push_back(&G->getEntryBlock());
- VisitedBBs.insert(FBBs[0]);
- while (!FBBs.empty()) {
- const BasicBlock *FBB = FBBs.pop_back_val();
- const BasicBlock *GBB = GBBs.pop_back_val();
- if (!compare(FBB, GBB) || !equals(FBB, GBB)) {
- Domains.clear();
- DomainCount.clear();
- return false;
- }
- const TerminatorInst *FTI = FBB->getTerminator();
- const TerminatorInst *GTI = GBB->getTerminator();
- assert(FTI->getNumSuccessors() == GTI->getNumSuccessors());
- for (unsigned i = 0, e = FTI->getNumSuccessors(); i != e; ++i) {
- if (!VisitedBBs.insert(FTI->getSuccessor(i)))
- continue;
- FBBs.push_back(FTI->getSuccessor(i));
- GBBs.push_back(GTI->getSuccessor(i));
- }
- }
+ // We do a CFG-ordered walk since the actual ordering of the blocks in the
+ // linked list is immaterial. Our walk starts at the entry block for both
+ // functions, then takes each block from each terminator in order. As an
+ // artifact, this also means that unreachable blocks are ignored.
+ SmallVector<const BasicBlock *, 8> F1BBs, F2BBs;
+ SmallSet<const BasicBlock *, 128> VisitedBBs; // in terms of F1.
- Domains.clear();
- DomainCount.clear();
- return true;
-}
+ F1BBs.push_back(&F1->getEntryBlock());
+ F2BBs.push_back(&F2->getEntryBlock());
-// ===----------------------------------------------------------------------===
-// Folding of functions
-// ===----------------------------------------------------------------------===
-
-// Cases:
-// * F is external strong, G is external strong:
-// turn G into a thunk to F (1)
-// * F is external strong, G is external weak:
-// turn G into a thunk to F (1)
-// * F is external weak, G is external weak:
-// unfoldable
-// * F is external strong, G is internal:
-// address of G taken:
-// turn G into a thunk to F (1)
-// address of G not taken:
-// make G an alias to F (2)
-// * F is internal, G is external weak
-// address of F is taken:
-// turn G into a thunk to F (1)
-// address of F is not taken:
-// make G an alias of F (2)
-// * F is internal, G is internal:
-// address of F and G are taken:
-// turn G into a thunk to F (1)
-// address of G is not taken:
-// make G an alias to F (2)
-//
-// alias requires linkage == (external,local,weak) fallback to creating a thunk
-// external means 'externally visible' linkage != (internal,private)
-// internal means linkage == (internal,private)
-// weak means linkage mayBeOverridable
-// being external implies that the address is taken
-//
-// 1. turn G into a thunk to F
-// 2. make G an alias to F
+ VisitedBBs.insert(F1BBs[0]);
+ while (!F1BBs.empty()) {
+ const BasicBlock *F1BB = F1BBs.pop_back_val();
+ const BasicBlock *F2BB = F2BBs.pop_back_val();
-enum LinkageCategory {
- ExternalStrong,
- ExternalWeak,
- Internal
-};
+ if (!Enumerate(F1BB, F2BB) || !Compare(F1BB, F2BB))
+ return false;
-static LinkageCategory categorize(const Function *F) {
- switch (F->getLinkage()) {
- case GlobalValue::InternalLinkage:
- case GlobalValue::PrivateLinkage:
- case GlobalValue::LinkerPrivateLinkage:
- return Internal;
-
- case GlobalValue::WeakAnyLinkage:
- case GlobalValue::WeakODRLinkage:
- case GlobalValue::ExternalWeakLinkage:
- case GlobalValue::LinkerPrivateWeakLinkage:
- return ExternalWeak;
-
- case GlobalValue::ExternalLinkage:
- case GlobalValue::AvailableExternallyLinkage:
- case GlobalValue::LinkOnceAnyLinkage:
- case GlobalValue::LinkOnceODRLinkage:
- case GlobalValue::AppendingLinkage:
- case GlobalValue::DLLImportLinkage:
- case GlobalValue::DLLExportLinkage:
- case GlobalValue::CommonLinkage:
- return ExternalStrong;
- }
+ const TerminatorInst *F1TI = F1BB->getTerminator();
+ const TerminatorInst *F2TI = F2BB->getTerminator();
- llvm_unreachable("Unknown LinkageType.");
- return ExternalWeak;
+ assert(F1TI->getNumSuccessors() == F2TI->getNumSuccessors());
+ for (unsigned i = 0, e = F1TI->getNumSuccessors(); i != e; ++i) {
+ if (!VisitedBBs.insert(F1TI->getSuccessor(i)))
+ continue;
+
+ F1BBs.push_back(F1TI->getSuccessor(i));
+ F2BBs.push_back(F2TI->getSuccessor(i));
+ }
+ }
+ return true;
}
-static void ThunkGToF(Function *F, Function *G) {
+/// WriteThunk - Replace G with a simple tail call to bitcast(F). Also replace
+/// direct uses of G with bitcast(F).
+void MergeFunctions::WriteThunk(Function *F, Function *G) const {
if (!G->mayBeOverridden()) {
// Redirect direct callers of G to F.
Constant *BitcastF = ConstantExpr::getBitCast(F, G->getType());
@@ -567,33 +490,34 @@ static void ThunkGToF(Function *F, Function *G) {
}
}
+ // If G was internal then we may have replaced all uses if G with F. If so,
+ // stop here and delete G. There's no need for a thunk.
+ if (G->hasLocalLinkage() && G->use_empty()) {
+ G->eraseFromParent();
+ return;
+ }
+
Function *NewG = Function::Create(G->getFunctionType(), G->getLinkage(), "",
G->getParent());
BasicBlock *BB = BasicBlock::Create(F->getContext(), "", NewG);
+ IRBuilder<false> Builder(BB);
SmallVector<Value *, 16> Args;
unsigned i = 0;
const FunctionType *FFTy = F->getFunctionType();
for (Function::arg_iterator AI = NewG->arg_begin(), AE = NewG->arg_end();
AI != AE; ++AI) {
- if (FFTy->getParamType(i) == AI->getType()) {
- Args.push_back(AI);
- } else {
- Args.push_back(new BitCastInst(AI, FFTy->getParamType(i), "", BB));
- }
+ Args.push_back(Builder.CreateBitCast(AI, FFTy->getParamType(i)));
++i;
}
- CallInst *CI = CallInst::Create(F, Args.begin(), Args.end(), "", BB);
+ CallInst *CI = Builder.CreateCall(F, Args.begin(), Args.end());
CI->setTailCall();
CI->setCallingConv(F->getCallingConv());
if (NewG->getReturnType()->isVoidTy()) {
- ReturnInst::Create(F->getContext(), BB);
- } else if (CI->getType() != NewG->getReturnType()) {
- Value *BCI = new BitCastInst(CI, NewG->getReturnType(), "", BB);
- ReturnInst::Create(F->getContext(), BCI, BB);
+ Builder.CreateRetVoid();
} else {
- ReturnInst::Create(F->getContext(), CI, BB);
+ Builder.CreateRet(Builder.CreateBitCast(CI, NewG->getReturnType()));
}
NewG->copyAttributesFrom(G);
@@ -602,152 +526,126 @@ static void ThunkGToF(Function *F, Function *G) {
G->eraseFromParent();
}
-static void AliasGToF(Function *F, Function *G) {
- // Darwin will trigger llvm_unreachable if asked to codegen an alias.
- return ThunkGToF(F, G);
-
-#if 0
- if (!G->hasExternalLinkage() && !G->hasLocalLinkage() && !G->hasWeakLinkage())
- return ThunkGToF(F, G);
-
- GlobalAlias *GA = new GlobalAlias(
- G->getType(), G->getLinkage(), "",
- ConstantExpr::getBitCast(F, G->getType()), G->getParent());
- F->setAlignment(std::max(F->getAlignment(), G->getAlignment()));
- GA->takeName(G);
- GA->setVisibility(G->getVisibility());
- G->replaceAllUsesWith(GA);
- G->eraseFromParent();
-#endif
-}
-
-static bool fold(std::vector<Function *> &FnVec, unsigned i, unsigned j) {
- Function *F = FnVec[i];
- Function *G = FnVec[j];
-
- LinkageCategory catF = categorize(F);
- LinkageCategory catG = categorize(G);
-
- if (catF == ExternalWeak || (catF == Internal && catG == ExternalStrong)) {
- std::swap(FnVec[i], FnVec[j]);
- std::swap(F, G);
- std::swap(catF, catG);
- }
-
- switch (catF) {
- case ExternalStrong:
- switch (catG) {
- case ExternalStrong:
- case ExternalWeak:
- ThunkGToF(F, G);
- break;
- case Internal:
- if (G->hasAddressTaken())
- ThunkGToF(F, G);
- else
- AliasGToF(F, G);
- break;
- }
- break;
-
- case ExternalWeak: {
- assert(catG == ExternalWeak);
+/// MergeTwoFunctions - Merge two equivalent functions. Upon completion,
+/// Function G is deleted.
+void MergeFunctions::MergeTwoFunctions(Function *F, Function *G) const {
+ if (F->isWeakForLinker()) {
+ assert(G->isWeakForLinker());
// Make them both thunks to the same internal function.
- F->setAlignment(std::max(F->getAlignment(), G->getAlignment()));
Function *H = Function::Create(F->getFunctionType(), F->getLinkage(), "",
F->getParent());
H->copyAttributesFrom(F);
H->takeName(F);
F->replaceAllUsesWith(H);
- ThunkGToF(F, G);
- ThunkGToF(F, H);
+ unsigned MaxAlignment = std::max(G->getAlignment(), H->getAlignment());
- F->setLinkage(GlobalValue::InternalLinkage);
- } break;
-
- case Internal:
- switch (catG) {
- case ExternalStrong:
- llvm_unreachable(0);
- // fall-through
- case ExternalWeak:
- if (F->hasAddressTaken())
- ThunkGToF(F, G);
- else
- AliasGToF(F, G);
- break;
- case Internal: {
- bool addrTakenF = F->hasAddressTaken();
- bool addrTakenG = G->hasAddressTaken();
- if (!addrTakenF && addrTakenG) {
- std::swap(FnVec[i], FnVec[j]);
- std::swap(F, G);
- std::swap(addrTakenF, addrTakenG);
- }
+ WriteThunk(F, G);
+ WriteThunk(F, H);
- if (addrTakenF && addrTakenG) {
- ThunkGToF(F, G);
- } else {
- assert(!addrTakenG);
- AliasGToF(F, G);
- }
- } break;
- } break;
+ F->setAlignment(MaxAlignment);
+ F->setLinkage(GlobalValue::InternalLinkage);
+ } else {
+ WriteThunk(F, G);
}
++NumFunctionsMerged;
- return true;
}
-// ===----------------------------------------------------------------------===
-// Pass definition
-// ===----------------------------------------------------------------------===
+static unsigned ProfileFunction(const Function *F) {
+ const FunctionType *FTy = F->getFunctionType();
-bool MergeFunctions::runOnModule(Module &M) {
- bool Changed = false;
+ FoldingSetNodeID ID;
+ ID.AddInteger(F->size());
+ ID.AddInteger(F->getCallingConv());
+ ID.AddBoolean(F->hasGC());
+ ID.AddBoolean(FTy->isVarArg());
+ ID.AddInteger(FTy->getReturnType()->getTypeID());
+ for (unsigned i = 0, e = FTy->getNumParams(); i != e; ++i)
+ ID.AddInteger(FTy->getParamType(i)->getTypeID());
+ return ID.ComputeHash();
+}
- std::map<unsigned long, std::vector<Function *> > FnMap;
+class ComparableFunction {
+public:
+ ComparableFunction(Function *Func, TargetData *TD)
+ : Func(Func), Hash(ProfileFunction(Func)), TD(TD) {}
- for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) {
- if (F->isDeclaration())
- continue;
+ AssertingVH<Function> const Func;
+ const unsigned Hash;
+ TargetData * const TD;
+};
- FnMap[hash(F)].push_back(F);
+struct MergeFunctionsEqualityInfo {
+ static ComparableFunction *getEmptyKey() {
+ return reinterpret_cast<ComparableFunction*>(0);
+ }
+ static ComparableFunction *getTombstoneKey() {
+ return reinterpret_cast<ComparableFunction*>(-1);
}
+ static unsigned getHashValue(const ComparableFunction *CF) {
+ return CF->Hash;
+ }
+ static bool isEqual(const ComparableFunction *LHS,
+ const ComparableFunction *RHS) {
+ if (LHS == RHS)
+ return true;
+ if (LHS == getEmptyKey() || LHS == getTombstoneKey() ||
+ RHS == getEmptyKey() || RHS == getTombstoneKey())
+ return false;
+ assert(LHS->TD == RHS->TD && "Comparing functions for different targets");
+ return FunctionComparator(LHS->TD, LHS->Func, RHS->Func).Compare();
+ }
+};
+bool MergeFunctions::runOnModule(Module &M) {
+ typedef DenseSet<ComparableFunction *, MergeFunctionsEqualityInfo> FnSetType;
+
+ bool Changed = false;
TD = getAnalysisIfAvailable<TargetData>();
+ std::vector<Function *> Funcs;
+ for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) {
+ if (!F->isDeclaration() && !F->hasAvailableExternallyLinkage())
+ Funcs.push_back(F);
+ }
+
bool LocalChanged;
do {
LocalChanged = false;
- DEBUG(dbgs() << "size: " << FnMap.size() << "\n");
- for (std::map<unsigned long, std::vector<Function *> >::iterator
- I = FnMap.begin(), E = FnMap.end(); I != E; ++I) {
- std::vector<Function *> &FnVec = I->second;
- DEBUG(dbgs() << "hash (" << I->first << "): " << FnVec.size() << "\n");
-
- for (int i = 0, e = FnVec.size(); i != e; ++i) {
- for (int j = i + 1; j != e; ++j) {
- bool isEqual = equals(FnVec[i], FnVec[j]);
-
- DEBUG(dbgs() << " " << FnVec[i]->getName()
- << (isEqual ? " == " : " != ")
- << FnVec[j]->getName() << "\n");
-
- if (isEqual) {
- if (fold(FnVec, i, j)) {
- LocalChanged = true;
- FnVec.erase(FnVec.begin() + j);
- --j, --e;
- }
- }
- }
- }
+ FnSetType FnSet;
+ for (unsigned i = 0, e = Funcs.size(); i != e;) {
+ Function *F = Funcs[i];
+ ComparableFunction *NewF = new ComparableFunction(F, TD);
+ std::pair<FnSetType::iterator, bool> Result = FnSet.insert(NewF);
+ if (!Result.second) {
+ ComparableFunction *&OldF = *Result.first;
+ assert(OldF && "Expected a hash collision");
+
+ // NewF will be deleted in favour of OldF unless NewF is strong and
+ // OldF is weak in which case swap them to keep the strong definition.
+
+ if (OldF->Func->isWeakForLinker() && !NewF->Func->isWeakForLinker())
+ std::swap(OldF, NewF);
+
+ DEBUG(dbgs() << " " << OldF->Func->getName() << " == "
+ << NewF->Func->getName() << '\n');
+
+ Funcs.erase(Funcs.begin() + i);
+ --e;
+
+ Function *DeleteF = NewF->Func;
+ delete NewF;
+ MergeTwoFunctions(OldF->Func, DeleteF);
+ LocalChanged = true;
+ Changed = true;
+ } else {
+ ++i;
+ }
}
- Changed |= LocalChanged;
+ DeleteContainerPointers(FnSet);
} while (LocalChanged);
return Changed;
diff --git a/lib/Transforms/IPO/PartialInlining.cpp b/lib/Transforms/IPO/PartialInlining.cpp
index 6b9814ceb8769..432f7c53a67d1 100644
--- a/lib/Transforms/IPO/PartialInlining.cpp
+++ b/lib/Transforms/IPO/PartialInlining.cpp
@@ -30,7 +30,7 @@ namespace {
struct PartialInliner : public ModulePass {
virtual void getAnalysisUsage(AnalysisUsage &AU) const { }
static char ID; // Pass identification, replacement for typeid
- PartialInliner() : ModulePass(&ID) {}
+ PartialInliner() : ModulePass(ID) {}
bool runOnModule(Module& M);
@@ -40,7 +40,8 @@ namespace {
}
char PartialInliner::ID = 0;
-static RegisterPass<PartialInliner> X("partial-inliner", "Partial Inliner");
+INITIALIZE_PASS(PartialInliner, "partial-inliner",
+ "Partial Inliner", false, false);
ModulePass* llvm::createPartialInliningPass() { return new PartialInliner(); }
@@ -67,7 +68,8 @@ Function* PartialInliner::unswitchFunction(Function* F) {
// Clone the function, so that we can hack away on it.
ValueMap<const Value*, Value*> VMap;
- Function* duplicateFunction = CloneFunction(F, VMap);
+ Function* duplicateFunction = CloneFunction(F, VMap,
+ /*ModuleLevelChanges=*/false);
duplicateFunction->setLinkage(GlobalValue::InternalLinkage);
F->getParent()->getFunctionList().push_back(duplicateFunction);
BasicBlock* newEntryBlock = cast<BasicBlock>(VMap[entryBlock]);
@@ -159,7 +161,7 @@ bool PartialInliner::runOnModule(Module& M) {
bool recursive = false;
for (Function::use_iterator UI = currFunc->use_begin(),
UE = currFunc->use_end(); UI != UE; ++UI)
- if (Instruction* I = dyn_cast<Instruction>(UI))
+ if (Instruction* I = dyn_cast<Instruction>(*UI))
if (I->getParent()->getParent() == currFunc) {
recursive = true;
break;
diff --git a/lib/Transforms/IPO/PartialSpecialization.cpp b/lib/Transforms/IPO/PartialSpecialization.cpp
index 58e14481b0edc..4a99a411ab338 100644
--- a/lib/Transforms/IPO/PartialSpecialization.cpp
+++ b/lib/Transforms/IPO/PartialSpecialization.cpp
@@ -50,14 +50,14 @@ namespace {
int scanDistribution(Function&, int, std::map<Constant*, int>&);
public :
static char ID; // Pass identification, replacement for typeid
- PartSpec() : ModulePass(&ID) {}
+ PartSpec() : ModulePass(ID) {}
bool runOnModule(Module &M);
};
}
char PartSpec::ID = 0;
-static RegisterPass<PartSpec>
-X("partialspecialization", "Partial Specialization");
+INITIALIZE_PASS(PartSpec, "partialspecialization",
+ "Partial Specialization", false, false);
// Specialize F by replacing the arguments (keys) in replacements with the
// constants (values). Replace all calls to F with those constants with
@@ -74,7 +74,8 @@ SpecializeFunction(Function* F,
deleted[arg->getArgNo()] = arg;
}
- Function* NF = CloneFunction(F, replacements);
+ Function* NF = CloneFunction(F, replacements,
+ /*ModuleLevelChanges=*/false);
NF->setLinkage(GlobalValue::InternalLinkage);
F->getParent()->getFunctionList().push_back(NF);
@@ -82,10 +83,10 @@ SpecializeFunction(Function* F,
ii != ee; ) {
Value::use_iterator i = ii;
++ii;
- if (isa<CallInst>(i) || isa<InvokeInst>(i)) {
- CallSite CS(cast<Instruction>(i));
+ User *U = *i;
+ CallSite CS(U);
+ if (CS) {
if (CS.getCalledFunction() == F) {
-
SmallVector<Value*, 6> args;
// Assemble the non-specialized arguments for the updated callsite.
// In the process, make sure that the specialized arguments are
@@ -105,13 +106,13 @@ SpecializeFunction(Function* F,
}
}
Value* NCall;
- if (CallInst *CI = dyn_cast<CallInst>(i)) {
+ if (CallInst *CI = dyn_cast<CallInst>(U)) {
NCall = CallInst::Create(NF, args.begin(), args.end(),
CI->getName(), CI);
cast<CallInst>(NCall)->setTailCall(CI->isTailCall());
cast<CallInst>(NCall)->setCallingConv(CI->getCallingConv());
} else {
- InvokeInst *II = cast<InvokeInst>(i);
+ InvokeInst *II = cast<InvokeInst>(U);
NCall = InvokeInst::Create(NF, II->getNormalDest(),
II->getUnwindDest(),
args.begin(), args.end(),
@@ -123,8 +124,7 @@ SpecializeFunction(Function* F,
++numReplaced;
}
}
- next_use:
- ;
+ next_use:;
}
return NF;
}
@@ -174,14 +174,14 @@ void PartSpec::scanForInterest(Function& F, InterestingArgVector& args) {
ui != ue; ++ui) {
bool interesting = false;
-
- if (isa<CmpInst>(ui)) interesting = true;
- else if (isa<CallInst>(ui))
+ User *U = *ui;
+ if (isa<CmpInst>(U)) interesting = true;
+ else if (isa<CallInst>(U))
interesting = ui->getOperand(0) == ii;
- else if (isa<InvokeInst>(ui))
+ else if (isa<InvokeInst>(U))
interesting = ui->getOperand(0) == ii;
- else if (isa<SwitchInst>(ui)) interesting = true;
- else if (isa<BranchInst>(ui)) interesting = true;
+ else if (isa<SwitchInst>(U)) interesting = true;
+ else if (isa<BranchInst>(U)) interesting = true;
if (interesting) {
args.push_back(std::distance(F.arg_begin(), ii));
@@ -196,14 +196,16 @@ int PartSpec::scanDistribution(Function& F, int arg,
std::map<Constant*, int>& dist) {
bool hasIndirect = false;
int total = 0;
- for(Value::use_iterator ii = F.use_begin(), ee = F.use_end();
- ii != ee; ++ii)
- if ((isa<CallInst>(ii) || isa<InvokeInst>(ii))
- && ii->getOperand(0) == &F) {
- ++dist[dyn_cast<Constant>(ii->getOperand(arg + 1))];
+ for (Value::use_iterator ii = F.use_begin(), ee = F.use_end();
+ ii != ee; ++ii) {
+ User *U = *ii;
+ CallSite CS(U);
+ if (CS && CS.getCalledFunction() == &F) {
+ ++dist[dyn_cast<Constant>(CS.getArgument(arg))];
++total;
} else
hasIndirect = true;
+ }
// Preserve the original address taken function even if all other uses
// will be specialized.
diff --git a/lib/Transforms/IPO/PruneEH.cpp b/lib/Transforms/IPO/PruneEH.cpp
index de6099cc1daa0..09ac76f979649 100644
--- a/lib/Transforms/IPO/PruneEH.cpp
+++ b/lib/Transforms/IPO/PruneEH.cpp
@@ -37,7 +37,7 @@ STATISTIC(NumUnreach, "Number of noreturn calls optimized");
namespace {
struct PruneEH : public CallGraphSCCPass {
static char ID; // Pass identification, replacement for typeid
- PruneEH() : CallGraphSCCPass(&ID) {}
+ PruneEH() : CallGraphSCCPass(ID) {}
// runOnSCC - Analyze the SCC, performing the transformation if possible.
bool runOnSCC(CallGraphSCC &SCC);
@@ -48,8 +48,8 @@ namespace {
}
char PruneEH::ID = 0;
-static RegisterPass<PruneEH>
-X("prune-eh", "Remove unused exception handling info");
+INITIALIZE_PASS(PruneEH, "prune-eh",
+ "Remove unused exception handling info", false, false);
Pass *llvm::createPruneEHPass() { return new PruneEH(); }
diff --git a/lib/Transforms/IPO/StripDeadPrototypes.cpp b/lib/Transforms/IPO/StripDeadPrototypes.cpp
index 4566a7634af5a..ee10ad0b8ba26 100644
--- a/lib/Transforms/IPO/StripDeadPrototypes.cpp
+++ b/lib/Transforms/IPO/StripDeadPrototypes.cpp
@@ -29,15 +29,15 @@ namespace {
class StripDeadPrototypesPass : public ModulePass {
public:
static char ID; // Pass identification, replacement for typeid
- StripDeadPrototypesPass() : ModulePass(&ID) { }
+ StripDeadPrototypesPass() : ModulePass(ID) { }
virtual bool runOnModule(Module &M);
};
} // end anonymous namespace
char StripDeadPrototypesPass::ID = 0;
-static RegisterPass<StripDeadPrototypesPass>
-X("strip-dead-prototypes", "Strip Unused Function Prototypes");
+INITIALIZE_PASS(StripDeadPrototypesPass, "strip-dead-prototypes",
+ "Strip Unused Function Prototypes", false, false);
bool StripDeadPrototypesPass::runOnModule(Module &M) {
bool MadeChange = false;
diff --git a/lib/Transforms/IPO/StripSymbols.cpp b/lib/Transforms/IPO/StripSymbols.cpp
index 12e8db8b4a546..20b7b8f2b8509 100644
--- a/lib/Transforms/IPO/StripSymbols.cpp
+++ b/lib/Transforms/IPO/StripSymbols.cpp
@@ -39,7 +39,7 @@ namespace {
public:
static char ID; // Pass identification, replacement for typeid
explicit StripSymbols(bool ODI = false)
- : ModulePass(&ID), OnlyDebugInfo(ODI) {}
+ : ModulePass(ID), OnlyDebugInfo(ODI) {}
virtual bool runOnModule(Module &M);
@@ -52,7 +52,7 @@ namespace {
public:
static char ID; // Pass identification, replacement for typeid
explicit StripNonDebugSymbols()
- : ModulePass(&ID) {}
+ : ModulePass(ID) {}
virtual bool runOnModule(Module &M);
@@ -65,7 +65,7 @@ namespace {
public:
static char ID; // Pass identification, replacement for typeid
explicit StripDebugDeclare()
- : ModulePass(&ID) {}
+ : ModulePass(ID) {}
virtual bool runOnModule(Module &M);
@@ -78,7 +78,7 @@ namespace {
public:
static char ID; // Pass identification, replacement for typeid
explicit StripDeadDebugInfo()
- : ModulePass(&ID) {}
+ : ModulePass(ID) {}
virtual bool runOnModule(Module &M);
@@ -89,32 +89,33 @@ namespace {
}
char StripSymbols::ID = 0;
-static RegisterPass<StripSymbols>
-X("strip", "Strip all symbols from a module");
+INITIALIZE_PASS(StripSymbols, "strip",
+ "Strip all symbols from a module", false, false);
ModulePass *llvm::createStripSymbolsPass(bool OnlyDebugInfo) {
return new StripSymbols(OnlyDebugInfo);
}
char StripNonDebugSymbols::ID = 0;
-static RegisterPass<StripNonDebugSymbols>
-Y("strip-nondebug", "Strip all symbols, except dbg symbols, from a module");
+INITIALIZE_PASS(StripNonDebugSymbols, "strip-nondebug",
+ "Strip all symbols, except dbg symbols, from a module",
+ false, false);
ModulePass *llvm::createStripNonDebugSymbolsPass() {
return new StripNonDebugSymbols();
}
char StripDebugDeclare::ID = 0;
-static RegisterPass<StripDebugDeclare>
-Z("strip-debug-declare", "Strip all llvm.dbg.declare intrinsics");
+INITIALIZE_PASS(StripDebugDeclare, "strip-debug-declare",
+ "Strip all llvm.dbg.declare intrinsics", false, false);
ModulePass *llvm::createStripDebugDeclarePass() {
return new StripDebugDeclare();
}
char StripDeadDebugInfo::ID = 0;
-static RegisterPass<StripDeadDebugInfo>
-A("strip-dead-debug-info", "Strip debug info for unused symbols");
+INITIALIZE_PASS(StripDeadDebugInfo, "strip-dead-debug-info",
+ "Strip debug info for unused symbols", false, false);
ModulePass *llvm::createStripDeadDebugInfoPass() {
return new StripDeadDebugInfo();
@@ -254,14 +255,15 @@ static bool StripDebugInfo(Module &M) {
}
}
- unsigned MDDbgKind = M.getMDKindID("dbg");
for (Module::iterator MI = M.begin(), ME = M.end(); MI != ME; ++MI)
for (Function::iterator FI = MI->begin(), FE = MI->end(); FI != FE;
++FI)
for (BasicBlock::iterator BI = FI->begin(), BE = FI->end(); BI != BE;
++BI) {
- Changed = true; // FIXME: Only set if there was debug metadata.
- BI->setMetadata(MDDbgKind, 0);
+ if (!BI->getDebugLoc().isUnknown()) {
+ Changed = true;
+ BI->setDebugLoc(DebugLoc());
+ }
}
return Changed;
@@ -348,8 +350,8 @@ bool StripDeadDebugInfo::runOnModule(Module &M) {
for (SmallVector<MDNode *, 8>::iterator I = MDs.begin(),
E = MDs.end(); I != E; ++I) {
- if (M.getGlobalVariable(DIGlobalVariable(*I).getGlobal()->getName(),
- true)) {
+ GlobalVariable *GV = DIGlobalVariable(*I).getGlobal();
+ if (GV && M.getGlobalVariable(GV->getName(), true)) {
if (!NMD)
NMD = M.getOrInsertNamedMetadata("llvm.dbg.gv");
NMD->addOperand(*I);
diff --git a/lib/Transforms/IPO/StructRetPromotion.cpp b/lib/Transforms/IPO/StructRetPromotion.cpp
index a74686f408b67..b82b03f7d9e7d 100644
--- a/lib/Transforms/IPO/StructRetPromotion.cpp
+++ b/lib/Transforms/IPO/StructRetPromotion.cpp
@@ -1,4 +1,4 @@
-//===-- StructRetPromotion.cpp - Promote sret arguments ------------------===//
+//===-- StructRetPromotion.cpp - Promote sret arguments -------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -50,20 +50,19 @@ namespace {
virtual bool runOnSCC(CallGraphSCC &SCC);
static char ID; // Pass identification, replacement for typeid
- SRETPromotion() : CallGraphSCCPass(&ID) {}
+ SRETPromotion() : CallGraphSCCPass(ID) {}
private:
CallGraphNode *PromoteReturn(CallGraphNode *CGN);
bool isSafeToUpdateAllCallers(Function *F);
Function *cloneFunctionBody(Function *F, const StructType *STy);
CallGraphNode *updateCallSites(Function *F, Function *NF);
- bool nestedStructType(const StructType *STy);
};
}
char SRETPromotion::ID = 0;
-static RegisterPass<SRETPromotion>
-X("sretpromotion", "Promote sret arguments to multiple ret values");
+INITIALIZE_PASS(SRETPromotion, "sretpromotion",
+ "Promote sret arguments to multiple ret values", false, false);
Pass *llvm::createStructRetPromotionPass() {
return new SRETPromotion();
@@ -156,7 +155,7 @@ bool SRETPromotion::isSafeToUpdateAllCallers(Function *F) {
FnUseI != FnUseE; ++FnUseI) {
// The function is passed in as an argument to (possibly) another function,
// we can't change it!
- CallSite CS = CallSite::get(*FnUseI);
+ CallSite CS(*FnUseI);
Instruction *Call = CS.getInstruction();
// The function is used by something else than a call or invoke instruction,
// we can't change it!
@@ -187,7 +186,7 @@ bool SRETPromotion::isSafeToUpdateAllCallers(Function *F) {
return false;
for (Value::use_iterator GEPI = GEP->use_begin(), GEPE = GEP->use_end();
GEPI != GEPE; ++GEPI)
- if (!isa<LoadInst>(GEPI))
+ if (!isa<LoadInst>(*GEPI))
return false;
}
// Any other FirstArg users make this function unsuitable for sret
@@ -271,7 +270,7 @@ CallGraphNode *SRETPromotion::updateCallSites(Function *F, Function *NF) {
CallGraphNode *NF_CGN = CG.getOrInsertFunction(NF);
while (!F->use_empty()) {
- CallSite CS = CallSite::get(*F->use_begin());
+ CallSite CS(*F->use_begin());
Instruction *Call = CS.getInstruction();
const AttrListPtr &PAL = F->getAttributes();
@@ -351,14 +350,3 @@ CallGraphNode *SRETPromotion::updateCallSites(Function *F, Function *NF) {
return NF_CGN;
}
-/// nestedStructType - Return true if STy includes any
-/// other aggregate types
-bool SRETPromotion::nestedStructType(const StructType *STy) {
- unsigned Num = STy->getNumElements();
- for (unsigned i = 0; i < Num; i++) {
- const Type *Ty = STy->getElementType(i);
- if (!Ty->isSingleValueType() && !Ty->isVoidTy())
- return true;
- }
- return false;
-}
diff --git a/lib/Transforms/InstCombine/InstCombine.h b/lib/Transforms/InstCombine/InstCombine.h
index 24e052881a9d4..6f9609cf997ba 100644
--- a/lib/Transforms/InstCombine/InstCombine.h
+++ b/lib/Transforms/InstCombine/InstCombine.h
@@ -81,7 +81,7 @@ public:
BuilderTy *Builder;
static char ID; // Pass identification, replacement for typeid
- InstCombiner() : FunctionPass(&ID), TD(0), Builder(0) {}
+ InstCombiner() : FunctionPass(ID), TD(0), Builder(0) {}
public:
virtual bool runOnFunction(Function &F);
diff --git a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
index 5876f408343b6..19a05bfe9bba3 100644
--- a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
+++ b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
@@ -474,19 +474,16 @@ Value *InstCombiner::FoldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS) {
}
// (icmp ne (A & C1), 0) & (icmp ne (A & C2), 0) -->
- // (icmp eq (A & (C1|C2)), (C1|C2))
+ // (icmp eq (A & (C1|C2)), (C1|C2)) where C1 and C2 are non-zero POT
if (LHSCC == ICmpInst::ICMP_NE && LHSCst->isZero()) {
- Instruction *I1 = dyn_cast<Instruction>(Val);
- Instruction *I2 = dyn_cast<Instruction>(Val2);
- if (I1 && I1->getOpcode() == Instruction::And &&
- I2 && I2->getOpcode() == Instruction::And &&
- I1->getOperand(0) == I1->getOperand(0)) {
- ConstantInt *CI1 = dyn_cast<ConstantInt>(I1->getOperand(1));
- ConstantInt *CI2 = dyn_cast<ConstantInt>(I2->getOperand(1));
- if (CI1 && !CI1->isZero() && CI2 && !CI2->isZero() &&
- CI1->getValue().operator&(CI2->getValue()) == 0) {
+ Value *Op1 = 0, *Op2 = 0;
+ ConstantInt *CI1 = 0, *CI2 = 0;
+ if (match(LHS->getOperand(0), m_And(m_Value(Op1), m_ConstantInt(CI1))) &&
+ match(RHS->getOperand(0), m_And(m_Value(Op2), m_ConstantInt(CI2)))) {
+ if (Op1 == Op2 && !CI1->isZero() && !CI2->isZero() &&
+ CI1->getValue().isPowerOf2() && CI2->getValue().isPowerOf2()) {
Constant *ConstOr = ConstantExpr::getOr(CI1, CI2);
- Value *NewAnd = Builder->CreateAnd(I1->getOperand(0), ConstOr);
+ Value *NewAnd = Builder->CreateAnd(Op1, ConstOr);
return Builder->CreateICmp(ICmpInst::ICMP_EQ, NewAnd, ConstOr);
}
}
@@ -1170,11 +1167,28 @@ Value *InstCombiner::FoldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS) {
ConstantInt *RHSCst = dyn_cast<ConstantInt>(RHS->getOperand(1));
if (LHSCst == 0 || RHSCst == 0) return 0;
- // (icmp ne A, 0) | (icmp ne B, 0) --> (icmp ne (A|B), 0)
- if (LHSCst == RHSCst && LHSCC == RHSCC &&
- LHSCC == ICmpInst::ICMP_NE && LHSCst->isZero()) {
- Value *NewOr = Builder->CreateOr(Val, Val2);
- return Builder->CreateICmp(LHSCC, NewOr, LHSCst);
+ if (LHSCst == RHSCst && LHSCC == RHSCC) {
+ // (icmp ne A, 0) | (icmp ne B, 0) --> (icmp ne (A|B), 0)
+ if (LHSCC == ICmpInst::ICMP_NE && LHSCst->isZero()) {
+ Value *NewOr = Builder->CreateOr(Val, Val2);
+ return Builder->CreateICmp(LHSCC, NewOr, LHSCst);
+ }
+
+ // (icmp eq (A & C1), 0) | (icmp eq (A & C2), 0) -->
+ // (icmp ne (A & (C1|C2)), (C1|C2)) where C1 and C2 are non-zero POT
+ if (LHSCC == ICmpInst::ICMP_EQ && LHSCst->isZero()) {
+ Value *Op1 = 0, *Op2 = 0;
+ ConstantInt *CI1 = 0, *CI2 = 0;
+ if (match(LHS->getOperand(0), m_And(m_Value(Op1), m_ConstantInt(CI1))) &&
+ match(RHS->getOperand(0), m_And(m_Value(Op2), m_ConstantInt(CI2)))) {
+ if (Op1 == Op2 && !CI1->isZero() && !CI2->isZero() &&
+ CI1->getValue().isPowerOf2() && CI2->getValue().isPowerOf2()) {
+ Constant *ConstOr = ConstantExpr::getOr(CI1, CI2);
+ Value *NewAnd = Builder->CreateAnd(Op1, ConstOr);
+ return Builder->CreateICmp(ICmpInst::ICMP_NE, NewAnd, ConstOr);
+ }
+ }
+ }
}
// From here on, we only handle:
diff --git a/lib/Transforms/InstCombine/InstCombineCalls.cpp b/lib/Transforms/InstCombine/InstCombineCalls.cpp
index 85251a83d4eac..0ebe3b45589e5 100644
--- a/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -96,14 +96,23 @@ static unsigned EnforceKnownAlignment(Value *V,
/// increase the alignment of the ultimate object, making this check succeed.
unsigned InstCombiner::GetOrEnforceKnownAlignment(Value *V,
unsigned PrefAlign) {
- unsigned BitWidth = TD ? TD->getTypeSizeInBits(V->getType()) :
- sizeof(PrefAlign) * CHAR_BIT;
+ assert(V->getType()->isPointerTy() &&
+ "GetOrEnforceKnownAlignment expects a pointer!");
+ unsigned BitWidth = TD ? TD->getPointerSizeInBits() : 64;
APInt Mask = APInt::getAllOnesValue(BitWidth);
APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0);
ComputeMaskedBits(V, Mask, KnownZero, KnownOne);
unsigned TrailZ = KnownZero.countTrailingOnes();
+
+ // Avoid trouble with rediculously large TrailZ values, such as
+ // those computed from a null pointer.
+ TrailZ = std::min(TrailZ, unsigned(sizeof(unsigned) * CHAR_BIT - 1));
+
unsigned Align = 1u << std::min(BitWidth - 1, TrailZ);
+ // LLVM doesn't support alignments larger than this currently.
+ Align = std::min(Align, +Value::MaximumAlignment);
+
if (PrefAlign > Align)
Align = EnforceKnownAlignment(V, Align, PrefAlign);
@@ -529,7 +538,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
// X + 0 -> {X, false}
if (RHS->isZero()) {
Constant *V[] = {
- UndefValue::get(II->getCalledValue()->getType()),
+ UndefValue::get(II->getArgOperand(0)->getType()),
ConstantInt::getFalse(II->getContext())
};
Constant *Struct = ConstantStruct::get(II->getContext(), V, 2, false);
@@ -630,8 +639,8 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
cast<VectorType>(II->getArgOperand(0)->getType())->getNumElements();
APInt DemandedElts(VWidth, 1);
APInt UndefElts(VWidth, 0);
- if (Value *V = SimplifyDemandedVectorElts(II->getArgOperand(0), DemandedElts,
- UndefElts)) {
+ if (Value *V = SimplifyDemandedVectorElts(II->getArgOperand(0),
+ DemandedElts, UndefElts)) {
II->setArgOperand(0, V);
return II;
}
@@ -655,8 +664,10 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
if (AllEltsOk) {
// Cast the input vectors to byte vectors.
- Value *Op0 = Builder->CreateBitCast(II->getArgOperand(0), Mask->getType());
- Value *Op1 = Builder->CreateBitCast(II->getArgOperand(1), Mask->getType());
+ Value *Op0 = Builder->CreateBitCast(II->getArgOperand(0),
+ Mask->getType());
+ Value *Op1 = Builder->CreateBitCast(II->getArgOperand(1),
+ Mask->getType());
Value *Result = UndefValue::get(Op0->getType());
// Only extract each element once.
@@ -772,13 +783,15 @@ protected:
NewInstruction = IC->ReplaceInstUsesWith(*CI, With);
}
bool isFoldable(unsigned SizeCIOp, unsigned SizeArgOp, bool isString) const {
- if (ConstantInt *SizeCI = dyn_cast<ConstantInt>(CI->getArgOperand(SizeCIOp - CallInst::ArgOffset))) {
+ if (ConstantInt *SizeCI =
+ dyn_cast<ConstantInt>(CI->getArgOperand(SizeCIOp))) {
if (SizeCI->isAllOnesValue())
return true;
if (isString)
return SizeCI->getZExtValue() >=
- GetStringLength(CI->getArgOperand(SizeArgOp - CallInst::ArgOffset));
- if (ConstantInt *Arg = dyn_cast<ConstantInt>(CI->getArgOperand(SizeArgOp - CallInst::ArgOffset)))
+ GetStringLength(CI->getArgOperand(SizeArgOp));
+ if (ConstantInt *Arg = dyn_cast<ConstantInt>(
+ CI->getArgOperand(SizeArgOp)))
return SizeCI->getZExtValue() >= Arg->getZExtValue();
}
return false;
@@ -1140,7 +1153,7 @@ Instruction *InstCombiner::transformCallThroughTrampoline(CallSite CS) {
IntrinsicInst *Tramp =
cast<IntrinsicInst>(cast<BitCastInst>(Callee)->getOperand(0));
- Function *NestF = cast<Function>(Tramp->getArgOperand(1)->stripPointerCasts());
+ Function *NestF =cast<Function>(Tramp->getArgOperand(1)->stripPointerCasts());
const PointerType *NestFPTy = cast<PointerType>(NestF->getType());
const FunctionType *NestFTy = cast<FunctionType>(NestFPTy->getElementType());
diff --git a/lib/Transforms/InstCombine/InstCombineCasts.cpp b/lib/Transforms/InstCombine/InstCombineCasts.cpp
index 505a0bf8f4e7f..79a9b09c64d07 100644
--- a/lib/Transforms/InstCombine/InstCombineCasts.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCasts.cpp
@@ -396,6 +396,11 @@ static bool CanEvaluateTruncated(Value *V, const Type *Ty) {
case Instruction::Trunc:
// trunc(trunc(x)) -> trunc(x)
return true;
+ case Instruction::ZExt:
+ case Instruction::SExt:
+ // trunc(ext(x)) -> ext(x) if the source type is smaller than the new dest
+ // trunc(ext(x)) -> trunc(x) if the source type is larger than the new dest
+ return true;
case Instruction::Select: {
SelectInst *SI = cast<SelectInst>(I);
return CanEvaluateTruncated(SI->getTrueValue(), Ty) &&
@@ -454,6 +459,29 @@ Instruction *InstCombiner::visitTrunc(TruncInst &CI) {
Value *Zero = Constant::getNullValue(Src->getType());
return new ICmpInst(ICmpInst::ICMP_NE, Src, Zero);
}
+
+ // Transform trunc(lshr (zext A), Cst) to eliminate one type conversion.
+ Value *A = 0; ConstantInt *Cst = 0;
+ if (match(Src, m_LShr(m_ZExt(m_Value(A)), m_ConstantInt(Cst))) &&
+ Src->hasOneUse()) {
+ // We have three types to worry about here, the type of A, the source of
+ // the truncate (MidSize), and the destination of the truncate. We know that
+ // ASize < MidSize and MidSize > ResultSize, but don't know the relation
+ // between ASize and ResultSize.
+ unsigned ASize = A->getType()->getPrimitiveSizeInBits();
+
+ // If the shift amount is larger than the size of A, then the result is
+ // known to be zero because all the input bits got shifted out.
+ if (Cst->getZExtValue() >= ASize)
+ return ReplaceInstUsesWith(CI, Constant::getNullValue(CI.getType()));
+
+ // Since we're doing an lshr and a zero extend, and know that the shift
+ // amount is smaller than ASize, it is always safe to do the shift in A's
+ // type, then zero extend or truncate to the result.
+ Value *Shift = Builder->CreateLShr(A, Cst->getZExtValue());
+ Shift->takeName(Src);
+ return CastInst::CreateIntegerCast(Shift, CI.getType(), false);
+ }
return 0;
}
@@ -538,8 +566,7 @@ Instruction *InstCombiner::transformZExtICmp(ICmpInst *ICI, Instruction &CI,
if (CI.getType() == In->getType())
return ReplaceInstUsesWith(CI, In);
- else
- return CastInst::CreateIntegerCast(In, CI.getType(), false/*ZExt*/);
+ return CastInst::CreateIntegerCast(In, CI.getType(), false/*ZExt*/);
}
}
}
@@ -1097,6 +1124,38 @@ Instruction *InstCombiner::visitFPTrunc(FPTruncInst &CI) {
break;
}
}
+
+ // Fold (fptrunc (sqrt (fpext x))) -> (sqrtf x)
+ // NOTE: This should be disabled by -fno-builtin-sqrt if we ever support it.
+ CallInst *Call = dyn_cast<CallInst>(CI.getOperand(0));
+ if (Call && Call->getCalledFunction() &&
+ Call->getCalledFunction()->getName() == "sqrt" &&
+ Call->getNumArgOperands() == 1) {
+ CastInst *Arg = dyn_cast<CastInst>(Call->getArgOperand(0));
+ if (Arg && Arg->getOpcode() == Instruction::FPExt &&
+ CI.getType()->isFloatTy() &&
+ Call->getType()->isDoubleTy() &&
+ Arg->getType()->isDoubleTy() &&
+ Arg->getOperand(0)->getType()->isFloatTy()) {
+ Function *Callee = Call->getCalledFunction();
+ Module *M = CI.getParent()->getParent()->getParent();
+ Constant *SqrtfFunc = M->getOrInsertFunction("sqrtf",
+ Callee->getAttributes(),
+ Builder->getFloatTy(),
+ Builder->getFloatTy(),
+ NULL);
+ CallInst *ret = CallInst::Create(SqrtfFunc, Arg->getOperand(0),
+ "sqrtfcall");
+ ret->setAttributes(Callee->getAttributes());
+
+
+ // Remove the old Call. With -fmath-errno, it won't get marked readnone.
+ Call->replaceAllUsesWith(UndefValue::get(Call->getType()));
+ EraseInstFromFunction(*Call);
+ return ret;
+ }
+ }
+
return 0;
}
@@ -1308,6 +1367,199 @@ static Instruction *OptimizeVectorResize(Value *InVal, const VectorType *DestTy,
return new ShuffleVectorInst(InVal, V2, Mask);
}
+static bool isMultipleOfTypeSize(unsigned Value, const Type *Ty) {
+ return Value % Ty->getPrimitiveSizeInBits() == 0;
+}
+
+static unsigned getTypeSizeIndex(unsigned Value, const Type *Ty) {
+ return Value / Ty->getPrimitiveSizeInBits();
+}
+
+/// CollectInsertionElements - V is a value which is inserted into a vector of
+/// VecEltTy. Look through the value to see if we can decompose it into
+/// insertions into the vector. See the example in the comment for
+/// OptimizeIntegerToVectorInsertions for the pattern this handles.
+/// The type of V is always a non-zero multiple of VecEltTy's size.
+///
+/// This returns false if the pattern can't be matched or true if it can,
+/// filling in Elements with the elements found here.
+static bool CollectInsertionElements(Value *V, unsigned ElementIndex,
+ SmallVectorImpl<Value*> &Elements,
+ const Type *VecEltTy) {
+ // Undef values never contribute useful bits to the result.
+ if (isa<UndefValue>(V)) return true;
+
+ // If we got down to a value of the right type, we win, try inserting into the
+ // right element.
+ if (V->getType() == VecEltTy) {
+ // Inserting null doesn't actually insert any elements.
+ if (Constant *C = dyn_cast<Constant>(V))
+ if (C->isNullValue())
+ return true;
+
+ // Fail if multiple elements are inserted into this slot.
+ if (ElementIndex >= Elements.size() || Elements[ElementIndex] != 0)
+ return false;
+
+ Elements[ElementIndex] = V;
+ return true;
+ }
+
+ if (Constant *C = dyn_cast<Constant>(V)) {
+ // Figure out the # elements this provides, and bitcast it or slice it up
+ // as required.
+ unsigned NumElts = getTypeSizeIndex(C->getType()->getPrimitiveSizeInBits(),
+ VecEltTy);
+ // If the constant is the size of a vector element, we just need to bitcast
+ // it to the right type so it gets properly inserted.
+ if (NumElts == 1)
+ return CollectInsertionElements(ConstantExpr::getBitCast(C, VecEltTy),
+ ElementIndex, Elements, VecEltTy);
+
+ // Okay, this is a constant that covers multiple elements. Slice it up into
+ // pieces and insert each element-sized piece into the vector.
+ if (!isa<IntegerType>(C->getType()))
+ C = ConstantExpr::getBitCast(C, IntegerType::get(V->getContext(),
+ C->getType()->getPrimitiveSizeInBits()));
+ unsigned ElementSize = VecEltTy->getPrimitiveSizeInBits();
+ const Type *ElementIntTy = IntegerType::get(C->getContext(), ElementSize);
+
+ for (unsigned i = 0; i != NumElts; ++i) {
+ Constant *Piece = ConstantExpr::getLShr(C, ConstantInt::get(C->getType(),
+ i*ElementSize));
+ Piece = ConstantExpr::getTrunc(Piece, ElementIntTy);
+ if (!CollectInsertionElements(Piece, ElementIndex+i, Elements, VecEltTy))
+ return false;
+ }
+ return true;
+ }
+
+ if (!V->hasOneUse()) return false;
+
+ Instruction *I = dyn_cast<Instruction>(V);
+ if (I == 0) return false;
+ switch (I->getOpcode()) {
+ default: return false; // Unhandled case.
+ case Instruction::BitCast:
+ return CollectInsertionElements(I->getOperand(0), ElementIndex,
+ Elements, VecEltTy);
+ case Instruction::ZExt:
+ if (!isMultipleOfTypeSize(
+ I->getOperand(0)->getType()->getPrimitiveSizeInBits(),
+ VecEltTy))
+ return false;
+ return CollectInsertionElements(I->getOperand(0), ElementIndex,
+ Elements, VecEltTy);
+ case Instruction::Or:
+ return CollectInsertionElements(I->getOperand(0), ElementIndex,
+ Elements, VecEltTy) &&
+ CollectInsertionElements(I->getOperand(1), ElementIndex,
+ Elements, VecEltTy);
+ case Instruction::Shl: {
+ // Must be shifting by a constant that is a multiple of the element size.
+ ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(1));
+ if (CI == 0) return false;
+ if (!isMultipleOfTypeSize(CI->getZExtValue(), VecEltTy)) return false;
+ unsigned IndexShift = getTypeSizeIndex(CI->getZExtValue(), VecEltTy);
+
+ return CollectInsertionElements(I->getOperand(0), ElementIndex+IndexShift,
+ Elements, VecEltTy);
+ }
+
+ }
+}
+
+
+/// OptimizeIntegerToVectorInsertions - If the input is an 'or' instruction, we
+/// may be doing shifts and ors to assemble the elements of the vector manually.
+/// Try to rip the code out and replace it with insertelements. This is to
+/// optimize code like this:
+///
+/// %tmp37 = bitcast float %inc to i32
+/// %tmp38 = zext i32 %tmp37 to i64
+/// %tmp31 = bitcast float %inc5 to i32
+/// %tmp32 = zext i32 %tmp31 to i64
+/// %tmp33 = shl i64 %tmp32, 32
+/// %ins35 = or i64 %tmp33, %tmp38
+/// %tmp43 = bitcast i64 %ins35 to <2 x float>
+///
+/// Into two insertelements that do "buildvector{%inc, %inc5}".
+static Value *OptimizeIntegerToVectorInsertions(BitCastInst &CI,
+ InstCombiner &IC) {
+ const VectorType *DestVecTy = cast<VectorType>(CI.getType());
+ Value *IntInput = CI.getOperand(0);
+
+ SmallVector<Value*, 8> Elements(DestVecTy->getNumElements());
+ if (!CollectInsertionElements(IntInput, 0, Elements,
+ DestVecTy->getElementType()))
+ return 0;
+
+ // If we succeeded, we know that all of the element are specified by Elements
+ // or are zero if Elements has a null entry. Recast this as a set of
+ // insertions.
+ Value *Result = Constant::getNullValue(CI.getType());
+ for (unsigned i = 0, e = Elements.size(); i != e; ++i) {
+ if (Elements[i] == 0) continue; // Unset element.
+
+ Result = IC.Builder->CreateInsertElement(Result, Elements[i],
+ IC.Builder->getInt32(i));
+ }
+
+ return Result;
+}
+
+
+/// OptimizeIntToFloatBitCast - See if we can optimize an integer->float/double
+/// bitcast. The various long double bitcasts can't get in here.
+static Instruction *OptimizeIntToFloatBitCast(BitCastInst &CI,InstCombiner &IC){
+ Value *Src = CI.getOperand(0);
+ const Type *DestTy = CI.getType();
+
+ // If this is a bitcast from int to float, check to see if the int is an
+ // extraction from a vector.
+ Value *VecInput = 0;
+ // bitcast(trunc(bitcast(somevector)))
+ if (match(Src, m_Trunc(m_BitCast(m_Value(VecInput)))) &&
+ isa<VectorType>(VecInput->getType())) {
+ const VectorType *VecTy = cast<VectorType>(VecInput->getType());
+ unsigned DestWidth = DestTy->getPrimitiveSizeInBits();
+
+ if (VecTy->getPrimitiveSizeInBits() % DestWidth == 0) {
+ // If the element type of the vector doesn't match the result type,
+ // bitcast it to be a vector type we can extract from.
+ if (VecTy->getElementType() != DestTy) {
+ VecTy = VectorType::get(DestTy,
+ VecTy->getPrimitiveSizeInBits() / DestWidth);
+ VecInput = IC.Builder->CreateBitCast(VecInput, VecTy);
+ }
+
+ return ExtractElementInst::Create(VecInput, IC.Builder->getInt32(0));
+ }
+ }
+
+ // bitcast(trunc(lshr(bitcast(somevector), cst))
+ ConstantInt *ShAmt = 0;
+ if (match(Src, m_Trunc(m_LShr(m_BitCast(m_Value(VecInput)),
+ m_ConstantInt(ShAmt)))) &&
+ isa<VectorType>(VecInput->getType())) {
+ const VectorType *VecTy = cast<VectorType>(VecInput->getType());
+ unsigned DestWidth = DestTy->getPrimitiveSizeInBits();
+ if (VecTy->getPrimitiveSizeInBits() % DestWidth == 0 &&
+ ShAmt->getZExtValue() % DestWidth == 0) {
+ // If the element type of the vector doesn't match the result type,
+ // bitcast it to be a vector type we can extract from.
+ if (VecTy->getElementType() != DestTy) {
+ VecTy = VectorType::get(DestTy,
+ VecTy->getPrimitiveSizeInBits() / DestWidth);
+ VecInput = IC.Builder->CreateBitCast(VecInput, VecTy);
+ }
+
+ unsigned Elt = ShAmt->getZExtValue() / DestWidth;
+ return ExtractElementInst::Create(VecInput, IC.Builder->getInt32(Elt));
+ }
+ }
+ return 0;
+}
Instruction *InstCombiner::visitBitCast(BitCastInst &CI) {
// If the operands are integer typed then apply the integer transforms,
@@ -1359,6 +1611,11 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) {
((Instruction*)NULL));
}
}
+
+ // Try to optimize int -> float bitcasts.
+ if ((DestTy->isFloatTy() || DestTy->isDoubleTy()) && isa<IntegerType>(SrcTy))
+ if (Instruction *I = OptimizeIntToFloatBitCast(CI, *this))
+ return I;
if (const VectorType *DestVTy = dyn_cast<VectorType>(DestTy)) {
if (DestVTy->getNumElements() == 1 && !SrcTy->isVectorTy()) {
@@ -1368,16 +1625,24 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) {
// FIXME: Canonicalize bitcast(insertelement) -> insertelement(bitcast)
}
- // If this is a cast from an integer to vector, check to see if the input
- // is a trunc or zext of a bitcast from vector. If so, we can replace all
- // the casts with a shuffle and (potentially) a bitcast.
- if (isa<IntegerType>(SrcTy) && (isa<TruncInst>(Src) || isa<ZExtInst>(Src))){
- CastInst *SrcCast = cast<CastInst>(Src);
- if (BitCastInst *BCIn = dyn_cast<BitCastInst>(SrcCast->getOperand(0)))
- if (isa<VectorType>(BCIn->getOperand(0)->getType()))
- if (Instruction *I = OptimizeVectorResize(BCIn->getOperand(0),
+ if (isa<IntegerType>(SrcTy)) {
+ // If this is a cast from an integer to vector, check to see if the input
+ // is a trunc or zext of a bitcast from vector. If so, we can replace all
+ // the casts with a shuffle and (potentially) a bitcast.
+ if (isa<TruncInst>(Src) || isa<ZExtInst>(Src)) {
+ CastInst *SrcCast = cast<CastInst>(Src);
+ if (BitCastInst *BCIn = dyn_cast<BitCastInst>(SrcCast->getOperand(0)))
+ if (isa<VectorType>(BCIn->getOperand(0)->getType()))
+ if (Instruction *I = OptimizeVectorResize(BCIn->getOperand(0),
cast<VectorType>(DestTy), *this))
- return I;
+ return I;
+ }
+
+ // If the input is an 'or' instruction, we may be doing shifts and ors to
+ // assemble the elements of the vector manually. Try to rip the code out
+ // and replace it with insertelements.
+ if (Value *V = OptimizeIntegerToVectorInsertions(CI, *this))
+ return ReplaceInstUsesWith(CI, V);
}
}
diff --git a/lib/Transforms/InstCombine/InstCombineCompares.cpp b/lib/Transforms/InstCombine/InstCombineCompares.cpp
index 6c00586412ac1..d7e2b72b7fac5 100644
--- a/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -1374,7 +1374,7 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
case Instruction::Or:
// If bits are being or'd in that are not present in the constant we
// are comparing against, then the comparison could never succeed!
- if (Constant *BOC = dyn_cast<Constant>(BO->getOperand(1))) {
+ if (ConstantInt *BOC = dyn_cast<ConstantInt>(BO->getOperand(1))) {
Constant *NotCI = ConstantExpr::getNot(RHS);
if (!ConstantExpr::getAnd(BOC, NotCI)->isNullValue())
return ReplaceInstUsesWith(ICI,
diff --git a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
index 8933a0b137ab4..b68fbc2db5c9a 100644
--- a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
+++ b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
@@ -146,10 +146,14 @@ Instruction *InstCombiner::visitLoadInst(LoadInst &LI) {
if (TD) {
unsigned KnownAlign =
GetOrEnforceKnownAlignment(Op, TD->getPrefTypeAlignment(LI.getType()));
- if (KnownAlign >
- (LI.getAlignment() == 0 ? TD->getABITypeAlignment(LI.getType()) :
- LI.getAlignment()))
+ unsigned LoadAlign = LI.getAlignment();
+ unsigned EffectiveLoadAlign = LoadAlign != 0 ? LoadAlign :
+ TD->getABITypeAlignment(LI.getType());
+
+ if (KnownAlign > EffectiveLoadAlign)
LI.setAlignment(KnownAlign);
+ else if (LoadAlign == 0)
+ LI.setAlignment(EffectiveLoadAlign);
}
// load (cast X) --> cast (load X) iff safe.
@@ -369,7 +373,7 @@ DbgDeclareInst *InstCombiner::hasOneUsePlusDeclare(Value *V) {
if (DbgDeclareInst *DI = dyn_cast<DbgDeclareInst>(U))
return DI;
if (isa<BitCastInst>(U) && U->hasOneUse()) {
- if (DbgDeclareInst *DI = dyn_cast<DbgDeclareInst>(U->use_begin()))
+ if (DbgDeclareInst *DI = dyn_cast<DbgDeclareInst>(*U->use_begin()))
return DI;
}
}
@@ -411,10 +415,14 @@ Instruction *InstCombiner::visitStoreInst(StoreInst &SI) {
if (TD) {
unsigned KnownAlign =
GetOrEnforceKnownAlignment(Ptr, TD->getPrefTypeAlignment(Val->getType()));
- if (KnownAlign >
- (SI.getAlignment() == 0 ? TD->getABITypeAlignment(Val->getType()) :
- SI.getAlignment()))
+ unsigned StoreAlign = SI.getAlignment();
+ unsigned EffectiveStoreAlign = StoreAlign != 0 ? StoreAlign :
+ TD->getABITypeAlignment(Val->getType());
+
+ if (KnownAlign > EffectiveStoreAlign)
SI.setAlignment(KnownAlign);
+ else if (StoreAlign == 0)
+ SI.setAlignment(EffectiveStoreAlign);
}
// Do really simple DSE, to catch cases where there are several consecutive
diff --git a/lib/Transforms/InstCombine/InstCombineSelect.cpp b/lib/Transforms/InstCombine/InstCombineSelect.cpp
index f9ffdb10f2660..c44fe9db6e3a7 100644
--- a/lib/Transforms/InstCombine/InstCombineSelect.cpp
+++ b/lib/Transforms/InstCombine/InstCombineSelect.cpp
@@ -699,34 +699,6 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
SI.setOperand(2, TrueVal);
return &SI;
}
-
- // select (A == 0 | B == 0), T, F--> select (A != 0 & B != 0), F, T
- // Note: This is a canonicalization rather than an optimization, and is used
- // to expose opportunities to other instcombine transforms.
- Instruction* CondInst = dyn_cast<Instruction>(CondVal);
- if (CondInst && CondInst->hasOneUse() &&
- CondInst->getOpcode() == Instruction::Or) {
- ICmpInst *LHSCmp = dyn_cast<ICmpInst>(CondInst->getOperand(0));
- ICmpInst *RHSCmp = dyn_cast<ICmpInst>(CondInst->getOperand(1));
- if (LHSCmp && LHSCmp->hasOneUse() &&
- LHSCmp->getPredicate() == ICmpInst::ICMP_EQ &&
- RHSCmp && RHSCmp->hasOneUse() &&
- RHSCmp->getPredicate() == ICmpInst::ICMP_EQ) {
- ConstantInt* C1 = dyn_cast<ConstantInt>(LHSCmp->getOperand(1));
- ConstantInt* C2 = dyn_cast<ConstantInt>(RHSCmp->getOperand(1));
- if (C1 && C1->isZero() && C2 && C2->isZero()) {
- LHSCmp->setPredicate(ICmpInst::ICMP_NE);
- RHSCmp->setPredicate(ICmpInst::ICMP_NE);
- Value *And =
- InsertNewInstBefore(BinaryOperator::CreateAnd(LHSCmp, RHSCmp,
- "and."+CondVal->getName()), SI);
- SI.setOperand(0, And);
- SI.setOperand(1, FalseVal);
- SI.setOperand(2, TrueVal);
- return &SI;
- }
- }
- }
return 0;
}
diff --git a/lib/Transforms/InstCombine/InstCombineShifts.cpp b/lib/Transforms/InstCombine/InstCombineShifts.cpp
index e5ce8a612f3f6..27716b886a226 100644
--- a/lib/Transforms/InstCombine/InstCombineShifts.cpp
+++ b/lib/Transforms/InstCombine/InstCombineShifts.cpp
@@ -56,10 +56,270 @@ Instruction *InstCombiner::commonShiftTransforms(BinaryOperator &I) {
return 0;
}
+/// CanEvaluateShifted - See if we can compute the specified value, but shifted
+/// logically to the left or right by some number of bits. This should return
+/// true if the expression can be computed for the same cost as the current
+/// expression tree. This is used to eliminate extraneous shifting from things
+/// like:
+/// %C = shl i128 %A, 64
+/// %D = shl i128 %B, 96
+/// %E = or i128 %C, %D
+/// %F = lshr i128 %E, 64
+/// where the client will ask if E can be computed shifted right by 64-bits. If
+/// this succeeds, the GetShiftedValue function will be called to produce the
+/// value.
+static bool CanEvaluateShifted(Value *V, unsigned NumBits, bool isLeftShift,
+ InstCombiner &IC) {
+ // We can always evaluate constants shifted.
+ if (isa<Constant>(V))
+ return true;
+
+ Instruction *I = dyn_cast<Instruction>(V);
+ if (!I) return false;
+
+ // If this is the opposite shift, we can directly reuse the input of the shift
+ // if the needed bits are already zero in the input. This allows us to reuse
+ // the value which means that we don't care if the shift has multiple uses.
+ // TODO: Handle opposite shift by exact value.
+ ConstantInt *CI;
+ if ((isLeftShift && match(I, m_LShr(m_Value(), m_ConstantInt(CI)))) ||
+ (!isLeftShift && match(I, m_Shl(m_Value(), m_ConstantInt(CI))))) {
+ if (CI->getZExtValue() == NumBits) {
+ // TODO: Check that the input bits are already zero with MaskedValueIsZero
+#if 0
+ // If this is a truncate of a logical shr, we can truncate it to a smaller
+ // lshr iff we know that the bits we would otherwise be shifting in are
+ // already zeros.
+ uint32_t OrigBitWidth = OrigTy->getScalarSizeInBits();
+ uint32_t BitWidth = Ty->getScalarSizeInBits();
+ if (MaskedValueIsZero(I->getOperand(0),
+ APInt::getHighBitsSet(OrigBitWidth, OrigBitWidth-BitWidth)) &&
+ CI->getLimitedValue(BitWidth) < BitWidth) {
+ return CanEvaluateTruncated(I->getOperand(0), Ty);
+ }
+#endif
+
+ }
+ }
+
+ // We can't mutate something that has multiple uses: doing so would
+ // require duplicating the instruction in general, which isn't profitable.
+ if (!I->hasOneUse()) return false;
+
+ switch (I->getOpcode()) {
+ default: return false;
+ case Instruction::And:
+ case Instruction::Or:
+ case Instruction::Xor:
+ // Bitwise operators can all arbitrarily be arbitrarily evaluated shifted.
+ return CanEvaluateShifted(I->getOperand(0), NumBits, isLeftShift, IC) &&
+ CanEvaluateShifted(I->getOperand(1), NumBits, isLeftShift, IC);
+
+ case Instruction::Shl: {
+ // We can often fold the shift into shifts-by-a-constant.
+ CI = dyn_cast<ConstantInt>(I->getOperand(1));
+ if (CI == 0) return false;
+
+ // We can always fold shl(c1)+shl(c2) -> shl(c1+c2).
+ if (isLeftShift) return true;
+
+ // We can always turn shl(c)+shr(c) -> and(c2).
+ if (CI->getValue() == NumBits) return true;
+
+ unsigned TypeWidth = I->getType()->getScalarSizeInBits();
+
+ // We can turn shl(c1)+shr(c2) -> shl(c3)+and(c4), but it isn't
+ // profitable unless we know the and'd out bits are already zero.
+ if (CI->getZExtValue() > NumBits) {
+ unsigned HighBits = CI->getZExtValue() - NumBits;
+ if (MaskedValueIsZero(I->getOperand(0),
+ APInt::getHighBitsSet(TypeWidth, HighBits)))
+ return true;
+ }
+
+ return false;
+ }
+ case Instruction::LShr: {
+ // We can often fold the shift into shifts-by-a-constant.
+ CI = dyn_cast<ConstantInt>(I->getOperand(1));
+ if (CI == 0) return false;
+
+ // We can always fold lshr(c1)+lshr(c2) -> lshr(c1+c2).
+ if (!isLeftShift) return true;
+
+ // We can always turn lshr(c)+shl(c) -> and(c2).
+ if (CI->getValue() == NumBits) return true;
+
+ unsigned TypeWidth = I->getType()->getScalarSizeInBits();
+
+ // We can always turn lshr(c1)+shl(c2) -> lshr(c3)+and(c4), but it isn't
+ // profitable unless we know the and'd out bits are already zero.
+ if (CI->getZExtValue() > NumBits) {
+ unsigned LowBits = CI->getZExtValue() - NumBits;
+ if (MaskedValueIsZero(I->getOperand(0),
+ APInt::getLowBitsSet(TypeWidth, LowBits)))
+ return true;
+ }
+
+ return false;
+ }
+ case Instruction::Select: {
+ SelectInst *SI = cast<SelectInst>(I);
+ return CanEvaluateShifted(SI->getTrueValue(), NumBits, isLeftShift, IC) &&
+ CanEvaluateShifted(SI->getFalseValue(), NumBits, isLeftShift, IC);
+ }
+ case Instruction::PHI: {
+ // We can change a phi if we can change all operands. Note that we never
+ // get into trouble with cyclic PHIs here because we only consider
+ // instructions with a single use.
+ PHINode *PN = cast<PHINode>(I);
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
+ if (!CanEvaluateShifted(PN->getIncomingValue(i), NumBits, isLeftShift,IC))
+ return false;
+ return true;
+ }
+ }
+}
+
+/// GetShiftedValue - When CanEvaluateShifted returned true for an expression,
+/// this value inserts the new computation that produces the shifted value.
+static Value *GetShiftedValue(Value *V, unsigned NumBits, bool isLeftShift,
+ InstCombiner &IC) {
+ // We can always evaluate constants shifted.
+ if (Constant *C = dyn_cast<Constant>(V)) {
+ if (isLeftShift)
+ V = IC.Builder->CreateShl(C, NumBits);
+ else
+ V = IC.Builder->CreateLShr(C, NumBits);
+ // If we got a constantexpr back, try to simplify it with TD info.
+ if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V))
+ V = ConstantFoldConstantExpression(CE, IC.getTargetData());
+ return V;
+ }
+
+ Instruction *I = cast<Instruction>(V);
+ IC.Worklist.Add(I);
+
+ switch (I->getOpcode()) {
+ default: assert(0 && "Inconsistency with CanEvaluateShifted");
+ case Instruction::And:
+ case Instruction::Or:
+ case Instruction::Xor:
+ // Bitwise operators can all arbitrarily be arbitrarily evaluated shifted.
+ I->setOperand(0, GetShiftedValue(I->getOperand(0), NumBits,isLeftShift,IC));
+ I->setOperand(1, GetShiftedValue(I->getOperand(1), NumBits,isLeftShift,IC));
+ return I;
+
+ case Instruction::Shl: {
+ unsigned TypeWidth = I->getType()->getScalarSizeInBits();
+
+ // We only accept shifts-by-a-constant in CanEvaluateShifted.
+ ConstantInt *CI = cast<ConstantInt>(I->getOperand(1));
+
+ // We can always fold shl(c1)+shl(c2) -> shl(c1+c2).
+ if (isLeftShift) {
+ // If this is oversized composite shift, then unsigned shifts get 0.
+ unsigned NewShAmt = NumBits+CI->getZExtValue();
+ if (NewShAmt >= TypeWidth)
+ return Constant::getNullValue(I->getType());
+
+ I->setOperand(1, ConstantInt::get(I->getType(), NewShAmt));
+ return I;
+ }
+
+ // We turn shl(c)+lshr(c) -> and(c2) if the input doesn't already have
+ // zeros.
+ if (CI->getValue() == NumBits) {
+ APInt Mask(APInt::getLowBitsSet(TypeWidth, TypeWidth - NumBits));
+ V = IC.Builder->CreateAnd(I->getOperand(0),
+ ConstantInt::get(I->getContext(), Mask));
+ if (Instruction *VI = dyn_cast<Instruction>(V)) {
+ VI->moveBefore(I);
+ VI->takeName(I);
+ }
+ return V;
+ }
+
+ // We turn shl(c1)+shr(c2) -> shl(c3)+and(c4), but only when we know that
+ // the and won't be needed.
+ assert(CI->getZExtValue() > NumBits);
+ I->setOperand(1, ConstantInt::get(I->getType(),
+ CI->getZExtValue() - NumBits));
+ return I;
+ }
+ case Instruction::LShr: {
+ unsigned TypeWidth = I->getType()->getScalarSizeInBits();
+ // We only accept shifts-by-a-constant in CanEvaluateShifted.
+ ConstantInt *CI = cast<ConstantInt>(I->getOperand(1));
+
+ // We can always fold lshr(c1)+lshr(c2) -> lshr(c1+c2).
+ if (!isLeftShift) {
+ // If this is oversized composite shift, then unsigned shifts get 0.
+ unsigned NewShAmt = NumBits+CI->getZExtValue();
+ if (NewShAmt >= TypeWidth)
+ return Constant::getNullValue(I->getType());
+
+ I->setOperand(1, ConstantInt::get(I->getType(), NewShAmt));
+ return I;
+ }
+
+ // We turn lshr(c)+shl(c) -> and(c2) if the input doesn't already have
+ // zeros.
+ if (CI->getValue() == NumBits) {
+ APInt Mask(APInt::getHighBitsSet(TypeWidth, TypeWidth - NumBits));
+ V = IC.Builder->CreateAnd(I->getOperand(0),
+ ConstantInt::get(I->getContext(), Mask));
+ if (Instruction *VI = dyn_cast<Instruction>(V)) {
+ VI->moveBefore(I);
+ VI->takeName(I);
+ }
+ return V;
+ }
+
+ // We turn lshr(c1)+shl(c2) -> lshr(c3)+and(c4), but only when we know that
+ // the and won't be needed.
+ assert(CI->getZExtValue() > NumBits);
+ I->setOperand(1, ConstantInt::get(I->getType(),
+ CI->getZExtValue() - NumBits));
+ return I;
+ }
+
+ case Instruction::Select:
+ I->setOperand(1, GetShiftedValue(I->getOperand(1), NumBits,isLeftShift,IC));
+ I->setOperand(2, GetShiftedValue(I->getOperand(2), NumBits,isLeftShift,IC));
+ return I;
+ case Instruction::PHI: {
+ // We can change a phi if we can change all operands. Note that we never
+ // get into trouble with cyclic PHIs here because we only consider
+ // instructions with a single use.
+ PHINode *PN = cast<PHINode>(I);
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
+ PN->setIncomingValue(i, GetShiftedValue(PN->getIncomingValue(i),
+ NumBits, isLeftShift, IC));
+ return PN;
+ }
+ }
+}
+
+
+
Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1,
BinaryOperator &I) {
bool isLeftShift = I.getOpcode() == Instruction::Shl;
-
+
+
+ // See if we can propagate this shift into the input, this covers the trivial
+ // cast of lshr(shl(x,c1),c2) as well as other more complex cases.
+ if (I.getOpcode() != Instruction::AShr &&
+ CanEvaluateShifted(Op0, Op1->getZExtValue(), isLeftShift, *this)) {
+ DEBUG(dbgs() << "ICE: GetShiftedValue propagating shift through expression"
+ " to eliminate shift:\n IN: " << *Op0 << "\n SH: " << I <<"\n");
+
+ return ReplaceInstUsesWith(I,
+ GetShiftedValue(Op0, Op1->getZExtValue(), isLeftShift, *this));
+ }
+
+
// See if we can simplify any instructions used by the instruction whose sole
// purpose is to compute bits we don't care about.
uint32_t TypeBits = Op0->getType()->getScalarSizeInBits();
@@ -288,39 +548,17 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1,
ConstantInt::get(Ty, AmtSum));
}
- if (ShiftOp->getOpcode() == Instruction::LShr &&
- I.getOpcode() == Instruction::AShr) {
- if (AmtSum >= TypeBits)
- return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType()));
-
- // ((X >>u C1) >>s C2) -> (X >>u (C1+C2)) since C1 != 0.
- return BinaryOperator::CreateLShr(X, ConstantInt::get(Ty, AmtSum));
- }
-
- if (ShiftOp->getOpcode() == Instruction::AShr &&
- I.getOpcode() == Instruction::LShr) {
- // ((X >>s C1) >>u C2) -> ((X >>s (C1+C2)) & mask) since C1 != 0.
- if (AmtSum >= TypeBits)
- AmtSum = TypeBits-1;
-
- Value *Shift = Builder->CreateAShr(X, ConstantInt::get(Ty, AmtSum));
-
- APInt Mask(APInt::getLowBitsSet(TypeBits, TypeBits - ShiftAmt2));
- return BinaryOperator::CreateAnd(Shift,
- ConstantInt::get(I.getContext(), Mask));
- }
-
- // Okay, if we get here, one shift must be left, and the other shift must be
- // right. See if the amounts are equal.
if (ShiftAmt1 == ShiftAmt2) {
// If we have ((X >>? C) << C), turn this into X & (-1 << C).
- if (I.getOpcode() == Instruction::Shl) {
+ if (I.getOpcode() == Instruction::Shl &&
+ ShiftOp->getOpcode() != Instruction::Shl) {
APInt Mask(APInt::getHighBitsSet(TypeBits, TypeBits - ShiftAmt1));
return BinaryOperator::CreateAnd(X,
ConstantInt::get(I.getContext(),Mask));
}
// If we have ((X << C) >>u C), turn this into X & (-1 >>u C).
- if (I.getOpcode() == Instruction::LShr) {
+ if (I.getOpcode() == Instruction::LShr &&
+ ShiftOp->getOpcode() == Instruction::Shl) {
APInt Mask(APInt::getLowBitsSet(TypeBits, TypeBits - ShiftAmt1));
return BinaryOperator::CreateAnd(X,
ConstantInt::get(I.getContext(), Mask));
@@ -329,7 +567,8 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1,
uint32_t ShiftDiff = ShiftAmt2-ShiftAmt1;
// (X >>? C1) << C2 --> X << (C2-C1) & (-1 << C2)
- if (I.getOpcode() == Instruction::Shl) {
+ if (I.getOpcode() == Instruction::Shl &&
+ ShiftOp->getOpcode() != Instruction::Shl) {
assert(ShiftOp->getOpcode() == Instruction::LShr ||
ShiftOp->getOpcode() == Instruction::AShr);
Value *Shift = Builder->CreateShl(X, ConstantInt::get(Ty, ShiftDiff));
@@ -340,7 +579,8 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1,
}
// (X << C1) >>u C2 --> X >>u (C2-C1) & (-1 >> C2)
- if (I.getOpcode() == Instruction::LShr) {
+ if (I.getOpcode() == Instruction::LShr &&
+ ShiftOp->getOpcode() == Instruction::Shl) {
assert(ShiftOp->getOpcode() == Instruction::Shl);
Value *Shift = Builder->CreateLShr(X, ConstantInt::get(Ty, ShiftDiff));
@@ -355,9 +595,8 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1,
uint32_t ShiftDiff = ShiftAmt1-ShiftAmt2;
// (X >>? C1) << C2 --> X >>? (C1-C2) & (-1 << C2)
- if (I.getOpcode() == Instruction::Shl) {
- assert(ShiftOp->getOpcode() == Instruction::LShr ||
- ShiftOp->getOpcode() == Instruction::AShr);
+ if (I.getOpcode() == Instruction::Shl &&
+ ShiftOp->getOpcode() != Instruction::Shl) {
Value *Shift = Builder->CreateBinOp(ShiftOp->getOpcode(), X,
ConstantInt::get(Ty, ShiftDiff));
@@ -367,8 +606,8 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1,
}
// (X << C1) >>u C2 --> X << (C1-C2) & (-1 >> C2)
- if (I.getOpcode() == Instruction::LShr) {
- assert(ShiftOp->getOpcode() == Instruction::Shl);
+ if (I.getOpcode() == Instruction::LShr &&
+ ShiftOp->getOpcode() == Instruction::Shl) {
Value *Shift = Builder->CreateShl(X, ConstantInt::get(Ty, ShiftDiff));
APInt Mask(APInt::getLowBitsSet(TypeBits, TypeBits - ShiftAmt2));
diff --git a/lib/Transforms/InstCombine/InstructionCombining.cpp b/lib/Transforms/InstCombine/InstructionCombining.cpp
index af2958fe3d910..e46c67994e2b0 100644
--- a/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -60,8 +60,8 @@ STATISTIC(NumSunkInst , "Number of instructions sunk");
char InstCombiner::ID = 0;
-static RegisterPass<InstCombiner>
-X("instcombine", "Combine redundant instructions");
+INITIALIZE_PASS(InstCombiner, "instcombine",
+ "Combine redundant instructions", false, false);
void InstCombiner::getAnalysisUsage(AnalysisUsage &AU) const {
AU.addPreservedID(LCSSAID);
diff --git a/lib/Transforms/Instrumentation/EdgeProfiling.cpp b/lib/Transforms/Instrumentation/EdgeProfiling.cpp
index 9ae3786707153..a77d70cd1c1bc 100644
--- a/lib/Transforms/Instrumentation/EdgeProfiling.cpp
+++ b/lib/Transforms/Instrumentation/EdgeProfiling.cpp
@@ -34,7 +34,7 @@ namespace {
bool runOnModule(Module &M);
public:
static char ID; // Pass identification, replacement for typeid
- EdgeProfiler() : ModulePass(&ID) {}
+ EdgeProfiler() : ModulePass(ID) {}
virtual const char *getPassName() const {
return "Edge Profiler";
@@ -43,8 +43,8 @@ namespace {
}
char EdgeProfiler::ID = 0;
-static RegisterPass<EdgeProfiler>
-X("insert-edge-profiling", "Insert instrumentation for edge profiling");
+INITIALIZE_PASS(EdgeProfiler, "insert-edge-profiling",
+ "Insert instrumentation for edge profiling", false, false);
ModulePass *llvm::createEdgeProfilerPass() { return new EdgeProfiler(); }
diff --git a/lib/Transforms/Instrumentation/OptimalEdgeProfiling.cpp b/lib/Transforms/Instrumentation/OptimalEdgeProfiling.cpp
index 41e3a39f2685b..8eec9872812dc 100644
--- a/lib/Transforms/Instrumentation/OptimalEdgeProfiling.cpp
+++ b/lib/Transforms/Instrumentation/OptimalEdgeProfiling.cpp
@@ -36,7 +36,7 @@ namespace {
bool runOnModule(Module &M);
public:
static char ID; // Pass identification, replacement for typeid
- OptimalEdgeProfiler() : ModulePass(&ID) {}
+ OptimalEdgeProfiler() : ModulePass(ID) {}
void getAnalysisUsage(AnalysisUsage &AU) const {
AU.addRequiredID(ProfileEstimatorPassID);
@@ -50,9 +50,9 @@ namespace {
}
char OptimalEdgeProfiler::ID = 0;
-static RegisterPass<OptimalEdgeProfiler>
-X("insert-optimal-edge-profiling",
- "Insert optimal instrumentation for edge profiling");
+INITIALIZE_PASS(OptimalEdgeProfiler, "insert-optimal-edge-profiling",
+ "Insert optimal instrumentation for edge profiling",
+ false, false);
ModulePass *llvm::createOptimalEdgeProfilerPass() {
return new OptimalEdgeProfiler();
diff --git a/lib/Transforms/Scalar/ABCD.cpp b/lib/Transforms/Scalar/ABCD.cpp
deleted file mode 100644
index dcf14a6860da5..0000000000000
--- a/lib/Transforms/Scalar/ABCD.cpp
+++ /dev/null
@@ -1,1112 +0,0 @@
-//===------- ABCD.cpp - Removes redundant conditional branches ------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This pass removes redundant branch instructions. This algorithm was
-// described by Rastislav Bodik, Rajiv Gupta and Vivek Sarkar in their paper
-// "ABCD: Eliminating Array Bounds Checks on Demand (2000)". The original
-// Algorithm was created to remove array bound checks for strongly typed
-// languages. This implementation expands the idea and removes any conditional
-// branches that can be proved redundant, not only those used in array bound
-// checks. With the SSI representation, each variable has a
-// constraint. By analyzing these constraints we can prove that a branch is
-// redundant. When a branch is proved redundant it means that
-// one direction will always be taken; thus, we can change this branch into an
-// unconditional jump.
-// It is advisable to run SimplifyCFG and Aggressive Dead Code Elimination
-// after ABCD to clean up the code.
-// This implementation was created based on the implementation of the ABCD
-// algorithm implemented for the compiler Jitrino.
-//
-//===----------------------------------------------------------------------===//
-
-#define DEBUG_TYPE "abcd"
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/OwningPtr.h"
-#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/Constants.h"
-#include "llvm/Function.h"
-#include "llvm/Instructions.h"
-#include "llvm/Pass.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Transforms/Scalar.h"
-#include "llvm/Transforms/Utils/SSI.h"
-
-using namespace llvm;
-
-STATISTIC(NumBranchTested, "Number of conditional branches analyzed");
-STATISTIC(NumBranchRemoved, "Number of conditional branches removed");
-
-namespace {
-
-class ABCD : public FunctionPass {
- public:
- static char ID; // Pass identification, replacement for typeid.
- ABCD() : FunctionPass(&ID) {}
-
- void getAnalysisUsage(AnalysisUsage &AU) const {
- AU.addRequired<SSI>();
- }
-
- bool runOnFunction(Function &F);
-
- private:
- /// Keep track of whether we've modified the program yet.
- bool modified;
-
- enum ProveResult {
- False = 0,
- Reduced = 1,
- True = 2
- };
-
- typedef ProveResult (*meet_function)(ProveResult, ProveResult);
- static ProveResult max(ProveResult res1, ProveResult res2) {
- return (ProveResult) std::max(res1, res2);
- }
- static ProveResult min(ProveResult res1, ProveResult res2) {
- return (ProveResult) std::min(res1, res2);
- }
-
- class Bound {
- public:
- Bound(APInt v, bool upper) : value(v), upper_bound(upper) {}
- Bound(const Bound &b, int cnst)
- : value(b.value - cnst), upper_bound(b.upper_bound) {}
- Bound(const Bound &b, const APInt &cnst)
- : value(b.value - cnst), upper_bound(b.upper_bound) {}
-
- /// Test if Bound is an upper bound
- bool isUpperBound() const { return upper_bound; }
-
- /// Get the bitwidth of this bound
- int32_t getBitWidth() const { return value.getBitWidth(); }
-
- /// Creates a Bound incrementing the one received
- static Bound createIncrement(const Bound &b) {
- return Bound(b.isUpperBound() ? b.value+1 : b.value-1,
- b.upper_bound);
- }
-
- /// Creates a Bound decrementing the one received
- static Bound createDecrement(const Bound &b) {
- return Bound(b.isUpperBound() ? b.value-1 : b.value+1,
- b.upper_bound);
- }
-
- /// Test if two bounds are equal
- static bool eq(const Bound *a, const Bound *b) {
- if (!a || !b) return false;
-
- assert(a->isUpperBound() == b->isUpperBound());
- return a->value == b->value;
- }
-
- /// Test if val is less than or equal to Bound b
- static bool leq(APInt val, const Bound &b) {
- return b.isUpperBound() ? val.sle(b.value) : val.sge(b.value);
- }
-
- /// Test if Bound a is less then or equal to Bound
- static bool leq(const Bound &a, const Bound &b) {
- assert(a.isUpperBound() == b.isUpperBound());
- return a.isUpperBound() ? a.value.sle(b.value) :
- a.value.sge(b.value);
- }
-
- /// Test if Bound a is less then Bound b
- static bool lt(const Bound &a, const Bound &b) {
- assert(a.isUpperBound() == b.isUpperBound());
- return a.isUpperBound() ? a.value.slt(b.value) :
- a.value.sgt(b.value);
- }
-
- /// Test if Bound b is greater then or equal val
- static bool geq(const Bound &b, APInt val) {
- return leq(val, b);
- }
-
- /// Test if Bound a is greater then or equal Bound b
- static bool geq(const Bound &a, const Bound &b) {
- return leq(b, a);
- }
-
- private:
- APInt value;
- bool upper_bound;
- };
-
- /// This class is used to store results some parts of the graph,
- /// so information does not need to be recalculated. The maximum false,
- /// minimum true and minimum reduced results are stored
- class MemoizedResultChart {
- public:
- MemoizedResultChart() {}
- MemoizedResultChart(const MemoizedResultChart &other) {
- if (other.max_false)
- max_false.reset(new Bound(*other.max_false));
- if (other.min_true)
- min_true.reset(new Bound(*other.min_true));
- if (other.min_reduced)
- min_reduced.reset(new Bound(*other.min_reduced));
- }
-
- /// Returns the max false
- const Bound *getFalse() const { return max_false.get(); }
-
- /// Returns the min true
- const Bound *getTrue() const { return min_true.get(); }
-
- /// Returns the min reduced
- const Bound *getReduced() const { return min_reduced.get(); }
-
- /// Return the stored result for this bound
- ProveResult getResult(const Bound &bound) const;
-
- /// Stores a false found
- void addFalse(const Bound &bound);
-
- /// Stores a true found
- void addTrue(const Bound &bound);
-
- /// Stores a Reduced found
- void addReduced(const Bound &bound);
-
- /// Clears redundant reduced
- /// If a min_true is smaller than a min_reduced then the min_reduced
- /// is unnecessary and then removed. It also works for min_reduced
- /// begin smaller than max_false.
- void clearRedundantReduced();
-
- void clear() {
- max_false.reset();
- min_true.reset();
- min_reduced.reset();
- }
-
- private:
- OwningPtr<Bound> max_false, min_true, min_reduced;
- };
-
- /// This class stores the result found for a node of the graph,
- /// so these results do not need to be recalculated, only searched for.
- class MemoizedResult {
- public:
- /// Test if there is true result stored from b to a
- /// that is less then the bound
- bool hasTrue(Value *b, const Bound &bound) const {
- const Bound *trueBound = map.lookup(b).getTrue();
- return trueBound && Bound::leq(*trueBound, bound);
- }
-
- /// Test if there is false result stored from b to a
- /// that is less then the bound
- bool hasFalse(Value *b, const Bound &bound) const {
- const Bound *falseBound = map.lookup(b).getFalse();
- return falseBound && Bound::leq(*falseBound, bound);
- }
-
- /// Test if there is reduced result stored from b to a
- /// that is less then the bound
- bool hasReduced(Value *b, const Bound &bound) const {
- const Bound *reducedBound = map.lookup(b).getReduced();
- return reducedBound && Bound::leq(*reducedBound, bound);
- }
-
- /// Returns the stored bound for b
- ProveResult getBoundResult(Value *b, const Bound &bound) {
- return map[b].getResult(bound);
- }
-
- /// Clears the map
- void clear() {
- DenseMapIterator<Value*, MemoizedResultChart> begin = map.begin();
- DenseMapIterator<Value*, MemoizedResultChart> end = map.end();
- for (; begin != end; ++begin) {
- begin->second.clear();
- }
- map.clear();
- }
-
- /// Stores the bound found
- void updateBound(Value *b, const Bound &bound, const ProveResult res);
-
- private:
- // Maps a nod in the graph with its results found.
- DenseMap<Value*, MemoizedResultChart> map;
- };
-
- /// This class represents an edge in the inequality graph used by the
- /// ABCD algorithm. An edge connects node v to node u with a value c if
- /// we could infer a constraint v <= u + c in the source program.
- class Edge {
- public:
- Edge(Value *V, APInt val, bool upper)
- : vertex(V), value(val), upper_bound(upper) {}
-
- Value *getVertex() const { return vertex; }
- const APInt &getValue() const { return value; }
- bool isUpperBound() const { return upper_bound; }
-
- private:
- Value *vertex;
- APInt value;
- bool upper_bound;
- };
-
- /// Weighted and Directed graph to represent constraints.
- /// There is one type of constraint, a <= b + X, which will generate an
- /// edge from b to a with weight X.
- class InequalityGraph {
- public:
-
- /// Adds an edge from V_from to V_to with weight value
- void addEdge(Value *V_from, Value *V_to, APInt value, bool upper);
-
- /// Test if there is a node V
- bool hasNode(Value *V) const { return graph.count(V); }
-
- /// Test if there is any edge from V in the upper direction
- bool hasEdge(Value *V, bool upper) const;
-
- /// Returns all edges pointed by vertex V
- SmallVector<Edge, 16> getEdges(Value *V) const {
- return graph.lookup(V);
- }
-
- /// Prints the graph in dot format.
- /// Blue edges represent upper bound and Red lower bound.
- void printGraph(raw_ostream &OS, Function &F) const {
- printHeader(OS, F);
- printBody(OS);
- printFooter(OS);
- }
-
- /// Clear the graph
- void clear() {
- graph.clear();
- }
-
- private:
- DenseMap<Value *, SmallVector<Edge, 16> > graph;
-
- /// Prints the header of the dot file
- void printHeader(raw_ostream &OS, Function &F) const;
-
- /// Prints the footer of the dot file
- void printFooter(raw_ostream &OS) const {
- OS << "}\n";
- }
-
- /// Prints the body of the dot file
- void printBody(raw_ostream &OS) const;
-
- /// Prints vertex source to the dot file
- void printVertex(raw_ostream &OS, Value *source) const;
-
- /// Prints the edge to the dot file
- void printEdge(raw_ostream &OS, Value *source, const Edge &edge) const;
-
- void printName(raw_ostream &OS, Value *info) const;
- };
-
- /// Iterates through all BasicBlocks, if the Terminator Instruction
- /// uses an Comparator Instruction, all operands of this comparator
- /// are sent to be transformed to SSI. Only Instruction operands are
- /// transformed.
- void createSSI(Function &F);
-
- /// Creates the graphs for this function.
- /// It will look for all comparators used in branches, and create them.
- /// These comparators will create constraints for any instruction as an
- /// operand.
- void executeABCD(Function &F);
-
- /// Seeks redundancies in the comparator instruction CI.
- /// If the ABCD algorithm can prove that the comparator CI always
- /// takes one way, then the Terminator Instruction TI is substituted from
- /// a conditional branch to a unconditional one.
- /// This code basically receives a comparator, and verifies which kind of
- /// instruction it is. Depending on the kind of instruction, we use different
- /// strategies to prove its redundancy.
- void seekRedundancy(ICmpInst *ICI, TerminatorInst *TI);
-
- /// Substitutes Terminator Instruction TI, that is a conditional branch,
- /// with one unconditional branch. Succ_edge determines if the new
- /// unconditional edge will be the first or second edge of the former TI
- /// instruction.
- void removeRedundancy(TerminatorInst *TI, bool Succ_edge);
-
- /// When an conditional branch is removed, the BasicBlock that is no longer
- /// reachable will have problems in phi functions. This method fixes these
- /// phis removing the former BasicBlock from the list of incoming BasicBlocks
- /// of all phis. In case the phi remains with no predecessor it will be
- /// marked to be removed later.
- void fixPhi(BasicBlock *BB, BasicBlock *Succ);
-
- /// Removes phis that have no predecessor
- void removePhis();
-
- /// Creates constraints for Instructions.
- /// If the constraint for this instruction has already been created
- /// nothing is done.
- void createConstraintInstruction(Instruction *I);
-
- /// Creates constraints for Binary Operators.
- /// It will create constraints only for addition and subtraction,
- /// the other binary operations are not treated by ABCD.
- /// For additions in the form a = b + X and a = X + b, where X is a constant,
- /// the constraint a <= b + X can be obtained. For this constraint, an edge
- /// a->b with weight X is added to the lower bound graph, and an edge
- /// b->a with weight -X is added to the upper bound graph.
- /// Only subtractions in the format a = b - X is used by ABCD.
- /// Edges are created using the same semantic as addition.
- void createConstraintBinaryOperator(BinaryOperator *BO);
-
- /// Creates constraints for Comparator Instructions.
- /// Only comparators that have any of the following operators
- /// are used to create constraints: >=, >, <=, <. And only if
- /// at least one operand is an Instruction. In a Comparator Instruction
- /// a op b, there will be 4 sigma functions a_t, a_f, b_t and b_f. Where
- /// t and f represent sigma for operands in true and false branches. The
- /// following constraints can be obtained. a_t <= a, a_f <= a, b_t <= b and
- /// b_f <= b. There are two more constraints that depend on the operator.
- /// For the operator <= : a_t <= b_t and b_f <= a_f-1
- /// For the operator < : a_t <= b_t-1 and b_f <= a_f
- /// For the operator >= : b_t <= a_t and a_f <= b_f-1
- /// For the operator > : b_t <= a_t-1 and a_f <= b_f
- void createConstraintCmpInst(ICmpInst *ICI, TerminatorInst *TI);
-
- /// Creates constraints for PHI nodes.
- /// In a PHI node a = phi(b,c) we can create the constraint
- /// a<= max(b,c). With this constraint there will be the edges,
- /// b->a and c->a with weight 0 in the lower bound graph, and the edges
- /// a->b and a->c with weight 0 in the upper bound graph.
- void createConstraintPHINode(PHINode *PN);
-
- /// Given a binary operator, we are only interest in the case
- /// that one operand is an Instruction and the other is a ConstantInt. In
- /// this case the method returns true, otherwise false. It also obtains the
- /// Instruction and ConstantInt from the BinaryOperator and returns it.
- bool createBinaryOperatorInfo(BinaryOperator *BO, Instruction **I1,
- Instruction **I2, ConstantInt **C1,
- ConstantInt **C2);
-
- /// This method creates a constraint between a Sigma and an Instruction.
- /// These constraints are created as soon as we find a comparator that uses a
- /// SSI variable.
- void createConstraintSigInst(Instruction *I_op, BasicBlock *BB_succ_t,
- BasicBlock *BB_succ_f, PHINode **SIG_op_t,
- PHINode **SIG_op_f);
-
- /// If PN_op1 and PN_o2 are different from NULL, create a constraint
- /// PN_op2 -> PN_op1 with value. In case any of them is NULL, replace
- /// with the respective V_op#, if V_op# is a ConstantInt.
- void createConstraintSigSig(PHINode *SIG_op1, PHINode *SIG_op2,
- ConstantInt *V_op1, ConstantInt *V_op2,
- APInt value);
-
- /// Returns the sigma representing the Instruction I in BasicBlock BB.
- /// Returns NULL in case there is no sigma for this Instruction in this
- /// Basic Block. This methods assume that sigmas are the first instructions
- /// in a block, and that there can be only two sigmas in a block. So it will
- /// only look on the first two instructions of BasicBlock BB.
- PHINode *findSigma(BasicBlock *BB, Instruction *I);
-
- /// Original ABCD algorithm to prove redundant checks.
- /// This implementation works on any kind of inequality branch.
- bool demandProve(Value *a, Value *b, int c, bool upper_bound);
-
- /// Prove that distance between b and a is <= bound
- ProveResult prove(Value *a, Value *b, const Bound &bound, unsigned level);
-
- /// Updates the distance value for a and b
- void updateMemDistance(Value *a, Value *b, const Bound &bound, unsigned level,
- meet_function meet);
-
- InequalityGraph inequality_graph;
- MemoizedResult mem_result;
- DenseMap<Value*, const Bound*> active;
- SmallPtrSet<Value*, 16> created;
- SmallVector<PHINode *, 16> phis_to_remove;
-};
-
-} // end anonymous namespace.
-
-char ABCD::ID = 0;
-static RegisterPass<ABCD> X("abcd", "ABCD: Eliminating Array Bounds Checks on Demand");
-
-
-bool ABCD::runOnFunction(Function &F) {
- modified = false;
- createSSI(F);
- executeABCD(F);
- DEBUG(inequality_graph.printGraph(dbgs(), F));
- removePhis();
-
- inequality_graph.clear();
- mem_result.clear();
- active.clear();
- created.clear();
- phis_to_remove.clear();
- return modified;
-}
-
-/// Iterates through all BasicBlocks, if the Terminator Instruction
-/// uses an Comparator Instruction, all operands of this comparator
-/// are sent to be transformed to SSI. Only Instruction operands are
-/// transformed.
-void ABCD::createSSI(Function &F) {
- SSI *ssi = &getAnalysis<SSI>();
-
- SmallVector<Instruction *, 16> Insts;
-
- for (Function::iterator begin = F.begin(), end = F.end();
- begin != end; ++begin) {
- BasicBlock *BB = begin;
- TerminatorInst *TI = BB->getTerminator();
- if (TI->getNumOperands() == 0)
- continue;
-
- if (ICmpInst *ICI = dyn_cast<ICmpInst>(TI->getOperand(0))) {
- if (Instruction *I = dyn_cast<Instruction>(ICI->getOperand(0))) {
- modified = true; // XXX: but yet createSSI might do nothing
- Insts.push_back(I);
- }
- if (Instruction *I = dyn_cast<Instruction>(ICI->getOperand(1))) {
- modified = true;
- Insts.push_back(I);
- }
- }
- }
- ssi->createSSI(Insts);
-}
-
-/// Creates the graphs for this function.
-/// It will look for all comparators used in branches, and create them.
-/// These comparators will create constraints for any instruction as an
-/// operand.
-void ABCD::executeABCD(Function &F) {
- for (Function::iterator begin = F.begin(), end = F.end();
- begin != end; ++begin) {
- BasicBlock *BB = begin;
- TerminatorInst *TI = BB->getTerminator();
- if (TI->getNumOperands() == 0)
- continue;
-
- ICmpInst *ICI = dyn_cast<ICmpInst>(TI->getOperand(0));
- if (!ICI || !ICI->getOperand(0)->getType()->isIntegerTy())
- continue;
-
- createConstraintCmpInst(ICI, TI);
- seekRedundancy(ICI, TI);
- }
-}
-
-/// Seeks redundancies in the comparator instruction CI.
-/// If the ABCD algorithm can prove that the comparator CI always
-/// takes one way, then the Terminator Instruction TI is substituted from
-/// a conditional branch to a unconditional one.
-/// This code basically receives a comparator, and verifies which kind of
-/// instruction it is. Depending on the kind of instruction, we use different
-/// strategies to prove its redundancy.
-void ABCD::seekRedundancy(ICmpInst *ICI, TerminatorInst *TI) {
- CmpInst::Predicate Pred = ICI->getPredicate();
-
- Value *source, *dest;
- int distance1, distance2;
- bool upper;
-
- switch(Pred) {
- case CmpInst::ICMP_SGT: // signed greater than
- upper = false;
- distance1 = 1;
- distance2 = 0;
- break;
-
- case CmpInst::ICMP_SGE: // signed greater or equal
- upper = false;
- distance1 = 0;
- distance2 = -1;
- break;
-
- case CmpInst::ICMP_SLT: // signed less than
- upper = true;
- distance1 = -1;
- distance2 = 0;
- break;
-
- case CmpInst::ICMP_SLE: // signed less or equal
- upper = true;
- distance1 = 0;
- distance2 = 1;
- break;
-
- default:
- return;
- }
-
- ++NumBranchTested;
- source = ICI->getOperand(0);
- dest = ICI->getOperand(1);
- if (demandProve(dest, source, distance1, upper)) {
- removeRedundancy(TI, true);
- } else if (demandProve(dest, source, distance2, !upper)) {
- removeRedundancy(TI, false);
- }
-}
-
-/// Substitutes Terminator Instruction TI, that is a conditional branch,
-/// with one unconditional branch. Succ_edge determines if the new
-/// unconditional edge will be the first or second edge of the former TI
-/// instruction.
-void ABCD::removeRedundancy(TerminatorInst *TI, bool Succ_edge) {
- BasicBlock *Succ;
- if (Succ_edge) {
- Succ = TI->getSuccessor(0);
- fixPhi(TI->getParent(), TI->getSuccessor(1));
- } else {
- Succ = TI->getSuccessor(1);
- fixPhi(TI->getParent(), TI->getSuccessor(0));
- }
-
- BranchInst::Create(Succ, TI);
- TI->eraseFromParent(); // XXX: invoke
- ++NumBranchRemoved;
- modified = true;
-}
-
-/// When an conditional branch is removed, the BasicBlock that is no longer
-/// reachable will have problems in phi functions. This method fixes these
-/// phis removing the former BasicBlock from the list of incoming BasicBlocks
-/// of all phis. In case the phi remains with no predecessor it will be
-/// marked to be removed later.
-void ABCD::fixPhi(BasicBlock *BB, BasicBlock *Succ) {
- BasicBlock::iterator begin = Succ->begin();
- while (PHINode *PN = dyn_cast<PHINode>(begin++)) {
- PN->removeIncomingValue(BB, false);
- if (PN->getNumIncomingValues() == 0)
- phis_to_remove.push_back(PN);
- }
-}
-
-/// Removes phis that have no predecessor
-void ABCD::removePhis() {
- for (unsigned i = 0, e = phis_to_remove.size(); i != e; ++i) {
- PHINode *PN = phis_to_remove[i];
- PN->replaceAllUsesWith(UndefValue::get(PN->getType()));
- PN->eraseFromParent();
- }
-}
-
-/// Creates constraints for Instructions.
-/// If the constraint for this instruction has already been created
-/// nothing is done.
-void ABCD::createConstraintInstruction(Instruction *I) {
- // Test if this instruction has not been created before
- if (created.insert(I)) {
- if (BinaryOperator *BO = dyn_cast<BinaryOperator>(I)) {
- createConstraintBinaryOperator(BO);
- } else if (PHINode *PN = dyn_cast<PHINode>(I)) {
- createConstraintPHINode(PN);
- }
- }
-}
-
-/// Creates constraints for Binary Operators.
-/// It will create constraints only for addition and subtraction,
-/// the other binary operations are not treated by ABCD.
-/// For additions in the form a = b + X and a = X + b, where X is a constant,
-/// the constraint a <= b + X can be obtained. For this constraint, an edge
-/// a->b with weight X is added to the lower bound graph, and an edge
-/// b->a with weight -X is added to the upper bound graph.
-/// Only subtractions in the format a = b - X is used by ABCD.
-/// Edges are created using the same semantic as addition.
-void ABCD::createConstraintBinaryOperator(BinaryOperator *BO) {
- Instruction *I1 = NULL, *I2 = NULL;
- ConstantInt *CI1 = NULL, *CI2 = NULL;
-
- // Test if an operand is an Instruction and the other is a Constant
- if (!createBinaryOperatorInfo(BO, &I1, &I2, &CI1, &CI2))
- return;
-
- Instruction *I = 0;
- APInt value;
-
- switch (BO->getOpcode()) {
- case Instruction::Add:
- if (I1) {
- I = I1;
- value = CI2->getValue();
- } else if (I2) {
- I = I2;
- value = CI1->getValue();
- }
- break;
-
- case Instruction::Sub:
- // Instructions like a = X-b, where X is a constant are not represented
- // in the graph.
- if (!I1)
- return;
-
- I = I1;
- value = -CI2->getValue();
- break;
-
- default:
- return;
- }
-
- inequality_graph.addEdge(I, BO, value, true);
- inequality_graph.addEdge(BO, I, -value, false);
- createConstraintInstruction(I);
-}
-
-/// Given a binary operator, we are only interest in the case
-/// that one operand is an Instruction and the other is a ConstantInt. In
-/// this case the method returns true, otherwise false. It also obtains the
-/// Instruction and ConstantInt from the BinaryOperator and returns it.
-bool ABCD::createBinaryOperatorInfo(BinaryOperator *BO, Instruction **I1,
- Instruction **I2, ConstantInt **C1,
- ConstantInt **C2) {
- Value *op1 = BO->getOperand(0);
- Value *op2 = BO->getOperand(1);
-
- if ((*I1 = dyn_cast<Instruction>(op1))) {
- if ((*C2 = dyn_cast<ConstantInt>(op2)))
- return true; // First is Instruction and second ConstantInt
-
- return false; // Both are Instruction
- } else {
- if ((*C1 = dyn_cast<ConstantInt>(op1)) &&
- (*I2 = dyn_cast<Instruction>(op2)))
- return true; // First is ConstantInt and second Instruction
-
- return false; // Both are not Instruction
- }
-}
-
-/// Creates constraints for Comparator Instructions.
-/// Only comparators that have any of the following operators
-/// are used to create constraints: >=, >, <=, <. And only if
-/// at least one operand is an Instruction. In a Comparator Instruction
-/// a op b, there will be 4 sigma functions a_t, a_f, b_t and b_f. Where
-/// t and f represent sigma for operands in true and false branches. The
-/// following constraints can be obtained. a_t <= a, a_f <= a, b_t <= b and
-/// b_f <= b. There are two more constraints that depend on the operator.
-/// For the operator <= : a_t <= b_t and b_f <= a_f-1
-/// For the operator < : a_t <= b_t-1 and b_f <= a_f
-/// For the operator >= : b_t <= a_t and a_f <= b_f-1
-/// For the operator > : b_t <= a_t-1 and a_f <= b_f
-void ABCD::createConstraintCmpInst(ICmpInst *ICI, TerminatorInst *TI) {
- Value *V_op1 = ICI->getOperand(0);
- Value *V_op2 = ICI->getOperand(1);
-
- if (!V_op1->getType()->isIntegerTy())
- return;
-
- Instruction *I_op1 = dyn_cast<Instruction>(V_op1);
- Instruction *I_op2 = dyn_cast<Instruction>(V_op2);
-
- // Test if at least one operand is an Instruction
- if (!I_op1 && !I_op2)
- return;
-
- BasicBlock *BB_succ_t = TI->getSuccessor(0);
- BasicBlock *BB_succ_f = TI->getSuccessor(1);
-
- PHINode *SIG_op1_t = NULL, *SIG_op1_f = NULL,
- *SIG_op2_t = NULL, *SIG_op2_f = NULL;
-
- createConstraintSigInst(I_op1, BB_succ_t, BB_succ_f, &SIG_op1_t, &SIG_op1_f);
- createConstraintSigInst(I_op2, BB_succ_t, BB_succ_f, &SIG_op2_t, &SIG_op2_f);
-
- int32_t width = cast<IntegerType>(V_op1->getType())->getBitWidth();
- APInt MinusOne = APInt::getAllOnesValue(width);
- APInt Zero = APInt::getNullValue(width);
-
- CmpInst::Predicate Pred = ICI->getPredicate();
- ConstantInt *CI1 = dyn_cast<ConstantInt>(V_op1);
- ConstantInt *CI2 = dyn_cast<ConstantInt>(V_op2);
- switch (Pred) {
- case CmpInst::ICMP_SGT: // signed greater than
- createConstraintSigSig(SIG_op2_t, SIG_op1_t, CI2, CI1, MinusOne);
- createConstraintSigSig(SIG_op1_f, SIG_op2_f, CI1, CI2, Zero);
- break;
-
- case CmpInst::ICMP_SGE: // signed greater or equal
- createConstraintSigSig(SIG_op2_t, SIG_op1_t, CI2, CI1, Zero);
- createConstraintSigSig(SIG_op1_f, SIG_op2_f, CI1, CI2, MinusOne);
- break;
-
- case CmpInst::ICMP_SLT: // signed less than
- createConstraintSigSig(SIG_op1_t, SIG_op2_t, CI1, CI2, MinusOne);
- createConstraintSigSig(SIG_op2_f, SIG_op1_f, CI2, CI1, Zero);
- break;
-
- case CmpInst::ICMP_SLE: // signed less or equal
- createConstraintSigSig(SIG_op1_t, SIG_op2_t, CI1, CI2, Zero);
- createConstraintSigSig(SIG_op2_f, SIG_op1_f, CI2, CI1, MinusOne);
- break;
-
- default:
- break;
- }
-
- if (I_op1)
- createConstraintInstruction(I_op1);
- if (I_op2)
- createConstraintInstruction(I_op2);
-}
-
-/// Creates constraints for PHI nodes.
-/// In a PHI node a = phi(b,c) we can create the constraint
-/// a<= max(b,c). With this constraint there will be the edges,
-/// b->a and c->a with weight 0 in the lower bound graph, and the edges
-/// a->b and a->c with weight 0 in the upper bound graph.
-void ABCD::createConstraintPHINode(PHINode *PN) {
- // FIXME: We really want to disallow sigma nodes, but I don't know the best
- // way to detect the other than this.
- if (PN->getNumOperands() == 2) return;
-
- int32_t width = cast<IntegerType>(PN->getType())->getBitWidth();
- for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
- Value *V = PN->getIncomingValue(i);
- if (Instruction *I = dyn_cast<Instruction>(V)) {
- createConstraintInstruction(I);
- }
- inequality_graph.addEdge(V, PN, APInt(width, 0), true);
- inequality_graph.addEdge(V, PN, APInt(width, 0), false);
- }
-}
-
-/// This method creates a constraint between a Sigma and an Instruction.
-/// These constraints are created as soon as we find a comparator that uses a
-/// SSI variable.
-void ABCD::createConstraintSigInst(Instruction *I_op, BasicBlock *BB_succ_t,
- BasicBlock *BB_succ_f, PHINode **SIG_op_t,
- PHINode **SIG_op_f) {
- *SIG_op_t = findSigma(BB_succ_t, I_op);
- *SIG_op_f = findSigma(BB_succ_f, I_op);
-
- if (*SIG_op_t) {
- int32_t width = cast<IntegerType>((*SIG_op_t)->getType())->getBitWidth();
- inequality_graph.addEdge(I_op, *SIG_op_t, APInt(width, 0), true);
- inequality_graph.addEdge(*SIG_op_t, I_op, APInt(width, 0), false);
- }
- if (*SIG_op_f) {
- int32_t width = cast<IntegerType>((*SIG_op_f)->getType())->getBitWidth();
- inequality_graph.addEdge(I_op, *SIG_op_f, APInt(width, 0), true);
- inequality_graph.addEdge(*SIG_op_f, I_op, APInt(width, 0), false);
- }
-}
-
-/// If PN_op1 and PN_o2 are different from NULL, create a constraint
-/// PN_op2 -> PN_op1 with value. In case any of them is NULL, replace
-/// with the respective V_op#, if V_op# is a ConstantInt.
-void ABCD::createConstraintSigSig(PHINode *SIG_op1, PHINode *SIG_op2,
- ConstantInt *V_op1, ConstantInt *V_op2,
- APInt value) {
- if (SIG_op1 && SIG_op2) {
- inequality_graph.addEdge(SIG_op2, SIG_op1, value, true);
- inequality_graph.addEdge(SIG_op1, SIG_op2, -value, false);
- } else if (SIG_op1 && V_op2) {
- inequality_graph.addEdge(V_op2, SIG_op1, value, true);
- inequality_graph.addEdge(SIG_op1, V_op2, -value, false);
- } else if (SIG_op2 && V_op1) {
- inequality_graph.addEdge(SIG_op2, V_op1, value, true);
- inequality_graph.addEdge(V_op1, SIG_op2, -value, false);
- }
-}
-
-/// Returns the sigma representing the Instruction I in BasicBlock BB.
-/// Returns NULL in case there is no sigma for this Instruction in this
-/// Basic Block. This methods assume that sigmas are the first instructions
-/// in a block, and that there can be only two sigmas in a block. So it will
-/// only look on the first two instructions of BasicBlock BB.
-PHINode *ABCD::findSigma(BasicBlock *BB, Instruction *I) {
- // BB has more than one predecessor, BB cannot have sigmas.
- if (I == NULL || BB->getSinglePredecessor() == NULL)
- return NULL;
-
- BasicBlock::iterator begin = BB->begin();
- BasicBlock::iterator end = BB->end();
-
- for (unsigned i = 0; i < 2 && begin != end; ++i, ++begin) {
- Instruction *I_succ = begin;
- if (PHINode *PN = dyn_cast<PHINode>(I_succ))
- if (PN->getIncomingValue(0) == I)
- return PN;
- }
-
- return NULL;
-}
-
-/// Original ABCD algorithm to prove redundant checks.
-/// This implementation works on any kind of inequality branch.
-bool ABCD::demandProve(Value *a, Value *b, int c, bool upper_bound) {
- int32_t width = cast<IntegerType>(a->getType())->getBitWidth();
- Bound bound(APInt(width, c), upper_bound);
-
- mem_result.clear();
- active.clear();
-
- ProveResult res = prove(a, b, bound, 0);
- return res != False;
-}
-
-/// Prove that distance between b and a is <= bound
-ABCD::ProveResult ABCD::prove(Value *a, Value *b, const Bound &bound,
- unsigned level) {
- // if (C[b-a<=e] == True for some e <= bound
- // Same or stronger difference was already proven
- if (mem_result.hasTrue(b, bound))
- return True;
-
- // if (C[b-a<=e] == False for some e >= bound
- // Same or weaker difference was already disproved
- if (mem_result.hasFalse(b, bound))
- return False;
-
- // if (C[b-a<=e] == Reduced for some e <= bound
- // b is on a cycle that was reduced for same or stronger difference
- if (mem_result.hasReduced(b, bound))
- return Reduced;
-
- // traversal reached the source vertex
- if (a == b && Bound::geq(bound, APInt(bound.getBitWidth(), 0, true)))
- return True;
-
- // if b has no predecessor then fail
- if (!inequality_graph.hasEdge(b, bound.isUpperBound()))
- return False;
-
- // a cycle was encountered
- if (active.count(b)) {
- if (Bound::leq(*active.lookup(b), bound))
- return Reduced; // a "harmless" cycle
-
- return False; // an amplifying cycle
- }
-
- active[b] = &bound;
- PHINode *PN = dyn_cast<PHINode>(b);
-
- // Test if a Value is a Phi. If it is a PHINode with more than 1 incoming
- // value, then it is a phi, if it has 1 incoming value it is a sigma.
- if (PN && PN->getNumIncomingValues() > 1)
- updateMemDistance(a, b, bound, level, min);
- else
- updateMemDistance(a, b, bound, level, max);
-
- active.erase(b);
-
- ABCD::ProveResult res = mem_result.getBoundResult(b, bound);
- return res;
-}
-
-/// Updates the distance value for a and b
-void ABCD::updateMemDistance(Value *a, Value *b, const Bound &bound,
- unsigned level, meet_function meet) {
- ABCD::ProveResult res = (meet == max) ? False : True;
-
- SmallVector<Edge, 16> Edges = inequality_graph.getEdges(b);
- SmallVector<Edge, 16>::iterator begin = Edges.begin(), end = Edges.end();
-
- for (; begin != end ; ++begin) {
- if (((res >= Reduced) && (meet == max)) ||
- ((res == False) && (meet == min))) {
- break;
- }
- const Edge &in = *begin;
- if (in.isUpperBound() == bound.isUpperBound()) {
- Value *succ = in.getVertex();
- res = meet(res, prove(a, succ, Bound(bound, in.getValue()),
- level+1));
- }
- }
-
- mem_result.updateBound(b, bound, res);
-}
-
-/// Return the stored result for this bound
-ABCD::ProveResult ABCD::MemoizedResultChart::getResult(const Bound &bound)const{
- if (max_false && Bound::leq(bound, *max_false))
- return False;
- if (min_true && Bound::leq(*min_true, bound))
- return True;
- if (min_reduced && Bound::leq(*min_reduced, bound))
- return Reduced;
- return False;
-}
-
-/// Stores a false found
-void ABCD::MemoizedResultChart::addFalse(const Bound &bound) {
- if (!max_false || Bound::leq(*max_false, bound))
- max_false.reset(new Bound(bound));
-
- if (Bound::eq(max_false.get(), min_reduced.get()))
- min_reduced.reset(new Bound(Bound::createIncrement(*min_reduced)));
- if (Bound::eq(max_false.get(), min_true.get()))
- min_true.reset(new Bound(Bound::createIncrement(*min_true)));
- if (Bound::eq(min_reduced.get(), min_true.get()))
- min_reduced.reset();
- clearRedundantReduced();
-}
-
-/// Stores a true found
-void ABCD::MemoizedResultChart::addTrue(const Bound &bound) {
- if (!min_true || Bound::leq(bound, *min_true))
- min_true.reset(new Bound(bound));
-
- if (Bound::eq(min_true.get(), min_reduced.get()))
- min_reduced.reset(new Bound(Bound::createDecrement(*min_reduced)));
- if (Bound::eq(min_true.get(), max_false.get()))
- max_false.reset(new Bound(Bound::createDecrement(*max_false)));
- if (Bound::eq(max_false.get(), min_reduced.get()))
- min_reduced.reset();
- clearRedundantReduced();
-}
-
-/// Stores a Reduced found
-void ABCD::MemoizedResultChart::addReduced(const Bound &bound) {
- if (!min_reduced || Bound::leq(bound, *min_reduced))
- min_reduced.reset(new Bound(bound));
-
- if (Bound::eq(min_reduced.get(), min_true.get()))
- min_true.reset(new Bound(Bound::createIncrement(*min_true)));
- if (Bound::eq(min_reduced.get(), max_false.get()))
- max_false.reset(new Bound(Bound::createDecrement(*max_false)));
-}
-
-/// Clears redundant reduced
-/// If a min_true is smaller than a min_reduced then the min_reduced
-/// is unnecessary and then removed. It also works for min_reduced
-/// begin smaller than max_false.
-void ABCD::MemoizedResultChart::clearRedundantReduced() {
- if (min_true && min_reduced && Bound::lt(*min_true, *min_reduced))
- min_reduced.reset();
- if (max_false && min_reduced && Bound::lt(*min_reduced, *max_false))
- min_reduced.reset();
-}
-
-/// Stores the bound found
-void ABCD::MemoizedResult::updateBound(Value *b, const Bound &bound,
- const ProveResult res) {
- if (res == False) {
- map[b].addFalse(bound);
- } else if (res == True) {
- map[b].addTrue(bound);
- } else {
- map[b].addReduced(bound);
- }
-}
-
-/// Adds an edge from V_from to V_to with weight value
-void ABCD::InequalityGraph::addEdge(Value *V_to, Value *V_from,
- APInt value, bool upper) {
- assert(V_from->getType() == V_to->getType());
- assert(cast<IntegerType>(V_from->getType())->getBitWidth() ==
- value.getBitWidth());
-
- graph[V_from].push_back(Edge(V_to, value, upper));
-}
-
-/// Test if there is any edge from V in the upper direction
-bool ABCD::InequalityGraph::hasEdge(Value *V, bool upper) const {
- SmallVector<Edge, 16> it = graph.lookup(V);
-
- SmallVector<Edge, 16>::iterator begin = it.begin();
- SmallVector<Edge, 16>::iterator end = it.end();
- for (; begin != end; ++begin) {
- if (begin->isUpperBound() == upper) {
- return true;
- }
- }
- return false;
-}
-
-/// Prints the header of the dot file
-void ABCD::InequalityGraph::printHeader(raw_ostream &OS, Function &F) const {
- OS << "digraph dotgraph {\n";
- OS << "label=\"Inequality Graph for \'";
- OS << F.getNameStr() << "\' function\";\n";
- OS << "node [shape=record,fontname=\"Times-Roman\",fontsize=14];\n";
-}
-
-/// Prints the body of the dot file
-void ABCD::InequalityGraph::printBody(raw_ostream &OS) const {
- DenseMap<Value *, SmallVector<Edge, 16> >::const_iterator begin =
- graph.begin(), end = graph.end();
-
- for (; begin != end ; ++begin) {
- SmallVector<Edge, 16>::const_iterator begin_par =
- begin->second.begin(), end_par = begin->second.end();
- Value *source = begin->first;
-
- printVertex(OS, source);
-
- for (; begin_par != end_par ; ++begin_par) {
- const Edge &edge = *begin_par;
- printEdge(OS, source, edge);
- }
- }
-}
-
-/// Prints vertex source to the dot file
-///
-void ABCD::InequalityGraph::printVertex(raw_ostream &OS, Value *source) const {
- OS << "\"";
- printName(OS, source);
- OS << "\"";
- OS << " [label=\"{";
- printName(OS, source);
- OS << "}\"];\n";
-}
-
-/// Prints the edge to the dot file
-void ABCD::InequalityGraph::printEdge(raw_ostream &OS, Value *source,
- const Edge &edge) const {
- Value *dest = edge.getVertex();
- APInt value = edge.getValue();
- bool upper = edge.isUpperBound();
-
- OS << "\"";
- printName(OS, source);
- OS << "\"";
- OS << " -> ";
- OS << "\"";
- printName(OS, dest);
- OS << "\"";
- OS << " [label=\"" << value << "\"";
- if (upper) {
- OS << "color=\"blue\"";
- } else {
- OS << "color=\"red\"";
- }
- OS << "];\n";
-}
-
-void ABCD::InequalityGraph::printName(raw_ostream &OS, Value *info) const {
- if (ConstantInt *CI = dyn_cast<ConstantInt>(info)) {
- OS << *CI;
- } else {
- if (!info->hasName()) {
- info->setName("V");
- }
- OS << info->getNameStr();
- }
-}
-
-/// createABCDPass - The public interface to this file...
-FunctionPass *llvm::createABCDPass() {
- return new ABCD();
-}
diff --git a/lib/Transforms/Scalar/ADCE.cpp b/lib/Transforms/Scalar/ADCE.cpp
index 2d19467ce7462..ada086e9db766 100644
--- a/lib/Transforms/Scalar/ADCE.cpp
+++ b/lib/Transforms/Scalar/ADCE.cpp
@@ -33,7 +33,7 @@ STATISTIC(NumRemoved, "Number of instructions removed");
namespace {
struct ADCE : public FunctionPass {
static char ID; // Pass identification, replacement for typeid
- ADCE() : FunctionPass(&ID) {}
+ ADCE() : FunctionPass(ID) {}
virtual bool runOnFunction(Function& F);
@@ -45,7 +45,7 @@ namespace {
}
char ADCE::ID = 0;
-static RegisterPass<ADCE> X("adce", "Aggressive Dead Code Elimination");
+INITIALIZE_PASS(ADCE, "adce", "Aggressive Dead Code Elimination", false, false);
bool ADCE::runOnFunction(Function& F) {
SmallPtrSet<Instruction*, 128> alive;
diff --git a/lib/Transforms/Scalar/BasicBlockPlacement.cpp b/lib/Transforms/Scalar/BasicBlockPlacement.cpp
index 54533f50405f3..b144678c6a0ed 100644
--- a/lib/Transforms/Scalar/BasicBlockPlacement.cpp
+++ b/lib/Transforms/Scalar/BasicBlockPlacement.cpp
@@ -41,7 +41,7 @@ STATISTIC(NumMoved, "Number of basic blocks moved");
namespace {
struct BlockPlacement : public FunctionPass {
static char ID; // Pass identification, replacement for typeid
- BlockPlacement() : FunctionPass(&ID) {}
+ BlockPlacement() : FunctionPass(ID) {}
virtual bool runOnFunction(Function &F);
@@ -74,8 +74,8 @@ namespace {
}
char BlockPlacement::ID = 0;
-static RegisterPass<BlockPlacement>
-X("block-placement", "Profile Guided Basic Block Placement");
+INITIALIZE_PASS(BlockPlacement, "block-placement",
+ "Profile Guided Basic Block Placement", false, false);
FunctionPass *llvm::createBlockPlacementPass() { return new BlockPlacement(); }
diff --git a/lib/Transforms/Scalar/CMakeLists.txt b/lib/Transforms/Scalar/CMakeLists.txt
index 1a3b10cc9baaf..b7598eace536b 100644
--- a/lib/Transforms/Scalar/CMakeLists.txt
+++ b/lib/Transforms/Scalar/CMakeLists.txt
@@ -1,9 +1,9 @@
add_llvm_library(LLVMScalarOpts
- ABCD.cpp
ADCE.cpp
BasicBlockPlacement.cpp
CodeGenPrepare.cpp
ConstantProp.cpp
+ CorrelatedValuePropagation.cpp
DCE.cpp
DeadStoreElimination.cpp
GEPSplitter.cpp
@@ -17,6 +17,7 @@ add_llvm_library(LLVMScalarOpts
LoopStrengthReduce.cpp
LoopUnrollPass.cpp
LoopUnswitch.cpp
+ LowerAtomic.cpp
MemCpyOptimizer.cpp
Reassociate.cpp
Reg2Mem.cpp
diff --git a/lib/Transforms/Scalar/CodeGenPrepare.cpp b/lib/Transforms/Scalar/CodeGenPrepare.cpp
index 272066c8c0c4b..e07b761e589c1 100644
--- a/lib/Transforms/Scalar/CodeGenPrepare.cpp
+++ b/lib/Transforms/Scalar/CodeGenPrepare.cpp
@@ -33,6 +33,7 @@
#include "llvm/ADT/SmallSet.h"
#include "llvm/Assembly/Writer.h"
#include "llvm/Support/CallSite.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/GetElementPtrTypeIterator.h"
#include "llvm/Support/PatternMatch.h"
@@ -41,6 +42,11 @@
using namespace llvm;
using namespace llvm::PatternMatch;
+static cl::opt<bool>
+CriticalEdgeSplit("cgp-critical-edge-splitting",
+ cl::desc("Split critical edges during codegen prepare"),
+ cl::init(true), cl::Hidden);
+
namespace {
class CodeGenPrepare : public FunctionPass {
/// TLI - Keep a pointer of a TargetLowering to consult for determining
@@ -54,7 +60,7 @@ namespace {
public:
static char ID; // Pass identification, replacement for typeid
explicit CodeGenPrepare(const TargetLowering *tli = 0)
- : FunctionPass(&ID), TLI(tli) {}
+ : FunctionPass(ID), TLI(tli) {}
bool runOnFunction(Function &F);
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
@@ -82,8 +88,8 @@ namespace {
}
char CodeGenPrepare::ID = 0;
-static RegisterPass<CodeGenPrepare> X("codegenprepare",
- "Optimize for code generation");
+INITIALIZE_PASS(CodeGenPrepare, "codegenprepare",
+ "Optimize for code generation", false, false);
FunctionPass *llvm::createCodeGenPreparePass(const TargetLowering *TLI) {
return new CodeGenPrepare(TLI);
@@ -427,9 +433,9 @@ static bool OptimizeNoopCopyExpression(CastInst *CI, const TargetLowering &TLI){
// If these values will be promoted, find out what they will be promoted
// to. This helps us consider truncates on PPC as noop copies when they
// are.
- if (TLI.getTypeAction(CI->getContext(), SrcVT) == TargetLowering::Promote)
+ if (TLI.getTypeAction(SrcVT) == TargetLowering::Promote)
SrcVT = TLI.getTypeToTransformTo(CI->getContext(), SrcVT);
- if (TLI.getTypeAction(CI->getContext(), DstVT) == TargetLowering::Promote)
+ if (TLI.getTypeAction(DstVT) == TargetLowering::Promote)
DstVT = TLI.getTypeToTransformTo(CI->getContext(), DstVT);
// If, after promotion, these are the same types, this is a noop copy.
@@ -548,9 +554,9 @@ protected:
CI->eraseFromParent();
}
bool isFoldable(unsigned SizeCIOp, unsigned, bool) const {
- if (ConstantInt *SizeCI = dyn_cast<ConstantInt>(CI->getArgOperand(SizeCIOp
- - CallInst::ArgOffset)))
- return SizeCI->isAllOnesValue();
+ if (ConstantInt *SizeCI =
+ dyn_cast<ConstantInt>(CI->getArgOperand(SizeCIOp)))
+ return SizeCI->isAllOnesValue();
return false;
}
};
@@ -891,12 +897,14 @@ bool CodeGenPrepare::OptimizeBlock(BasicBlock &BB) {
bool MadeChange = false;
// Split all critical edges where the dest block has a PHI.
- TerminatorInst *BBTI = BB.getTerminator();
- if (BBTI->getNumSuccessors() > 1 && !isa<IndirectBrInst>(BBTI)) {
- for (unsigned i = 0, e = BBTI->getNumSuccessors(); i != e; ++i) {
- BasicBlock *SuccBB = BBTI->getSuccessor(i);
- if (isa<PHINode>(SuccBB->begin()) && isCriticalEdge(BBTI, i, true))
- SplitEdgeNicely(BBTI, i, BackEdges, this);
+ if (CriticalEdgeSplit) {
+ TerminatorInst *BBTI = BB.getTerminator();
+ if (BBTI->getNumSuccessors() > 1 && !isa<IndirectBrInst>(BBTI)) {
+ for (unsigned i = 0, e = BBTI->getNumSuccessors(); i != e; ++i) {
+ BasicBlock *SuccBB = BBTI->getSuccessor(i);
+ if (isa<PHINode>(SuccBB->begin()) && isCriticalEdge(BBTI, i, true))
+ SplitEdgeNicely(BBTI, i, BackEdges, this);
+ }
}
}
diff --git a/lib/Transforms/Scalar/ConstantProp.cpp b/lib/Transforms/Scalar/ConstantProp.cpp
index ea208135739d5..a0ea369d0cadd 100644
--- a/lib/Transforms/Scalar/ConstantProp.cpp
+++ b/lib/Transforms/Scalar/ConstantProp.cpp
@@ -34,7 +34,7 @@ STATISTIC(NumInstKilled, "Number of instructions killed");
namespace {
struct ConstantPropagation : public FunctionPass {
static char ID; // Pass identification, replacement for typeid
- ConstantPropagation() : FunctionPass(&ID) {}
+ ConstantPropagation() : FunctionPass(ID) {}
bool runOnFunction(Function &F);
@@ -45,8 +45,8 @@ namespace {
}
char ConstantPropagation::ID = 0;
-static RegisterPass<ConstantPropagation>
-X("constprop", "Simple constant propagation");
+INITIALIZE_PASS(ConstantPropagation, "constprop",
+ "Simple constant propagation", false, false);
FunctionPass *llvm::createConstantPropagationPass() {
return new ConstantPropagation();
diff --git a/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp b/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
new file mode 100644
index 0000000000000..0d4e45de34664
--- /dev/null
+++ b/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
@@ -0,0 +1,200 @@
+//===- CorrelatedValuePropagation.cpp - Propagate CFG-derived info --------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the Correlated Value Propagation pass.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "correlated-value-propagation"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Function.h"
+#include "llvm/Instructions.h"
+#include "llvm/Pass.h"
+#include "llvm/Analysis/LazyValueInfo.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/ADT/Statistic.h"
+using namespace llvm;
+
+STATISTIC(NumPhis, "Number of phis propagated");
+STATISTIC(NumSelects, "Number of selects propagated");
+STATISTIC(NumMemAccess, "Number of memory access targets propagated");
+STATISTIC(NumCmps, "Number of comparisons propagated");
+
+namespace {
+ class CorrelatedValuePropagation : public FunctionPass {
+ LazyValueInfo *LVI;
+
+ bool processSelect(SelectInst *SI);
+ bool processPHI(PHINode *P);
+ bool processMemAccess(Instruction *I);
+ bool processCmp(CmpInst *C);
+
+ public:
+ static char ID;
+ CorrelatedValuePropagation(): FunctionPass(ID) { }
+
+ bool runOnFunction(Function &F);
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<LazyValueInfo>();
+ }
+ };
+}
+
+char CorrelatedValuePropagation::ID = 0;
+INITIALIZE_PASS(CorrelatedValuePropagation, "correlated-propagation",
+ "Value Propagation", false, false);
+
+// Public interface to the Value Propagation pass
+Pass *llvm::createCorrelatedValuePropagationPass() {
+ return new CorrelatedValuePropagation();
+}
+
+bool CorrelatedValuePropagation::processSelect(SelectInst *S) {
+ if (S->getType()->isVectorTy()) return false;
+ if (isa<Constant>(S->getOperand(0))) return false;
+
+ Constant *C = LVI->getConstant(S->getOperand(0), S->getParent());
+ if (!C) return false;
+
+ ConstantInt *CI = dyn_cast<ConstantInt>(C);
+ if (!CI) return false;
+
+ S->replaceAllUsesWith(S->getOperand(CI->isOne() ? 1 : 2));
+ S->eraseFromParent();
+
+ ++NumSelects;
+
+ return true;
+}
+
+bool CorrelatedValuePropagation::processPHI(PHINode *P) {
+ bool Changed = false;
+
+ BasicBlock *BB = P->getParent();
+ for (unsigned i = 0, e = P->getNumIncomingValues(); i < e; ++i) {
+ Value *Incoming = P->getIncomingValue(i);
+ if (isa<Constant>(Incoming)) continue;
+
+ Constant *C = LVI->getConstantOnEdge(P->getIncomingValue(i),
+ P->getIncomingBlock(i),
+ BB);
+ if (!C) continue;
+
+ P->setIncomingValue(i, C);
+ Changed = true;
+ }
+
+ if (Value *ConstVal = P->hasConstantValue()) {
+ P->replaceAllUsesWith(ConstVal);
+ P->eraseFromParent();
+ Changed = true;
+ }
+
+ ++NumPhis;
+
+ return Changed;
+}
+
+bool CorrelatedValuePropagation::processMemAccess(Instruction *I) {
+ Value *Pointer = 0;
+ if (LoadInst *L = dyn_cast<LoadInst>(I))
+ Pointer = L->getPointerOperand();
+ else
+ Pointer = cast<StoreInst>(I)->getPointerOperand();
+
+ if (isa<Constant>(Pointer)) return false;
+
+ Constant *C = LVI->getConstant(Pointer, I->getParent());
+ if (!C) return false;
+
+ ++NumMemAccess;
+ I->replaceUsesOfWith(Pointer, C);
+ return true;
+}
+
+/// processCmp - If the value of this comparison could be determined locally,
+/// constant propagation would already have figured it out. Instead, walk
+/// the predecessors and statically evaluate the comparison based on information
+/// available on that edge. If a given static evaluation is true on ALL
+/// incoming edges, then it's true universally and we can simplify the compare.
+bool CorrelatedValuePropagation::processCmp(CmpInst *C) {
+ Value *Op0 = C->getOperand(0);
+ if (isa<Instruction>(Op0) &&
+ cast<Instruction>(Op0)->getParent() == C->getParent())
+ return false;
+
+ Constant *Op1 = dyn_cast<Constant>(C->getOperand(1));
+ if (!Op1) return false;
+
+ pred_iterator PI = pred_begin(C->getParent()), PE = pred_end(C->getParent());
+ if (PI == PE) return false;
+
+ LazyValueInfo::Tristate Result = LVI->getPredicateOnEdge(C->getPredicate(),
+ C->getOperand(0), Op1, *PI, C->getParent());
+ if (Result == LazyValueInfo::Unknown) return false;
+
+ ++PI;
+ while (PI != PE) {
+ LazyValueInfo::Tristate Res = LVI->getPredicateOnEdge(C->getPredicate(),
+ C->getOperand(0), Op1, *PI, C->getParent());
+ if (Res != Result) return false;
+ ++PI;
+ }
+
+ ++NumCmps;
+
+ if (Result == LazyValueInfo::True)
+ C->replaceAllUsesWith(ConstantInt::getTrue(C->getContext()));
+ else
+ C->replaceAllUsesWith(ConstantInt::getFalse(C->getContext()));
+
+ C->eraseFromParent();
+
+ return true;
+}
+
+bool CorrelatedValuePropagation::runOnFunction(Function &F) {
+ LVI = &getAnalysis<LazyValueInfo>();
+
+ bool FnChanged = false;
+
+ for (Function::iterator FI = F.begin(), FE = F.end(); FI != FE; ++FI) {
+ bool BBChanged = false;
+ for (BasicBlock::iterator BI = FI->begin(), BE = FI->end(); BI != BE; ) {
+ Instruction *II = BI++;
+ switch (II->getOpcode()) {
+ case Instruction::Select:
+ BBChanged |= processSelect(cast<SelectInst>(II));
+ break;
+ case Instruction::PHI:
+ BBChanged |= processPHI(cast<PHINode>(II));
+ break;
+ case Instruction::ICmp:
+ case Instruction::FCmp:
+ BBChanged |= processCmp(cast<CmpInst>(II));
+ break;
+ case Instruction::Load:
+ case Instruction::Store:
+ BBChanged |= processMemAccess(II);
+ break;
+ }
+ }
+
+ // Propagating correlated values might leave cruft around.
+ // Try to clean it up before we continue.
+ if (BBChanged)
+ SimplifyInstructionsInBlock(FI);
+
+ FnChanged |= BBChanged;
+ }
+
+ return FnChanged;
+}
diff --git a/lib/Transforms/Scalar/DCE.cpp b/lib/Transforms/Scalar/DCE.cpp
index 39940c35da5d5..87ea8038356ae 100644
--- a/lib/Transforms/Scalar/DCE.cpp
+++ b/lib/Transforms/Scalar/DCE.cpp
@@ -35,7 +35,7 @@ namespace {
//
struct DeadInstElimination : public BasicBlockPass {
static char ID; // Pass identification, replacement for typeid
- DeadInstElimination() : BasicBlockPass(&ID) {}
+ DeadInstElimination() : BasicBlockPass(ID) {}
virtual bool runOnBasicBlock(BasicBlock &BB) {
bool Changed = false;
for (BasicBlock::iterator DI = BB.begin(); DI != BB.end(); ) {
@@ -56,8 +56,8 @@ namespace {
}
char DeadInstElimination::ID = 0;
-static RegisterPass<DeadInstElimination>
-X("die", "Dead Instruction Elimination");
+INITIALIZE_PASS(DeadInstElimination, "die",
+ "Dead Instruction Elimination", false, false);
Pass *llvm::createDeadInstEliminationPass() {
return new DeadInstElimination();
@@ -70,7 +70,7 @@ namespace {
//
struct DCE : public FunctionPass {
static char ID; // Pass identification, replacement for typeid
- DCE() : FunctionPass(&ID) {}
+ DCE() : FunctionPass(ID) {}
virtual bool runOnFunction(Function &F);
@@ -81,7 +81,7 @@ namespace {
}
char DCE::ID = 0;
-static RegisterPass<DCE> Y("dce", "Dead Code Elimination");
+INITIALIZE_PASS(DCE, "dce", "Dead Code Elimination", false, false);
bool DCE::runOnFunction(Function &F) {
// Start out with all of the instructions in the worklist...
diff --git a/lib/Transforms/Scalar/DeadStoreElimination.cpp b/lib/Transforms/Scalar/DeadStoreElimination.cpp
index e047e4ffa151c..c8fd9d9fa5561 100644
--- a/lib/Transforms/Scalar/DeadStoreElimination.cpp
+++ b/lib/Transforms/Scalar/DeadStoreElimination.cpp
@@ -40,7 +40,7 @@ namespace {
TargetData *TD;
static char ID; // Pass identification, replacement for typeid
- DSE() : FunctionPass(&ID) {}
+ DSE() : FunctionPass(ID) {}
virtual bool runOnFunction(Function &F) {
bool Changed = false;
@@ -82,7 +82,7 @@ namespace {
}
char DSE::ID = 0;
-static RegisterPass<DSE> X("dse", "Dead Store Elimination");
+INITIALIZE_PASS(DSE, "dse", "Dead Store Elimination", false, false);
FunctionPass *llvm::createDeadStoreEliminationPass() { return new DSE(); }
@@ -401,10 +401,9 @@ bool DSE::handleEndBlock(BasicBlock &BB) {
}
continue;
- } else if (CallSite::get(BBI).getInstruction() != 0) {
+ } else if (CallSite CS = cast<Value>(BBI)) {
// If this call does not access memory, it can't
// be undeadifying any of our pointers.
- CallSite CS = CallSite::get(BBI);
if (AA.doesNotAccessMemory(CS))
continue;
diff --git a/lib/Transforms/Scalar/GEPSplitter.cpp b/lib/Transforms/Scalar/GEPSplitter.cpp
index 610a41dae44b1..53dd06d24bb5b 100644
--- a/lib/Transforms/Scalar/GEPSplitter.cpp
+++ b/lib/Transforms/Scalar/GEPSplitter.cpp
@@ -27,13 +27,13 @@ namespace {
virtual void getAnalysisUsage(AnalysisUsage &AU) const;
public:
static char ID; // Pass identification, replacement for typeid
- explicit GEPSplitter() : FunctionPass(&ID) {}
+ explicit GEPSplitter() : FunctionPass(ID) {}
};
}
char GEPSplitter::ID = 0;
-static RegisterPass<GEPSplitter> X("split-geps",
- "split complex GEPs into simple GEPs");
+INITIALIZE_PASS(GEPSplitter, "split-geps",
+ "split complex GEPs into simple GEPs", false, false);
FunctionPass *llvm::createGEPSplitterPass() {
return new GEPSplitter();
diff --git a/lib/Transforms/Scalar/GVN.cpp b/lib/Transforms/Scalar/GVN.cpp
index 88b67768fa5df..c62ce1f27f647 100644
--- a/lib/Transforms/Scalar/GVN.cpp
+++ b/lib/Transforms/Scalar/GVN.cpp
@@ -165,7 +165,6 @@ namespace {
Expression create_expression(CastInst* C);
Expression create_expression(GetElementPtrInst* G);
Expression create_expression(CallInst* C);
- Expression create_expression(Constant* C);
Expression create_expression(ExtractValueInst* C);
Expression create_expression(InsertValueInst* C);
@@ -665,7 +664,7 @@ namespace {
public:
static char ID; // Pass identification, replacement for typeid
explicit GVN(bool noloads = false)
- : FunctionPass(&ID), NoLoads(noloads), MD(0) { }
+ : FunctionPass(ID), NoLoads(noloads), MD(0) { }
private:
bool NoLoads;
@@ -716,8 +715,7 @@ FunctionPass *llvm::createGVNPass(bool NoLoads) {
return new GVN(NoLoads);
}
-static RegisterPass<GVN> X("gvn",
- "Global Value Numbering");
+INITIALIZE_PASS(GVN, "gvn", "Global Value Numbering", false, false);
void GVN::dump(DenseMap<uint32_t, Value*>& d) {
errs() << "{\n";
@@ -735,7 +733,7 @@ static bool isSafeReplacement(PHINode* p, Instruction *inst) {
for (Instruction::use_iterator UI = p->use_begin(), E = p->use_end();
UI != E; ++UI)
- if (PHINode* use_phi = dyn_cast<PHINode>(UI))
+ if (PHINode* use_phi = dyn_cast<PHINode>(*UI))
if (use_phi->getParent() == inst->getParent())
return false;
@@ -1312,7 +1310,7 @@ static Value *ConstructSSAForLoadSet(LoadInst *LI,
// Otherwise, we have to construct SSA form.
SmallVector<PHINode*, 8> NewPHIs;
SSAUpdater SSAUpdate(&NewPHIs);
- SSAUpdate.Initialize(LI);
+ SSAUpdate.Initialize(LI->getType(), LI->getName());
const Type *LoadTy = LI->getType();
@@ -2112,6 +2110,11 @@ bool GVN::performPRE(Function &F) {
CurInst->mayReadFromMemory() || CurInst->mayHaveSideEffects() ||
isa<DbgInfoIntrinsic>(CurInst))
continue;
+
+ // We don't currently value number ANY inline asm calls.
+ if (CallInst *CallI = dyn_cast<CallInst>(CurInst))
+ if (CallI->isInlineAsm())
+ continue;
uint32_t ValNo = VN.lookup(CurInst);
diff --git a/lib/Transforms/Scalar/IndVarSimplify.cpp b/lib/Transforms/Scalar/IndVarSimplify.cpp
index b5c9dd881df86..af2eafc47cbf3 100644
--- a/lib/Transforms/Scalar/IndVarSimplify.cpp
+++ b/lib/Transforms/Scalar/IndVarSimplify.cpp
@@ -77,7 +77,7 @@ namespace {
public:
static char ID; // Pass identification, replacement for typeid
- IndVarSimplify() : LoopPass(&ID) {}
+ IndVarSimplify() : LoopPass(ID) {}
virtual bool runOnLoop(Loop *L, LPPassManager &LPM);
@@ -102,7 +102,7 @@ namespace {
void RewriteNonIntegerIVs(Loop *L);
ICmpInst *LinearFunctionTestReplace(Loop *L, const SCEV *BackedgeTakenCount,
- Value *IndVar,
+ PHINode *IndVar,
BasicBlock *ExitingBlock,
BranchInst *BI,
SCEVExpander &Rewriter);
@@ -117,8 +117,8 @@ namespace {
}
char IndVarSimplify::ID = 0;
-static RegisterPass<IndVarSimplify>
-X("indvars", "Canonicalize Induction Variables");
+INITIALIZE_PASS(IndVarSimplify, "indvars",
+ "Canonicalize Induction Variables", false, false);
Pass *llvm::createIndVarSimplifyPass() {
return new IndVarSimplify();
@@ -131,7 +131,7 @@ Pass *llvm::createIndVarSimplifyPass() {
/// is actually a much broader range than just linear tests.
ICmpInst *IndVarSimplify::LinearFunctionTestReplace(Loop *L,
const SCEV *BackedgeTakenCount,
- Value *IndVar,
+ PHINode *IndVar,
BasicBlock *ExitingBlock,
BranchInst *BI,
SCEVExpander &Rewriter) {
@@ -181,7 +181,7 @@ ICmpInst *IndVarSimplify::LinearFunctionTestReplace(Loop *L,
// The BackedgeTaken expression contains the number of times that the
// backedge branches to the loop header. This is one less than the
// number of times the loop executes, so use the incremented indvar.
- CmpIndVar = L->getCanonicalInductionVariableIncrement();
+ CmpIndVar = IndVar->getIncomingValueForBlock(ExitingBlock);
} else {
// We have to use the preincremented value...
RHS = SE->getTruncateOrZeroExtend(BackedgeTakenCount,
@@ -534,7 +534,7 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
// Now that we know the largest of the induction variable expressions
// in this loop, insert a canonical induction variable of the largest size.
- Value *IndVar = 0;
+ PHINode *IndVar = 0;
if (NeedCannIV) {
// Check to see if the loop already has any canonical-looking induction
// variables. If any are present and wider than the planned canonical
@@ -862,9 +862,9 @@ void IndVarSimplify::HandleFloatingPointIV(Loop *L, PHINode *PN) {
// Check Incr uses. One user is PN and the other user is an exit condition
// used by the conditional terminator.
Value::use_iterator IncrUse = Incr->use_begin();
- Instruction *U1 = cast<Instruction>(IncrUse++);
+ Instruction *U1 = cast<Instruction>(*IncrUse++);
if (IncrUse == Incr->use_end()) return;
- Instruction *U2 = cast<Instruction>(IncrUse++);
+ Instruction *U2 = cast<Instruction>(*IncrUse++);
if (IncrUse != Incr->use_end()) return;
// Find exit condition, which is an fcmp. If it doesn't exist, or if it isn't
diff --git a/lib/Transforms/Scalar/JumpThreading.cpp b/lib/Transforms/Scalar/JumpThreading.cpp
index edce14cd92eaf..104d5aecbdd32 100644
--- a/lib/Transforms/Scalar/JumpThreading.cpp
+++ b/lib/Transforms/Scalar/JumpThreading.cpp
@@ -24,6 +24,7 @@
#include "llvm/Transforms/Utils/SSAUpdater.h"
#include "llvm/Target/TargetData.h"
#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallPtrSet.h"
@@ -45,7 +46,10 @@ Threshold("jump-threading-threshold",
// Turn on use of LazyValueInfo.
static cl::opt<bool>
-EnableLVI("enable-jump-threading-lvi", cl::ReallyHidden);
+EnableLVI("enable-jump-threading-lvi",
+ cl::desc("Use LVI for jump threading"),
+ cl::init(true),
+ cl::ReallyHidden);
@@ -74,15 +78,32 @@ namespace {
#else
SmallSet<AssertingVH<BasicBlock>, 16> LoopHeaders;
#endif
+ DenseSet<std::pair<Value*, BasicBlock*> > RecursionSet;
+
+ // RAII helper for updating the recursion stack.
+ struct RecursionSetRemover {
+ DenseSet<std::pair<Value*, BasicBlock*> > &TheSet;
+ std::pair<Value*, BasicBlock*> ThePair;
+
+ RecursionSetRemover(DenseSet<std::pair<Value*, BasicBlock*> > &S,
+ std::pair<Value*, BasicBlock*> P)
+ : TheSet(S), ThePair(P) { }
+
+ ~RecursionSetRemover() {
+ TheSet.erase(ThePair);
+ }
+ };
public:
static char ID; // Pass identification
- JumpThreading() : FunctionPass(&ID) {}
+ JumpThreading() : FunctionPass(ID) {}
bool runOnFunction(Function &F);
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
- if (EnableLVI)
+ if (EnableLVI) {
AU.addRequired<LazyValueInfo>();
+ AU.addPreserved<LazyValueInfo>();
+ }
}
void FindLoopHeaders(Function &F);
@@ -111,8 +132,8 @@ namespace {
}
char JumpThreading::ID = 0;
-static RegisterPass<JumpThreading>
-X("jump-threading", "Jump Threading");
+INITIALIZE_PASS(JumpThreading, "jump-threading",
+ "Jump Threading", false, false);
// Public interface to the Jump Threading pass
FunctionPass *llvm::createJumpThreadingPass() { return new JumpThreading(); }
@@ -144,6 +165,7 @@ bool JumpThreading::runOnFunction(Function &F) {
DEBUG(dbgs() << " JT: Deleting dead block '" << BB->getName()
<< "' with terminator: " << *BB->getTerminator() << '\n');
LoopHeaders.erase(BB);
+ if (LVI) LVI->eraseBlock(BB);
DeleteDeadBlock(BB);
Changed = true;
} else if (BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator())) {
@@ -164,6 +186,11 @@ bool JumpThreading::runOnFunction(Function &F) {
bool ErasedFromLoopHeaders = LoopHeaders.erase(BB);
BasicBlock *Succ = BI->getSuccessor(0);
+ // FIXME: It is always conservatively correct to drop the info
+ // for a block even if it doesn't get erased. This isn't totally
+ // awesome, but it allows us to use AssertingVH to prevent nasty
+ // dangling pointer issues within LazyValueInfo.
+ if (LVI) LVI->eraseBlock(BB);
if (TryToSimplifyUncondBranchFromEmptyBlock(BB)) {
Changed = true;
// If we deleted BB and BB was the header of a loop, then the
@@ -251,6 +278,17 @@ void JumpThreading::FindLoopHeaders(Function &F) {
LoopHeaders.insert(const_cast<BasicBlock*>(Edges[i].second));
}
+// Helper method for ComputeValueKnownInPredecessors. If Value is a
+// ConstantInt, push it. If it's an undef, push 0. Otherwise, do nothing.
+static void PushConstantIntOrUndef(SmallVectorImpl<std::pair<ConstantInt*,
+ BasicBlock*> > &Result,
+ Constant *Value, BasicBlock* BB){
+ if (ConstantInt *FoldedCInt = dyn_cast<ConstantInt>(Value))
+ Result.push_back(std::make_pair(FoldedCInt, BB));
+ else if (isa<UndefValue>(Value))
+ Result.push_back(std::make_pair((ConstantInt*)0, BB));
+}
+
/// ComputeValueKnownInPredecessors - Given a basic block BB and a value V, see
/// if we can infer that the value is a known ConstantInt in any of our
/// predecessors. If so, return the known list of value and pred BB in the
@@ -260,12 +298,24 @@ void JumpThreading::FindLoopHeaders(Function &F) {
///
bool JumpThreading::
ComputeValueKnownInPredecessors(Value *V, BasicBlock *BB,PredValueInfo &Result){
+ // This method walks up use-def chains recursively. Because of this, we could
+ // get into an infinite loop going around loops in the use-def chain. To
+ // prevent this, keep track of what (value, block) pairs we've already visited
+ // and terminate the search if we loop back to them
+ if (!RecursionSet.insert(std::make_pair(V, BB)).second)
+ return false;
+
+ // An RAII help to remove this pair from the recursion set once the recursion
+ // stack pops back out again.
+ RecursionSetRemover remover(RecursionSet, std::make_pair(V, BB));
+
// If V is a constantint, then it is known in all predecessors.
if (isa<ConstantInt>(V) || isa<UndefValue>(V)) {
ConstantInt *CI = dyn_cast<ConstantInt>(V);
for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI)
Result.push_back(std::make_pair(CI, *PI));
+
return true;
}
@@ -313,8 +363,15 @@ ComputeValueKnownInPredecessors(Value *V, BasicBlock *BB,PredValueInfo &Result){
if (isa<ConstantInt>(InVal) || isa<UndefValue>(InVal)) {
ConstantInt *CI = dyn_cast<ConstantInt>(InVal);
Result.push_back(std::make_pair(CI, PN->getIncomingBlock(i)));
+ } else if (LVI) {
+ Constant *CI = LVI->getConstantOnEdge(InVal,
+ PN->getIncomingBlock(i), BB);
+ // LVI returns null is no value could be determined.
+ if (!CI) continue;
+ PushConstantIntOrUndef(Result, CI, PN->getIncomingBlock(i));
}
}
+
return !Result.empty();
}
@@ -338,29 +395,26 @@ ComputeValueKnownInPredecessors(Value *V, BasicBlock *BB,PredValueInfo &Result){
else
InterestingVal = ConstantInt::getFalse(I->getContext());
+ SmallPtrSet<BasicBlock*, 4> LHSKnownBBs;
+
// Scan for the sentinel. If we find an undef, force it to the
// interesting value: x|undef -> true and x&undef -> false.
for (unsigned i = 0, e = LHSVals.size(); i != e; ++i)
if (LHSVals[i].first == InterestingVal || LHSVals[i].first == 0) {
Result.push_back(LHSVals[i]);
Result.back().first = InterestingVal;
+ LHSKnownBBs.insert(LHSVals[i].second);
}
for (unsigned i = 0, e = RHSVals.size(); i != e; ++i)
if (RHSVals[i].first == InterestingVal || RHSVals[i].first == 0) {
// If we already inferred a value for this block on the LHS, don't
// re-add it.
- bool HasValue = false;
- for (unsigned r = 0, e = Result.size(); r != e; ++r)
- if (Result[r].second == RHSVals[i].second) {
- HasValue = true;
- break;
- }
-
- if (!HasValue) {
+ if (!LHSKnownBBs.count(RHSVals[i].second)) {
Result.push_back(RHSVals[i]);
Result.back().first = InterestingVal;
}
}
+
return !Result.empty();
}
@@ -377,8 +431,27 @@ ComputeValueKnownInPredecessors(Value *V, BasicBlock *BB,PredValueInfo &Result){
if (Result[i].first)
Result[i].first =
cast<ConstantInt>(ConstantExpr::getNot(Result[i].first));
+
return true;
}
+
+ // Try to simplify some other binary operator values.
+ } else if (BinaryOperator *BO = dyn_cast<BinaryOperator>(I)) {
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(BO->getOperand(1))) {
+ SmallVector<std::pair<ConstantInt*, BasicBlock*>, 8> LHSVals;
+ ComputeValueKnownInPredecessors(BO->getOperand(0), BB, LHSVals);
+
+ // Try to use constant folding to simplify the binary operator.
+ for (unsigned i = 0, e = LHSVals.size(); i != e; ++i) {
+ Constant *V = LHSVals[i].first ? LHSVals[i].first :
+ cast<Constant>(UndefValue::get(BO->getType()));
+ Constant *Folded = ConstantExpr::get(BO->getOpcode(), V, CI);
+
+ PushConstantIntOrUndef(Result, Folded, LHSVals[i].second);
+ }
+ }
+
+ return !Result.empty();
}
// Handle compare with phi operand, where the PHI is defined in this block.
@@ -405,10 +478,8 @@ ComputeValueKnownInPredecessors(Value *V, BasicBlock *BB,PredValueInfo &Result){
Res = ConstantInt::get(Type::getInt1Ty(LHS->getContext()), ResT);
}
- if (isa<UndefValue>(Res))
- Result.push_back(std::make_pair((ConstantInt*)0, PredBB));
- else if (ConstantInt *CI = dyn_cast<ConstantInt>(Res))
- Result.push_back(std::make_pair(CI, PredBB));
+ if (Constant *ConstRes = dyn_cast<Constant>(Res))
+ PushConstantIntOrUndef(Result, ConstRes, PredBB);
}
return !Result.empty();
@@ -418,28 +489,59 @@ ComputeValueKnownInPredecessors(Value *V, BasicBlock *BB,PredValueInfo &Result){
// If comparing a live-in value against a constant, see if we know the
// live-in value on any predecessors.
if (LVI && isa<Constant>(Cmp->getOperand(1)) &&
- Cmp->getType()->isIntegerTy() && // Not vector compare.
- (!isa<Instruction>(Cmp->getOperand(0)) ||
- cast<Instruction>(Cmp->getOperand(0))->getParent() != BB)) {
- Constant *RHSCst = cast<Constant>(Cmp->getOperand(1));
+ Cmp->getType()->isIntegerTy()) {
+ if (!isa<Instruction>(Cmp->getOperand(0)) ||
+ cast<Instruction>(Cmp->getOperand(0))->getParent() != BB) {
+ Constant *RHSCst = cast<Constant>(Cmp->getOperand(1));
+
+ for (pred_iterator PI = pred_begin(BB), E = pred_end(BB);PI != E; ++PI){
+ BasicBlock *P = *PI;
+ // If the value is known by LazyValueInfo to be a constant in a
+ // predecessor, use that information to try to thread this block.
+ LazyValueInfo::Tristate Res =
+ LVI->getPredicateOnEdge(Cmp->getPredicate(), Cmp->getOperand(0),
+ RHSCst, P, BB);
+ if (Res == LazyValueInfo::Unknown)
+ continue;
- for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) {
- BasicBlock *P = *PI;
- // If the value is known by LazyValueInfo to be a constant in a
- // predecessor, use that information to try to thread this block.
- LazyValueInfo::Tristate
- Res = LVI->getPredicateOnEdge(Cmp->getPredicate(), Cmp->getOperand(0),
- RHSCst, P, BB);
- if (Res == LazyValueInfo::Unknown)
- continue;
+ Constant *ResC = ConstantInt::get(Cmp->getType(), Res);
+ Result.push_back(std::make_pair(cast<ConstantInt>(ResC), P));
+ }
- Constant *ResC = ConstantInt::get(Cmp->getType(), Res);
- Result.push_back(std::make_pair(cast<ConstantInt>(ResC), P));
+ return !Result.empty();
}
-
- return !Result.empty();
+
+ // Try to find a constant value for the LHS of a comparison,
+ // and evaluate it statically if we can.
+ if (Constant *CmpConst = dyn_cast<Constant>(Cmp->getOperand(1))) {
+ SmallVector<std::pair<ConstantInt*, BasicBlock*>, 8> LHSVals;
+ ComputeValueKnownInPredecessors(I->getOperand(0), BB, LHSVals);
+
+ for (unsigned i = 0, e = LHSVals.size(); i != e; ++i) {
+ Constant *V = LHSVals[i].first ? LHSVals[i].first :
+ cast<Constant>(UndefValue::get(CmpConst->getType()));
+ Constant *Folded = ConstantExpr::getCompare(Cmp->getPredicate(),
+ V, CmpConst);
+ PushConstantIntOrUndef(Result, Folded, LHSVals[i].second);
+ }
+
+ return !Result.empty();
+ }
+ }
+ }
+
+ if (LVI) {
+ // If all else fails, see if LVI can figure out a constant value for us.
+ Constant *CI = LVI->getConstant(V, BB);
+ ConstantInt *CInt = dyn_cast_or_null<ConstantInt>(CI);
+ if (CInt) {
+ for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI)
+ Result.push_back(std::make_pair(CInt, *PI));
}
+
+ return !Result.empty();
}
+
return false;
}
@@ -490,6 +592,7 @@ bool JumpThreading::ProcessBlock(BasicBlock *BB) {
// Remember if SinglePred was the entry block of the function. If so, we
// will need to move BB back to the entry position.
bool isEntry = SinglePred == &SinglePred->getParent()->getEntryBlock();
+ if (LVI) LVI->eraseBlock(SinglePred);
MergeBasicBlockIntoOnlyPred(BB);
if (isEntry && BB != &BB->getParent()->getEntryBlock())
@@ -603,6 +706,44 @@ bool JumpThreading::ProcessBlock(BasicBlock *BB) {
}
}
}
+
+ // For a comparison where the LHS is outside this block, it's possible
+ // that we've branched on it before. Used LVI to see if we can simplify
+ // the branch based on that.
+ BranchInst *CondBr = dyn_cast<BranchInst>(BB->getTerminator());
+ Constant *CondConst = dyn_cast<Constant>(CondCmp->getOperand(1));
+ pred_iterator PI = pred_begin(BB), PE = pred_end(BB);
+ if (LVI && CondBr && CondConst && CondBr->isConditional() && PI != PE &&
+ (!isa<Instruction>(CondCmp->getOperand(0)) ||
+ cast<Instruction>(CondCmp->getOperand(0))->getParent() != BB)) {
+ // For predecessor edge, determine if the comparison is true or false
+ // on that edge. If they're all true or all false, we can simplify the
+ // branch.
+ // FIXME: We could handle mixed true/false by duplicating code.
+ LazyValueInfo::Tristate Baseline =
+ LVI->getPredicateOnEdge(CondCmp->getPredicate(), CondCmp->getOperand(0),
+ CondConst, *PI, BB);
+ if (Baseline != LazyValueInfo::Unknown) {
+ // Check that all remaining incoming values match the first one.
+ while (++PI != PE) {
+ LazyValueInfo::Tristate Ret = LVI->getPredicateOnEdge(
+ CondCmp->getPredicate(),
+ CondCmp->getOperand(0),
+ CondConst, *PI, BB);
+ if (Ret != Baseline) break;
+ }
+
+ // If we terminated early, then one of the values didn't match.
+ if (PI == PE) {
+ unsigned ToRemove = Baseline == LazyValueInfo::True ? 1 : 0;
+ unsigned ToKeep = Baseline == LazyValueInfo::True ? 0 : 1;
+ RemovePredecessorAndSimplify(CondBr->getSuccessor(ToRemove), BB, TD);
+ BranchInst::Create(CondBr->getSuccessor(ToKeep), CondBr);
+ CondBr->eraseFromParent();
+ return true;
+ }
+ }
+ }
}
// Check for some cases that are worth simplifying. Right now we want to look
@@ -1020,6 +1161,7 @@ bool JumpThreading::ProcessThreadableEdges(Value *Cond, BasicBlock *BB) {
SmallVector<std::pair<ConstantInt*, BasicBlock*>, 8> PredValues;
if (!ComputeValueKnownInPredecessors(Cond, BB, PredValues))
return false;
+
assert(!PredValues.empty() &&
"ComputeValueKnownInPredecessors returned true with no values");
@@ -1314,6 +1456,9 @@ bool JumpThreading::ThreadEdge(BasicBlock *BB,
<< ", across block:\n "
<< *BB << "\n");
+ if (LVI)
+ LVI->threadEdge(PredBB, BB, SuccBB);
+
// We are going to have to map operands from the original BB block to the new
// copy of the block 'NewBB'. If there are PHI nodes in BB, evaluate them to
// account for entry from PredBB.
@@ -1383,7 +1528,7 @@ bool JumpThreading::ThreadEdge(BasicBlock *BB,
// We found a use of I outside of BB. Rename all uses of I that are outside
// its block to be uses of the appropriate PHI node etc. See ValuesInBlocks
// with the two values we know.
- SSAUpdate.Initialize(I);
+ SSAUpdate.Initialize(I->getType(), I->getName());
SSAUpdate.AddAvailableValue(BB, I);
SSAUpdate.AddAvailableValue(NewBB, ValueMapping[I]);
@@ -1538,7 +1683,7 @@ bool JumpThreading::DuplicateCondBranchOnPHIIntoPred(BasicBlock *BB,
// We found a use of I outside of BB. Rename all uses of I that are outside
// its block to be uses of the appropriate PHI node etc. See ValuesInBlocks
// with the two values we know.
- SSAUpdate.Initialize(I);
+ SSAUpdate.Initialize(I->getType(), I->getName());
SSAUpdate.AddAvailableValue(BB, I);
SSAUpdate.AddAvailableValue(PredBB, ValueMapping[I]);
diff --git a/lib/Transforms/Scalar/LICM.cpp b/lib/Transforms/Scalar/LICM.cpp
index 73473952912ee..2ef85446bd9ba 100644
--- a/lib/Transforms/Scalar/LICM.cpp
+++ b/lib/Transforms/Scalar/LICM.cpp
@@ -26,8 +26,7 @@
// pointer. There are no calls in the loop which mod/ref the pointer.
// If these conditions are true, we can promote the loads and stores in the
// loop of the pointer to use a temporary alloca'd variable. We then use
-// the mem2reg functionality to construct the appropriate SSA form for the
-// variable.
+// the SSAUpdater to construct the appropriate SSA form for the value.
//
//===----------------------------------------------------------------------===//
@@ -37,14 +36,15 @@
#include "llvm/DerivedTypes.h"
#include "llvm/IntrinsicInst.h"
#include "llvm/Instructions.h"
-#include "llvm/Target/TargetData.h"
-#include "llvm/Analysis/LoopInfo.h"
-#include "llvm/Analysis/LoopPass.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/AliasSetTracker.h"
+#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/LoopPass.h"
#include "llvm/Analysis/Dominators.h"
#include "llvm/Analysis/ScalarEvolution.h"
-#include "llvm/Transforms/Utils/PromoteMemToReg.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Transforms/Utils/SSAUpdater.h"
#include "llvm/Support/CFG.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/raw_ostream.h"
@@ -66,7 +66,7 @@ DisablePromotion("disable-licm-promotion", cl::Hidden,
namespace {
struct LICM : public LoopPass {
static char ID; // Pass identification, replacement for typeid
- LICM() : LoopPass(&ID) {}
+ LICM() : LoopPass(ID) {}
virtual bool runOnLoop(Loop *L, LPPassManager &LPM);
@@ -75,39 +75,31 @@ namespace {
///
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesCFG();
- AU.addRequiredID(LoopSimplifyID);
- AU.addRequired<LoopInfo>();
AU.addRequired<DominatorTree>();
- AU.addRequired<DominanceFrontier>(); // For scalar promotion (mem2reg)
+ AU.addRequired<LoopInfo>();
+ AU.addRequiredID(LoopSimplifyID);
AU.addRequired<AliasAnalysis>();
+ AU.addPreserved<AliasAnalysis>();
AU.addPreserved<ScalarEvolution>();
- AU.addPreserved<DominanceFrontier>();
AU.addPreservedID(LoopSimplifyID);
}
bool doFinalization() {
- // Free the values stored in the map
- for (std::map<Loop *, AliasSetTracker *>::iterator
- I = LoopToAliasMap.begin(), E = LoopToAliasMap.end(); I != E; ++I)
- delete I->second;
-
- LoopToAliasMap.clear();
+ assert(LoopToAliasSetMap.empty() && "Didn't free loop alias sets");
return false;
}
private:
- // Various analyses that we use...
AliasAnalysis *AA; // Current AliasAnalysis information
LoopInfo *LI; // Current LoopInfo
- DominatorTree *DT; // Dominator Tree for the current Loop...
- DominanceFrontier *DF; // Current Dominance Frontier
+ DominatorTree *DT; // Dominator Tree for the current Loop.
- // State that is updated as we process loops
+ // State that is updated as we process loops.
bool Changed; // Set to true when we change anything.
BasicBlock *Preheader; // The preheader block of the current loop...
Loop *CurLoop; // The current loop we are working on...
AliasSetTracker *CurAST; // AliasSet information for the current loop...
- std::map<Loop *, AliasSetTracker *> LoopToAliasMap;
+ DenseMap<Loop*, AliasSetTracker*> LoopToAliasSetMap;
/// cloneBasicBlockAnalysis - Simple Analysis hook. Clone alias set info.
void cloneBasicBlockAnalysis(BasicBlock *From, BasicBlock *To, Loop *L);
@@ -204,25 +196,12 @@ namespace {
bool isLoopInvariantInst(Instruction &I);
bool isNotUsedInLoop(Instruction &I);
- /// PromoteValuesInLoop - Look at the stores in the loop and promote as many
- /// to scalars as we can.
- ///
- void PromoteValuesInLoop();
-
- /// FindPromotableValuesInLoop - Check the current loop for stores to
- /// definite pointers, which are not loaded and stored through may aliases.
- /// If these are found, create an alloca for the value, add it to the
- /// PromotedValues list, and keep track of the mapping from value to
- /// alloca...
- ///
- void FindPromotableValuesInLoop(
- std::vector<std::pair<AllocaInst*, Value*> > &PromotedValues,
- std::map<Value*, AllocaInst*> &Val2AlMap);
+ void PromoteAliasSet(AliasSet &AS);
};
}
char LICM::ID = 0;
-static RegisterPass<LICM> X("licm", "Loop Invariant Code Motion");
+INITIALIZE_PASS(LICM, "licm", "Loop Invariant Code Motion", false, false);
Pass *llvm::createLICMPass() { return new LICM(); }
@@ -236,19 +215,23 @@ bool LICM::runOnLoop(Loop *L, LPPassManager &LPM) {
// Get our Loop and Alias Analysis information...
LI = &getAnalysis<LoopInfo>();
AA = &getAnalysis<AliasAnalysis>();
- DF = &getAnalysis<DominanceFrontier>();
DT = &getAnalysis<DominatorTree>();
CurAST = new AliasSetTracker(*AA);
- // Collect Alias info from subloops
+ // Collect Alias info from subloops.
for (Loop::iterator LoopItr = L->begin(), LoopItrE = L->end();
LoopItr != LoopItrE; ++LoopItr) {
Loop *InnerL = *LoopItr;
- AliasSetTracker *InnerAST = LoopToAliasMap[InnerL];
- assert (InnerAST && "Where is my AST?");
+ AliasSetTracker *InnerAST = LoopToAliasSetMap[InnerL];
+ assert(InnerAST && "Where is my AST?");
// What if InnerLoop was modified by other passes ?
CurAST->add(*InnerAST);
+
+ // Once we've incorporated the inner loop's AST into ours, we don't need the
+ // subloop's anymore.
+ delete InnerAST;
+ LoopToAliasSetMap.erase(InnerL);
}
CurLoop = L;
@@ -263,7 +246,7 @@ bool LICM::runOnLoop(Loop *L, LPPassManager &LPM) {
for (Loop::block_iterator I = L->block_begin(), E = L->block_end();
I != E; ++I) {
BasicBlock *BB = *I;
- if (LI->getLoopFor(BB) == L) // Ignore blocks in subloops...
+ if (LI->getLoopFor(BB) == L) // Ignore blocks in subloops.
CurAST->add(*BB); // Incorporate the specified basic block
}
@@ -283,15 +266,24 @@ bool LICM::runOnLoop(Loop *L, LPPassManager &LPM) {
HoistRegion(DT->getNode(L->getHeader()));
// Now that all loop invariants have been removed from the loop, promote any
- // memory references to scalars that we can...
- if (!DisablePromotion && Preheader && L->hasDedicatedExits())
- PromoteValuesInLoop();
-
+ // memory references to scalars that we can.
+ if (!DisablePromotion && Preheader && L->hasDedicatedExits()) {
+ // Loop over all of the alias sets in the tracker object.
+ for (AliasSetTracker::iterator I = CurAST->begin(), E = CurAST->end();
+ I != E; ++I)
+ PromoteAliasSet(*I);
+ }
+
// Clear out loops state information for the next iteration
CurLoop = 0;
Preheader = 0;
- LoopToAliasMap[L] = CurAST;
+ // If this loop is nested inside of another one, save the alias information
+ // for when we process the outer loop.
+ if (L->getParentLoop())
+ LoopToAliasSetMap[L] = CurAST;
+ else
+ delete CurAST;
return Changed;
}
@@ -308,7 +300,7 @@ void LICM::SinkRegion(DomTreeNode *N) {
// If this subregion is not in the top level loop at all, exit.
if (!CurLoop->contains(BB)) return;
- // We are processing blocks in reverse dfo, so process children first...
+ // We are processing blocks in reverse dfo, so process children first.
const std::vector<DomTreeNode*> &Children = N->getChildren();
for (unsigned i = 0, e = Children.size(); i != e; ++i)
SinkRegion(Children[i]);
@@ -319,6 +311,17 @@ void LICM::SinkRegion(DomTreeNode *N) {
for (BasicBlock::iterator II = BB->end(); II != BB->begin(); ) {
Instruction &I = *--II;
+
+ // If the instruction is dead, we would try to sink it because it isn't used
+ // in the loop, instead, just delete it.
+ if (isInstructionTriviallyDead(&I)) {
+ DEBUG(dbgs() << "LICM deleting dead inst: " << I << '\n');
+ ++II;
+ CurAST->deleteValue(&I);
+ I.eraseFromParent();
+ Changed = true;
+ continue;
+ }
// Check to see if we can sink this instruction to the exit blocks
// of the loop. We can do this if the all users of the instruction are
@@ -350,6 +353,18 @@ void LICM::HoistRegion(DomTreeNode *N) {
for (BasicBlock::iterator II = BB->begin(), E = BB->end(); II != E; ) {
Instruction &I = *II++;
+ // Try constant folding this instruction. If all the operands are
+ // constants, it is technically hoistable, but it would be better to just
+ // fold it.
+ if (Constant *C = ConstantFoldInstruction(&I)) {
+ DEBUG(dbgs() << "LICM folding inst: " << I << " --> " << *C << '\n');
+ CurAST->copyValue(&I, C);
+ CurAST->deleteValue(&I);
+ I.replaceAllUsesWith(C);
+ I.eraseFromParent();
+ continue;
+ }
+
// Try hoisting the instruction out to the preheader. We can only do this
// if all of the operands of the instruction are loop invariant and if it
// is safe to hoist the instruction.
@@ -357,7 +372,7 @@ void LICM::HoistRegion(DomTreeNode *N) {
if (isLoopInvariantInst(I) && canSinkOrHoistInst(I) &&
isSafeToExecuteUnconditionally(I))
hoist(I);
- }
+ }
const std::vector<DomTreeNode*> &Children = N->getChildren();
for (unsigned i = 0, e = Children.size(); i != e; ++i)
@@ -457,10 +472,10 @@ bool LICM::isLoopInvariantInst(Instruction &I) {
/// position, and may either delete it or move it to outside of the loop.
///
void LICM::sink(Instruction &I) {
- DEBUG(dbgs() << "LICM sinking instruction: " << I);
+ DEBUG(dbgs() << "LICM sinking instruction: " << I << "\n");
SmallVector<BasicBlock*, 8> ExitBlocks;
- CurLoop->getExitBlocks(ExitBlocks);
+ CurLoop->getUniqueExitBlocks(ExitBlocks);
if (isa<LoadInst>(I)) ++NumMovedLoads;
else if (isa<CallInst>(I)) ++NumMovedCalls;
@@ -477,122 +492,101 @@ void LICM::sink(Instruction &I) {
// If I has users in unreachable blocks, eliminate.
// If I is not void type then replaceAllUsesWith undef.
// This allows ValueHandlers and custom metadata to adjust itself.
- if (!I.getType()->isVoidTy())
+ if (!I.use_empty())
I.replaceAllUsesWith(UndefValue::get(I.getType()));
I.eraseFromParent();
} else {
// Move the instruction to the start of the exit block, after any PHI
// nodes in it.
- I.removeFromParent();
- BasicBlock::iterator InsertPt = ExitBlocks[0]->getFirstNonPHI();
- ExitBlocks[0]->getInstList().insert(InsertPt, &I);
+ I.moveBefore(ExitBlocks[0]->getFirstNonPHI());
+
+ // This instruction is no longer in the AST for the current loop, because
+ // we just sunk it out of the loop. If we just sunk it into an outer
+ // loop, we will rediscover the operation when we process it.
+ CurAST->deleteValue(&I);
}
- } else if (ExitBlocks.empty()) {
+ return;
+ }
+
+ if (ExitBlocks.empty()) {
// The instruction is actually dead if there ARE NO exit blocks.
CurAST->deleteValue(&I);
// If I has users in unreachable blocks, eliminate.
// If I is not void type then replaceAllUsesWith undef.
// This allows ValueHandlers and custom metadata to adjust itself.
- if (!I.getType()->isVoidTy())
+ if (!I.use_empty())
I.replaceAllUsesWith(UndefValue::get(I.getType()));
I.eraseFromParent();
- } else {
- // Otherwise, if we have multiple exits, use the PromoteMem2Reg function to
- // do all of the hard work of inserting PHI nodes as necessary. We convert
- // the value into a stack object to get it to do this.
-
- // Firstly, we create a stack object to hold the value...
- AllocaInst *AI = 0;
-
- if (!I.getType()->isVoidTy()) {
- AI = new AllocaInst(I.getType(), 0, I.getName(),
- I.getParent()->getParent()->getEntryBlock().begin());
- CurAST->add(AI);
- }
-
- // Secondly, insert load instructions for each use of the instruction
- // outside of the loop.
- while (!I.use_empty()) {
- Instruction *U = cast<Instruction>(I.use_back());
-
- // If the user is a PHI Node, we actually have to insert load instructions
- // in all predecessor blocks, not in the PHI block itself!
- if (PHINode *UPN = dyn_cast<PHINode>(U)) {
- // Only insert into each predecessor once, so that we don't have
- // different incoming values from the same block!
- std::map<BasicBlock*, Value*> InsertedBlocks;
- for (unsigned i = 0, e = UPN->getNumIncomingValues(); i != e; ++i)
- if (UPN->getIncomingValue(i) == &I) {
- BasicBlock *Pred = UPN->getIncomingBlock(i);
- Value *&PredVal = InsertedBlocks[Pred];
- if (!PredVal) {
- // Insert a new load instruction right before the terminator in
- // the predecessor block.
- PredVal = new LoadInst(AI, "", Pred->getTerminator());
- CurAST->add(cast<LoadInst>(PredVal));
- }
-
- UPN->setIncomingValue(i, PredVal);
- }
-
- } else {
- LoadInst *L = new LoadInst(AI, "", U);
- U->replaceUsesOfWith(&I, L);
- CurAST->add(L);
- }
- }
-
- // Thirdly, insert a copy of the instruction in each exit block of the loop
- // that is dominated by the instruction, storing the result into the memory
- // location. Be careful not to insert the instruction into any particular
- // basic block more than once.
- std::set<BasicBlock*> InsertedBlocks;
- BasicBlock *InstOrigBB = I.getParent();
-
- for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) {
- BasicBlock *ExitBlock = ExitBlocks[i];
-
- if (isExitBlockDominatedByBlockInLoop(ExitBlock, InstOrigBB)) {
- // If we haven't already processed this exit block, do so now.
- if (InsertedBlocks.insert(ExitBlock).second) {
- // Insert the code after the last PHI node...
- BasicBlock::iterator InsertPt = ExitBlock->getFirstNonPHI();
-
- // If this is the first exit block processed, just move the original
- // instruction, otherwise clone the original instruction and insert
- // the copy.
- Instruction *New;
- if (InsertedBlocks.size() == 1) {
- I.removeFromParent();
- ExitBlock->getInstList().insert(InsertPt, &I);
- New = &I;
- } else {
- New = I.clone();
- CurAST->copyValue(&I, New);
- if (!I.getName().empty())
- New->setName(I.getName()+".le");
- ExitBlock->getInstList().insert(InsertPt, New);
- }
-
- // Now that we have inserted the instruction, store it into the alloca
- if (AI) new StoreInst(New, AI, InsertPt);
- }
- }
- }
-
- // If the instruction doesn't dominate any exit blocks, it must be dead.
- if (InsertedBlocks.empty()) {
- CurAST->deleteValue(&I);
- I.eraseFromParent();
- }
-
- // Finally, promote the fine value to SSA form.
- if (AI) {
- std::vector<AllocaInst*> Allocas;
- Allocas.push_back(AI);
- PromoteMemToReg(Allocas, *DT, *DF, CurAST);
+ return;
+ }
+
+ // Otherwise, if we have multiple exits, use the SSAUpdater to do all of the
+ // hard work of inserting PHI nodes as necessary.
+ SmallVector<PHINode*, 8> NewPHIs;
+ SSAUpdater SSA(&NewPHIs);
+
+ if (!I.use_empty())
+ SSA.Initialize(I.getType(), I.getName());
+
+ // Insert a copy of the instruction in each exit block of the loop that is
+ // dominated by the instruction. Each exit block is known to only be in the
+ // ExitBlocks list once.
+ BasicBlock *InstOrigBB = I.getParent();
+ unsigned NumInserted = 0;
+
+ for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) {
+ BasicBlock *ExitBlock = ExitBlocks[i];
+
+ if (!isExitBlockDominatedByBlockInLoop(ExitBlock, InstOrigBB))
+ continue;
+
+ // Insert the code after the last PHI node.
+ BasicBlock::iterator InsertPt = ExitBlock->getFirstNonPHI();
+
+ // If this is the first exit block processed, just move the original
+ // instruction, otherwise clone the original instruction and insert
+ // the copy.
+ Instruction *New;
+ if (NumInserted++ == 0) {
+ I.moveBefore(InsertPt);
+ New = &I;
+ } else {
+ New = I.clone();
+ if (!I.getName().empty())
+ New->setName(I.getName()+".le");
+ ExitBlock->getInstList().insert(InsertPt, New);
}
+
+ // Now that we have inserted the instruction, inform SSAUpdater.
+ if (!I.use_empty())
+ SSA.AddAvailableValue(ExitBlock, New);
}
+
+ // If the instruction doesn't dominate any exit blocks, it must be dead.
+ if (NumInserted == 0) {
+ CurAST->deleteValue(&I);
+ if (!I.use_empty())
+ I.replaceAllUsesWith(UndefValue::get(I.getType()));
+ I.eraseFromParent();
+ return;
+ }
+
+ // Next, rewrite uses of the instruction, inserting PHI nodes as needed.
+ for (Value::use_iterator UI = I.use_begin(), UE = I.use_end(); UI != UE; ) {
+ // Grab the use before incrementing the iterator.
+ Use &U = UI.getUse();
+ // Increment the iterator before removing the use from the list.
+ ++UI;
+ SSA.RewriteUseAfterInsertions(U);
+ }
+
+ // Update CurAST for NewPHIs if I had pointer type.
+ if (I.getType()->isPointerTy())
+ for (unsigned i = 0, e = NewPHIs.size(); i != e; ++i)
+ CurAST->copyValue(&I, NewPHIs[i]);
+
+ // Finally, remove the instruction from CurAST. It is no longer in the loop.
+ CurAST->deleteValue(&I);
}
/// hoist - When an instruction is found to only use loop invariant operands
@@ -602,12 +596,8 @@ void LICM::hoist(Instruction &I) {
DEBUG(dbgs() << "LICM hoisting to " << Preheader->getName() << ": "
<< I << "\n");
- // Remove the instruction from its current basic block... but don't delete the
- // instruction.
- I.removeFromParent();
-
- // Insert the new node in Preheader, before the terminator.
- Preheader->getInstList().insert(Preheader->getTerminator(), &I);
+ // Move the new node to the Preheader, before its terminator.
+ I.moveBefore(Preheader->getTerminator());
if (isa<LoadInst>(I)) ++NumMovedLoads;
else if (isa<CallInst>(I)) ++NumMovedCalls;
@@ -647,223 +637,269 @@ bool LICM::isSafeToExecuteUnconditionally(Instruction &Inst) {
return true;
}
-
-/// PromoteValuesInLoop - Try to promote memory values to scalars by sinking
+/// PromoteAliasSet - Try to promote memory values to scalars by sinking
/// stores out of the loop and moving loads to before the loop. We do this by
/// looping over the stores in the loop, looking for stores to Must pointers
-/// which are loop invariant. We promote these memory locations to use allocas
-/// instead. These allocas can easily be raised to register values by the
-/// PromoteMem2Reg functionality.
+/// which are loop invariant.
///
-void LICM::PromoteValuesInLoop() {
- // PromotedValues - List of values that are promoted out of the loop. Each
- // value has an alloca instruction for it, and a canonical version of the
- // pointer.
- std::vector<std::pair<AllocaInst*, Value*> > PromotedValues;
- std::map<Value*, AllocaInst*> ValueToAllocaMap; // Map of ptr to alloca
-
- FindPromotableValuesInLoop(PromotedValues, ValueToAllocaMap);
- if (ValueToAllocaMap.empty()) return; // If there are values to promote.
-
- Changed = true;
- NumPromoted += PromotedValues.size();
-
- std::vector<Value*> PointerValueNumbers;
-
- // Emit a copy from the value into the alloca'd value in the loop preheader
- TerminatorInst *LoopPredInst = Preheader->getTerminator();
- for (unsigned i = 0, e = PromotedValues.size(); i != e; ++i) {
- Value *Ptr = PromotedValues[i].second;
-
- // If we are promoting a pointer value, update alias information for the
- // inserted load.
- Value *LoadValue = 0;
- if (cast<PointerType>(Ptr->getType())->getElementType()->isPointerTy()) {
- // Locate a load or store through the pointer, and assign the same value
- // to LI as we are loading or storing. Since we know that the value is
- // stored in this loop, this will always succeed.
- for (Value::use_iterator UI = Ptr->use_begin(), E = Ptr->use_end();
- UI != E; ++UI) {
- User *U = *UI;
- if (LoadInst *LI = dyn_cast<LoadInst>(U)) {
- LoadValue = LI;
- break;
- } else if (StoreInst *SI = dyn_cast<StoreInst>(U)) {
- if (SI->getOperand(1) == Ptr) {
- LoadValue = SI->getOperand(0);
- break;
- }
- }
- }
- assert(LoadValue && "No store through the pointer found!");
- PointerValueNumbers.push_back(LoadValue); // Remember this for later.
- }
-
- // Load from the memory we are promoting.
- LoadInst *LI = new LoadInst(Ptr, Ptr->getName()+".promoted", LoopPredInst);
-
- if (LoadValue) CurAST->copyValue(LoadValue, LI);
-
- // Store into the temporary alloca.
- new StoreInst(LI, PromotedValues[i].first, LoopPredInst);
- }
+void LICM::PromoteAliasSet(AliasSet &AS) {
+ // We can promote this alias set if it has a store, if it is a "Must" alias
+ // set, if the pointer is loop invariant, and if we are not eliminating any
+ // volatile loads or stores.
+ if (AS.isForwardingAliasSet() || !AS.isMod() || !AS.isMustAlias() ||
+ AS.isVolatile() || !CurLoop->isLoopInvariant(AS.begin()->getValue()))
+ return;
+
+ assert(!AS.empty() &&
+ "Must alias set should have at least one pointer element in it!");
+ Value *SomePtr = AS.begin()->getValue();
- // Scan the basic blocks in the loop, replacing uses of our pointers with
- // uses of the allocas in question.
+ // It isn't safe to promote a load/store from the loop if the load/store is
+ // conditional. For example, turning:
//
- for (Loop::block_iterator I = CurLoop->block_begin(),
- E = CurLoop->block_end(); I != E; ++I) {
- BasicBlock *BB = *I;
- // Rewrite all loads and stores in the block of the pointer...
- for (BasicBlock::iterator II = BB->begin(), E = BB->end(); II != E; ++II) {
- if (LoadInst *L = dyn_cast<LoadInst>(II)) {
- std::map<Value*, AllocaInst*>::iterator
- I = ValueToAllocaMap.find(L->getOperand(0));
- if (I != ValueToAllocaMap.end())
- L->setOperand(0, I->second); // Rewrite load instruction...
- } else if (StoreInst *S = dyn_cast<StoreInst>(II)) {
- std::map<Value*, AllocaInst*>::iterator
- I = ValueToAllocaMap.find(S->getOperand(1));
- if (I != ValueToAllocaMap.end())
- S->setOperand(1, I->second); // Rewrite store instruction...
- }
- }
- }
-
- // Now that the body of the loop uses the allocas instead of the original
- // memory locations, insert code to copy the alloca value back into the
- // original memory location on all exits from the loop. Note that we only
- // want to insert one copy of the code in each exit block, though the loop may
- // exit to the same block more than once.
+ // for () { if (c) *P += 1; }
//
- SmallPtrSet<BasicBlock*, 16> ProcessedBlocks;
-
- SmallVector<BasicBlock*, 8> ExitBlocks;
- CurLoop->getExitBlocks(ExitBlocks);
- for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) {
- if (!ProcessedBlocks.insert(ExitBlocks[i]))
- continue;
-
- // Copy all of the allocas into their memory locations.
- BasicBlock::iterator BI = ExitBlocks[i]->getFirstNonPHI();
- Instruction *InsertPos = BI;
- unsigned PVN = 0;
- for (unsigned i = 0, e = PromotedValues.size(); i != e; ++i) {
- // Load from the alloca.
- LoadInst *LI = new LoadInst(PromotedValues[i].first, "", InsertPos);
-
- // If this is a pointer type, update alias info appropriately.
- if (LI->getType()->isPointerTy())
- CurAST->copyValue(PointerValueNumbers[PVN++], LI);
-
- // Store into the memory we promoted.
- new StoreInst(LI, PromotedValues[i].second, InsertPos);
- }
- }
-
- // Now that we have done the deed, use the mem2reg functionality to promote
- // all of the new allocas we just created into real SSA registers.
+ // into:
//
- std::vector<AllocaInst*> PromotedAllocas;
- PromotedAllocas.reserve(PromotedValues.size());
- for (unsigned i = 0, e = PromotedValues.size(); i != e; ++i)
- PromotedAllocas.push_back(PromotedValues[i].first);
- PromoteMemToReg(PromotedAllocas, *DT, *DF, CurAST);
-}
-
-/// FindPromotableValuesInLoop - Check the current loop for stores to definite
-/// pointers, which are not loaded and stored through may aliases and are safe
-/// for promotion. If these are found, create an alloca for the value, add it
-/// to the PromotedValues list, and keep track of the mapping from value to
-/// alloca.
-void LICM::FindPromotableValuesInLoop(
- std::vector<std::pair<AllocaInst*, Value*> > &PromotedValues,
- std::map<Value*, AllocaInst*> &ValueToAllocaMap) {
- Instruction *FnStart = CurLoop->getHeader()->getParent()->begin()->begin();
-
- // Loop over all of the alias sets in the tracker object.
- for (AliasSetTracker::iterator I = CurAST->begin(), E = CurAST->end();
- I != E; ++I) {
- AliasSet &AS = *I;
- // We can promote this alias set if it has a store, if it is a "Must" alias
- // set, if the pointer is loop invariant, and if we are not eliminating any
- // volatile loads or stores.
- if (AS.isForwardingAliasSet() || !AS.isMod() || !AS.isMustAlias() ||
- AS.isVolatile() || !CurLoop->isLoopInvariant(AS.begin()->getValue()))
- continue;
+ // tmp = *P; for () { if (c) tmp +=1; } *P = tmp;
+ //
+ // is not safe, because *P may only be valid to access if 'c' is true.
+ //
+ // It is safe to promote P if all uses are direct load/stores and if at
+ // least one is guaranteed to be executed.
+ bool GuaranteedToExecute = false;
+
+ SmallVector<Instruction*, 64> LoopUses;
+ SmallPtrSet<Value*, 4> PointerMustAliases;
+
+ // Check that all of the pointers in the alias set have the same type. We
+ // cannot (yet) promote a memory location that is loaded and stored in
+ // different sizes.
+ for (AliasSet::iterator ASI = AS.begin(), E = AS.end(); ASI != E; ++ASI) {
+ Value *ASIV = ASI->getValue();
+ PointerMustAliases.insert(ASIV);
- assert(!AS.empty() &&
- "Must alias set should have at least one pointer element in it!");
- Value *V = AS.begin()->getValue();
-
// Check that all of the pointers in the alias set have the same type. We
// cannot (yet) promote a memory location that is loaded and stored in
// different sizes.
- {
- bool PointerOk = true;
- for (AliasSet::iterator I = AS.begin(), E = AS.end(); I != E; ++I)
- if (V->getType() != I->getValue()->getType()) {
- PointerOk = false;
- break;
- }
- if (!PointerOk)
- continue;
- }
-
- // It isn't safe to promote a load/store from the loop if the load/store is
- // conditional. For example, turning:
- //
- // for () { if (c) *P += 1; }
- //
- // into:
- //
- // tmp = *P; for () { if (c) tmp +=1; } *P = tmp;
- //
- // is not safe, because *P may only be valid to access if 'c' is true.
- //
- // It is safe to promote P if all uses are direct load/stores and if at
- // least one is guaranteed to be executed.
- bool GuaranteedToExecute = false;
- bool InvalidInst = false;
- for (Value::use_iterator UI = V->use_begin(), UE = V->use_end();
+ if (SomePtr->getType() != ASIV->getType())
+ return;
+
+ for (Value::use_iterator UI = ASIV->use_begin(), UE = ASIV->use_end();
UI != UE; ++UI) {
- // Ignore instructions not in this loop.
+ // Ignore instructions that are outside the loop.
Instruction *Use = dyn_cast<Instruction>(*UI);
if (!Use || !CurLoop->contains(Use))
continue;
-
- if (!isa<LoadInst>(Use) && !isa<StoreInst>(Use)) {
- InvalidInst = true;
- break;
- }
+
+ // If there is an non-load/store instruction in the loop, we can't promote
+ // it.
+ if (isa<LoadInst>(Use))
+ assert(!cast<LoadInst>(Use)->isVolatile() && "AST broken");
+ else if (isa<StoreInst>(Use)) {
+ assert(!cast<StoreInst>(Use)->isVolatile() && "AST broken");
+ if (Use->getOperand(0) == ASIV) return;
+ } else
+ return; // Not a load or store.
if (!GuaranteedToExecute)
GuaranteedToExecute = isSafeToExecuteUnconditionally(*Use);
+
+ LoopUses.push_back(Use);
}
+ }
+
+ // If there isn't a guaranteed-to-execute instruction, we can't promote.
+ if (!GuaranteedToExecute)
+ return;
+
+ // Otherwise, this is safe to promote, lets do it!
+ DEBUG(dbgs() << "LICM: Promoting value stored to in loop: " <<*SomePtr<<'\n');
+ Changed = true;
+ ++NumPromoted;
- // If there is an non-load/store instruction in the loop, we can't promote
- // it. If there isn't a guaranteed-to-execute instruction, we can't
- // promote.
- if (InvalidInst || !GuaranteedToExecute)
+ // We use the SSAUpdater interface to insert phi nodes as required.
+ SmallVector<PHINode*, 16> NewPHIs;
+ SSAUpdater SSA(&NewPHIs);
+
+ // It wants to know some value of the same type as what we'll be inserting.
+ Value *SomeValue;
+ if (isa<LoadInst>(LoopUses[0]))
+ SomeValue = LoopUses[0];
+ else
+ SomeValue = cast<StoreInst>(LoopUses[0])->getOperand(0);
+ SSA.Initialize(SomeValue->getType(), SomeValue->getName());
+
+ // First step: bucket up uses of the pointers by the block they occur in.
+ // This is important because we have to handle multiple defs/uses in a block
+ // ourselves: SSAUpdater is purely for cross-block references.
+ // FIXME: Want a TinyVector<Instruction*> since there is usually 0/1 element.
+ DenseMap<BasicBlock*, std::vector<Instruction*> > UsesByBlock;
+ for (unsigned i = 0, e = LoopUses.size(); i != e; ++i) {
+ Instruction *User = LoopUses[i];
+ UsesByBlock[User->getParent()].push_back(User);
+ }
+
+ // Okay, now we can iterate over all the blocks in the loop with uses,
+ // processing them. Keep track of which loads are loading a live-in value.
+ SmallVector<LoadInst*, 32> LiveInLoads;
+ DenseMap<Value*, Value*> ReplacedLoads;
+
+ for (unsigned LoopUse = 0, e = LoopUses.size(); LoopUse != e; ++LoopUse) {
+ Instruction *User = LoopUses[LoopUse];
+ std::vector<Instruction*> &BlockUses = UsesByBlock[User->getParent()];
+
+ // If this block has already been processed, ignore this repeat use.
+ if (BlockUses.empty()) continue;
+
+ // Okay, this is the first use in the block. If this block just has a
+ // single user in it, we can rewrite it trivially.
+ if (BlockUses.size() == 1) {
+ // If it is a store, it is a trivial def of the value in the block.
+ if (isa<StoreInst>(User)) {
+ SSA.AddAvailableValue(User->getParent(),
+ cast<StoreInst>(User)->getOperand(0));
+ } else {
+ // Otherwise it is a load, queue it to rewrite as a live-in load.
+ LiveInLoads.push_back(cast<LoadInst>(User));
+ }
+ BlockUses.clear();
continue;
+ }
- const Type *Ty = cast<PointerType>(V->getType())->getElementType();
- AllocaInst *AI = new AllocaInst(Ty, 0, V->getName()+".tmp", FnStart);
- PromotedValues.push_back(std::make_pair(AI, V));
+ // Otherwise, check to see if this block is all loads. If so, we can queue
+ // them all as live in loads.
+ bool HasStore = false;
+ for (unsigned i = 0, e = BlockUses.size(); i != e; ++i) {
+ if (isa<StoreInst>(BlockUses[i])) {
+ HasStore = true;
+ break;
+ }
+ }
+
+ if (!HasStore) {
+ for (unsigned i = 0, e = BlockUses.size(); i != e; ++i)
+ LiveInLoads.push_back(cast<LoadInst>(BlockUses[i]));
+ BlockUses.clear();
+ continue;
+ }
- // Update the AST and alias analysis.
- CurAST->copyValue(V, AI);
+ // Otherwise, we have mixed loads and stores (or just a bunch of stores).
+ // Since SSAUpdater is purely for cross-block values, we need to determine
+ // the order of these instructions in the block. If the first use in the
+ // block is a load, then it uses the live in value. The last store defines
+ // the live out value. We handle this by doing a linear scan of the block.
+ BasicBlock *BB = User->getParent();
+ Value *StoredValue = 0;
+ for (BasicBlock::iterator II = BB->begin(), E = BB->end(); II != E; ++II) {
+ if (LoadInst *L = dyn_cast<LoadInst>(II)) {
+ // If this is a load from an unrelated pointer, ignore it.
+ if (!PointerMustAliases.count(L->getOperand(0))) continue;
+
+ // If we haven't seen a store yet, this is a live in use, otherwise
+ // use the stored value.
+ if (StoredValue) {
+ L->replaceAllUsesWith(StoredValue);
+ ReplacedLoads[L] = StoredValue;
+ } else {
+ LiveInLoads.push_back(L);
+ }
+ continue;
+ }
+
+ if (StoreInst *S = dyn_cast<StoreInst>(II)) {
+ // If this is a store to an unrelated pointer, ignore it.
+ if (!PointerMustAliases.count(S->getOperand(1))) continue;
- for (AliasSet::iterator I = AS.begin(), E = AS.end(); I != E; ++I)
- ValueToAllocaMap.insert(std::make_pair(I->getValue(), AI));
+ // Remember that this is the active value in the block.
+ StoredValue = S->getOperand(0);
+ }
+ }
+
+ // The last stored value that happened is the live-out for the block.
+ assert(StoredValue && "Already checked that there is a store in block");
+ SSA.AddAvailableValue(BB, StoredValue);
+ BlockUses.clear();
+ }
+
+ // Now that all the intra-loop values are classified, set up the preheader.
+ // It gets a load of the pointer we're promoting, and it is the live-out value
+ // from the preheader.
+ LoadInst *PreheaderLoad = new LoadInst(SomePtr,SomePtr->getName()+".promoted",
+ Preheader->getTerminator());
+ SSA.AddAvailableValue(Preheader, PreheaderLoad);
+
+ // Now that the preheader is good to go, set up the exit blocks. Each exit
+ // block gets a store of the live-out values that feed them. Since we've
+ // already told the SSA updater about the defs in the loop and the preheader
+ // definition, it is all set and we can start using it.
+ SmallVector<BasicBlock*, 8> ExitBlocks;
+ CurLoop->getUniqueExitBlocks(ExitBlocks);
+ for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) {
+ BasicBlock *ExitBlock = ExitBlocks[i];
+ Value *LiveInValue = SSA.GetValueInMiddleOfBlock(ExitBlock);
+ Instruction *InsertPos = ExitBlock->getFirstNonPHI();
+ new StoreInst(LiveInValue, SomePtr, InsertPos);
+ }
- DEBUG(dbgs() << "LICM: Promoting value: " << *V << "\n");
+ // Okay, now we rewrite all loads that use live-in values in the loop,
+ // inserting PHI nodes as necessary.
+ for (unsigned i = 0, e = LiveInLoads.size(); i != e; ++i) {
+ LoadInst *ALoad = LiveInLoads[i];
+ Value *NewVal = SSA.GetValueInMiddleOfBlock(ALoad->getParent());
+ ALoad->replaceAllUsesWith(NewVal);
+ CurAST->copyValue(ALoad, NewVal);
+ ReplacedLoads[ALoad] = NewVal;
+ }
+
+ // If the preheader load is itself a pointer, we need to tell alias analysis
+ // about the new pointer we created in the preheader block and about any PHI
+ // nodes that just got inserted.
+ if (PreheaderLoad->getType()->isPointerTy()) {
+ // Copy any value stored to or loaded from a must-alias of the pointer.
+ CurAST->copyValue(SomeValue, PreheaderLoad);
+
+ for (unsigned i = 0, e = NewPHIs.size(); i != e; ++i)
+ CurAST->copyValue(SomeValue, NewPHIs[i]);
}
+
+ // Now that everything is rewritten, delete the old instructions from the body
+ // of the loop. They should all be dead now.
+ for (unsigned i = 0, e = LoopUses.size(); i != e; ++i) {
+ Instruction *User = LoopUses[i];
+
+ // If this is a load that still has uses, then the load must have been added
+ // as a live value in the SSAUpdate data structure for a block (e.g. because
+ // the loaded value was stored later). In this case, we need to recursively
+ // propagate the updates until we get to the real value.
+ if (!User->use_empty()) {
+ Value *NewVal = ReplacedLoads[User];
+ assert(NewVal && "not a replaced load?");
+
+ // Propagate down to the ultimate replacee. The intermediately loads
+ // could theoretically already have been deleted, so we don't want to
+ // dereference the Value*'s.
+ DenseMap<Value*, Value*>::iterator RLI = ReplacedLoads.find(NewVal);
+ while (RLI != ReplacedLoads.end()) {
+ NewVal = RLI->second;
+ RLI = ReplacedLoads.find(NewVal);
+ }
+
+ User->replaceAllUsesWith(NewVal);
+ CurAST->copyValue(User, NewVal);
+ }
+
+ CurAST->deleteValue(User);
+ User->eraseFromParent();
+ }
+
+ // fwew, we're done!
}
+
/// cloneBasicBlockAnalysis - Simple Analysis hook. Clone alias set info.
void LICM::cloneBasicBlockAnalysis(BasicBlock *From, BasicBlock *To, Loop *L) {
- AliasSetTracker *AST = LoopToAliasMap[L];
+ AliasSetTracker *AST = LoopToAliasSetMap.lookup(L);
if (!AST)
return;
@@ -873,7 +909,7 @@ void LICM::cloneBasicBlockAnalysis(BasicBlock *From, BasicBlock *To, Loop *L) {
/// deleteAnalysisValue - Simple Analysis hook. Delete value V from alias
/// set.
void LICM::deleteAnalysisValue(Value *V, Loop *L) {
- AliasSetTracker *AST = LoopToAliasMap[L];
+ AliasSetTracker *AST = LoopToAliasSetMap.lookup(L);
if (!AST)
return;
diff --git a/lib/Transforms/Scalar/LoopDeletion.cpp b/lib/Transforms/Scalar/LoopDeletion.cpp
index e4894e99b68f1..543dfc1cba096 100644
--- a/lib/Transforms/Scalar/LoopDeletion.cpp
+++ b/lib/Transforms/Scalar/LoopDeletion.cpp
@@ -28,7 +28,7 @@ namespace {
class LoopDeletion : public LoopPass {
public:
static char ID; // Pass ID, replacement for typeid
- LoopDeletion() : LoopPass(&ID) {}
+ LoopDeletion() : LoopPass(ID) {}
// Possibly eliminate loop L if it is dead.
bool runOnLoop(Loop* L, LPPassManager& LPM);
@@ -38,9 +38,9 @@ namespace {
bool &Changed, BasicBlock *Preheader);
virtual void getAnalysisUsage(AnalysisUsage& AU) const {
- AU.addRequired<ScalarEvolution>();
AU.addRequired<DominatorTree>();
AU.addRequired<LoopInfo>();
+ AU.addRequired<ScalarEvolution>();
AU.addRequiredID(LoopSimplifyID);
AU.addRequiredID(LCSSAID);
@@ -55,7 +55,8 @@ namespace {
}
char LoopDeletion::ID = 0;
-static RegisterPass<LoopDeletion> X("loop-deletion", "Delete dead loops");
+INITIALIZE_PASS(LoopDeletion, "loop-deletion",
+ "Delete dead loops", false, false);
Pass* llvm::createLoopDeletionPass() {
return new LoopDeletion();
diff --git a/lib/Transforms/Scalar/LoopIndexSplit.cpp b/lib/Transforms/Scalar/LoopIndexSplit.cpp
index 31058e5759a4a..a4336743a8f01 100644
--- a/lib/Transforms/Scalar/LoopIndexSplit.cpp
+++ b/lib/Transforms/Scalar/LoopIndexSplit.cpp
@@ -74,7 +74,7 @@ namespace {
class LoopIndexSplit : public LoopPass {
public:
static char ID; // Pass ID, replacement for typeid
- LoopIndexSplit() : LoopPass(&ID) {}
+ LoopIndexSplit() : LoopPass(ID) {}
// Index split Loop L. Return true if loop is split.
bool runOnLoop(Loop *L, LPPassManager &LPM);
@@ -197,8 +197,8 @@ namespace {
}
char LoopIndexSplit::ID = 0;
-static RegisterPass<LoopIndexSplit>
-X("loop-index-split", "Index Split Loops");
+INITIALIZE_PASS(LoopIndexSplit, "loop-index-split",
+ "Index Split Loops", false, false);
Pass *llvm::createLoopIndexSplitPass() {
return new LoopIndexSplit();
@@ -677,7 +677,7 @@ void LoopIndexSplit::removeBlocks(BasicBlock *DeadBB, Loop *LP,
for(pred_iterator PI = pred_begin(FrontierBB), PE = pred_end(FrontierBB);
PI != PE; ++PI) {
BasicBlock *P = *PI;
- if (P == DeadBB || DT->dominates(DeadBB, P))
+ if (DT->dominates(DeadBB, P))
PredBlocks.push_back(P);
}
@@ -799,7 +799,7 @@ void LoopIndexSplit::moveExitCondition(BasicBlock *CondBB, BasicBlock *ActiveBB,
// the dominance frontiers.
for (Loop::block_iterator I = LP->block_begin(), E = LP->block_end();
I != E; ++I) {
- if (*I == CondBB || !DT->dominates(CondBB, *I)) continue;
+ if (!DT->properlyDominates(CondBB, *I)) continue;
DominanceFrontier::iterator BBDF = DF->find(*I);
DominanceFrontier::DomSetType::iterator DomSetI = BBDF->second.begin();
DominanceFrontier::DomSetType::iterator DomSetE = BBDF->second.end();
@@ -1183,7 +1183,7 @@ bool LoopIndexSplit::cleanBlock(BasicBlock *BB) {
bool usedOutsideBB = false;
for (Value::use_iterator UI = I->use_begin(), UE = I->use_end();
UI != UE; ++UI) {
- Instruction *U = cast<Instruction>(UI);
+ Instruction *U = cast<Instruction>(*UI);
if (U->getParent() != BB)
usedOutsideBB = true;
}
diff --git a/lib/Transforms/Scalar/LoopRotation.cpp b/lib/Transforms/Scalar/LoopRotation.cpp
index 16c4a15d3550a..65acc1d9257ad 100644
--- a/lib/Transforms/Scalar/LoopRotation.cpp
+++ b/lib/Transforms/Scalar/LoopRotation.cpp
@@ -35,7 +35,7 @@ namespace {
class LoopRotate : public LoopPass {
public:
static char ID; // Pass ID, replacement for typeid
- LoopRotate() : LoopPass(&ID) {}
+ LoopRotate() : LoopPass(ID) {}
// Rotate Loop L as many times as possible. Return true if
// loop is rotated at least once.
@@ -43,15 +43,15 @@ namespace {
// LCSSA form makes instruction renaming easier.
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addPreserved<DominatorTree>();
+ AU.addPreserved<DominanceFrontier>();
+ AU.addRequired<LoopInfo>();
+ AU.addPreserved<LoopInfo>();
AU.addRequiredID(LoopSimplifyID);
AU.addPreservedID(LoopSimplifyID);
AU.addRequiredID(LCSSAID);
AU.addPreservedID(LCSSAID);
AU.addPreserved<ScalarEvolution>();
- AU.addRequired<LoopInfo>();
- AU.addPreserved<LoopInfo>();
- AU.addPreserved<DominatorTree>();
- AU.addPreserved<DominanceFrontier>();
}
// Helper functions
@@ -79,7 +79,7 @@ namespace {
}
char LoopRotate::ID = 0;
-static RegisterPass<LoopRotate> X("loop-rotate", "Rotate Loops");
+INITIALIZE_PASS(LoopRotate, "loop-rotate", "Rotate Loops", false, false);
Pass *llvm::createLoopRotatePass() { return new LoopRotate(); }
@@ -221,7 +221,7 @@ bool LoopRotate::rotateLoop(Loop *Lp, LPPassManager &LPM) {
// The value now exits in two versions: the initial value in the preheader
// and the loop "next" value in the original header.
- SSA.Initialize(OrigHeaderVal);
+ SSA.Initialize(OrigHeaderVal->getType(), OrigHeaderVal->getName());
SSA.AddAvailableValue(OrigHeader, OrigHeaderVal);
SSA.AddAvailableValue(OrigPreHeader, OrigPreHeaderVal);
@@ -261,6 +261,26 @@ bool LoopRotate::rotateLoop(Loop *Lp, LPPassManager &LPM) {
// NewHeader is now the header of the loop.
L->moveToHeader(NewHeader);
+ // Move the original header to the bottom of the loop, where it now more
+ // naturally belongs. This isn't necessary for correctness, and CodeGen can
+ // usually reorder blocks on its own to fix things like this up, but it's
+ // still nice to keep the IR readable.
+ //
+ // The original header should have only one predecessor at this point, since
+ // we checked that the loop had a proper preheader and unique backedge before
+ // we started.
+ assert(OrigHeader->getSinglePredecessor() &&
+ "Original loop header has too many predecessors after loop rotation!");
+ OrigHeader->moveAfter(OrigHeader->getSinglePredecessor());
+
+ // Also, since this original header only has one predecessor, zap its
+ // PHI nodes, which are now trivial.
+ FoldSingleEntryPHINodes(OrigHeader);
+
+ // TODO: We could just go ahead and merge OrigHeader into its predecessor
+ // at this point, if we don't mind updating dominator info.
+
+ // Establish a new preheader, update dominators, etc.
preserveCanonicalLoopForm(LPM);
++NumRotated;
diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
index 1f9b4156b9cd6..e8dc5d3a640e6 100644
--- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp
+++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
@@ -161,9 +161,10 @@ RegUseTracker::DropUse(size_t LUIdx) {
bool
RegUseTracker::isRegUsedByUsesOtherThan(const SCEV *Reg, size_t LUIdx) const {
- if (!RegUsesMap.count(Reg)) return false;
- const SmallBitVector &UsedByIndices =
- RegUsesMap.find(Reg)->second.UsedByIndices;
+ RegUsesTy::const_iterator I = RegUsesMap.find(Reg);
+ if (I == RegUsesMap.end())
+ return false;
+ const SmallBitVector &UsedByIndices = I->second.UsedByIndices;
int i = UsedByIndices.find_first();
if (i == -1) return false;
if ((size_t)i != LUIdx) return true;
@@ -441,12 +442,12 @@ static const SCEV *getExactSDiv(const SCEV *LHS, const SCEV *RHS,
// Distribute the sdiv over addrec operands, if the addrec doesn't overflow.
if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(LHS)) {
if (IgnoreSignificantBits || isAddRecSExtable(AR, SE)) {
- const SCEV *Start = getExactSDiv(AR->getStart(), RHS, SE,
- IgnoreSignificantBits);
- if (!Start) return 0;
const SCEV *Step = getExactSDiv(AR->getStepRecurrence(SE), RHS, SE,
IgnoreSignificantBits);
if (!Step) return 0;
+ const SCEV *Start = getExactSDiv(AR->getStart(), RHS, SE,
+ IgnoreSignificantBits);
+ if (!Start) return 0;
return SE.getAddRecExpr(Start, Step, AR->getLoop());
}
return 0;
@@ -505,12 +506,14 @@ static int64_t ExtractImmediate(const SCEV *&S, ScalarEvolution &SE) {
} else if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
SmallVector<const SCEV *, 8> NewOps(Add->op_begin(), Add->op_end());
int64_t Result = ExtractImmediate(NewOps.front(), SE);
- S = SE.getAddExpr(NewOps);
+ if (Result != 0)
+ S = SE.getAddExpr(NewOps);
return Result;
} else if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
SmallVector<const SCEV *, 8> NewOps(AR->op_begin(), AR->op_end());
int64_t Result = ExtractImmediate(NewOps.front(), SE);
- S = SE.getAddRecExpr(NewOps, AR->getLoop());
+ if (Result != 0)
+ S = SE.getAddRecExpr(NewOps, AR->getLoop());
return Result;
}
return 0;
@@ -528,12 +531,14 @@ static GlobalValue *ExtractSymbol(const SCEV *&S, ScalarEvolution &SE) {
} else if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
SmallVector<const SCEV *, 8> NewOps(Add->op_begin(), Add->op_end());
GlobalValue *Result = ExtractSymbol(NewOps.back(), SE);
- S = SE.getAddExpr(NewOps);
+ if (Result)
+ S = SE.getAddExpr(NewOps);
return Result;
} else if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
SmallVector<const SCEV *, 8> NewOps(AR->op_begin(), AR->op_end());
GlobalValue *Result = ExtractSymbol(NewOps.front(), SE);
- S = SE.getAddRecExpr(NewOps, AR->getLoop());
+ if (Result)
+ S = SE.getAddRecExpr(NewOps, AR->getLoop());
return Result;
}
return 0;
@@ -965,6 +970,12 @@ public:
/// may be used.
bool AllFixupsOutsideLoop;
+ /// WidestFixupType - This records the widest use type for any fixup using
+ /// this LSRUse. FindUseWithSimilarFormula can't consider uses with different
+ /// max fixup widths to be equivalent, because the narrower one may be relying
+ /// on the implicit truncation to truncate away bogus bits.
+ const Type *WidestFixupType;
+
/// Formulae - A list of ways to build a value that can satisfy this user.
/// After the list is populated, one of these is selected heuristically and
/// used to formulate a replacement for OperandValToReplace in UserInst.
@@ -976,15 +987,14 @@ public:
LSRUse(KindType K, const Type *T) : Kind(K), AccessTy(T),
MinOffset(INT64_MAX),
MaxOffset(INT64_MIN),
- AllFixupsOutsideLoop(true) {}
+ AllFixupsOutsideLoop(true),
+ WidestFixupType(0) {}
bool HasFormulaWithSameRegs(const Formula &F) const;
bool InsertFormula(const Formula &F);
void DeleteFormula(Formula &F);
void RecomputeRegs(size_t LUIdx, RegUseTracker &Reguses);
- void check() const;
-
void print(raw_ostream &OS) const;
void dump() const;
};
@@ -1076,13 +1086,16 @@ void LSRUse::print(raw_ostream &OS) const {
for (SmallVectorImpl<int64_t>::const_iterator I = Offsets.begin(),
E = Offsets.end(); I != E; ++I) {
OS << *I;
- if (next(I) != E)
+ if (llvm::next(I) != E)
OS << ',';
}
OS << '}';
if (AllFixupsOutsideLoop)
OS << ", all-fixups-outside-loop";
+
+ if (WidestFixupType)
+ OS << ", widest fixup type: " << *WidestFixupType;
}
void LSRUse::dump() const {
@@ -1354,6 +1367,10 @@ public:
void FilterOutUndesirableDedicatedRegisters();
size_t EstimateSearchSpaceComplexity() const;
+ void NarrowSearchSpaceByDetectingSupersets();
+ void NarrowSearchSpaceByCollapsingUnrolledCode();
+ void NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters();
+ void NarrowSearchSpaceByPickingWinnerRegs();
void NarrowSearchSpaceUsingHeuristics();
void SolveRecurse(SmallVectorImpl<const Formula *> &Solution,
@@ -1587,7 +1604,7 @@ ICmpInst *LSRInstance::OptimizeMax(ICmpInst *Cond, IVStrideUse* &CondUse) {
const SCEV *One = SE.getConstant(BackedgeTakenCount->getType(), 1);
// Add one to the backedge-taken count to get the trip count.
- const SCEV *IterationCount = SE.getAddExpr(BackedgeTakenCount, One);
+ const SCEV *IterationCount = SE.getAddExpr(One, BackedgeTakenCount);
if (IterationCount != SE.getSCEV(Sel)) return Cond;
// Check for a max calculation that matches the pattern. There's no check
@@ -1919,32 +1936,41 @@ void LSRInstance::DeleteUse(LSRUse &LU) {
LSRUse *
LSRInstance::FindUseWithSimilarFormula(const Formula &OrigF,
const LSRUse &OrigLU) {
- // Search all uses for the formula. This could be more clever. Ignore
- // ICmpZero uses because they may contain formulae generated by
- // GenerateICmpZeroScales, in which case adding fixup offsets may
- // be invalid.
+ // Search all uses for the formula. This could be more clever.
for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
LSRUse &LU = Uses[LUIdx];
+ // Check whether this use is close enough to OrigLU, to see whether it's
+ // worthwhile looking through its formulae.
+ // Ignore ICmpZero uses because they may contain formulae generated by
+ // GenerateICmpZeroScales, in which case adding fixup offsets may
+ // be invalid.
if (&LU != &OrigLU &&
LU.Kind != LSRUse::ICmpZero &&
LU.Kind == OrigLU.Kind && OrigLU.AccessTy == LU.AccessTy &&
+ LU.WidestFixupType == OrigLU.WidestFixupType &&
LU.HasFormulaWithSameRegs(OrigF)) {
+ // Scan through this use's formulae.
for (SmallVectorImpl<Formula>::const_iterator I = LU.Formulae.begin(),
E = LU.Formulae.end(); I != E; ++I) {
const Formula &F = *I;
+ // Check to see if this formula has the same registers and symbols
+ // as OrigF.
if (F.BaseRegs == OrigF.BaseRegs &&
F.ScaledReg == OrigF.ScaledReg &&
F.AM.BaseGV == OrigF.AM.BaseGV &&
- F.AM.Scale == OrigF.AM.Scale &&
- LU.Kind) {
+ F.AM.Scale == OrigF.AM.Scale) {
if (F.AM.BaseOffs == 0)
return &LU;
+ // This is the formula where all the registers and symbols matched;
+ // there aren't going to be any others. Since we declined it, we
+ // can skip the rest of the formulae and procede to the next LSRUse.
break;
}
}
}
}
+ // Nothing looked good.
return 0;
}
@@ -1976,7 +2002,7 @@ void LSRInstance::CollectInterestingTypesAndFactors() {
for (SmallSetVector<const SCEV *, 4>::const_iterator
I = Strides.begin(), E = Strides.end(); I != E; ++I)
for (SmallSetVector<const SCEV *, 4>::const_iterator NewStrideIter =
- next(I); NewStrideIter != E; ++NewStrideIter) {
+ llvm::next(I); NewStrideIter != E; ++NewStrideIter) {
const SCEV *OldStride = *I;
const SCEV *NewStride = *NewStrideIter;
@@ -2066,6 +2092,10 @@ void LSRInstance::CollectFixupsAndInitialFormulae() {
LF.Offset = P.second;
LSRUse &LU = Uses[LF.LUIdx];
LU.AllFixupsOutsideLoop &= LF.isUseFullyOutsideLoop(L);
+ if (!LU.WidestFixupType ||
+ SE.getTypeSizeInBits(LU.WidestFixupType) <
+ SE.getTypeSizeInBits(LF.OperandValToReplace->getType()))
+ LU.WidestFixupType = LF.OperandValToReplace->getType();
// If this is the first use of this LSRUse, give it a formula.
if (LU.Formulae.empty()) {
@@ -2195,6 +2225,10 @@ LSRInstance::CollectLoopInvariantFixupsAndFormulae() {
LF.Offset = P.second;
LSRUse &LU = Uses[LF.LUIdx];
LU.AllFixupsOutsideLoop &= LF.isUseFullyOutsideLoop(L);
+ if (!LU.WidestFixupType ||
+ SE.getTypeSizeInBits(LU.WidestFixupType) <
+ SE.getTypeSizeInBits(LF.OperandValToReplace->getType()))
+ LU.WidestFixupType = LF.OperandValToReplace->getType();
InsertSupplementalFormula(U, LU, LF.LUIdx);
CountRegisters(LU.Formulae.back(), Uses.size() - 1);
break;
@@ -2207,14 +2241,13 @@ LSRInstance::CollectLoopInvariantFixupsAndFormulae() {
/// separate registers. If C is non-null, multiply each subexpression by C.
static void CollectSubexprs(const SCEV *S, const SCEVConstant *C,
SmallVectorImpl<const SCEV *> &Ops,
- SmallVectorImpl<const SCEV *> &UninterestingOps,
const Loop *L,
ScalarEvolution &SE) {
if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
// Break out add operands.
for (SCEVAddExpr::op_iterator I = Add->op_begin(), E = Add->op_end();
I != E; ++I)
- CollectSubexprs(*I, C, Ops, UninterestingOps, L, SE);
+ CollectSubexprs(*I, C, Ops, L, SE);
return;
} else if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
// Split a non-zero base out of an addrec.
@@ -2222,8 +2255,8 @@ static void CollectSubexprs(const SCEV *S, const SCEVConstant *C,
CollectSubexprs(SE.getAddRecExpr(SE.getConstant(AR->getType(), 0),
AR->getStepRecurrence(SE),
AR->getLoop()),
- C, Ops, UninterestingOps, L, SE);
- CollectSubexprs(AR->getStart(), C, Ops, UninterestingOps, L, SE);
+ C, Ops, L, SE);
+ CollectSubexprs(AR->getStart(), C, Ops, L, SE);
return;
}
} else if (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(S)) {
@@ -2233,17 +2266,13 @@ static void CollectSubexprs(const SCEV *S, const SCEVConstant *C,
dyn_cast<SCEVConstant>(Mul->getOperand(0))) {
CollectSubexprs(Mul->getOperand(1),
C ? cast<SCEVConstant>(SE.getMulExpr(C, Op0)) : Op0,
- Ops, UninterestingOps, L, SE);
+ Ops, L, SE);
return;
}
}
- // Otherwise use the value itself. Loop-variant "unknown" values are
- // uninteresting; we won't be able to do anything meaningful with them.
- if (!C && isa<SCEVUnknown>(S) && !S->isLoopInvariant(L))
- UninterestingOps.push_back(S);
- else
- Ops.push_back(C ? SE.getMulExpr(C, S) : S);
+ // Otherwise use the value itself, optionally with a scale applied.
+ Ops.push_back(C ? SE.getMulExpr(C, S) : S);
}
/// GenerateReassociations - Split out subexpressions from adds and the bases of
@@ -2257,19 +2286,19 @@ void LSRInstance::GenerateReassociations(LSRUse &LU, unsigned LUIdx,
for (size_t i = 0, e = Base.BaseRegs.size(); i != e; ++i) {
const SCEV *BaseReg = Base.BaseRegs[i];
- SmallVector<const SCEV *, 8> AddOps, UninterestingAddOps;
- CollectSubexprs(BaseReg, 0, AddOps, UninterestingAddOps, L, SE);
-
- // Add any uninteresting values as one register, as we won't be able to
- // form any interesting reassociation opportunities with them. They'll
- // just have to be added inside the loop no matter what we do.
- if (!UninterestingAddOps.empty())
- AddOps.push_back(SE.getAddExpr(UninterestingAddOps));
+ SmallVector<const SCEV *, 8> AddOps;
+ CollectSubexprs(BaseReg, 0, AddOps, L, SE);
if (AddOps.size() == 1) continue;
for (SmallVectorImpl<const SCEV *>::const_iterator J = AddOps.begin(),
JE = AddOps.end(); J != JE; ++J) {
+
+ // Loop-variant "unknown" values are uninteresting; we won't be able to
+ // do anything meaningful with them.
+ if (isa<SCEVUnknown>(*J) && !(*J)->isLoopInvariant(L))
+ continue;
+
// Don't pull a constant into a register if the constant could be folded
// into an immediate field.
if (isAlwaysFoldable(*J, LU.MinOffset, LU.MaxOffset,
@@ -2279,9 +2308,9 @@ void LSRInstance::GenerateReassociations(LSRUse &LU, unsigned LUIdx,
// Collect all operands except *J.
SmallVector<const SCEV *, 8> InnerAddOps
- ( ((const SmallVector<const SCEV *, 8> &)AddOps).begin(), J);
+ (((const SmallVector<const SCEV *, 8> &)AddOps).begin(), J);
InnerAddOps.append
- (next(J), ((const SmallVector<const SCEV *, 8> &)AddOps).end());
+ (llvm::next(J), ((const SmallVector<const SCEV *, 8> &)AddOps).end());
// Don't leave just a constant behind in a register if the constant could
// be folded into an immediate field.
@@ -2377,7 +2406,7 @@ void LSRInstance::GenerateConstantOffsets(LSRUse &LU, unsigned LUIdx,
if (isLegalUse(F.AM, LU.MinOffset - *I, LU.MaxOffset - *I,
LU.Kind, LU.AccessTy, TLI)) {
// Add the offset to the base register.
- const SCEV *NewG = SE.getAddExpr(G, SE.getConstant(G->getType(), *I));
+ const SCEV *NewG = SE.getAddExpr(SE.getConstant(G->getType(), *I), G);
// If it cancelled out, drop the base register, otherwise update it.
if (NewG->isZero()) {
std::swap(F.BaseRegs[i], F.BaseRegs.back());
@@ -2778,6 +2807,10 @@ LSRInstance::GenerateAllReuseFormulae() {
}
GenerateCrossUseConstantOffsets();
+
+ DEBUG(dbgs() << "\n"
+ "After generating reuse formulae:\n";
+ print_uses(dbgs()));
}
/// If their are multiple formulae with the same set of registers used
@@ -2876,11 +2909,11 @@ size_t LSRInstance::EstimateSearchSpaceComplexity() const {
return Power;
}
-/// NarrowSearchSpaceUsingHeuristics - If there are an extraordinary number of
-/// formulae to choose from, use some rough heuristics to prune down the number
-/// of formulae. This keeps the main solver from taking an extraordinary amount
-/// of time in some worst-case scenarios.
-void LSRInstance::NarrowSearchSpaceUsingHeuristics() {
+/// NarrowSearchSpaceByDetectingSupersets - When one formula uses a superset
+/// of the registers of another formula, it won't help reduce register
+/// pressure (though it may not necessarily hurt register pressure); remove
+/// it to simplify the system.
+void LSRInstance::NarrowSearchSpaceByDetectingSupersets() {
if (EstimateSearchSpaceComplexity() >= ComplexityLimit) {
DEBUG(dbgs() << "The search space is too complex.\n");
@@ -2938,7 +2971,12 @@ void LSRInstance::NarrowSearchSpaceUsingHeuristics() {
DEBUG(dbgs() << "After pre-selection:\n";
print_uses(dbgs()));
}
+}
+/// NarrowSearchSpaceByCollapsingUnrolledCode - When there are many registers
+/// for expressions like A, A+1, A+2, etc., allocate a single register for
+/// them.
+void LSRInstance::NarrowSearchSpaceByCollapsingUnrolledCode() {
if (EstimateSearchSpaceComplexity() >= ComplexityLimit) {
DEBUG(dbgs() << "The search space is too complex.\n");
@@ -2988,7 +3026,7 @@ void LSRInstance::NarrowSearchSpaceUsingHeuristics() {
if (Fixup.LUIdx == LUIdx) {
Fixup.LUIdx = LUThatHas - &Uses.front();
Fixup.Offset += F.AM.BaseOffs;
- DEBUG(errs() << "New fixup has offset "
+ DEBUG(dbgs() << "New fixup has offset "
<< Fixup.Offset << '\n');
}
if (Fixup.LUIdx == NumUses-1)
@@ -3009,7 +3047,30 @@ void LSRInstance::NarrowSearchSpaceUsingHeuristics() {
DEBUG(dbgs() << "After pre-selection:\n";
print_uses(dbgs()));
}
+}
+
+/// NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters - Call
+/// FilterOutUndesirableDedicatedRegisters again, if necessary, now that
+/// we've done more filtering, as it may be able to find more formulae to
+/// eliminate.
+void LSRInstance::NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters(){
+ if (EstimateSearchSpaceComplexity() >= ComplexityLimit) {
+ DEBUG(dbgs() << "The search space is too complex.\n");
+
+ DEBUG(dbgs() << "Narrowing the search space by re-filtering out "
+ "undesirable dedicated registers.\n");
+
+ FilterOutUndesirableDedicatedRegisters();
+
+ DEBUG(dbgs() << "After pre-selection:\n";
+ print_uses(dbgs()));
+ }
+}
+/// NarrowSearchSpaceByPickingWinnerRegs - Pick a register which seems likely
+/// to be profitable, and then in any use which has any reference to that
+/// register, delete all formulae which do not reference that register.
+void LSRInstance::NarrowSearchSpaceByPickingWinnerRegs() {
// With all other options exhausted, loop until the system is simple
// enough to handle.
SmallPtrSet<const SCEV *, 4> Taken;
@@ -3071,6 +3132,17 @@ void LSRInstance::NarrowSearchSpaceUsingHeuristics() {
}
}
+/// NarrowSearchSpaceUsingHeuristics - If there are an extraordinary number of
+/// formulae to choose from, use some rough heuristics to prune down the number
+/// of formulae. This keeps the main solver from taking an extraordinary amount
+/// of time in some worst-case scenarios.
+void LSRInstance::NarrowSearchSpaceUsingHeuristics() {
+ NarrowSearchSpaceByDetectingSupersets();
+ NarrowSearchSpaceByCollapsingUnrolledCode();
+ NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters();
+ NarrowSearchSpaceByPickingWinnerRegs();
+}
+
/// SolveRecurse - This is the recursive solver.
void LSRInstance::SolveRecurse(SmallVectorImpl<const Formula *> &Solution,
Cost &SolutionCost,
@@ -3614,10 +3686,6 @@ LSRInstance::LSRInstance(const TargetLowering *tli, Loop *l, Pass *P)
// to formulate the values needed for the uses.
GenerateAllReuseFormulae();
- DEBUG(dbgs() << "\n"
- "After generating reuse formulae:\n";
- print_uses(dbgs()));
-
FilterOutUndesirableDedicatedRegisters();
NarrowSearchSpaceUsingHeuristics();
@@ -3724,15 +3792,15 @@ private:
}
char LoopStrengthReduce::ID = 0;
-static RegisterPass<LoopStrengthReduce>
-X("loop-reduce", "Loop Strength Reduction");
+INITIALIZE_PASS(LoopStrengthReduce, "loop-reduce",
+ "Loop Strength Reduction", false, false);
Pass *llvm::createLoopStrengthReducePass(const TargetLowering *TLI) {
return new LoopStrengthReduce(TLI);
}
LoopStrengthReduce::LoopStrengthReduce(const TargetLowering *tli)
- : LoopPass(&ID), TLI(tli) {}
+ : LoopPass(ID), TLI(tli) {}
void LoopStrengthReduce::getAnalysisUsage(AnalysisUsage &AU) const {
// We split critical edges, so we change the CFG. However, we do update
diff --git a/lib/Transforms/Scalar/LoopUnrollPass.cpp b/lib/Transforms/Scalar/LoopUnrollPass.cpp
index 4ad41ae4b59f7..d0edfa2200513 100644
--- a/lib/Transforms/Scalar/LoopUnrollPass.cpp
+++ b/lib/Transforms/Scalar/LoopUnrollPass.cpp
@@ -17,6 +17,7 @@
#include "llvm/Transforms/Scalar.h"
#include "llvm/Analysis/LoopPass.h"
#include "llvm/Analysis/InlineCost.h"
+#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
@@ -26,7 +27,7 @@
using namespace llvm;
static cl::opt<unsigned>
-UnrollThreshold("unroll-threshold", cl::init(100), cl::Hidden,
+UnrollThreshold("unroll-threshold", cl::init(200), cl::Hidden,
cl::desc("The cut-off point for automatic loop unrolling"));
static cl::opt<unsigned>
@@ -42,7 +43,7 @@ namespace {
class LoopUnroll : public LoopPass {
public:
static char ID; // Pass ID, replacement for typeid
- LoopUnroll() : LoopPass(&ID) {}
+ LoopUnroll() : LoopPass(ID) {}
/// A magic value for use with the Threshold parameter to indicate
/// that the loop unroll should be performed regardless of how much
@@ -55,23 +56,24 @@ namespace {
/// loop preheaders be inserted into the CFG...
///
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<LoopInfo>();
+ AU.addPreserved<LoopInfo>();
AU.addRequiredID(LoopSimplifyID);
+ AU.addPreservedID(LoopSimplifyID);
AU.addRequiredID(LCSSAID);
- AU.addRequired<LoopInfo>();
AU.addPreservedID(LCSSAID);
- AU.addPreserved<LoopInfo>();
+ AU.addPreserved<ScalarEvolution>();
// FIXME: Loop unroll requires LCSSA. And LCSSA requires dom info.
// If loop unroll does not preserve dom info then LCSSA pass on next
// loop will receive invalid dom info.
// For now, recreate dom info, if loop is unrolled.
AU.addPreserved<DominatorTree>();
- AU.addPreserved<DominanceFrontier>();
}
};
}
char LoopUnroll::ID = 0;
-static RegisterPass<LoopUnroll> X("loop-unroll", "Unroll loops");
+INITIALIZE_PASS(LoopUnroll, "loop-unroll", "Unroll loops", false, false);
Pass *llvm::createLoopUnrollPass() { return new LoopUnroll(); }
@@ -145,12 +147,7 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) {
return false;
// FIXME: Reconstruct dom info, because it is not preserved properly.
- DominatorTree *DT = getAnalysisIfAvailable<DominatorTree>();
- if (DT) {
+ if (DominatorTree *DT = getAnalysisIfAvailable<DominatorTree>())
DT->runOnFunction(*F);
- DominanceFrontier *DF = getAnalysisIfAvailable<DominanceFrontier>();
- if (DF)
- DF->runOnFunction(*F);
- }
return true;
}
diff --git a/lib/Transforms/Scalar/LoopUnswitch.cpp b/lib/Transforms/Scalar/LoopUnswitch.cpp
index 0c900ffc40277..9afe428ba5691 100644
--- a/lib/Transforms/Scalar/LoopUnswitch.cpp
+++ b/lib/Transforms/Scalar/LoopUnswitch.cpp
@@ -77,7 +77,6 @@ namespace {
bool redoLoop;
Loop *currentLoop;
- DominanceFrontier *DF;
DominatorTree *DT;
BasicBlock *loopHeader;
BasicBlock *loopPreheader;
@@ -92,15 +91,15 @@ namespace {
public:
static char ID; // Pass ID, replacement for typeid
explicit LoopUnswitch(bool Os = false) :
- LoopPass(&ID), OptimizeForSize(Os), redoLoop(false),
- currentLoop(NULL), DF(NULL), DT(NULL), loopHeader(NULL),
+ LoopPass(ID), OptimizeForSize(Os), redoLoop(false),
+ currentLoop(NULL), DT(NULL), loopHeader(NULL),
loopPreheader(NULL) {}
bool runOnLoop(Loop *L, LPPassManager &LPM);
bool processCurrentLoop();
/// This transformation requires natural loop information & requires that
- /// loop preheaders be inserted into the CFG...
+ /// loop preheaders be inserted into the CFG.
///
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
AU.addRequiredID(LoopSimplifyID);
@@ -110,7 +109,6 @@ namespace {
AU.addRequiredID(LCSSAID);
AU.addPreservedID(LCSSAID);
AU.addPreserved<DominatorTree>();
- AU.addPreserved<DominanceFrontier>();
}
private:
@@ -160,7 +158,7 @@ namespace {
};
}
char LoopUnswitch::ID = 0;
-static RegisterPass<LoopUnswitch> X("loop-unswitch", "Unswitch loops");
+INITIALIZE_PASS(LoopUnswitch, "loop-unswitch", "Unswitch loops", false, false);
Pass *llvm::createLoopUnswitchPass(bool Os) {
return new LoopUnswitch(Os);
@@ -201,7 +199,6 @@ static Value *FindLIVLoopCondition(Value *Cond, Loop *L, bool &Changed) {
bool LoopUnswitch::runOnLoop(Loop *L, LPPassManager &LPM_Ref) {
LI = &getAnalysis<LoopInfo>();
LPM = &LPM_Ref;
- DF = getAnalysisIfAvailable<DominanceFrontier>();
DT = getAnalysisIfAvailable<DominatorTree>();
currentLoop = L;
Function *F = currentLoop->getHeader()->getParent();
@@ -216,8 +213,6 @@ bool LoopUnswitch::runOnLoop(Loop *L, LPPassManager &LPM_Ref) {
// FIXME: Reconstruct dom info, because it is not preserved properly.
if (DT)
DT->runOnFunction(*F);
- if (DF)
- DF->runOnFunction(*F);
}
return Changed;
}
@@ -282,19 +277,18 @@ bool LoopUnswitch::processCurrentLoop() {
return Changed;
}
-/// isTrivialLoopExitBlock - Check to see if all paths from BB either:
-/// 1. Exit the loop with no side effects.
-/// 2. Branch to the latch block with no side-effects.
+/// isTrivialLoopExitBlock - Check to see if all paths from BB exit the
+/// loop with no side effects (including infinite loops).
///
-/// If these conditions are true, we return true and set ExitBB to the block we
+/// If true, we return true and set ExitBB to the block we
/// exit through.
///
static bool isTrivialLoopExitBlockHelper(Loop *L, BasicBlock *BB,
BasicBlock *&ExitBB,
std::set<BasicBlock*> &Visited) {
if (!Visited.insert(BB).second) {
- // Already visited and Ok, end of recursion.
- return true;
+ // Already visited. Without more analysis, this could indicate an infinte loop.
+ return false;
} else if (!L->contains(BB)) {
// Otherwise, this is a loop exit, this is fine so long as this is the
// first exit.
@@ -324,7 +318,7 @@ static bool isTrivialLoopExitBlockHelper(Loop *L, BasicBlock *BB,
/// process. If so, return the block that is exited to, otherwise return null.
static BasicBlock *isTrivialLoopExitBlock(Loop *L, BasicBlock *BB) {
std::set<BasicBlock*> Visited;
- Visited.insert(L->getHeader()); // Branches to header are ok.
+ Visited.insert(L->getHeader()); // Branches to header make infinite loops.
BasicBlock *ExitBB = 0;
if (isTrivialLoopExitBlockHelper(L, BB, ExitBB, Visited))
return ExitBB;
@@ -356,8 +350,8 @@ bool LoopUnswitch::IsTrivialUnswitchCondition(Value *Cond, Constant **Val,
if (!BI->isConditional() || BI->getCondition() != Cond)
return false;
- // Check to see if a successor of the branch is guaranteed to go to the
- // latch block or exit through a one exit block without having any
+ // Check to see if a successor of the branch is guaranteed to
+ // exit through a unique exit block without having any
// side-effects. If so, determine the value of Cond that causes it to do
// this.
if ((LoopExitBB = isTrivialLoopExitBlock(currentLoop,
diff --git a/lib/Transforms/Scalar/LowerAtomic.cpp b/lib/Transforms/Scalar/LowerAtomic.cpp
new file mode 100644
index 0000000000000..973ffe7e6a40f
--- /dev/null
+++ b/lib/Transforms/Scalar/LowerAtomic.cpp
@@ -0,0 +1,161 @@
+//===- LowerAtomic.cpp - Lower atomic intrinsics --------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass lowers atomic intrinsics to non-atomic form for use in a known
+// non-preemptible environment.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "loweratomic"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/BasicBlock.h"
+#include "llvm/Function.h"
+#include "llvm/Instruction.h"
+#include "llvm/Instructions.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/IRBuilder.h"
+
+using namespace llvm;
+
+namespace {
+
+bool LowerAtomicIntrinsic(CallInst *CI) {
+ IRBuilder<> Builder(CI->getParent(), CI);
+
+ Function *Callee = CI->getCalledFunction();
+ if (!Callee)
+ return false;
+
+ unsigned IID = Callee->getIntrinsicID();
+ switch (IID) {
+ case Intrinsic::memory_barrier:
+ break;
+
+ case Intrinsic::atomic_load_add:
+ case Intrinsic::atomic_load_sub:
+ case Intrinsic::atomic_load_and:
+ case Intrinsic::atomic_load_nand:
+ case Intrinsic::atomic_load_or:
+ case Intrinsic::atomic_load_xor:
+ case Intrinsic::atomic_load_max:
+ case Intrinsic::atomic_load_min:
+ case Intrinsic::atomic_load_umax:
+ case Intrinsic::atomic_load_umin: {
+ Value *Ptr = CI->getArgOperand(0);
+ Value *Delta = CI->getArgOperand(1);
+
+ LoadInst *Orig = Builder.CreateLoad(Ptr);
+ Value *Res = NULL;
+ switch (IID) {
+ default: assert(0 && "Unrecognized atomic modify operation");
+ case Intrinsic::atomic_load_add:
+ Res = Builder.CreateAdd(Orig, Delta);
+ break;
+ case Intrinsic::atomic_load_sub:
+ Res = Builder.CreateSub(Orig, Delta);
+ break;
+ case Intrinsic::atomic_load_and:
+ Res = Builder.CreateAnd(Orig, Delta);
+ break;
+ case Intrinsic::atomic_load_nand:
+ Res = Builder.CreateNot(Builder.CreateAnd(Orig, Delta));
+ break;
+ case Intrinsic::atomic_load_or:
+ Res = Builder.CreateOr(Orig, Delta);
+ break;
+ case Intrinsic::atomic_load_xor:
+ Res = Builder.CreateXor(Orig, Delta);
+ break;
+ case Intrinsic::atomic_load_max:
+ Res = Builder.CreateSelect(Builder.CreateICmpSLT(Orig, Delta),
+ Delta,
+ Orig);
+ break;
+ case Intrinsic::atomic_load_min:
+ Res = Builder.CreateSelect(Builder.CreateICmpSLT(Orig, Delta),
+ Orig,
+ Delta);
+ break;
+ case Intrinsic::atomic_load_umax:
+ Res = Builder.CreateSelect(Builder.CreateICmpULT(Orig, Delta),
+ Delta,
+ Orig);
+ break;
+ case Intrinsic::atomic_load_umin:
+ Res = Builder.CreateSelect(Builder.CreateICmpULT(Orig, Delta),
+ Orig,
+ Delta);
+ break;
+ }
+ Builder.CreateStore(Res, Ptr);
+
+ CI->replaceAllUsesWith(Orig);
+ break;
+ }
+
+ case Intrinsic::atomic_swap: {
+ Value *Ptr = CI->getArgOperand(0);
+ Value *Val = CI->getArgOperand(1);
+
+ LoadInst *Orig = Builder.CreateLoad(Ptr);
+ Builder.CreateStore(Val, Ptr);
+
+ CI->replaceAllUsesWith(Orig);
+ break;
+ }
+
+ case Intrinsic::atomic_cmp_swap: {
+ Value *Ptr = CI->getArgOperand(0);
+ Value *Cmp = CI->getArgOperand(1);
+ Value *Val = CI->getArgOperand(2);
+
+ LoadInst *Orig = Builder.CreateLoad(Ptr);
+ Value *Equal = Builder.CreateICmpEQ(Orig, Cmp);
+ Value *Res = Builder.CreateSelect(Equal, Val, Orig);
+ Builder.CreateStore(Res, Ptr);
+
+ CI->replaceAllUsesWith(Orig);
+ break;
+ }
+
+ default:
+ return false;
+ }
+
+ assert(CI->use_empty() &&
+ "Lowering should have eliminated any uses of the intrinsic call!");
+ CI->eraseFromParent();
+
+ return true;
+}
+
+struct LowerAtomic : public BasicBlockPass {
+ static char ID;
+ LowerAtomic() : BasicBlockPass(ID) {}
+ bool runOnBasicBlock(BasicBlock &BB) {
+ bool Changed = false;
+ for (BasicBlock::iterator DI = BB.begin(), DE = BB.end(); DI != DE; ) {
+ Instruction *Inst = DI++;
+ if (CallInst *CI = dyn_cast<CallInst>(Inst))
+ Changed |= LowerAtomicIntrinsic(CI);
+ }
+ return Changed;
+ }
+
+};
+
+}
+
+char LowerAtomic::ID = 0;
+INITIALIZE_PASS(LowerAtomic, "loweratomic",
+ "Lower atomic intrinsics to non-atomic form",
+ false, false);
+
+Pass *llvm::createLowerAtomicPass() { return new LowerAtomic(); }
diff --git a/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/lib/Transforms/Scalar/MemCpyOptimizer.cpp
index 0e566c5bd9be2..24fae423d2f70 100644
--- a/lib/Transforms/Scalar/MemCpyOptimizer.cpp
+++ b/lib/Transforms/Scalar/MemCpyOptimizer.cpp
@@ -304,7 +304,7 @@ namespace {
bool runOnFunction(Function &F);
public:
static char ID; // Pass identification, replacement for typeid
- MemCpyOpt() : FunctionPass(&ID) {}
+ MemCpyOpt() : FunctionPass(ID) {}
private:
// This transformation requires dominator postdominator info
@@ -331,8 +331,7 @@ namespace {
// createMemCpyOptPass - The public interface to this file...
FunctionPass *llvm::createMemCpyOptPass() { return new MemCpyOpt(); }
-static RegisterPass<MemCpyOpt> X("memcpyopt",
- "MemCpy Optimization");
+INITIALIZE_PASS(MemCpyOpt, "memcpyopt", "MemCpy Optimization", false, false);
@@ -374,7 +373,7 @@ bool MemCpyOpt::processStore(StoreInst *SI, BasicBlock::iterator &BBI) {
// If the call is readnone, ignore it, otherwise bail out. We don't even
// allow readonly here because we don't want something like:
// A[1] = 2; strlen(A); A[2] = 2; -> memcpy(A, ...); strlen(A).
- if (AA.getModRefBehavior(CallSite::get(BI)) ==
+ if (AA.getModRefBehavior(CallSite(BI)) ==
AliasAnalysis::DoesNotAccessMemory)
continue;
@@ -509,7 +508,7 @@ bool MemCpyOpt::performCallSlotOptzn(MemCpyInst *cpy, CallInst *C) {
// because we'll need to do type comparisons based on the underlying type.
Value *cpyDest = cpy->getDest();
Value *cpySrc = cpy->getSource();
- CallSite CS = CallSite::get(C);
+ CallSite CS(C);
// We need to be able to reason about the size of the memcpy, so we require
// that it be a constant.
@@ -637,10 +636,11 @@ bool MemCpyOpt::performCallSlotOptzn(MemCpyInst *cpy, CallInst *C) {
return true;
}
-/// processMemCpy - perform simplication of memcpy's. If we have memcpy A which
-/// copies X to Y, and memcpy B which copies Y to Z, then we can rewrite B to be
-/// a memcpy from X to Z (or potentially a memmove, depending on circumstances).
-/// This allows later passes to remove the first memcpy altogether.
+/// processMemCpy - perform simplification of memcpy's. If we have memcpy A
+/// which copies X to Y, and memcpy B which copies Y to Z, then we can rewrite
+/// B to be a memcpy from X to Z (or potentially a memmove, depending on
+/// circumstances). This allows later passes to remove the first memcpy
+/// altogether.
bool MemCpyOpt::processMemCpy(MemCpyInst *M) {
MemoryDependenceAnalysis &MD = getAnalysis<MemoryDependenceAnalysis>();
@@ -744,7 +744,8 @@ bool MemCpyOpt::processMemMove(MemMoveInst *M) {
const Type *ArgTys[3] = { M->getRawDest()->getType(),
M->getRawSource()->getType(),
M->getLength()->getType() };
- M->setCalledFunction(Intrinsic::getDeclaration(Mod, Intrinsic::memcpy, ArgTys, 3));
+ M->setCalledFunction(Intrinsic::getDeclaration(Mod, Intrinsic::memcpy,
+ ArgTys, 3));
// MemDep may have over conservative information about this instruction, just
// conservatively flush it from the cache.
diff --git a/lib/Transforms/Scalar/Reassociate.cpp b/lib/Transforms/Scalar/Reassociate.cpp
index 98452f5d82c47..b8afcc12d927d 100644
--- a/lib/Transforms/Scalar/Reassociate.cpp
+++ b/lib/Transforms/Scalar/Reassociate.cpp
@@ -77,7 +77,7 @@ namespace {
bool MadeChange;
public:
static char ID; // Pass identification, replacement for typeid
- Reassociate() : FunctionPass(&ID) {}
+ Reassociate() : FunctionPass(ID) {}
bool runOnFunction(Function &F);
@@ -103,7 +103,8 @@ namespace {
}
char Reassociate::ID = 0;
-static RegisterPass<Reassociate> X("reassociate", "Reassociate expressions");
+INITIALIZE_PASS(Reassociate, "reassociate",
+ "Reassociate expressions", false, false);
// Public interface to the Reassociate pass
FunctionPass *llvm::createReassociatePass() { return new Reassociate(); }
diff --git a/lib/Transforms/Scalar/Reg2Mem.cpp b/lib/Transforms/Scalar/Reg2Mem.cpp
index 13222ac22004e..506b72ac34e0d 100644
--- a/lib/Transforms/Scalar/Reg2Mem.cpp
+++ b/lib/Transforms/Scalar/Reg2Mem.cpp
@@ -36,7 +36,7 @@ STATISTIC(NumPhisDemoted, "Number of phi-nodes demoted");
namespace {
struct RegToMem : public FunctionPass {
static char ID; // Pass identification, replacement for typeid
- RegToMem() : FunctionPass(&ID) {}
+ RegToMem() : FunctionPass(ID) {}
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
AU.addRequiredID(BreakCriticalEdgesID);
@@ -59,8 +59,8 @@ namespace {
}
char RegToMem::ID = 0;
-static RegisterPass<RegToMem>
-X("reg2mem", "Demote all values to stack slots");
+INITIALIZE_PASS(RegToMem, "reg2mem", "Demote all values to stack slots",
+ false, false);
bool RegToMem::runOnFunction(Function &F) {
@@ -124,7 +124,7 @@ bool RegToMem::runOnFunction(Function &F) {
// createDemoteRegisterToMemory - Provide an entry point to create this pass.
//
-const PassInfo *const llvm::DemoteRegisterToMemoryID = &X;
+char &llvm::DemoteRegisterToMemoryID = RegToMem::ID;
FunctionPass *llvm::createDemoteRegisterToMemoryPass() {
return new RegToMem();
}
diff --git a/lib/Transforms/Scalar/SCCP.cpp b/lib/Transforms/Scalar/SCCP.cpp
index 907ece8fcce97..6115c05c20ac4 100644
--- a/lib/Transforms/Scalar/SCCP.cpp
+++ b/lib/Transforms/Scalar/SCCP.cpp
@@ -275,12 +275,12 @@ public:
return I->second;
}
- LatticeVal getStructLatticeValueFor(Value *V, unsigned i) const {
+ /*LatticeVal getStructLatticeValueFor(Value *V, unsigned i) const {
DenseMap<std::pair<Value*, unsigned>, LatticeVal>::const_iterator I =
StructValueState.find(std::make_pair(V, i));
assert(I != StructValueState.end() && "V is not in valuemap!");
return I->second;
- }
+ }*/
/// getTrackedRetVals - Get the inferred return value map.
///
@@ -508,17 +508,16 @@ private:
void visitLoadInst (LoadInst &I);
void visitGetElementPtrInst(GetElementPtrInst &I);
void visitCallInst (CallInst &I) {
- visitCallSite(CallSite::get(&I));
+ visitCallSite(&I);
}
void visitInvokeInst (InvokeInst &II) {
- visitCallSite(CallSite::get(&II));
+ visitCallSite(&II);
visitTerminatorInst(II);
}
void visitCallSite (CallSite CS);
void visitUnwindInst (TerminatorInst &I) { /*returns void*/ }
void visitUnreachableInst(TerminatorInst &I) { /*returns void*/ }
void visitAllocaInst (Instruction &I) { markOverdefined(&I); }
- void visitVANextInst (Instruction &I) { markOverdefined(&I); }
void visitVAArgInst (Instruction &I) { markAnythingOverdefined(&I); }
void visitInstruction(Instruction &I) {
@@ -1586,7 +1585,7 @@ namespace {
///
struct SCCP : public FunctionPass {
static char ID; // Pass identification, replacement for typeid
- SCCP() : FunctionPass(&ID) {}
+ SCCP() : FunctionPass(ID) {}
// runOnFunction - Run the Sparse Conditional Constant Propagation
// algorithm, and return true if the function was modified.
@@ -1600,8 +1599,8 @@ namespace {
} // end anonymous namespace
char SCCP::ID = 0;
-static RegisterPass<SCCP>
-X("sccp", "Sparse Conditional Constant Propagation");
+INITIALIZE_PASS(SCCP, "sccp",
+ "Sparse Conditional Constant Propagation", false, false);
// createSCCPPass - This is the public interface to this file.
FunctionPass *llvm::createSCCPPass() {
@@ -1702,14 +1701,15 @@ namespace {
///
struct IPSCCP : public ModulePass {
static char ID;
- IPSCCP() : ModulePass(&ID) {}
+ IPSCCP() : ModulePass(ID) {}
bool runOnModule(Module &M);
};
} // end anonymous namespace
char IPSCCP::ID = 0;
-static RegisterPass<IPSCCP>
-Y("ipsccp", "Interprocedural Sparse Conditional Constant Propagation");
+INITIALIZE_PASS(IPSCCP, "ipsccp",
+ "Interprocedural Sparse Conditional Constant Propagation",
+ false, false);
// createIPSCCPPass - This is the public interface to this file.
ModulePass *llvm::createIPSCCPPass() {
@@ -1748,6 +1748,13 @@ static bool AddressIsTaken(const GlobalValue *GV) {
bool IPSCCP::runOnModule(Module &M) {
SCCPSolver Solver(getAnalysisIfAvailable<TargetData>());
+ // AddressTakenFunctions - This set keeps track of the address-taken functions
+ // that are in the input. As IPSCCP runs through and simplifies code,
+ // functions that were address taken can end up losing their
+ // address-taken-ness. Because of this, we keep track of their addresses from
+ // the first pass so we can use them for the later simplification pass.
+ SmallPtrSet<Function*, 32> AddressTakenFunctions;
+
// Loop over all functions, marking arguments to those with their addresses
// taken or that are external as overdefined.
//
@@ -1763,9 +1770,13 @@ bool IPSCCP::runOnModule(Module &M) {
// If this function only has direct calls that we can see, we can track its
// arguments and return value aggressively, and can assume it is not called
// unless we see evidence to the contrary.
- if (F->hasLocalLinkage() && !AddressIsTaken(F)) {
- Solver.AddArgumentTrackedFunction(F);
- continue;
+ if (F->hasLocalLinkage()) {
+ if (AddressIsTaken(F))
+ AddressTakenFunctions.insert(F);
+ else {
+ Solver.AddArgumentTrackedFunction(F);
+ continue;
+ }
}
// Assume the function is called.
@@ -1950,7 +1961,7 @@ bool IPSCCP::runOnModule(Module &M) {
continue;
// We can only do this if we know that nothing else can call the function.
- if (!F->hasLocalLinkage() || AddressIsTaken(F))
+ if (!F->hasLocalLinkage() || AddressTakenFunctions.count(F))
continue;
for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB)
diff --git a/lib/Transforms/Scalar/ScalarReplAggregates.cpp b/lib/Transforms/Scalar/ScalarReplAggregates.cpp
index dd445f63320a4..fee317dbd9ab5 100644
--- a/lib/Transforms/Scalar/ScalarReplAggregates.cpp
+++ b/lib/Transforms/Scalar/ScalarReplAggregates.cpp
@@ -28,6 +28,7 @@
#include "llvm/Instructions.h"
#include "llvm/IntrinsicInst.h"
#include "llvm/LLVMContext.h"
+#include "llvm/Module.h"
#include "llvm/Pass.h"
#include "llvm/Analysis/Dominators.h"
#include "llvm/Target/TargetData.h"
@@ -51,7 +52,7 @@ STATISTIC(NumGlobals, "Number of allocas copied from constant global");
namespace {
struct SROA : public FunctionPass {
static char ID; // Pass identification, replacement for typeid
- explicit SROA(signed T = -1) : FunctionPass(&ID) {
+ explicit SROA(signed T = -1) : FunctionPass(ID) {
if (T == -1)
SRThreshold = 128;
else
@@ -114,8 +115,7 @@ namespace {
void DoScalarReplacement(AllocaInst *AI,
std::vector<AllocaInst*> &WorkList);
void DeleteDeadInstructions();
- AllocaInst *AddNewAlloca(Function &F, const Type *Ty, AllocaInst *Base);
-
+
void RewriteForScalarRepl(Instruction *I, AllocaInst *AI, uint64_t Offset,
SmallVector<AllocaInst*, 32> &NewElts);
void RewriteBitCast(BitCastInst *BC, AllocaInst *AI, uint64_t Offset,
@@ -135,7 +135,8 @@ namespace {
}
char SROA::ID = 0;
-static RegisterPass<SROA> X("scalarrepl", "Scalar Replacement of Aggregates");
+INITIALIZE_PASS(SROA, "scalarrepl",
+ "Scalar Replacement of Aggregates", false, false);
// Public interface to the ScalarReplAggregates pass
FunctionPass *llvm::createScalarReplAggregatesPass(signed int Threshold) {
@@ -193,6 +194,27 @@ private:
};
} // end anonymous namespace.
+
+/// IsVerbotenVectorType - Return true if this is a vector type ScalarRepl isn't
+/// allowed to form. We do this to avoid MMX types, which is a complete hack,
+/// but is required until the backend is fixed.
+static bool IsVerbotenVectorType(const VectorType *VTy, const Instruction *I) {
+ StringRef Triple(I->getParent()->getParent()->getParent()->getTargetTriple());
+ if (!Triple.startswith("i386") &&
+ !Triple.startswith("x86_64"))
+ return false;
+
+ // Reject all the MMX vector types.
+ switch (VTy->getNumElements()) {
+ default: return false;
+ case 1: return VTy->getElementType()->isIntegerTy(64);
+ case 2: return VTy->getElementType()->isIntegerTy(32);
+ case 4: return VTy->getElementType()->isIntegerTy(16);
+ case 8: return VTy->getElementType()->isIntegerTy(8);
+ }
+}
+
+
/// TryConvert - Analyze the specified alloca, and if it is safe to do so,
/// rewrite it to be a new alloca which is mem2reg'able. This returns the new
/// alloca if possible or null if not.
@@ -209,7 +231,8 @@ AllocaInst *ConvertToScalarInfo::TryConvert(AllocaInst *AI) {
// we just get a lot of insert/extracts. If at least one vector is
// involved, then we probably really do have a union of vector/array.
const Type *NewTy;
- if (VectorTy && VectorTy->isVectorTy() && HadAVector) {
+ if (VectorTy && VectorTy->isVectorTy() && HadAVector &&
+ !IsVerbotenVectorType(cast<VectorType>(VectorTy), AI)) {
DEBUG(dbgs() << "CONVERT TO VECTOR: " << *AI << "\n TYPE = "
<< *VectorTy << '\n');
NewTy = VectorTy; // Use the vector type.
@@ -969,7 +992,7 @@ void SROA::isSafeForScalarRepl(Instruction *I, AllocaInst *AI, uint64_t Offset,
ConstantInt *Length = dyn_cast<ConstantInt>(MI->getLength());
if (Length)
isSafeMemAccess(AI, Offset, Length->getZExtValue(), 0,
- UI.getOperandNo() == CallInst::ArgOffset, Info);
+ UI.getOperandNo() == 0, Info);
else
MarkUnsafe(Info);
} else if (LoadInst *LI = dyn_cast<LoadInst>(User)) {
@@ -1662,6 +1685,12 @@ void SROA::RewriteLoadUserOfWholeAlloca(LoadInst *LI, AllocaInst *AI,
/// HasPadding - Return true if the specified type has any structure or
/// alignment padding, false otherwise.
static bool HasPadding(const Type *Ty, const TargetData &TD) {
+ if (const ArrayType *ATy = dyn_cast<ArrayType>(Ty))
+ return HasPadding(ATy->getElementType(), TD);
+
+ if (const VectorType *VTy = dyn_cast<VectorType>(Ty))
+ return HasPadding(VTy->getElementType(), TD);
+
if (const StructType *STy = dyn_cast<StructType>(Ty)) {
const StructLayout *SL = TD.getStructLayout(STy);
unsigned PrevFieldBitOffset = 0;
@@ -1691,12 +1720,8 @@ static bool HasPadding(const Type *Ty, const TargetData &TD) {
if (PrevFieldEnd < SL->getSizeInBits())
return true;
}
-
- } else if (const ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
- return HasPadding(ATy->getElementType(), TD);
- } else if (const VectorType *VTy = dyn_cast<VectorType>(Ty)) {
- return HasPadding(VTy->getElementType(), TD);
}
+
return TD.getTypeSizeInBits(Ty) != TD.getTypeAllocSizeInBits(Ty);
}
@@ -1787,7 +1812,7 @@ static bool isOnlyCopiedFromConstantGlobal(Value *V, MemTransferInst *&TheCopy,
if (isOffset) return false;
// If the memintrinsic isn't using the alloca as the dest, reject it.
- if (UI.getOperandNo() != CallInst::ArgOffset) return false;
+ if (UI.getOperandNo() != 0) return false;
// If the source of the memcpy/move is not a constant global, reject it.
if (!PointsToConstantGlobal(MI->getSource()))
diff --git a/lib/Transforms/Scalar/SimplifyCFGPass.cpp b/lib/Transforms/Scalar/SimplifyCFGPass.cpp
index 49d93a2fcc271..360749caf1116 100644
--- a/lib/Transforms/Scalar/SimplifyCFGPass.cpp
+++ b/lib/Transforms/Scalar/SimplifyCFGPass.cpp
@@ -42,14 +42,15 @@ STATISTIC(NumSimpl, "Number of blocks simplified");
namespace {
struct CFGSimplifyPass : public FunctionPass {
static char ID; // Pass identification, replacement for typeid
- CFGSimplifyPass() : FunctionPass(&ID) {}
+ CFGSimplifyPass() : FunctionPass(ID) {}
virtual bool runOnFunction(Function &F);
};
}
char CFGSimplifyPass::ID = 0;
-static RegisterPass<CFGSimplifyPass> X("simplifycfg", "Simplify the CFG");
+INITIALIZE_PASS(CFGSimplifyPass, "simplifycfg",
+ "Simplify the CFG", false, false);
// Public interface to the CFGSimplification pass
FunctionPass *llvm::createCFGSimplificationPass() {
@@ -284,10 +285,9 @@ static bool IterativeSimplifyCFG(Function &F, const TargetData *TD) {
while (LocalChange) {
LocalChange = false;
- // Loop over all of the basic blocks (except the first one) and remove them
- // if they are unneeded...
+ // Loop over all of the basic blocks and remove them if they are unneeded...
//
- for (Function::iterator BBIt = ++F.begin(); BBIt != F.end(); ) {
+ for (Function::iterator BBIt = F.begin(); BBIt != F.end(); ) {
if (SimplifyCFG(BBIt++, TD)) {
LocalChange = true;
++NumSimpl;
diff --git a/lib/Transforms/Scalar/SimplifyHalfPowrLibCalls.cpp b/lib/Transforms/Scalar/SimplifyHalfPowrLibCalls.cpp
index c3408e77807fb..3ec70ec2e024f 100644
--- a/lib/Transforms/Scalar/SimplifyHalfPowrLibCalls.cpp
+++ b/lib/Transforms/Scalar/SimplifyHalfPowrLibCalls.cpp
@@ -32,7 +32,7 @@ namespace {
const TargetData *TD;
public:
static char ID; // Pass identification
- SimplifyHalfPowrLibCalls() : FunctionPass(&ID) {}
+ SimplifyHalfPowrLibCalls() : FunctionPass(ID) {}
bool runOnFunction(Function &F);
@@ -46,8 +46,8 @@ namespace {
char SimplifyHalfPowrLibCalls::ID = 0;
} // end anonymous namespace.
-static RegisterPass<SimplifyHalfPowrLibCalls>
-X("simplify-libcalls-halfpowr", "Simplify half_powr library calls");
+INITIALIZE_PASS(SimplifyHalfPowrLibCalls, "simplify-libcalls-halfpowr",
+ "Simplify half_powr library calls", false, false);
// Public interface to the Simplify HalfPowr LibCalls pass.
FunctionPass *llvm::createSimplifyHalfPowrLibCallsPass() {
diff --git a/lib/Transforms/Scalar/SimplifyLibCalls.cpp b/lib/Transforms/Scalar/SimplifyLibCalls.cpp
index b1c619125c355..d7ce53f367153 100644
--- a/lib/Transforms/Scalar/SimplifyLibCalls.cpp
+++ b/lib/Transforms/Scalar/SimplifyLibCalls.cpp
@@ -532,7 +532,7 @@ struct StrStrOpt : public LibCallOptimization {
StrLen, B, TD);
for (Value::use_iterator UI = CI->use_begin(), UE = CI->use_end();
UI != UE; ) {
- ICmpInst *Old = cast<ICmpInst>(UI++);
+ ICmpInst *Old = cast<ICmpInst>(*UI++);
Value *Cmp = B.CreateICmp(Old->getPredicate(), StrNCmp,
ConstantInt::getNullValue(StrNCmp->getType()),
"cmp");
@@ -566,8 +566,8 @@ struct StrStrOpt : public LibCallOptimization {
// fold strstr(x, "y") -> strchr(x, 'y').
if (HasStr2 && ToFindStr.size() == 1)
- return B.CreateBitCast(EmitStrChr(CI->getArgOperand(0), ToFindStr[0], B, TD),
- CI->getType());
+ return B.CreateBitCast(EmitStrChr(CI->getArgOperand(0),
+ ToFindStr[0], B, TD), CI->getType());
return 0;
}
};
@@ -681,8 +681,8 @@ struct MemSetOpt : public LibCallOptimization {
return 0;
// memset(p, v, n) -> llvm.memset(p, v, n, 1)
- Value *Val = B.CreateIntCast(CI->getArgOperand(1), Type::getInt8Ty(*Context),
- false);
+ Value *Val = B.CreateIntCast(CI->getArgOperand(1),
+ Type::getInt8Ty(*Context), false);
EmitMemSet(CI->getArgOperand(0), Val, CI->getArgOperand(2), false, B, TD);
return CI->getArgOperand(0);
}
@@ -1042,9 +1042,9 @@ struct SPrintFOpt : public LibCallOptimization {
if (!TD) return 0;
// sprintf(str, fmt) -> llvm.memcpy(str, fmt, strlen(fmt)+1, 1)
- EmitMemCpy(CI->getArgOperand(0), CI->getArgOperand(1), // Copy the nul byte.
- ConstantInt::get(TD->getIntPtrType(*Context),
- FormatStr.size()+1), 1, false, B, TD);
+ EmitMemCpy(CI->getArgOperand(0), CI->getArgOperand(1), // Copy the
+ ConstantInt::get(TD->getIntPtrType(*Context), // nul byte.
+ FormatStr.size() + 1), 1, false, B, TD);
return ConstantInt::get(CI->getType(), FormatStr.size());
}
@@ -1080,7 +1080,8 @@ struct SPrintFOpt : public LibCallOptimization {
Value *IncLen = B.CreateAdd(Len,
ConstantInt::get(Len->getType(), 1),
"leninc");
- EmitMemCpy(CI->getArgOperand(0), CI->getArgOperand(2), IncLen, 1, false, B, TD);
+ EmitMemCpy(CI->getArgOperand(0), CI->getArgOperand(2),
+ IncLen, 1, false, B, TD);
// The sprintf result is the unincremented number of bytes in the string.
return B.CreateIntCast(Len, CI->getType(), false);
@@ -1236,7 +1237,7 @@ namespace {
bool Modified; // This is only used by doInitialization.
public:
static char ID; // Pass identification
- SimplifyLibCalls() : FunctionPass(&ID), StrCpy(false), StrCpyChk(true) {}
+ SimplifyLibCalls() : FunctionPass(ID), StrCpy(false), StrCpyChk(true) {}
void InitOptimizations();
bool runOnFunction(Function &F);
@@ -1253,8 +1254,8 @@ namespace {
char SimplifyLibCalls::ID = 0;
} // end anonymous namespace.
-static RegisterPass<SimplifyLibCalls>
-X("simplify-libcalls", "Simplify well-known library calls");
+INITIALIZE_PASS(SimplifyLibCalls, "simplify-libcalls",
+ "Simplify well-known library calls", false, false);
// Public interface to the Simplify LibCalls pass.
FunctionPass *llvm::createSimplifyLibCallsPass() {
@@ -2155,7 +2156,7 @@ bool SimplifyLibCalls::doInitialization(Module &M) {
// * pow(pow(x,y),z)-> pow(x,y*z)
//
// puts:
-// * puts("") -> putchar("\n")
+// * puts("") -> putchar('\n')
//
// round, roundf, roundl:
// * round(cnst) -> cnst'
diff --git a/lib/Transforms/Scalar/Sink.cpp b/lib/Transforms/Scalar/Sink.cpp
index b88ba48505092..95d3dedfb62db 100644
--- a/lib/Transforms/Scalar/Sink.cpp
+++ b/lib/Transforms/Scalar/Sink.cpp
@@ -35,7 +35,7 @@ namespace {
public:
static char ID; // Pass identification
- Sinking() : FunctionPass(&ID) {}
+ Sinking() : FunctionPass(ID) {}
virtual bool runOnFunction(Function &F);
@@ -56,8 +56,7 @@ namespace {
} // end anonymous namespace
char Sinking::ID = 0;
-static RegisterPass<Sinking>
-X("sink", "Code sinking");
+INITIALIZE_PASS(Sinking, "sink", "Code sinking", false, false);
FunctionPass *llvm::createSinkingPass() { return new Sinking(); }
diff --git a/lib/Transforms/Scalar/TailDuplication.cpp b/lib/Transforms/Scalar/TailDuplication.cpp
index 9208238f4ba5e..2e437ac778c8c 100644
--- a/lib/Transforms/Scalar/TailDuplication.cpp
+++ b/lib/Transforms/Scalar/TailDuplication.cpp
@@ -49,7 +49,7 @@ namespace {
bool runOnFunction(Function &F);
public:
static char ID; // Pass identification, replacement for typeid
- TailDup() : FunctionPass(&ID) {}
+ TailDup() : FunctionPass(ID) {}
private:
inline bool shouldEliminateUnconditionalBranch(TerminatorInst *, unsigned);
@@ -59,7 +59,7 @@ namespace {
}
char TailDup::ID = 0;
-static RegisterPass<TailDup> X("tailduplicate", "Tail Duplication");
+INITIALIZE_PASS(TailDup, "tailduplicate", "Tail Duplication", false, false);
// Public interface to the Tail Duplication pass
FunctionPass *llvm::createTailDuplicationPass() { return new TailDup(); }
diff --git a/lib/Transforms/Scalar/TailRecursionElimination.cpp b/lib/Transforms/Scalar/TailRecursionElimination.cpp
index 01c8e5d6fcf48..371725467a24e 100644
--- a/lib/Transforms/Scalar/TailRecursionElimination.cpp
+++ b/lib/Transforms/Scalar/TailRecursionElimination.cpp
@@ -72,7 +72,7 @@ STATISTIC(NumAccumAdded, "Number of accumulators introduced");
namespace {
struct TailCallElim : public FunctionPass {
static char ID; // Pass identification, replacement for typeid
- TailCallElim() : FunctionPass(&ID) {}
+ TailCallElim() : FunctionPass(ID) {}
virtual bool runOnFunction(Function &F);
@@ -87,7 +87,8 @@ namespace {
}
char TailCallElim::ID = 0;
-static RegisterPass<TailCallElim> X("tailcallelim", "Tail Call Elimination");
+INITIALIZE_PASS(TailCallElim, "tailcallelim",
+ "Tail Call Elimination", false, false);
// Public interface to the TailCallElimination pass
FunctionPass *llvm::createTailCallEliminationPass() {
@@ -277,22 +278,22 @@ static Value *getCommonReturnValue(ReturnInst *IgnoreRI, CallInst *CI) {
Function *F = CI->getParent()->getParent();
Value *ReturnedValue = 0;
- for (Function::iterator BBI = F->begin(), E = F->end(); BBI != E; ++BBI)
- if (ReturnInst *RI = dyn_cast<ReturnInst>(BBI->getTerminator()))
- if (RI != IgnoreRI) {
- Value *RetOp = RI->getOperand(0);
-
- // We can only perform this transformation if the value returned is
- // evaluatable at the start of the initial invocation of the function,
- // instead of at the end of the evaluation.
- //
- if (!isDynamicConstant(RetOp, CI, RI))
- return 0;
-
- if (ReturnedValue && RetOp != ReturnedValue)
- return 0; // Cannot transform if differing values are returned.
- ReturnedValue = RetOp;
- }
+ for (Function::iterator BBI = F->begin(), E = F->end(); BBI != E; ++BBI) {
+ ReturnInst *RI = dyn_cast<ReturnInst>(BBI->getTerminator());
+ if (RI == 0 || RI == IgnoreRI) continue;
+
+ // We can only perform this transformation if the value returned is
+ // evaluatable at the start of the initial invocation of the function,
+ // instead of at the end of the evaluation.
+ //
+ Value *RetOp = RI->getOperand(0);
+ if (!isDynamicConstant(RetOp, CI, RI))
+ return 0;
+
+ if (ReturnedValue && RetOp != ReturnedValue)
+ return 0; // Cannot transform if differing values are returned.
+ ReturnedValue = RetOp;
+ }
return ReturnedValue;
}
@@ -306,7 +307,7 @@ Value *TailCallElim::CanTransformAccumulatorRecursion(Instruction *I,
assert(I->getNumOperands() == 2 &&
"Associative/commutative operations should have 2 args!");
- // Exactly one operand should be the result of the call instruction...
+ // Exactly one operand should be the result of the call instruction.
if ((I->getOperand(0) == CI && I->getOperand(1) == CI) ||
(I->getOperand(0) != CI && I->getOperand(1) != CI))
return 0;
@@ -386,21 +387,22 @@ bool TailCallElim::ProcessReturningBlock(ReturnInst *Ret, BasicBlock *&OldEntry,
// tail call if all of the instructions between the call and the return are
// movable to above the call itself, leaving the call next to the return.
// Check that this is the case now.
- for (BBI = CI, ++BBI; &*BBI != Ret; ++BBI)
- if (!CanMoveAboveCall(BBI, CI)) {
- // If we can't move the instruction above the call, it might be because it
- // is an associative and commutative operation that could be tranformed
- // using accumulator recursion elimination. Check to see if this is the
- // case, and if so, remember the initial accumulator value for later.
- if ((AccumulatorRecursionEliminationInitVal =
- CanTransformAccumulatorRecursion(BBI, CI))) {
- // Yes, this is accumulator recursion. Remember which instruction
- // accumulates.
- AccumulatorRecursionInstr = BBI;
- } else {
- return false; // Otherwise, we cannot eliminate the tail recursion!
- }
+ for (BBI = CI, ++BBI; &*BBI != Ret; ++BBI) {
+ if (CanMoveAboveCall(BBI, CI)) continue;
+
+ // If we can't move the instruction above the call, it might be because it
+ // is an associative and commutative operation that could be tranformed
+ // using accumulator recursion elimination. Check to see if this is the
+ // case, and if so, remember the initial accumulator value for later.
+ if ((AccumulatorRecursionEliminationInitVal =
+ CanTransformAccumulatorRecursion(BBI, CI))) {
+ // Yes, this is accumulator recursion. Remember which instruction
+ // accumulates.
+ AccumulatorRecursionInstr = BBI;
+ } else {
+ return false; // Otherwise, we cannot eliminate the tail recursion!
}
+ }
// We can only transform call/return pairs that either ignore the return value
// of the call and return void, ignore the value of the call and return a
diff --git a/lib/Transforms/Utils/BasicBlockUtils.cpp b/lib/Transforms/Utils/BasicBlockUtils.cpp
index ec625b4cbb28f..093083a630cf3 100644
--- a/lib/Transforms/Utils/BasicBlockUtils.cpp
+++ b/lib/Transforms/Utils/BasicBlockUtils.cpp
@@ -97,23 +97,13 @@ bool llvm::DeleteDeadPHIs(BasicBlock *BB) {
/// MergeBlockIntoPredecessor - Attempts to merge a block into its predecessor,
/// if possible. The return value indicates success or failure.
bool llvm::MergeBlockIntoPredecessor(BasicBlock *BB, Pass *P) {
- pred_iterator PI(pred_begin(BB)), PE(pred_end(BB));
- // Can't merge the entry block. Don't merge away blocks who have their
- // address taken: this is a bug if the predecessor block is the entry node
- // (because we'd end up taking the address of the entry) and undesirable in
- // any case.
- if (pred_begin(BB) == pred_end(BB) ||
- BB->hasAddressTaken()) return false;
+ // Don't merge away blocks who have their address taken.
+ if (BB->hasAddressTaken()) return false;
- BasicBlock *PredBB = *PI++;
- for (; PI != PE; ++PI) // Search all predecessors, see if they are all same
- if (*PI != PredBB) {
- PredBB = 0; // There are multiple different predecessors...
- break;
- }
-
- // Can't merge if there are multiple predecessors.
+ // Can't merge if there are multiple predecessors, or no predecessors.
+ BasicBlock *PredBB = BB->getUniquePredecessor();
if (!PredBB) return false;
+
// Don't break self-loops.
if (PredBB == BB) return false;
// Don't break invokes.
@@ -267,7 +257,7 @@ void llvm::RemoveSuccessor(TerminatorInst *TI, unsigned SuccNum) {
case Instruction::Switch: // Should remove entry
default:
case Instruction::Ret: // Cannot happen, has no successors!
- llvm_unreachable("Unhandled terminator instruction type in RemoveSuccessor!");
+ llvm_unreachable("Unhandled terminator inst type in RemoveSuccessor!");
}
if (NewTI) // If it's a different instruction, replace.
@@ -421,7 +411,8 @@ BasicBlock *llvm::SplitBlockPredecessors(BasicBlock *BB,
DominatorTree *DT = P ? P->getAnalysisIfAvailable<DominatorTree>() : 0;
if (DT)
DT->splitBlock(NewBB);
- if (DominanceFrontier *DF = P ? P->getAnalysisIfAvailable<DominanceFrontier>():0)
+ if (DominanceFrontier *DF =
+ P ? P->getAnalysisIfAvailable<DominanceFrontier>() : 0)
DF->splitBlock(NewBB);
// Insert a new PHI node into NewBB for every PHI node in BB and that new PHI
diff --git a/lib/Transforms/Utils/BasicInliner.cpp b/lib/Transforms/Utils/BasicInliner.cpp
index f0e31efa30c43..23a30cc585077 100644
--- a/lib/Transforms/Utils/BasicInliner.cpp
+++ b/lib/Transforms/Utils/BasicInliner.cpp
@@ -82,8 +82,8 @@ void BasicInlinerImpl::inlineFunctions() {
Function *F = *FI;
for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB)
for (BasicBlock::iterator I = BB->begin(); I != BB->end(); ++I) {
- CallSite CS = CallSite::get(I);
- if (CS.getInstruction() && CS.getCalledFunction()
+ CallSite CS(cast<Value>(I));
+ if (CS && CS.getCalledFunction()
&& !CS.getCalledFunction()->isDeclaration())
CallSites.push_back(CS);
}
diff --git a/lib/Transforms/Utils/BreakCriticalEdges.cpp b/lib/Transforms/Utils/BreakCriticalEdges.cpp
index 26f53c05a042f..f75ffe6105fa6 100644
--- a/lib/Transforms/Utils/BreakCriticalEdges.cpp
+++ b/lib/Transforms/Utils/BreakCriticalEdges.cpp
@@ -36,7 +36,7 @@ STATISTIC(NumBroken, "Number of blocks inserted");
namespace {
struct BreakCriticalEdges : public FunctionPass {
static char ID; // Pass identification, replacement for typeid
- BreakCriticalEdges() : FunctionPass(&ID) {}
+ BreakCriticalEdges() : FunctionPass(ID) {}
virtual bool runOnFunction(Function &F);
@@ -53,11 +53,11 @@ namespace {
}
char BreakCriticalEdges::ID = 0;
-static RegisterPass<BreakCriticalEdges>
-X("break-crit-edges", "Break critical edges in CFG");
+INITIALIZE_PASS(BreakCriticalEdges, "break-crit-edges",
+ "Break critical edges in CFG", false, false);
// Publically exposed interface to pass...
-const PassInfo *const llvm::BreakCriticalEdgesID = &X;
+char &llvm::BreakCriticalEdgesID = BreakCriticalEdges::ID;
FunctionPass *llvm::createBreakCriticalEdgesPass() {
return new BreakCriticalEdges();
}
@@ -225,7 +225,7 @@ BasicBlock *llvm::SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum,
for (Value::use_iterator UI = TIBB->use_begin(), E = TIBB->use_end();
UI != E; ) {
Value::use_iterator Use = UI++;
- if (PHINode *PN = dyn_cast<PHINode>(Use)) {
+ if (PHINode *PN = dyn_cast<PHINode>(*Use)) {
// Remove one entry from each PHI.
if (PN->getParent() == DestBB && UpdatedPHIs.insert(PN))
PN->setOperand(Use.getOperandNo(), NewBB);
diff --git a/lib/Transforms/Utils/BuildLibCalls.cpp b/lib/Transforms/Utils/BuildLibCalls.cpp
index 7a9d007ed5583..c3139498c2504 100644
--- a/lib/Transforms/Utils/BuildLibCalls.cpp
+++ b/lib/Transforms/Utils/BuildLibCalls.cpp
@@ -421,9 +421,9 @@ bool SimplifyFortifiedLibCalls::fold(CallInst *CI, const TargetData *TD) {
FT->getParamType(3) != TD->getIntPtrType(Context))
return false;
- if (isFoldable(3 + CallInst::ArgOffset, 2 + CallInst::ArgOffset, false)) {
- EmitMemCpy(CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(2),
- 1, false, B, TD);
+ if (isFoldable(3, 2, false)) {
+ EmitMemCpy(CI->getArgOperand(0), CI->getArgOperand(1),
+ CI->getArgOperand(2), 1, false, B, TD);
replaceCall(CI->getArgOperand(0));
return true;
}
@@ -444,9 +444,9 @@ bool SimplifyFortifiedLibCalls::fold(CallInst *CI, const TargetData *TD) {
FT->getParamType(3) != TD->getIntPtrType(Context))
return false;
- if (isFoldable(3 + CallInst::ArgOffset, 2 + CallInst::ArgOffset, false)) {
- EmitMemMove(CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(2),
- 1, false, B, TD);
+ if (isFoldable(3, 2, false)) {
+ EmitMemMove(CI->getArgOperand(0), CI->getArgOperand(1),
+ CI->getArgOperand(2), 1, false, B, TD);
replaceCall(CI->getArgOperand(0));
return true;
}
@@ -462,10 +462,11 @@ bool SimplifyFortifiedLibCalls::fold(CallInst *CI, const TargetData *TD) {
FT->getParamType(3) != TD->getIntPtrType(Context))
return false;
- if (isFoldable(3 + CallInst::ArgOffset, 2 + CallInst::ArgOffset, false)) {
+ if (isFoldable(3, 2, false)) {
Value *Val = B.CreateIntCast(CI->getArgOperand(1), B.getInt8Ty(),
false);
- EmitMemSet(CI->getArgOperand(0), Val, CI->getArgOperand(2), false, B, TD);
+ EmitMemSet(CI->getArgOperand(0), Val, CI->getArgOperand(2),
+ false, B, TD);
replaceCall(CI->getArgOperand(0));
return true;
}
@@ -487,7 +488,7 @@ bool SimplifyFortifiedLibCalls::fold(CallInst *CI, const TargetData *TD) {
// st[rp]cpy_chk call which may fail at runtime if the size is too long.
// TODO: It might be nice to get a maximum length out of the possible
// string lengths for varying.
- if (isFoldable(2 + CallInst::ArgOffset, 1 + CallInst::ArgOffset, true)) {
+ if (isFoldable(2, 1, true)) {
Value *Ret = EmitStrCpy(CI->getArgOperand(0), CI->getArgOperand(1), B, TD,
Name.substr(2, 6));
replaceCall(Ret);
@@ -505,7 +506,7 @@ bool SimplifyFortifiedLibCalls::fold(CallInst *CI, const TargetData *TD) {
FT->getParamType(3) != TD->getIntPtrType(Context))
return false;
- if (isFoldable(3 + CallInst::ArgOffset, 2 + CallInst::ArgOffset, false)) {
+ if (isFoldable(3, 2, false)) {
Value *Ret = EmitStrNCpy(CI->getArgOperand(0), CI->getArgOperand(1),
CI->getArgOperand(2), B, TD, Name.substr(2, 7));
replaceCall(Ret);
diff --git a/lib/Transforms/Utils/CMakeLists.txt b/lib/Transforms/Utils/CMakeLists.txt
index dec227acafd27..61cbeb2bd35b9 100644
--- a/lib/Transforms/Utils/CMakeLists.txt
+++ b/lib/Transforms/Utils/CMakeLists.txt
@@ -20,7 +20,6 @@ add_llvm_library(LLVMTransformUtils
Mem2Reg.cpp
PromoteMemoryToRegister.cpp
SSAUpdater.cpp
- SSI.cpp
SimplifyCFG.cpp
UnifyFunctionExitNodes.cpp
ValueMapper.cpp
diff --git a/lib/Transforms/Utils/CloneFunction.cpp b/lib/Transforms/Utils/CloneFunction.cpp
index 1dcfd57878466..f43186edae435 100644
--- a/lib/Transforms/Utils/CloneFunction.cpp
+++ b/lib/Transforms/Utils/CloneFunction.cpp
@@ -23,7 +23,7 @@
#include "llvm/LLVMContext.h"
#include "llvm/Metadata.h"
#include "llvm/Support/CFG.h"
-#include "ValueMapper.h"
+#include "llvm/Transforms/Utils/ValueMapper.h"
#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/Analysis/DebugInfo.h"
#include "llvm/ADT/SmallVector.h"
@@ -69,10 +69,11 @@ BasicBlock *llvm::CloneBasicBlock(const BasicBlock *BB,
}
// Clone OldFunc into NewFunc, transforming the old arguments into references to
-// ArgMap values.
+// VMap values.
//
void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc,
ValueToValueMapTy &VMap,
+ bool ModuleLevelChanges,
SmallVectorImpl<ReturnInst*> &Returns,
const char *NameSuffix, ClonedCodeInfo *CodeInfo) {
assert(NameSuffix && "NameSuffix cannot be null!");
@@ -126,7 +127,7 @@ void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc,
BE = NewFunc->end(); BB != BE; ++BB)
// Loop over all instructions, fixing each one as we find it...
for (BasicBlock::iterator II = BB->begin(); II != BB->end(); ++II)
- RemapInstruction(II, VMap);
+ RemapInstruction(II, VMap, ModuleLevelChanges);
}
/// CloneFunction - Return a copy of the specified function, but without
@@ -139,6 +140,7 @@ void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc,
///
Function *llvm::CloneFunction(const Function *F,
ValueToValueMapTy &VMap,
+ bool ModuleLevelChanges,
ClonedCodeInfo *CodeInfo) {
std::vector<const Type*> ArgTypes;
@@ -167,7 +169,7 @@ Function *llvm::CloneFunction(const Function *F,
}
SmallVector<ReturnInst*, 8> Returns; // Ignore returns cloned.
- CloneFunctionInto(NewF, F, VMap, Returns, "", CodeInfo);
+ CloneFunctionInto(NewF, F, VMap, ModuleLevelChanges, Returns, "", CodeInfo);
return NewF;
}
@@ -180,6 +182,7 @@ namespace {
Function *NewFunc;
const Function *OldFunc;
ValueToValueMapTy &VMap;
+ bool ModuleLevelChanges;
SmallVectorImpl<ReturnInst*> &Returns;
const char *NameSuffix;
ClonedCodeInfo *CodeInfo;
@@ -187,12 +190,14 @@ namespace {
public:
PruningFunctionCloner(Function *newFunc, const Function *oldFunc,
ValueToValueMapTy &valueMap,
+ bool moduleLevelChanges,
SmallVectorImpl<ReturnInst*> &returns,
const char *nameSuffix,
ClonedCodeInfo *codeInfo,
const TargetData *td)
- : NewFunc(newFunc), OldFunc(oldFunc), VMap(valueMap), Returns(returns),
- NameSuffix(nameSuffix), CodeInfo(codeInfo), TD(td) {
+ : NewFunc(newFunc), OldFunc(oldFunc),
+ VMap(valueMap), ModuleLevelChanges(moduleLevelChanges),
+ Returns(returns), NameSuffix(nameSuffix), CodeInfo(codeInfo), TD(td) {
}
/// CloneBlock - The specified block is found to be reachable, clone it and
@@ -313,7 +318,7 @@ ConstantFoldMappedInstruction(const Instruction *I) {
SmallVector<Constant*, 8> Ops;
for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i)
if (Constant *Op = dyn_cast_or_null<Constant>(MapValue(I->getOperand(i),
- VMap)))
+ VMap, ModuleLevelChanges)))
Ops.push_back(Op);
else
return 0; // All operands not constant!
@@ -334,25 +339,16 @@ ConstantFoldMappedInstruction(const Instruction *I) {
Ops.size(), TD);
}
-static MDNode *UpdateInlinedAtInfo(MDNode *InsnMD, MDNode *TheCallMD) {
- DILocation ILoc(InsnMD);
- if (!ILoc.Verify()) return InsnMD;
+static DebugLoc
+UpdateInlinedAtInfo(const DebugLoc &InsnDL, const DebugLoc &TheCallDL,
+ LLVMContext &Ctx) {
+ DebugLoc NewLoc = TheCallDL;
+ if (MDNode *IA = InsnDL.getInlinedAt(Ctx))
+ NewLoc = UpdateInlinedAtInfo(DebugLoc::getFromDILocation(IA), TheCallDL,
+ Ctx);
- DILocation CallLoc(TheCallMD);
- if (!CallLoc.Verify()) return InsnMD;
-
- DILocation OrigLocation = ILoc.getOrigLocation();
- MDNode *NewLoc = TheCallMD;
- if (OrigLocation.Verify())
- NewLoc = UpdateInlinedAtInfo(OrigLocation, TheCallMD);
-
- Value *MDVs[] = {
- InsnMD->getOperand(0), // Line
- InsnMD->getOperand(1), // Col
- InsnMD->getOperand(2), // Scope
- NewLoc
- };
- return MDNode::get(InsnMD->getContext(), MDVs, 4);
+ return DebugLoc::get(InsnDL.getLine(), InsnDL.getCol(),
+ InsnDL.getScope(Ctx), NewLoc.getAsMDNode(Ctx));
}
/// CloneAndPruneFunctionInto - This works exactly like CloneFunctionInto,
@@ -364,6 +360,7 @@ static MDNode *UpdateInlinedAtInfo(MDNode *InsnMD, MDNode *TheCallMD) {
/// used for things like CloneFunction or CloneModule.
void llvm::CloneAndPruneFunctionInto(Function *NewFunc, const Function *OldFunc,
ValueToValueMapTy &VMap,
+ bool ModuleLevelChanges,
SmallVectorImpl<ReturnInst*> &Returns,
const char *NameSuffix,
ClonedCodeInfo *CodeInfo,
@@ -377,8 +374,8 @@ void llvm::CloneAndPruneFunctionInto(Function *NewFunc, const Function *OldFunc,
assert(VMap.count(II) && "No mapping from source argument specified!");
#endif
- PruningFunctionCloner PFC(NewFunc, OldFunc, VMap, Returns,
- NameSuffix, CodeInfo, TD);
+ PruningFunctionCloner PFC(NewFunc, OldFunc, VMap, ModuleLevelChanges,
+ Returns, NameSuffix, CodeInfo, TD);
// Clone the entry block, and anything recursively reachable from it.
std::vector<const BasicBlock*> CloneWorklist;
@@ -408,10 +405,9 @@ void llvm::CloneAndPruneFunctionInto(Function *NewFunc, const Function *OldFunc,
//
BasicBlock::iterator I = NewBB->begin();
- unsigned DbgKind = OldFunc->getContext().getMDKindID("dbg");
- MDNode *TheCallMD = NULL;
- if (TheCall && TheCall->hasMetadata())
- TheCallMD = TheCall->getMetadata(DbgKind);
+ DebugLoc TheCallDL;
+ if (TheCall)
+ TheCallDL = TheCall->getDebugLoc();
// Handle PHI nodes specially, as we have to remove references to dead
// blocks.
@@ -420,15 +416,17 @@ void llvm::CloneAndPruneFunctionInto(Function *NewFunc, const Function *OldFunc,
BasicBlock::const_iterator OldI = BI->begin();
for (; (PN = dyn_cast<PHINode>(I)); ++I, ++OldI) {
if (I->hasMetadata()) {
- if (TheCallMD) {
- if (MDNode *IMD = I->getMetadata(DbgKind)) {
- MDNode *NewMD = UpdateInlinedAtInfo(IMD, TheCallMD);
- I->setMetadata(DbgKind, NewMD);
+ if (!TheCallDL.isUnknown()) {
+ DebugLoc IDL = I->getDebugLoc();
+ if (!IDL.isUnknown()) {
+ DebugLoc NewDL = UpdateInlinedAtInfo(IDL, TheCallDL,
+ I->getContext());
+ I->setDebugLoc(NewDL);
}
} else {
// The cloned instruction has dbg info but the call instruction
// does not have dbg info. Remove dbg info from cloned instruction.
- I->setMetadata(DbgKind, 0);
+ I->setDebugLoc(DebugLoc());
}
}
PHIToResolve.push_back(cast<PHINode>(OldI));
@@ -444,18 +442,20 @@ void llvm::CloneAndPruneFunctionInto(Function *NewFunc, const Function *OldFunc,
// Otherwise, remap the rest of the instructions normally.
for (; I != NewBB->end(); ++I) {
if (I->hasMetadata()) {
- if (TheCallMD) {
- if (MDNode *IMD = I->getMetadata(DbgKind)) {
- MDNode *NewMD = UpdateInlinedAtInfo(IMD, TheCallMD);
- I->setMetadata(DbgKind, NewMD);
+ if (!TheCallDL.isUnknown()) {
+ DebugLoc IDL = I->getDebugLoc();
+ if (!IDL.isUnknown()) {
+ DebugLoc NewDL = UpdateInlinedAtInfo(IDL, TheCallDL,
+ I->getContext());
+ I->setDebugLoc(NewDL);
}
} else {
// The cloned instruction has dbg info but the call instruction
// does not have dbg info. Remove dbg info from cloned instruction.
- I->setMetadata(DbgKind, 0);
+ I->setDebugLoc(DebugLoc());
}
}
- RemapInstruction(I, VMap);
+ RemapInstruction(I, VMap, ModuleLevelChanges);
}
}
@@ -477,7 +477,7 @@ void llvm::CloneAndPruneFunctionInto(Function *NewFunc, const Function *OldFunc,
if (BasicBlock *MappedBlock =
cast_or_null<BasicBlock>(VMap[PN->getIncomingBlock(pred)])) {
Value *InVal = MapValue(PN->getIncomingValue(pred),
- VMap);
+ VMap, ModuleLevelChanges);
assert(InVal && "Unknown input value?");
PN->setIncomingValue(pred, InVal);
PN->setIncomingBlock(pred, MappedBlock);
diff --git a/lib/Transforms/Utils/CloneModule.cpp b/lib/Transforms/Utils/CloneModule.cpp
index fc603d23e9ace..b347bf597f8ef 100644
--- a/lib/Transforms/Utils/CloneModule.cpp
+++ b/lib/Transforms/Utils/CloneModule.cpp
@@ -17,7 +17,7 @@
#include "llvm/DerivedTypes.h"
#include "llvm/TypeSymbolTable.h"
#include "llvm/Constant.h"
-#include "ValueMapper.h"
+#include "llvm/Transforms/Utils/ValueMapper.h"
using namespace llvm;
/// CloneModule - Return an exact copy of the specified module. This is not as
@@ -89,7 +89,8 @@ Module *llvm::CloneModule(const Module *M,
GlobalVariable *GV = cast<GlobalVariable>(VMap[I]);
if (I->hasInitializer())
GV->setInitializer(cast<Constant>(MapValue(I->getInitializer(),
- VMap)));
+ VMap,
+ true)));
GV->setLinkage(I->getLinkage());
GV->setThreadLocal(I->isThreadLocal());
GV->setConstant(I->isConstant());
@@ -108,7 +109,7 @@ Module *llvm::CloneModule(const Module *M,
}
SmallVector<ReturnInst*, 8> Returns; // Ignore returns cloned.
- CloneFunctionInto(F, I, VMap, Returns);
+ CloneFunctionInto(F, I, VMap, /*ModuleLevelChanges=*/true, Returns);
}
F->setLinkage(I->getLinkage());
@@ -120,34 +121,17 @@ Module *llvm::CloneModule(const Module *M,
GlobalAlias *GA = cast<GlobalAlias>(VMap[I]);
GA->setLinkage(I->getLinkage());
if (const Constant* C = I->getAliasee())
- GA->setAliasee(cast<Constant>(MapValue(C, VMap)));
+ GA->setAliasee(cast<Constant>(MapValue(C, VMap, true)));
}
// And named metadata....
for (Module::const_named_metadata_iterator I = M->named_metadata_begin(),
E = M->named_metadata_end(); I != E; ++I) {
const NamedMDNode &NMD = *I;
- SmallVector<MDNode*, 4> MDs;
+ NamedMDNode *NewNMD = New->getOrInsertNamedMetadata(NMD.getName());
for (unsigned i = 0, e = NMD.getNumOperands(); i != e; ++i)
- MDs.push_back(cast<MDNode>(MapValue(NMD.getOperand(i), VMap)));
- NamedMDNode::Create(New->getContext(), NMD.getName(),
- MDs.data(), MDs.size(), New);
+ NewNMD->addOperand(cast<MDNode>(MapValue(NMD.getOperand(i), VMap, true)));
}
- // Update metadata attach with instructions.
- for (Module::iterator MI = New->begin(), ME = New->end(); MI != ME; ++MI)
- for (Function::iterator FI = MI->begin(), FE = MI->end();
- FI != FE; ++FI)
- for (BasicBlock::iterator BI = FI->begin(), BE = FI->end();
- BI != BE; ++BI) {
- SmallVector<std::pair<unsigned, MDNode *>, 4 > MDs;
- BI->getAllMetadata(MDs);
- for (SmallVector<std::pair<unsigned, MDNode *>, 4>::iterator
- MDI = MDs.begin(), MDE = MDs.end(); MDI != MDE; ++MDI) {
- Value *MappedValue = MapValue(MDI->second, VMap);
- if (MDI->second != MappedValue && MappedValue)
- BI->setMetadata(MDI->first, cast<MDNode>(MappedValue));
- }
- }
return New;
}
diff --git a/lib/Transforms/Utils/InlineFunction.cpp b/lib/Transforms/Utils/InlineFunction.cpp
index 598e7d29e3781..88979e862df26 100644
--- a/lib/Transforms/Utils/InlineFunction.cpp
+++ b/lib/Transforms/Utils/InlineFunction.cpp
@@ -215,12 +215,12 @@ static void UpdateCallGraphAfterInlining(CallSite CS,
if (I->second->getFunction() == 0)
if (Function *F = CallSite(NewCall).getCalledFunction()) {
// Indirect call site resolved to direct call.
- CallerNode->addCalledFunction(CallSite::get(NewCall), CG[F]);
-
+ CallerNode->addCalledFunction(CallSite(NewCall), CG[F]);
+
continue;
}
-
- CallerNode->addCalledFunction(CallSite::get(NewCall), I->second);
+
+ CallerNode->addCalledFunction(CallSite(NewCall), I->second);
}
// Update the call graph by deleting the edge from Callee to Caller. We must
@@ -365,7 +365,8 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI) {
// have no dead or constant instructions leftover after inlining occurs
// (which can happen, e.g., because an argument was constant), but we'll be
// happy with whatever the cloner can do.
- CloneAndPruneFunctionInto(Caller, CalledFunc, VMap, Returns, ".i",
+ CloneAndPruneFunctionInto(Caller, CalledFunc, VMap,
+ /*ModuleLevelChanges=*/false, Returns, ".i",
&InlinedFunctionInfo, IFI.TD, TheCall);
// Remember the first block that is newly cloned over.
diff --git a/lib/Transforms/Utils/InstructionNamer.cpp b/lib/Transforms/Utils/InstructionNamer.cpp
index 090af95c4b87b..5ca82996b42f4 100644
--- a/lib/Transforms/Utils/InstructionNamer.cpp
+++ b/lib/Transforms/Utils/InstructionNamer.cpp
@@ -23,7 +23,7 @@ using namespace llvm;
namespace {
struct InstNamer : public FunctionPass {
static char ID; // Pass identification, replacement for typeid
- InstNamer() : FunctionPass(&ID) {}
+ InstNamer() : FunctionPass(ID) {}
void getAnalysisUsage(AnalysisUsage &Info) const {
Info.setPreservesAll();
@@ -48,12 +48,12 @@ namespace {
};
char InstNamer::ID = 0;
- static RegisterPass<InstNamer> X("instnamer",
- "Assign names to anonymous instructions");
+ INITIALIZE_PASS(InstNamer, "instnamer",
+ "Assign names to anonymous instructions", false, false);
}
-const PassInfo *const llvm::InstructionNamerID = &X;
+char &llvm::InstructionNamerID = InstNamer::ID;
//===----------------------------------------------------------------------===//
//
// InstructionNamer - Give any unnamed non-void instructions "tmp" names.
diff --git a/lib/Transforms/Utils/LCSSA.cpp b/lib/Transforms/Utils/LCSSA.cpp
index e90c30bba78e2..275b26508f991 100644
--- a/lib/Transforms/Utils/LCSSA.cpp
+++ b/lib/Transforms/Utils/LCSSA.cpp
@@ -47,7 +47,7 @@ STATISTIC(NumLCSSA, "Number of live out of a loop variables");
namespace {
struct LCSSA : public LoopPass {
static char ID; // Pass identification, replacement for typeid
- LCSSA() : LoopPass(&ID) {}
+ LCSSA() : LoopPass(ID) {}
// Cached analysis information for the current function.
DominatorTree *DT;
@@ -64,22 +64,13 @@ namespace {
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesCFG();
- // LCSSA doesn't actually require LoopSimplify, but the PassManager
- // doesn't know how to schedule LoopSimplify by itself.
- AU.addRequiredID(LoopSimplifyID);
- AU.addPreservedID(LoopSimplifyID);
- AU.addRequiredTransitive<LoopInfo>();
- AU.addPreserved<LoopInfo>();
- AU.addRequiredTransitive<DominatorTree>();
- AU.addPreserved<ScalarEvolution>();
+ AU.addRequired<DominatorTree>();
AU.addPreserved<DominatorTree>();
-
- // Request DominanceFrontier now, even though LCSSA does
- // not use it. This allows Pass Manager to schedule Dominance
- // Frontier early enough such that one LPPassManager can handle
- // multiple loop transformation passes.
- AU.addRequired<DominanceFrontier>();
AU.addPreserved<DominanceFrontier>();
+ AU.addRequired<LoopInfo>();
+ AU.addPreserved<LoopInfo>();
+ AU.addPreservedID(LoopSimplifyID);
+ AU.addPreserved<ScalarEvolution>();
}
private:
bool ProcessInstruction(Instruction *Inst,
@@ -99,10 +90,10 @@ namespace {
}
char LCSSA::ID = 0;
-static RegisterPass<LCSSA> X("lcssa", "Loop-Closed SSA Form Pass");
+INITIALIZE_PASS(LCSSA, "lcssa", "Loop-Closed SSA Form Pass", false, false);
Pass *llvm::createLCSSAPass() { return new LCSSA(); }
-const PassInfo *const llvm::LCSSAID = &X;
+char &llvm::LCSSAID = LCSSA::ID;
/// BlockDominatesAnExit - Return true if the specified block dominates at least
@@ -215,7 +206,7 @@ bool LCSSA::ProcessInstruction(Instruction *Inst,
DomTreeNode *DomNode = DT->getNode(DomBB);
SSAUpdater SSAUpdate;
- SSAUpdate.Initialize(Inst);
+ SSAUpdate.Initialize(Inst->getType(), Inst->getName());
// Insert the LCSSA phi's into all of the exit blocks dominated by the
// value, and add them to the Phi's map.
diff --git a/lib/Transforms/Utils/Local.cpp b/lib/Transforms/Utils/Local.cpp
index 8e9113871f47b..52f0499f39b0e 100644
--- a/lib/Transforms/Utils/Local.cpp
+++ b/lib/Transforms/Utils/Local.cpp
@@ -490,6 +490,9 @@ static bool CanPropagatePredecessorsForPHIs(BasicBlock *BB, BasicBlock *Succ) {
/// rewriting all the predecessors to branch to the successor block and return
/// true. If we can't transform, return false.
bool llvm::TryToSimplifyUncondBranchFromEmptyBlock(BasicBlock *BB) {
+ assert(BB != &BB->getParent()->getEntryBlock() &&
+ "TryToSimplifyUncondBranchFromEmptyBlock called on entry block!");
+
// We can't eliminate infinite loops.
BasicBlock *Succ = cast<BranchInst>(BB->getTerminator())->getSuccessor(0);
if (BB == Succ) return false;
diff --git a/lib/Transforms/Utils/LoopSimplify.cpp b/lib/Transforms/Utils/LoopSimplify.cpp
index 4f4edf3a754c1..b3c4801a4f15b 100644
--- a/lib/Transforms/Utils/LoopSimplify.cpp
+++ b/lib/Transforms/Utils/LoopSimplify.cpp
@@ -46,9 +46,9 @@
#include "llvm/LLVMContext.h"
#include "llvm/Type.h"
#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Analysis/Dominators.h"
#include "llvm/Analysis/LoopPass.h"
-#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Support/CFG.h"
@@ -65,27 +65,30 @@ STATISTIC(NumNested , "Number of nested loops split out");
namespace {
struct LoopSimplify : public LoopPass {
static char ID; // Pass identification, replacement for typeid
- LoopSimplify() : LoopPass(&ID) {}
+ LoopSimplify() : LoopPass(ID) {}
// AA - If we have an alias analysis object to update, this is it, otherwise
// this is null.
AliasAnalysis *AA;
LoopInfo *LI;
DominatorTree *DT;
+ ScalarEvolution *SE;
Loop *L;
virtual bool runOnLoop(Loop *L, LPPassManager &LPM);
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
// We need loop information to identify the loops...
- AU.addRequiredTransitive<LoopInfo>();
- AU.addRequiredTransitive<DominatorTree>();
+ AU.addRequired<DominatorTree>();
+ AU.addPreserved<DominatorTree>();
+ AU.addRequired<LoopInfo>();
AU.addPreserved<LoopInfo>();
- AU.addPreserved<DominatorTree>();
- AU.addPreserved<DominanceFrontier>();
+
AU.addPreserved<AliasAnalysis>();
AU.addPreserved<ScalarEvolution>();
AU.addPreservedID(BreakCriticalEdgesID); // No critical edges added.
+ AU.addPreserved<DominanceFrontier>();
+ AU.addPreservedID(LCSSAID);
}
/// verifyAnalysis() - Verify LoopSimplifyForm's guarantees.
@@ -104,11 +107,11 @@ namespace {
}
char LoopSimplify::ID = 0;
-static RegisterPass<LoopSimplify>
-X("loopsimplify", "Canonicalize natural loops", true);
+INITIALIZE_PASS(LoopSimplify, "loopsimplify",
+ "Canonicalize natural loops", true, false);
// Publically exposed interface to pass...
-const PassInfo *const llvm::LoopSimplifyID = &X;
+char &llvm::LoopSimplifyID = LoopSimplify::ID;
Pass *llvm::createLoopSimplifyPass() { return new LoopSimplify(); }
/// runOnLoop - Run down all loops in the CFG (recursively, but we could do
@@ -120,6 +123,7 @@ bool LoopSimplify::runOnLoop(Loop *l, LPPassManager &LPM) {
LI = &getAnalysis<LoopInfo>();
AA = getAnalysisIfAvailable<AliasAnalysis>();
DT = &getAnalysis<DominatorTree>();
+ SE = getAnalysisIfAvailable<ScalarEvolution>();
Changed |= ProcessLoop(L, LPM);
@@ -141,15 +145,16 @@ ReprocessLoop:
BB != E; ++BB) {
if (*BB == L->getHeader()) continue;
- SmallPtrSet<BasicBlock *, 4> BadPreds;
- for (pred_iterator PI = pred_begin(*BB), PE = pred_end(*BB); PI != PE; ++PI){
+ SmallPtrSet<BasicBlock*, 4> BadPreds;
+ for (pred_iterator PI = pred_begin(*BB),
+ PE = pred_end(*BB); PI != PE; ++PI) {
BasicBlock *P = *PI;
if (!L->contains(P))
BadPreds.insert(P);
}
// Delete each unique out-of-loop (and thus dead) predecessor.
- for (SmallPtrSet<BasicBlock *, 4>::iterator I = BadPreds.begin(),
+ for (SmallPtrSet<BasicBlock*, 4>::iterator I = BadPreds.begin(),
E = BadPreds.end(); I != E; ++I) {
DEBUG(dbgs() << "LoopSimplify: Deleting edge from dead predecessor ";
@@ -530,6 +535,12 @@ Loop *LoopSimplify::SeparateNestedLoop(Loop *L, LPPassManager &LPM) {
DEBUG(dbgs() << "LoopSimplify: Splitting out a new outer loop\n");
+ // If ScalarEvolution is around and knows anything about values in
+ // this loop, tell it to forget them, because we're about to
+ // substantially change it.
+ if (SE)
+ SE->forgetLoop(L);
+
BasicBlock *Header = L->getHeader();
BasicBlock *NewBB = SplitBlockPredecessors(Header, &OuterLoopPreds[0],
OuterLoopPreds.size(),
@@ -619,6 +630,11 @@ LoopSimplify::InsertUniqueBackedgeBlock(Loop *L, BasicBlock *Preheader) {
std::vector<BasicBlock*> BackedgeBlocks;
for (pred_iterator I = pred_begin(Header), E = pred_end(Header); I != E; ++I){
BasicBlock *P = *I;
+
+ // Indirectbr edges cannot be split, so we must fail if we find one.
+ if (isa<IndirectBrInst>(P->getTerminator()))
+ return 0;
+
if (P != Preheader) BackedgeBlocks.push_back(P);
}
diff --git a/lib/Transforms/Utils/LoopUnroll.cpp b/lib/Transforms/Utils/LoopUnroll.cpp
index e0e07e7bbc821..236bbe9057bfc 100644
--- a/lib/Transforms/Utils/LoopUnroll.cpp
+++ b/lib/Transforms/Utils/LoopUnroll.cpp
@@ -24,6 +24,7 @@
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/Analysis/LoopPass.h"
+#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
@@ -127,6 +128,11 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, LoopInfo* LI, LPPassManager* LPM)
return false;
}
+ // Notify ScalarEvolution that the loop will be substantially changed,
+ // if not outright eliminated.
+ if (ScalarEvolution *SE = LPM->getAnalysisIfAvailable<ScalarEvolution>())
+ SE->forgetLoop(L);
+
// Find trip count
unsigned TripCount = L->getSmallConstantTripCount();
// Find trip multiple if count is not available
diff --git a/lib/Transforms/Utils/LowerInvoke.cpp b/lib/Transforms/Utils/LowerInvoke.cpp
index 2696e6913f3bf..a46dd8402aca7 100644
--- a/lib/Transforms/Utils/LowerInvoke.cpp
+++ b/lib/Transforms/Utils/LowerInvoke.cpp
@@ -78,14 +78,14 @@ namespace {
static char ID; // Pass identification, replacement for typeid
explicit LowerInvoke(const TargetLowering *tli = NULL,
bool useExpensiveEHSupport = ExpensiveEHSupport)
- : FunctionPass(&ID), useExpensiveEHSupport(useExpensiveEHSupport),
+ : FunctionPass(ID), useExpensiveEHSupport(useExpensiveEHSupport),
TLI(tli) { }
bool doInitialization(Module &M);
bool runOnFunction(Function &F);
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
// This is a cluster of orthogonal Transforms
- AU.addPreservedID(PromoteMemoryToRegisterID);
+ AU.addPreserved("mem2reg");
AU.addPreservedID(LowerSwitchID);
}
@@ -100,10 +100,11 @@ namespace {
}
char LowerInvoke::ID = 0;
-static RegisterPass<LowerInvoke>
-X("lowerinvoke", "Lower invoke and unwind, for unwindless code generators");
+INITIALIZE_PASS(LowerInvoke, "lowerinvoke",
+ "Lower invoke and unwind, for unwindless code generators",
+ false, false);
-const PassInfo *const llvm::LowerInvokePassID = &X;
+char &llvm::LowerInvokePassID = LowerInvoke::ID;
// Public Interface To the LowerInvoke pass.
FunctionPass *llvm::createLowerInvokePass(const TargetLowering *TLI) {
diff --git a/lib/Transforms/Utils/LowerSwitch.cpp b/lib/Transforms/Utils/LowerSwitch.cpp
index 468a5fe4c5e5d..5530b4700aac6 100644
--- a/lib/Transforms/Utils/LowerSwitch.cpp
+++ b/lib/Transforms/Utils/LowerSwitch.cpp
@@ -29,19 +29,18 @@ using namespace llvm;
namespace {
/// LowerSwitch Pass - Replace all SwitchInst instructions with chained branch
- /// instructions. Note that this cannot be a BasicBlock pass because it
- /// modifies the CFG!
+ /// instructions.
class LowerSwitch : public FunctionPass {
public:
static char ID; // Pass identification, replacement for typeid
- LowerSwitch() : FunctionPass(&ID) {}
+ LowerSwitch() : FunctionPass(ID) {}
virtual bool runOnFunction(Function &F);
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
// This is a cluster of orthogonal Transforms
AU.addPreserved<UnifyFunctionExitNodes>();
- AU.addPreservedID(PromoteMemoryToRegisterID);
+ AU.addPreserved("mem2reg");
AU.addPreservedID(LowerInvokePassID);
}
@@ -50,8 +49,7 @@ namespace {
Constant* High;
BasicBlock* BB;
- CaseRange() : Low(0), High(0), BB(0) { }
- CaseRange(Constant* low, Constant* high, BasicBlock* bb) :
+ CaseRange(Constant *low = 0, Constant *high = 0, BasicBlock *bb = 0) :
Low(low), High(high), BB(bb) { }
};
@@ -81,11 +79,11 @@ namespace {
}
char LowerSwitch::ID = 0;
-static RegisterPass<LowerSwitch>
-X("lowerswitch", "Lower SwitchInst's to branches");
+INITIALIZE_PASS(LowerSwitch, "lowerswitch",
+ "Lower SwitchInst's to branches", false, false);
// Publically exposed interface to pass...
-const PassInfo *const llvm::LowerSwitchID = &X;
+char &llvm::LowerSwitchID = LowerSwitch::ID;
// createLowerSwitchPass - Interface to this file...
FunctionPass *llvm::createLowerSwitchPass() {
return new LowerSwitch();
diff --git a/lib/Transforms/Utils/Mem2Reg.cpp b/lib/Transforms/Utils/Mem2Reg.cpp
index 99203b662120e..101645bd92b77 100644
--- a/lib/Transforms/Utils/Mem2Reg.cpp
+++ b/lib/Transforms/Utils/Mem2Reg.cpp
@@ -27,7 +27,7 @@ STATISTIC(NumPromoted, "Number of alloca's promoted");
namespace {
struct PromotePass : public FunctionPass {
static char ID; // Pass identification, replacement for typeid
- PromotePass() : FunctionPass(&ID) {}
+ PromotePass() : FunctionPass(ID) {}
// runOnFunction - To run this pass, first we calculate the alloca
// instructions that are safe for promotion, then we promote each one.
@@ -49,7 +49,8 @@ namespace {
} // end of anonymous namespace
char PromotePass::ID = 0;
-static RegisterPass<PromotePass> X("mem2reg", "Promote Memory to Register");
+INITIALIZE_PASS(PromotePass, "mem2reg", "Promote Memory to Register",
+ false, false);
bool PromotePass::runOnFunction(Function &F) {
std::vector<AllocaInst*> Allocas;
@@ -81,8 +82,6 @@ bool PromotePass::runOnFunction(Function &F) {
return Changed;
}
-// Publically exposed interface to pass...
-const PassInfo *const llvm::PromoteMemoryToRegisterID = &X;
// createPromoteMemoryToRegister - Provide an entry point to create this pass.
//
FunctionPass *llvm::createPromoteMemoryToRegisterPass() {
diff --git a/lib/Transforms/Utils/PromoteMemoryToRegister.cpp b/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
index c0de1938b2db3..a4e3029e3a5a7 100644
--- a/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
+++ b/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
@@ -228,14 +228,6 @@ namespace {
void run();
- /// properlyDominates - Return true if I1 properly dominates I2.
- ///
- bool properlyDominates(Instruction *I1, Instruction *I2) const {
- if (InvokeInst *II = dyn_cast<InvokeInst>(I1))
- I1 = II->getNormalDest()->begin();
- return DT.properlyDominates(I1->getParent(), I2->getParent());
- }
-
/// dominates - Return true if BB1 dominates BB2 using the DominatorTree.
///
bool dominates(BasicBlock *BB1, BasicBlock *BB2) const {
@@ -896,11 +888,12 @@ void PromoteMem2Reg::ConvertDebugDeclareToDebugValue(DbgDeclareInst *DDI,
DIVar, SI);
// Propagate any debug metadata from the store onto the dbg.value.
- if (MDNode *SIMD = SI->getMetadata("dbg"))
- DbgVal->setMetadata("dbg", SIMD);
+ DebugLoc SIDL = SI->getDebugLoc();
+ if (!SIDL.isUnknown())
+ DbgVal->setDebugLoc(SIDL);
// Otherwise propagate debug metadata from dbg.declare.
- else if (MDNode *MD = DDI->getMetadata("dbg"))
- DbgVal->setMetadata("dbg", MD);
+ else
+ DbgVal->setDebugLoc(DDI->getDebugLoc());
}
// QueuePhiNode - queues a phi-node to be added to a basic-block for a specific
diff --git a/lib/Transforms/Utils/SSAUpdater.cpp b/lib/Transforms/Utils/SSAUpdater.cpp
index f4bdb527655ab..c855988307ea7 100644
--- a/lib/Transforms/Utils/SSAUpdater.cpp
+++ b/lib/Transforms/Utils/SSAUpdater.cpp
@@ -29,20 +29,21 @@ static AvailableValsTy &getAvailableVals(void *AV) {
}
SSAUpdater::SSAUpdater(SmallVectorImpl<PHINode*> *NewPHI)
- : AV(0), PrototypeValue(0), InsertedPHIs(NewPHI) {}
+ : AV(0), ProtoType(0), ProtoName(), InsertedPHIs(NewPHI) {}
SSAUpdater::~SSAUpdater() {
delete &getAvailableVals(AV);
}
/// Initialize - Reset this object to get ready for a new set of SSA
-/// updates. ProtoValue is the value used to name PHI nodes.
-void SSAUpdater::Initialize(Value *ProtoValue) {
+/// updates with type 'Ty'. PHI nodes get a name based on 'Name'.
+void SSAUpdater::Initialize(const Type *Ty, StringRef Name) {
if (AV == 0)
AV = new AvailableValsTy();
else
getAvailableVals(AV).clear();
- PrototypeValue = ProtoValue;
+ ProtoType = Ty;
+ ProtoName = Name;
}
/// HasValueForBlock - Return true if the SSAUpdater already has a value for
@@ -54,8 +55,8 @@ bool SSAUpdater::HasValueForBlock(BasicBlock *BB) const {
/// AddAvailableValue - Indicate that a rewritten value is available in the
/// specified block with the specified value.
void SSAUpdater::AddAvailableValue(BasicBlock *BB, Value *V) {
- assert(PrototypeValue != 0 && "Need to initialize SSAUpdater");
- assert(PrototypeValue->getType() == V->getType() &&
+ assert(ProtoType != 0 && "Need to initialize SSAUpdater");
+ assert(ProtoType == V->getType() &&
"All rewritten values must have the same type");
getAvailableVals(AV)[BB] = V;
}
@@ -148,7 +149,7 @@ Value *SSAUpdater::GetValueInMiddleOfBlock(BasicBlock *BB) {
// If there are no predecessors, just return undef.
if (PredValues.empty())
- return UndefValue::get(PrototypeValue->getType());
+ return UndefValue::get(ProtoType);
// Otherwise, if all the merged values are the same, just use it.
if (SingularValue != 0)
@@ -168,9 +169,7 @@ Value *SSAUpdater::GetValueInMiddleOfBlock(BasicBlock *BB) {
}
// Ok, we have no way out, insert a new one now.
- PHINode *InsertedPHI = PHINode::Create(PrototypeValue->getType(),
- PrototypeValue->getName(),
- &BB->front());
+ PHINode *InsertedPHI = PHINode::Create(ProtoType, ProtoName, &BB->front());
InsertedPHI->reserveOperandSpace(PredValues.size());
// Fill in all the predecessors of the PHI.
@@ -205,6 +204,22 @@ void SSAUpdater::RewriteUse(Use &U) {
U.set(V);
}
+/// RewriteUseAfterInsertions - Rewrite a use, just like RewriteUse. However,
+/// this version of the method can rewrite uses in the same block as a
+/// definition, because it assumes that all uses of a value are below any
+/// inserted values.
+void SSAUpdater::RewriteUseAfterInsertions(Use &U) {
+ Instruction *User = cast<Instruction>(U.getUser());
+
+ Value *V;
+ if (PHINode *UserPN = dyn_cast<PHINode>(User))
+ V = GetValueAtEndOfBlock(UserPN->getIncomingBlock(U));
+ else
+ V = GetValueAtEndOfBlock(User->getParent());
+
+ U.set(V);
+}
+
/// PHIiter - Iterator for PHI operands. This is used for the PHI_iterator
/// in the SSAUpdaterImpl template.
namespace {
@@ -266,15 +281,14 @@ public:
/// GetUndefVal - Get an undefined value of the same type as the value
/// being handled.
static Value *GetUndefVal(BasicBlock *BB, SSAUpdater *Updater) {
- return UndefValue::get(Updater->PrototypeValue->getType());
+ return UndefValue::get(Updater->ProtoType);
}
/// CreateEmptyPHI - Create a new PHI instruction in the specified block.
/// Reserve space for the operands but do not fill them in yet.
static Value *CreateEmptyPHI(BasicBlock *BB, unsigned NumPreds,
SSAUpdater *Updater) {
- PHINode *PHI = PHINode::Create(Updater->PrototypeValue->getType(),
- Updater->PrototypeValue->getName(),
+ PHINode *PHI = PHINode::Create(Updater->ProtoType, Updater->ProtoName,
&BB->front());
PHI->reserveOperandSpace(NumPreds);
return PHI;
diff --git a/lib/Transforms/Utils/SSI.cpp b/lib/Transforms/Utils/SSI.cpp
deleted file mode 100644
index 4e813ddf95c7d..0000000000000
--- a/lib/Transforms/Utils/SSI.cpp
+++ /dev/null
@@ -1,432 +0,0 @@
-//===------------------- SSI.cpp - Creates SSI Representation -------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This pass converts a list of variables to the Static Single Information
-// form. This is a program representation described by Scott Ananian in his
-// Master Thesis: "The Static Single Information Form (1999)".
-// We are building an on-demand representation, that is, we do not convert
-// every single variable in the target function to SSI form. Rather, we receive
-// a list of target variables that must be converted. We also do not
-// completely convert a target variable to the SSI format. Instead, we only
-// change the variable in the points where new information can be attached
-// to its live range, that is, at branch points.
-//
-//===----------------------------------------------------------------------===//
-
-#define DEBUG_TYPE "ssi"
-
-#include "llvm/Transforms/Scalar.h"
-#include "llvm/Transforms/Utils/SSI.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/Analysis/Dominators.h"
-
-using namespace llvm;
-
-static const std::string SSI_PHI = "SSI_phi";
-static const std::string SSI_SIG = "SSI_sigma";
-
-STATISTIC(NumSigmaInserted, "Number of sigma functions inserted");
-STATISTIC(NumPhiInserted, "Number of phi functions inserted");
-
-void SSI::getAnalysisUsage(AnalysisUsage &AU) const {
- AU.addRequiredTransitive<DominanceFrontier>();
- AU.addRequiredTransitive<DominatorTree>();
- AU.setPreservesAll();
-}
-
-bool SSI::runOnFunction(Function &F) {
- DT_ = &getAnalysis<DominatorTree>();
- return false;
-}
-
-/// This methods creates the SSI representation for the list of values
-/// received. It will only create SSI representation if a value is used
-/// to decide a branch. Repeated values are created only once.
-///
-void SSI::createSSI(SmallVectorImpl<Instruction *> &value) {
- init(value);
-
- SmallPtrSet<Instruction*, 4> needConstruction;
- for (SmallVectorImpl<Instruction*>::iterator I = value.begin(),
- E = value.end(); I != E; ++I)
- if (created.insert(*I))
- needConstruction.insert(*I);
-
- insertSigmaFunctions(needConstruction);
-
- // Test if there is a need to transform to SSI
- if (!needConstruction.empty()) {
- insertPhiFunctions(needConstruction);
- renameInit(needConstruction);
- rename(DT_->getRoot());
- fixPhis();
- }
-
- clean();
-}
-
-/// Insert sigma functions (a sigma function is a phi function with one
-/// operator)
-///
-void SSI::insertSigmaFunctions(SmallPtrSet<Instruction*, 4> &value) {
- for (SmallPtrSet<Instruction*, 4>::iterator I = value.begin(),
- E = value.end(); I != E; ++I) {
- for (Value::use_iterator begin = (*I)->use_begin(),
- end = (*I)->use_end(); begin != end; ++begin) {
- // Test if the Use of the Value is in a comparator
- if (CmpInst *CI = dyn_cast<CmpInst>(begin)) {
- // Iterates through all uses of CmpInst
- for (Value::use_iterator begin_ci = CI->use_begin(),
- end_ci = CI->use_end(); begin_ci != end_ci; ++begin_ci) {
- // Test if any use of CmpInst is in a Terminator
- if (TerminatorInst *TI = dyn_cast<TerminatorInst>(begin_ci)) {
- insertSigma(TI, *I);
- }
- }
- }
- }
- }
-}
-
-/// Inserts Sigma Functions in every BasicBlock successor to Terminator
-/// Instruction TI. All inserted Sigma Function are related to Instruction I.
-///
-void SSI::insertSigma(TerminatorInst *TI, Instruction *I) {
- // Basic Block of the Terminator Instruction
- BasicBlock *BB = TI->getParent();
- for (unsigned i = 0, e = TI->getNumSuccessors(); i < e; ++i) {
- // Next Basic Block
- BasicBlock *BB_next = TI->getSuccessor(i);
- if (BB_next != BB &&
- BB_next->getSinglePredecessor() != NULL &&
- dominateAny(BB_next, I)) {
- PHINode *PN = PHINode::Create(I->getType(), SSI_SIG, BB_next->begin());
- PN->addIncoming(I, BB);
- sigmas[PN] = I;
- created.insert(PN);
- defsites[I].push_back(BB_next);
- ++NumSigmaInserted;
- }
- }
-}
-
-/// Insert phi functions when necessary
-///
-void SSI::insertPhiFunctions(SmallPtrSet<Instruction*, 4> &value) {
- DominanceFrontier *DF = &getAnalysis<DominanceFrontier>();
- for (SmallPtrSet<Instruction*, 4>::iterator I = value.begin(),
- E = value.end(); I != E; ++I) {
- // Test if there were any sigmas for this variable
- SmallPtrSet<BasicBlock *, 16> BB_visited;
-
- // Insert phi functions if there is any sigma function
- while (!defsites[*I].empty()) {
-
- BasicBlock *BB = defsites[*I].back();
-
- defsites[*I].pop_back();
- DominanceFrontier::iterator DF_BB = DF->find(BB);
-
- // The BB is unreachable. Skip it.
- if (DF_BB == DF->end())
- continue;
-
- // Iterates through all the dominance frontier of BB
- for (std::set<BasicBlock *>::iterator DF_BB_begin =
- DF_BB->second.begin(), DF_BB_end = DF_BB->second.end();
- DF_BB_begin != DF_BB_end; ++DF_BB_begin) {
- BasicBlock *BB_dominated = *DF_BB_begin;
-
- // Test if has not yet visited this node and if the
- // original definition dominates this node
- if (BB_visited.insert(BB_dominated) &&
- DT_->properlyDominates(value_original[*I], BB_dominated) &&
- dominateAny(BB_dominated, *I)) {
- PHINode *PN = PHINode::Create(
- (*I)->getType(), SSI_PHI, BB_dominated->begin());
- phis.insert(std::make_pair(PN, *I));
- created.insert(PN);
-
- defsites[*I].push_back(BB_dominated);
- ++NumPhiInserted;
- }
- }
- }
- BB_visited.clear();
- }
-}
-
-/// Some initialization for the rename part
-///
-void SSI::renameInit(SmallPtrSet<Instruction*, 4> &value) {
- for (SmallPtrSet<Instruction*, 4>::iterator I = value.begin(),
- E = value.end(); I != E; ++I)
- value_stack[*I].push_back(*I);
-}
-
-/// Renames all variables in the specified BasicBlock.
-/// Only variables that need to be rename will be.
-///
-void SSI::rename(BasicBlock *BB) {
- SmallPtrSet<Instruction*, 8> defined;
-
- // Iterate through instructions and make appropriate renaming.
- // For SSI_PHI (b = PHI()), store b at value_stack as a new
- // definition of the variable it represents.
- // For SSI_SIG (b = PHI(a)), substitute a with the current
- // value of a, present in the value_stack.
- // Then store bin the value_stack as the new definition of a.
- // For all other instructions (b = OP(a, c, d, ...)), we need to substitute
- // all operands with its current value, present in value_stack.
- for (BasicBlock::iterator begin = BB->begin(), end = BB->end();
- begin != end; ++begin) {
- Instruction *I = begin;
- if (PHINode *PN = dyn_cast<PHINode>(I)) { // Treat PHI functions
- Instruction* position;
-
- // Treat SSI_PHI
- if ((position = getPositionPhi(PN))) {
- value_stack[position].push_back(PN);
- defined.insert(position);
- // Treat SSI_SIG
- } else if ((position = getPositionSigma(PN))) {
- substituteUse(I);
- value_stack[position].push_back(PN);
- defined.insert(position);
- }
-
- // Treat all other PHI functions
- else {
- substituteUse(I);
- }
- }
-
- // Treat all other functions
- else {
- substituteUse(I);
- }
- }
-
- // This loop iterates in all BasicBlocks that are successors of the current
- // BasicBlock. For each SSI_PHI instruction found, insert an operand.
- // This operand is the current operand in value_stack for the variable
- // in "position". And the BasicBlock this operand represents is the current
- // BasicBlock.
- for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB); SI != SE; ++SI) {
- BasicBlock *BB_succ = *SI;
-
- for (BasicBlock::iterator begin = BB_succ->begin(),
- notPhi = BB_succ->getFirstNonPHI(); begin != *notPhi; ++begin) {
- Instruction *I = begin;
- PHINode *PN = dyn_cast<PHINode>(I);
- Instruction* position;
- if (PN && ((position = getPositionPhi(PN)))) {
- PN->addIncoming(value_stack[position].back(), BB);
- }
- }
- }
-
- // This loop calls rename on all children from this block. This time children
- // refers to a successor block in the dominance tree.
- DomTreeNode *DTN = DT_->getNode(BB);
- for (DomTreeNode::iterator begin = DTN->begin(), end = DTN->end();
- begin != end; ++begin) {
- DomTreeNodeBase<BasicBlock> *DTN_children = *begin;
- BasicBlock *BB_children = DTN_children->getBlock();
- rename(BB_children);
- }
-
- // Now we remove all inserted definitions of a variable from the top of
- // the stack leaving the previous one as the top.
- for (SmallPtrSet<Instruction*, 8>::iterator DI = defined.begin(),
- DE = defined.end(); DI != DE; ++DI)
- value_stack[*DI].pop_back();
-}
-
-/// Substitute any use in this instruction for the last definition of
-/// the variable
-///
-void SSI::substituteUse(Instruction *I) {
- for (unsigned i = 0, e = I->getNumOperands(); i < e; ++i) {
- Value *operand = I->getOperand(i);
- for (DenseMap<Instruction*, SmallVector<Instruction*, 1> >::iterator
- VI = value_stack.begin(), VE = value_stack.end(); VI != VE; ++VI) {
- if (operand == VI->second.front() &&
- I != VI->second.back()) {
- PHINode *PN_I = dyn_cast<PHINode>(I);
- PHINode *PN_vs = dyn_cast<PHINode>(VI->second.back());
-
- // If a phi created in a BasicBlock is used as an operand of another
- // created in the same BasicBlock, this step marks this second phi,
- // to fix this issue later. It cannot be fixed now, because the
- // operands of the first phi are not final yet.
- if (PN_I && PN_vs &&
- VI->second.back()->getParent() == I->getParent()) {
-
- phisToFix.insert(PN_I);
- }
-
- I->setOperand(i, VI->second.back());
- break;
- }
- }
- }
-}
-
-/// Test if the BasicBlock BB dominates any use or definition of value.
-/// If it dominates a phi instruction that is on the same BasicBlock,
-/// that does not count.
-///
-bool SSI::dominateAny(BasicBlock *BB, Instruction *value) {
- for (Value::use_iterator begin = value->use_begin(),
- end = value->use_end(); begin != end; ++begin) {
- Instruction *I = cast<Instruction>(*begin);
- BasicBlock *BB_father = I->getParent();
- if (BB == BB_father && isa<PHINode>(I))
- continue;
- if (DT_->dominates(BB, BB_father)) {
- return true;
- }
- }
- return false;
-}
-
-/// When there is a phi node that is created in a BasicBlock and it is used
-/// as an operand of another phi function used in the same BasicBlock,
-/// LLVM looks this as an error. So on the second phi, the first phi is called
-/// P and the BasicBlock it incomes is B. This P will be replaced by the value
-/// it has for BasicBlock B. It also includes undef values for predecessors
-/// that were not included in the phi.
-///
-void SSI::fixPhis() {
- for (SmallPtrSet<PHINode *, 1>::iterator begin = phisToFix.begin(),
- end = phisToFix.end(); begin != end; ++begin) {
- PHINode *PN = *begin;
- for (unsigned i = 0, e = PN->getNumIncomingValues(); i < e; ++i) {
- PHINode *PN_father = dyn_cast<PHINode>(PN->getIncomingValue(i));
- if (PN_father && PN->getParent() == PN_father->getParent() &&
- !DT_->dominates(PN->getParent(), PN->getIncomingBlock(i))) {
- BasicBlock *BB = PN->getIncomingBlock(i);
- int pos = PN_father->getBasicBlockIndex(BB);
- PN->setIncomingValue(i, PN_father->getIncomingValue(pos));
- }
- }
- }
-
- for (DenseMapIterator<PHINode *, Instruction*> begin = phis.begin(),
- end = phis.end(); begin != end; ++begin) {
- PHINode *PN = begin->first;
- BasicBlock *BB = PN->getParent();
- pred_iterator PI = pred_begin(BB), PE = pred_end(BB);
- SmallVector<BasicBlock*, 8> Preds(PI, PE);
- for (unsigned size = Preds.size();
- PI != PE && PN->getNumIncomingValues() != size; ++PI) {
- bool found = false;
- for (unsigned i = 0, pn_end = PN->getNumIncomingValues();
- i < pn_end; ++i) {
- if (PN->getIncomingBlock(i) == *PI) {
- found = true;
- break;
- }
- }
- if (!found) {
- PN->addIncoming(UndefValue::get(PN->getType()), *PI);
- }
- }
- }
-}
-
-/// Return which variable (position on the vector of variables) this phi
-/// represents on the phis list.
-///
-Instruction* SSI::getPositionPhi(PHINode *PN) {
- DenseMap<PHINode *, Instruction*>::iterator val = phis.find(PN);
- if (val == phis.end())
- return 0;
- else
- return val->second;
-}
-
-/// Return which variable (position on the vector of variables) this phi
-/// represents on the sigmas list.
-///
-Instruction* SSI::getPositionSigma(PHINode *PN) {
- DenseMap<PHINode *, Instruction*>::iterator val = sigmas.find(PN);
- if (val == sigmas.end())
- return 0;
- else
- return val->second;
-}
-
-/// Initializes
-///
-void SSI::init(SmallVectorImpl<Instruction *> &value) {
- for (SmallVectorImpl<Instruction *>::iterator I = value.begin(),
- E = value.end(); I != E; ++I) {
- value_original[*I] = (*I)->getParent();
- defsites[*I].push_back((*I)->getParent());
- }
-}
-
-/// Clean all used resources in this creation of SSI
-///
-void SSI::clean() {
- phis.clear();
- sigmas.clear();
- phisToFix.clear();
-
- defsites.clear();
- value_stack.clear();
- value_original.clear();
-}
-
-/// createSSIPass - The public interface to this file...
-///
-FunctionPass *llvm::createSSIPass() { return new SSI(); }
-
-char SSI::ID = 0;
-static RegisterPass<SSI> X("ssi", "Static Single Information Construction");
-
-/// SSIEverything - A pass that runs createSSI on every non-void variable,
-/// intended for debugging.
-namespace {
- struct SSIEverything : public FunctionPass {
- static char ID; // Pass identification, replacement for typeid
- SSIEverything() : FunctionPass(&ID) {}
-
- bool runOnFunction(Function &F);
-
- virtual void getAnalysisUsage(AnalysisUsage &AU) const {
- AU.addRequired<SSI>();
- }
- };
-}
-
-bool SSIEverything::runOnFunction(Function &F) {
- SmallVector<Instruction *, 16> Insts;
- SSI &ssi = getAnalysis<SSI>();
-
- if (F.isDeclaration() || F.isIntrinsic()) return false;
-
- for (Function::iterator B = F.begin(), BE = F.end(); B != BE; ++B)
- for (BasicBlock::iterator I = B->begin(), E = B->end(); I != E; ++I)
- if (!I->getType()->isVoidTy())
- Insts.push_back(I);
-
- ssi.createSSI(Insts);
- return true;
-}
-
-/// createSSIEverythingPass - The public interface to this file...
-///
-FunctionPass *llvm::createSSIEverythingPass() { return new SSIEverything(); }
-
-char SSIEverything::ID = 0;
-static RegisterPass<SSIEverything>
-Y("ssi-everything", "Static Single Information Construction");
diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp
index 27b07d9731a58..28d7afbf1c33e 100644
--- a/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -949,7 +949,7 @@ static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *BB1) {
UI != E; ++UI) {
// Ignore any user that is not a PHI node in BB2. These can only occur in
// unreachable blocks, because they would not be dominated by the instr.
- PHINode *PN = dyn_cast<PHINode>(UI);
+ PHINode *PN = dyn_cast<PHINode>(*UI);
if (!PN || PN->getParent() != BB2)
return false;
PHIUses.push_back(PN);
@@ -1724,12 +1724,12 @@ bool SimplifyCFGOpt::run(BasicBlock *BB) {
assert(BB && BB->getParent() && "Block not embedded in function!");
assert(BB->getTerminator() && "Degenerate basic block encountered!");
- assert(&BB->getParent()->getEntryBlock() != BB &&
- "Can't Simplify entry block!");
- // Remove basic blocks that have no predecessors... or that just have themself
- // as a predecessor. These are unreachable.
- if (pred_begin(BB) == pred_end(BB) || BB->getSinglePredecessor() == BB) {
+ // Remove basic blocks that have no predecessors (except the entry block)...
+ // or that just have themself as a predecessor. These are unreachable.
+ if ((pred_begin(BB) == pred_end(BB) &&
+ &BB->getParent()->getEntryBlock() != BB) ||
+ BB->getSinglePredecessor() == BB) {
DEBUG(dbgs() << "Removing BB: \n" << *BB);
DeleteDeadBlock(BB);
return true;
@@ -1880,8 +1880,9 @@ bool SimplifyCFGOpt::run(BasicBlock *BB) {
while (isa<DbgInfoIntrinsic>(BBI))
++BBI;
if (BBI->isTerminator()) // Terminator is the only non-phi instruction!
- if (TryToSimplifyUncondBranchFromEmptyBlock(BB))
- return true;
+ if (BB != &BB->getParent()->getEntryBlock())
+ if (TryToSimplifyUncondBranchFromEmptyBlock(BB))
+ return true;
} else { // Conditional branch
if (isValueEqualityComparison(BI)) {
@@ -2049,12 +2050,38 @@ bool SimplifyCFGOpt::run(BasicBlock *BB) {
}
// If this block is now dead, remove it.
- if (pred_begin(BB) == pred_end(BB)) {
+ if (pred_begin(BB) == pred_end(BB) &&
+ BB != &BB->getParent()->getEntryBlock()) {
// We know there are no successors, so just nuke the block.
M->getBasicBlockList().erase(BB);
return true;
}
}
+ } else if (IndirectBrInst *IBI =
+ dyn_cast<IndirectBrInst>(BB->getTerminator())) {
+ // Eliminate redundant destinations.
+ SmallPtrSet<Value *, 8> Succs;
+ for (unsigned i = 0, e = IBI->getNumDestinations(); i != e; ++i) {
+ BasicBlock *Dest = IBI->getDestination(i);
+ if (!Dest->hasAddressTaken() || !Succs.insert(Dest)) {
+ Dest->removePredecessor(BB);
+ IBI->removeDestination(i);
+ --i; --e;
+ Changed = true;
+ }
+ }
+
+ if (IBI->getNumDestinations() == 0) {
+ // If the indirectbr has no successors, change it to unreachable.
+ new UnreachableInst(IBI->getContext(), IBI);
+ IBI->eraseFromParent();
+ Changed = true;
+ } else if (IBI->getNumDestinations() == 1) {
+ // If the indirectbr has one successor, change it to a direct branch.
+ BranchInst::Create(IBI->getDestination(0), IBI);
+ IBI->eraseFromParent();
+ Changed = true;
+ }
}
// Merge basic blocks into their predecessor if there is only one distinct
@@ -2068,12 +2095,15 @@ bool SimplifyCFGOpt::run(BasicBlock *BB) {
// is a conditional branch, see if we can hoist any code from this block up
// into our predecessor.
pred_iterator PI(pred_begin(BB)), PE(pred_end(BB));
- BasicBlock *OnlyPred = *PI++;
- for (; PI != PE; ++PI) // Search all predecessors, see if they are all same
- if (*PI != OnlyPred) {
+ BasicBlock *OnlyPred = 0;
+ for (; PI != PE; ++PI) { // Search all predecessors, see if they are all same
+ if (!OnlyPred)
+ OnlyPred = *PI;
+ else if (*PI != OnlyPred) {
OnlyPred = 0; // There are multiple different predecessors...
break;
}
+ }
if (OnlyPred)
if (BranchInst *BI = dyn_cast<BranchInst>(OnlyPred->getTerminator()))
@@ -2172,8 +2202,6 @@ bool SimplifyCFGOpt::run(BasicBlock *BB) {
/// eliminates unreachable basic blocks, and does other "peephole" optimization
/// of the CFG. It returns true if a modification was made.
///
-/// WARNING: The entry node of a function may not be simplified.
-///
bool llvm::SimplifyCFG(BasicBlock *BB, const TargetData *TD) {
return SimplifyCFGOpt(TD).run(BB);
}
diff --git a/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp b/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp
index 3fa8b70a8505a..a51f1e1a47f65 100644
--- a/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp
+++ b/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp
@@ -24,8 +24,8 @@
using namespace llvm;
char UnifyFunctionExitNodes::ID = 0;
-static RegisterPass<UnifyFunctionExitNodes>
-X("mergereturn", "Unify function exit nodes");
+INITIALIZE_PASS(UnifyFunctionExitNodes, "mergereturn",
+ "Unify function exit nodes", false, false);
Pass *llvm::createUnifyFunctionExitNodesPass() {
return new UnifyFunctionExitNodes();
@@ -35,7 +35,7 @@ void UnifyFunctionExitNodes::getAnalysisUsage(AnalysisUsage &AU) const{
// We preserve the non-critical-edgeness property
AU.addPreservedID(BreakCriticalEdgesID);
// This is a cluster of orthogonal Transforms
- AU.addPreservedID(PromoteMemoryToRegisterID);
+ AU.addPreserved("mem2reg");
AU.addPreservedID(LowerSwitchID);
}
diff --git a/lib/Transforms/Utils/ValueMapper.cpp b/lib/Transforms/Utils/ValueMapper.cpp
index 3f6a90c94ebb6..fc4bde77d4f95 100644
--- a/lib/Transforms/Utils/ValueMapper.cpp
+++ b/lib/Transforms/Utils/ValueMapper.cpp
@@ -12,7 +12,7 @@
//
//===----------------------------------------------------------------------===//
-#include "ValueMapper.h"
+#include "llvm/Transforms/Utils/ValueMapper.h"
#include "llvm/Type.h"
#include "llvm/Constants.h"
#include "llvm/Function.h"
@@ -20,28 +20,51 @@
#include "llvm/ADT/SmallVector.h"
using namespace llvm;
-Value *llvm::MapValue(const Value *V, ValueToValueMapTy &VM) {
+Value *llvm::MapValue(const Value *V, ValueToValueMapTy &VM,
+ bool ModuleLevelChanges) {
Value *&VMSlot = VM[V];
if (VMSlot) return VMSlot; // Does it exist in the map yet?
// NOTE: VMSlot can be invalidated by any reference to VM, which can grow the
// DenseMap. This includes any recursive calls to MapValue.
- // Global values and non-function-local metadata do not need to be seeded into
- // the VM if they are using the identity mapping.
+ // Global values do not need to be seeded into the VM if they
+ // are using the identity mapping.
if (isa<GlobalValue>(V) || isa<InlineAsm>(V) || isa<MDString>(V) ||
- (isa<MDNode>(V) && !cast<MDNode>(V)->isFunctionLocal()))
+ (isa<MDNode>(V) && !cast<MDNode>(V)->isFunctionLocal() &&
+ !ModuleLevelChanges))
return VMSlot = const_cast<Value*>(V);
if (const MDNode *MD = dyn_cast<MDNode>(V)) {
- SmallVector<Value*, 4> Elts;
- for (unsigned i = 0, e = MD->getNumOperands(); i != e; ++i)
- Elts.push_back(MD->getOperand(i) ? MapValue(MD->getOperand(i), VM) : 0);
- return VM[V] = MDNode::get(V->getContext(), Elts.data(), Elts.size());
+ // Start by assuming that we'll use the identity mapping.
+ VMSlot = const_cast<Value*>(V);
+
+ // Check all operands to see if any need to be remapped.
+ for (unsigned i = 0, e = MD->getNumOperands(); i != e; ++i) {
+ Value *OP = MD->getOperand(i);
+ if (!OP || MapValue(OP, VM, ModuleLevelChanges) == OP) continue;
+
+ // Ok, at least one operand needs remapping.
+ MDNode *Dummy = MDNode::getTemporary(V->getContext(), 0, 0);
+ VM[V] = Dummy;
+ SmallVector<Value*, 4> Elts;
+ Elts.reserve(MD->getNumOperands());
+ for (i = 0; i != e; ++i)
+ Elts.push_back(MD->getOperand(i) ?
+ MapValue(MD->getOperand(i), VM, ModuleLevelChanges) : 0);
+ MDNode *NewMD = MDNode::get(V->getContext(), Elts.data(), Elts.size());
+ Dummy->replaceAllUsesWith(NewMD);
+ MDNode::deleteTemporary(Dummy);
+ return VM[V] = NewMD;
+ }
+
+ // No operands needed remapping; keep the identity map.
+ return const_cast<Value*>(V);
}
Constant *C = const_cast<Constant*>(dyn_cast<Constant>(V));
- if (C == 0) return 0;
+ if (C == 0)
+ return 0;
if (isa<ConstantInt>(C) || isa<ConstantFP>(C) ||
isa<ConstantPointerNull>(C) || isa<ConstantAggregateZero>(C) ||
@@ -51,7 +74,7 @@ Value *llvm::MapValue(const Value *V, ValueToValueMapTy &VM) {
if (ConstantArray *CA = dyn_cast<ConstantArray>(C)) {
for (User::op_iterator b = CA->op_begin(), i = b, e = CA->op_end();
i != e; ++i) {
- Value *MV = MapValue(*i, VM);
+ Value *MV = MapValue(*i, VM, ModuleLevelChanges);
if (MV != *i) {
// This array must contain a reference to a global, make a new array
// and return it.
@@ -62,7 +85,8 @@ Value *llvm::MapValue(const Value *V, ValueToValueMapTy &VM) {
Values.push_back(cast<Constant>(*j));
Values.push_back(cast<Constant>(MV));
for (++i; i != e; ++i)
- Values.push_back(cast<Constant>(MapValue(*i, VM)));
+ Values.push_back(cast<Constant>(MapValue(*i, VM,
+ ModuleLevelChanges)));
return VM[V] = ConstantArray::get(CA->getType(), Values);
}
}
@@ -72,7 +96,7 @@ Value *llvm::MapValue(const Value *V, ValueToValueMapTy &VM) {
if (ConstantStruct *CS = dyn_cast<ConstantStruct>(C)) {
for (User::op_iterator b = CS->op_begin(), i = b, e = CS->op_end();
i != e; ++i) {
- Value *MV = MapValue(*i, VM);
+ Value *MV = MapValue(*i, VM, ModuleLevelChanges);
if (MV != *i) {
// This struct must contain a reference to a global, make a new struct
// and return it.
@@ -83,7 +107,8 @@ Value *llvm::MapValue(const Value *V, ValueToValueMapTy &VM) {
Values.push_back(cast<Constant>(*j));
Values.push_back(cast<Constant>(MV));
for (++i; i != e; ++i)
- Values.push_back(cast<Constant>(MapValue(*i, VM)));
+ Values.push_back(cast<Constant>(MapValue(*i, VM,
+ ModuleLevelChanges)));
return VM[V] = ConstantStruct::get(CS->getType(), Values);
}
}
@@ -93,14 +118,14 @@ Value *llvm::MapValue(const Value *V, ValueToValueMapTy &VM) {
if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) {
std::vector<Constant*> Ops;
for (User::op_iterator i = CE->op_begin(), e = CE->op_end(); i != e; ++i)
- Ops.push_back(cast<Constant>(MapValue(*i, VM)));
+ Ops.push_back(cast<Constant>(MapValue(*i, VM, ModuleLevelChanges)));
return VM[V] = CE->getWithOperands(Ops);
}
if (ConstantVector *CV = dyn_cast<ConstantVector>(C)) {
for (User::op_iterator b = CV->op_begin(), i = b, e = CV->op_end();
i != e; ++i) {
- Value *MV = MapValue(*i, VM);
+ Value *MV = MapValue(*i, VM, ModuleLevelChanges);
if (MV != *i) {
// This vector value must contain a reference to a global, make a new
// vector constant and return it.
@@ -111,7 +136,8 @@ Value *llvm::MapValue(const Value *V, ValueToValueMapTy &VM) {
Values.push_back(cast<Constant>(*j));
Values.push_back(cast<Constant>(MV));
for (++i; i != e; ++i)
- Values.push_back(cast<Constant>(MapValue(*i, VM)));
+ Values.push_back(cast<Constant>(MapValue(*i, VM,
+ ModuleLevelChanges)));
return VM[V] = ConstantVector::get(Values);
}
}
@@ -119,19 +145,33 @@ Value *llvm::MapValue(const Value *V, ValueToValueMapTy &VM) {
}
BlockAddress *BA = cast<BlockAddress>(C);
- Function *F = cast<Function>(MapValue(BA->getFunction(), VM));
- BasicBlock *BB = cast_or_null<BasicBlock>(MapValue(BA->getBasicBlock(),VM));
+ Function *F = cast<Function>(MapValue(BA->getFunction(), VM,
+ ModuleLevelChanges));
+ BasicBlock *BB = cast_or_null<BasicBlock>(MapValue(BA->getBasicBlock(),VM,
+ ModuleLevelChanges));
return VM[V] = BlockAddress::get(F, BB ? BB : BA->getBasicBlock());
}
/// RemapInstruction - Convert the instruction operands from referencing the
/// current values into those specified by VMap.
///
-void llvm::RemapInstruction(Instruction *I, ValueToValueMapTy &VMap) {
+void llvm::RemapInstruction(Instruction *I, ValueToValueMapTy &VMap,
+ bool ModuleLevelChanges) {
+ // Remap operands.
for (User::op_iterator op = I->op_begin(), E = I->op_end(); op != E; ++op) {
- Value *V = MapValue(*op, VMap);
+ Value *V = MapValue(*op, VMap, ModuleLevelChanges);
assert(V && "Referenced value not in value map!");
*op = V;
}
-}
+ // Remap attached metadata.
+ SmallVector<std::pair<unsigned, MDNode *>, 4> MDs;
+ I->getAllMetadata(MDs);
+ for (SmallVectorImpl<std::pair<unsigned, MDNode *> >::iterator
+ MI = MDs.begin(), ME = MDs.end(); MI != ME; ++MI) {
+ Value *Old = MI->second;
+ Value *New = MapValue(Old, VMap, ModuleLevelChanges);
+ if (New != Old)
+ I->setMetadata(MI->first, cast<MDNode>(New));
+ }
+}
diff --git a/lib/Transforms/Utils/ValueMapper.h b/lib/Transforms/Utils/ValueMapper.h
deleted file mode 100644
index f4ff643ca03ed..0000000000000
--- a/lib/Transforms/Utils/ValueMapper.h
+++ /dev/null
@@ -1,29 +0,0 @@
-//===- ValueMapper.h - Interface shared by lib/Transforms/Utils -*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines the MapValue interface which is used by various parts of
-// the Transforms/Utils library to implement cloning and linking facilities.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef VALUEMAPPER_H
-#define VALUEMAPPER_H
-
-#include "llvm/ADT/ValueMap.h"
-
-namespace llvm {
- class Value;
- class Instruction;
- typedef ValueMap<const Value *, Value *> ValueToValueMapTy;
-
- Value *MapValue(const Value *V, ValueToValueMapTy &VM);
- void RemapInstruction(Instruction *I, ValueToValueMapTy &VM);
-} // End llvm namespace
-
-#endif
diff --git a/lib/VMCore/AsmWriter.cpp b/lib/VMCore/AsmWriter.cpp
index 09b8aa507d833..831a9960463d1 100644
--- a/lib/VMCore/AsmWriter.cpp
+++ b/lib/VMCore/AsmWriter.cpp
@@ -16,7 +16,7 @@
#include "llvm/Assembly/Writer.h"
#include "llvm/Assembly/PrintModulePass.h"
-#include "llvm/Assembly/AsmAnnotationWriter.h"
+#include "llvm/Assembly/AssemblyAnnotationWriter.h"
#include "llvm/LLVMContext.h"
#include "llvm/CallingConv.h"
#include "llvm/Constants.h"
@@ -63,8 +63,6 @@ static const Module *getModuleFromVal(const Value *V) {
if (const GlobalValue *GV = dyn_cast<GlobalValue>(V))
return GV->getParent();
- if (const NamedMDNode *NMD = dyn_cast<NamedMDNode>(V))
- return NMD->getParent();
return 0;
}
@@ -230,7 +228,7 @@ void TypePrinting::CalcTypeName(const Type *Ty,
E = STy->element_end(); I != E; ++I) {
OS << ' ';
CalcTypeName(*I, TypeStack, OS);
- if (next(I) == STy->element_end())
+ if (llvm::next(I) == STy->element_end())
OS << ' ';
else
OS << ',';
@@ -240,21 +238,6 @@ void TypePrinting::CalcTypeName(const Type *Ty,
OS << '>';
break;
}
- case Type::UnionTyID: {
- const UnionType *UTy = cast<UnionType>(Ty);
- OS << "union {";
- for (StructType::element_iterator I = UTy->element_begin(),
- E = UTy->element_end(); I != E; ++I) {
- OS << ' ';
- CalcTypeName(*I, TypeStack, OS);
- if (next(I) == UTy->element_end())
- OS << ' ';
- else
- OS << ',';
- }
- OS << '}';
- break;
- }
case Type::PointerTyID: {
const PointerType *PTy = cast<PointerType>(Ty);
CalcTypeName(PTy->getElementType(), TypeStack, OS);
@@ -581,8 +564,12 @@ static SlotTracker *createSlotTracker(const Value *V) {
if (const Function *Func = dyn_cast<Function>(V))
return new SlotTracker(Func);
- if (isa<MDNode>(V))
+ if (const MDNode *MD = dyn_cast<MDNode>(V)) {
+ if (!MD->isFunctionLocal())
+ return new SlotTracker(MD->getFunction());
+
return new SlotTracker((Function *)0);
+ }
return 0;
}
@@ -634,10 +621,8 @@ void SlotTracker::processModule() {
I = TheModule->named_metadata_begin(),
E = TheModule->named_metadata_end(); I != E; ++I) {
const NamedMDNode *NMD = I;
- for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i) {
- if (MDNode *MD = NMD->getOperand(i))
- CreateMetadataSlot(MD);
- }
+ for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i)
+ CreateMetadataSlot(NMD->getOperand(i));
}
// Add all the unnamed functions to the table.
@@ -778,15 +763,14 @@ void SlotTracker::CreateMetadataSlot(const MDNode *N) {
// Don't insert if N is a function-local metadata, these are always printed
// inline.
- if (N->isFunctionLocal())
- return;
-
- mdn_iterator I = mdnMap.find(N);
- if (I != mdnMap.end())
- return;
+ if (!N->isFunctionLocal()) {
+ mdn_iterator I = mdnMap.find(N);
+ if (I != mdnMap.end())
+ return;
- unsigned DestSlot = mdnNext++;
- mdnMap[N] = DestSlot;
+ unsigned DestSlot = mdnNext++;
+ mdnMap[N] = DestSlot;
+ }
// Recursively add any MDNodes referenced by operands.
for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
@@ -800,7 +784,8 @@ void SlotTracker::CreateMetadataSlot(const MDNode *N) {
static void WriteAsOperandInternal(raw_ostream &Out, const Value *V,
TypePrinting *TypePrinter,
- SlotTracker *Machine);
+ SlotTracker *Machine,
+ const Module *Context);
@@ -856,7 +841,8 @@ static void WriteOptimizationInfo(raw_ostream &Out, const User *U) {
static void WriteConstantInternal(raw_ostream &Out, const Constant *CV,
TypePrinting &TypePrinter,
- SlotTracker *Machine) {
+ SlotTracker *Machine,
+ const Module *Context) {
if (const ConstantInt *CI = dyn_cast<ConstantInt>(CV)) {
if (CI->getType()->isIntegerTy(1)) {
Out << (CI->getZExtValue() ? "true" : "false");
@@ -972,9 +958,11 @@ static void WriteConstantInternal(raw_ostream &Out, const Constant *CV,
if (const BlockAddress *BA = dyn_cast<BlockAddress>(CV)) {
Out << "blockaddress(";
- WriteAsOperandInternal(Out, BA->getFunction(), &TypePrinter, Machine);
+ WriteAsOperandInternal(Out, BA->getFunction(), &TypePrinter, Machine,
+ Context);
Out << ", ";
- WriteAsOperandInternal(Out, BA->getBasicBlock(), &TypePrinter, Machine);
+ WriteAsOperandInternal(Out, BA->getBasicBlock(), &TypePrinter, Machine,
+ Context);
Out << ")";
return;
}
@@ -994,12 +982,14 @@ static void WriteConstantInternal(raw_ostream &Out, const Constant *CV,
TypePrinter.print(ETy, Out);
Out << ' ';
WriteAsOperandInternal(Out, CA->getOperand(0),
- &TypePrinter, Machine);
+ &TypePrinter, Machine,
+ Context);
for (unsigned i = 1, e = CA->getNumOperands(); i != e; ++i) {
Out << ", ";
TypePrinter.print(ETy, Out);
Out << ' ';
- WriteAsOperandInternal(Out, CA->getOperand(i), &TypePrinter, Machine);
+ WriteAsOperandInternal(Out, CA->getOperand(i), &TypePrinter, Machine,
+ Context);
}
}
Out << ']';
@@ -1017,14 +1007,16 @@ static void WriteConstantInternal(raw_ostream &Out, const Constant *CV,
TypePrinter.print(CS->getOperand(0)->getType(), Out);
Out << ' ';
- WriteAsOperandInternal(Out, CS->getOperand(0), &TypePrinter, Machine);
+ WriteAsOperandInternal(Out, CS->getOperand(0), &TypePrinter, Machine,
+ Context);
for (unsigned i = 1; i < N; i++) {
Out << ", ";
TypePrinter.print(CS->getOperand(i)->getType(), Out);
Out << ' ';
- WriteAsOperandInternal(Out, CS->getOperand(i), &TypePrinter, Machine);
+ WriteAsOperandInternal(Out, CS->getOperand(i), &TypePrinter, Machine,
+ Context);
}
Out << ' ';
}
@@ -1035,15 +1027,6 @@ static void WriteConstantInternal(raw_ostream &Out, const Constant *CV,
return;
}
- if (const ConstantUnion *CU = dyn_cast<ConstantUnion>(CV)) {
- Out << "{ ";
- TypePrinter.print(CU->getOperand(0)->getType(), Out);
- Out << ' ';
- WriteAsOperandInternal(Out, CU->getOperand(0), &TypePrinter, Machine);
- Out << " }";
- return;
- }
-
if (const ConstantVector *CP = dyn_cast<ConstantVector>(CV)) {
const Type *ETy = CP->getType()->getElementType();
assert(CP->getNumOperands() > 0 &&
@@ -1051,12 +1034,14 @@ static void WriteConstantInternal(raw_ostream &Out, const Constant *CV,
Out << '<';
TypePrinter.print(ETy, Out);
Out << ' ';
- WriteAsOperandInternal(Out, CP->getOperand(0), &TypePrinter, Machine);
+ WriteAsOperandInternal(Out, CP->getOperand(0), &TypePrinter, Machine,
+ Context);
for (unsigned i = 1, e = CP->getNumOperands(); i != e; ++i) {
Out << ", ";
TypePrinter.print(ETy, Out);
Out << ' ';
- WriteAsOperandInternal(Out, CP->getOperand(i), &TypePrinter, Machine);
+ WriteAsOperandInternal(Out, CP->getOperand(i), &TypePrinter, Machine,
+ Context);
}
Out << '>';
return;
@@ -1087,7 +1072,7 @@ static void WriteConstantInternal(raw_ostream &Out, const Constant *CV,
for (User::const_op_iterator OI=CE->op_begin(); OI != CE->op_end(); ++OI) {
TypePrinter.print((*OI)->getType(), Out);
Out << ' ';
- WriteAsOperandInternal(Out, *OI, &TypePrinter, Machine);
+ WriteAsOperandInternal(Out, *OI, &TypePrinter, Machine, Context);
if (OI+1 != CE->op_end())
Out << ", ";
}
@@ -1112,7 +1097,8 @@ static void WriteConstantInternal(raw_ostream &Out, const Constant *CV,
static void WriteMDNodeBodyInternal(raw_ostream &Out, const MDNode *Node,
TypePrinting *TypePrinter,
- SlotTracker *Machine) {
+ SlotTracker *Machine,
+ const Module *Context) {
Out << "!{";
for (unsigned mi = 0, me = Node->getNumOperands(); mi != me; ++mi) {
const Value *V = Node->getOperand(mi);
@@ -1122,7 +1108,7 @@ static void WriteMDNodeBodyInternal(raw_ostream &Out, const MDNode *Node,
TypePrinter->print(V->getType(), Out);
Out << ' ';
WriteAsOperandInternal(Out, Node->getOperand(mi),
- TypePrinter, Machine);
+ TypePrinter, Machine, Context);
}
if (mi + 1 != me)
Out << ", ";
@@ -1138,7 +1124,8 @@ static void WriteMDNodeBodyInternal(raw_ostream &Out, const MDNode *Node,
///
static void WriteAsOperandInternal(raw_ostream &Out, const Value *V,
TypePrinting *TypePrinter,
- SlotTracker *Machine) {
+ SlotTracker *Machine,
+ const Module *Context) {
if (V->hasName()) {
PrintLLVMName(Out, V);
return;
@@ -1147,7 +1134,7 @@ static void WriteAsOperandInternal(raw_ostream &Out, const Value *V,
const Constant *CV = dyn_cast<Constant>(V);
if (CV && !isa<GlobalValue>(CV)) {
assert(TypePrinter && "Constants require TypePrinting!");
- WriteConstantInternal(Out, CV, *TypePrinter, Machine);
+ WriteConstantInternal(Out, CV, *TypePrinter, Machine, Context);
return;
}
@@ -1168,12 +1155,16 @@ static void WriteAsOperandInternal(raw_ostream &Out, const Value *V,
if (const MDNode *N = dyn_cast<MDNode>(V)) {
if (N->isFunctionLocal()) {
// Print metadata inline, not via slot reference number.
- WriteMDNodeBodyInternal(Out, N, TypePrinter, Machine);
+ WriteMDNodeBodyInternal(Out, N, TypePrinter, Machine, Context);
return;
}
- if (!Machine)
- Machine = createSlotTracker(V);
+ if (!Machine) {
+ if (N->isFunctionLocal())
+ Machine = new SlotTracker(N->getFunction());
+ else
+ Machine = new SlotTracker(Context);
+ }
Out << '!' << Machine->getMetadataSlot(N);
return;
}
@@ -1227,8 +1218,9 @@ void llvm::WriteAsOperand(raw_ostream &Out, const Value *V,
// Fast path: Don't construct and populate a TypePrinting object if we
// won't be needing any types printed.
if (!PrintType &&
- (!isa<Constant>(V) || V->hasName() || isa<GlobalValue>(V))) {
- WriteAsOperandInternal(Out, V, 0, 0);
+ ((!isa<Constant>(V) && !isa<MDNode>(V)) ||
+ V->hasName() || isa<GlobalValue>(V))) {
+ WriteAsOperandInternal(Out, V, 0, 0, Context);
return;
}
@@ -1242,7 +1234,7 @@ void llvm::WriteAsOperand(raw_ostream &Out, const Value *V,
Out << ' ';
}
- WriteAsOperandInternal(Out, V, &TypePrinter, 0);
+ WriteAsOperandInternal(Out, V, &TypePrinter, 0, Context);
}
namespace {
@@ -1297,7 +1289,7 @@ void AssemblyWriter::writeOperand(const Value *Operand, bool PrintType) {
TypePrinter.print(Operand->getType(), Out);
Out << ' ';
}
- WriteAsOperandInternal(Out, Operand, &TypePrinter, &Machine);
+ WriteAsOperandInternal(Out, Operand, &TypePrinter, &Machine, TheModule);
}
void AssemblyWriter::writeParamOperand(const Value *Operand,
@@ -1314,7 +1306,7 @@ void AssemblyWriter::writeParamOperand(const Value *Operand,
Out << ' ' << Attribute::getAsString(Attrs);
Out << ' ';
// Print the operand
- WriteAsOperandInternal(Out, Operand, &TypePrinter, &Machine);
+ WriteAsOperandInternal(Out, Operand, &TypePrinter, &Machine, TheModule);
}
void AssemblyWriter::printModule(const Module *M) {
@@ -1403,10 +1395,7 @@ void AssemblyWriter::printNamedMDNode(const NamedMDNode *NMD) {
Out << "!" << NMD->getName() << " = !{";
for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i) {
if (i) Out << ", ";
- if (MDNode *MD = NMD->getOperand(i))
- Out << '!' << Machine.getMetadataSlot(MD);
- else
- Out << "null";
+ Out << '!' << Machine.getMetadataSlot(NMD->getOperand(i));
}
Out << "}\n";
}
@@ -1421,6 +1410,9 @@ static void PrintLinkage(GlobalValue::LinkageTypes LT,
case GlobalValue::LinkerPrivateWeakLinkage:
Out << "linker_private_weak ";
break;
+ case GlobalValue::LinkerPrivateWeakDefAutoLinkage:
+ Out << "linker_private_weak_def_auto ";
+ break;
case GlobalValue::InternalLinkage: Out << "internal "; break;
case GlobalValue::LinkOnceAnyLinkage: Out << "linkonce "; break;
case GlobalValue::LinkOnceODRLinkage: Out << "linkonce_odr "; break;
@@ -1451,7 +1443,7 @@ void AssemblyWriter::printGlobal(const GlobalVariable *GV) {
if (GV->isMaterializable())
Out << "; Materializable\n";
- WriteAsOperandInternal(Out, GV, &TypePrinter, &Machine);
+ WriteAsOperandInternal(Out, GV, &TypePrinter, &Machine, GV->getParent());
Out << " = ";
if (!GV->hasInitializer() && GV->hasExternalLinkage())
@@ -1510,7 +1502,7 @@ void AssemblyWriter::printAlias(const GlobalAlias *GA) {
TypePrinter.print(F->getFunctionType(), Out);
Out << "* ";
- WriteAsOperandInternal(Out, F, &TypePrinter, &Machine);
+ WriteAsOperandInternal(Out, F, &TypePrinter, &Machine, F->getParent());
} else if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(Aliasee)) {
TypePrinter.print(GA->getType(), Out);
Out << ' ';
@@ -1593,7 +1585,7 @@ void AssemblyWriter::printFunction(const Function *F) {
Out << Attribute::getAsString(Attrs.getRetAttributes()) << ' ';
TypePrinter.print(F->getReturnType(), Out);
Out << ' ';
- WriteAsOperandInternal(Out, F, &TypePrinter, &Machine);
+ WriteAsOperandInternal(Out, F, &TypePrinter, &Machine, F->getParent());
Out << '(';
Machine.incorporateFunction(F);
@@ -1643,11 +1635,10 @@ void AssemblyWriter::printFunction(const Function *F) {
if (F->hasGC())
Out << " gc \"" << F->getGC() << '"';
if (F->isDeclaration()) {
- Out << "\n";
+ Out << '\n';
} else {
Out << " {";
-
- // Output all of its basic blocks... for the function
+ // Output all of the function's basic blocks.
for (Function::const_iterator I = F->begin(), E = F->end(); I != E; ++I)
printBasicBlock(I);
@@ -1696,7 +1687,7 @@ void AssemblyWriter::printBasicBlock(const BasicBlock *BB) {
Out.PadToColumn(50);
Out << "; Error: Block without parent!";
} else if (BB != &BB->getParent()->getEntryBlock()) { // Not the entry block?
- // Output predecessors for the block...
+ // Output predecessors for the block.
Out.PadToColumn(50);
Out << ";";
const_pred_iterator PI = pred_begin(BB), PE = pred_end(BB);
@@ -1734,13 +1725,6 @@ void AssemblyWriter::printInfoComment(const Value &V) {
AnnotationWriter->printInfoComment(V, Out);
return;
}
-
- if (V.getType()->isVoidTy()) return;
-
- Out.PadToColumn(50);
- Out << "; <";
- TypePrinter.print(V.getType(), Out);
- Out << "> [#uses=" << V.getNumUses() << ']'; // Output # uses
}
// This member is called for each Instruction in a function..
@@ -2029,7 +2013,9 @@ void AssemblyWriter::printInstruction(const Instruction &I) {
} else {
Out << ", !<unknown kind #" << Kind << ">";
}
- Out << " !" << Machine.getMetadataSlot(InstMD[i].second);
+ Out << ' ';
+ WriteAsOperandInternal(Out, InstMD[i].second, &TypePrinter, &Machine,
+ TheModule);
}
}
printInfoComment(I);
@@ -2077,7 +2063,7 @@ void AssemblyWriter::writeAllMDNodes() {
}
void AssemblyWriter::printMDNodeBody(const MDNode *Node) {
- WriteMDNodeBodyInternal(Out, Node, &TypePrinter, &Machine);
+ WriteMDNodeBodyInternal(Out, Node, &TypePrinter, &Machine, TheModule);
WriteMDNodeComment(Node, Out);
Out << "\n";
}
@@ -2093,6 +2079,13 @@ void Module::print(raw_ostream &ROS, AssemblyAnnotationWriter *AAW) const {
W.printModule(this);
}
+void NamedMDNode::print(raw_ostream &ROS, AssemblyAnnotationWriter *AAW) const {
+ SlotTracker SlotTable(getParent());
+ formatted_raw_ostream OS(ROS);
+ AssemblyWriter W(OS, SlotTable, getParent(), AAW);
+ W.printNamedMDNode(this);
+}
+
void Type::print(raw_ostream &OS) const {
if (this == 0) {
OS << "<null Type>";
@@ -2130,15 +2123,11 @@ void Value::print(raw_ostream &ROS, AssemblyAnnotationWriter *AAW) const {
SlotTracker SlotTable(F);
AssemblyWriter W(OS, SlotTable, F ? F->getParent() : 0, AAW);
W.printMDNodeBody(N);
- } else if (const NamedMDNode *N = dyn_cast<NamedMDNode>(this)) {
- SlotTracker SlotTable(N->getParent());
- AssemblyWriter W(OS, SlotTable, N->getParent(), AAW);
- W.printNamedMDNode(N);
} else if (const Constant *C = dyn_cast<Constant>(this)) {
TypePrinting TypePrinter;
TypePrinter.print(C->getType(), OS);
OS << ' ';
- WriteConstantInternal(OS, C, TypePrinter, 0);
+ WriteConstantInternal(OS, C, TypePrinter, 0, 0);
} else if (isa<InlineAsm>(this) || isa<MDString>(this) ||
isa<Argument>(this)) {
WriteAsOperand(OS, this, true, 0);
diff --git a/lib/VMCore/AutoUpgrade.cpp b/lib/VMCore/AutoUpgrade.cpp
index dc39024e39456..9330e141c3412 100644
--- a/lib/VMCore/AutoUpgrade.cpp
+++ b/lib/VMCore/AutoUpgrade.cpp
@@ -78,6 +78,63 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
NewFn = F;
return true;
}
+ } else if (Name.compare(5, 9, "arm.neon.", 9) == 0) {
+ if (((Name.compare(14, 5, "vmovl", 5) == 0 ||
+ Name.compare(14, 5, "vaddl", 5) == 0 ||
+ Name.compare(14, 5, "vsubl", 5) == 0 ||
+ Name.compare(14, 5, "vaddw", 5) == 0 ||
+ Name.compare(14, 5, "vsubw", 5) == 0 ||
+ Name.compare(14, 5, "vmull", 5) == 0 ||
+ Name.compare(14, 5, "vmlal", 5) == 0 ||
+ Name.compare(14, 5, "vmlsl", 5) == 0 ||
+ Name.compare(14, 5, "vabdl", 5) == 0 ||
+ Name.compare(14, 5, "vabal", 5) == 0) &&
+ (Name.compare(19, 2, "s.", 2) == 0 ||
+ Name.compare(19, 2, "u.", 2) == 0)) ||
+
+ (Name.compare(14, 4, "vaba", 4) == 0 &&
+ (Name.compare(18, 2, "s.", 2) == 0 ||
+ Name.compare(18, 2, "u.", 2) == 0)) ||
+
+ (Name.compare(14, 6, "vmovn.", 6) == 0)) {
+
+ // Calls to these are transformed into IR without intrinsics.
+ NewFn = 0;
+ return true;
+ }
+ // Old versions of NEON ld/st intrinsics are missing alignment arguments.
+ bool isVLd = (Name.compare(14, 3, "vld", 3) == 0);
+ bool isVSt = (Name.compare(14, 3, "vst", 3) == 0);
+ if (isVLd || isVSt) {
+ unsigned NumVecs = Name.at(17) - '0';
+ if (NumVecs == 0 || NumVecs > 4)
+ return false;
+ bool isLaneOp = (Name.compare(18, 5, "lane.", 5) == 0);
+ if (!isLaneOp && Name.at(18) != '.')
+ return false;
+ unsigned ExpectedArgs = 2; // for the address and alignment
+ if (isVSt || isLaneOp)
+ ExpectedArgs += NumVecs;
+ if (isLaneOp)
+ ExpectedArgs += 1; // for the lane number
+ unsigned NumP = FTy->getNumParams();
+ if (NumP != ExpectedArgs - 1)
+ return false;
+
+ // Change the name of the old (bad) intrinsic, because
+ // its type is incorrect, but we cannot overload that name.
+ F->setName("");
+
+ // One argument is missing: add the alignment argument.
+ std::vector<const Type*> NewParams;
+ for (unsigned p = 0; p < NumP; ++p)
+ NewParams.push_back(FTy->getParamType(p));
+ NewParams.push_back(Type::getInt32Ty(F->getContext()));
+ FunctionType *NewFTy = FunctionType::get(FTy->getReturnType(),
+ NewParams, false);
+ NewFn = cast<Function>(M->getOrInsertFunction(Name, NewFTy));
+ return true;
+ }
}
break;
case 'b':
@@ -182,7 +239,6 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
NewFnName = "llvm.memset.p0i8.i64";
}
if (NewFnName) {
- const FunctionType *FTy = F->getFunctionType();
NewFn = cast<Function>(M->getOrInsertFunction(NewFnName,
FTy->getReturnType(),
FTy->getParamType(0),
@@ -309,6 +365,73 @@ bool llvm::UpgradeIntrinsicFunction(Function *F, Function *&NewFn) {
return Upgraded;
}
+bool llvm::UpgradeGlobalVariable(GlobalVariable *GV) {
+ StringRef Name(GV->getName());
+
+ // We are only upgrading one symbol here.
+ if (Name == ".llvm.eh.catch.all.value") {
+ GV->setName("llvm.eh.catch.all.value");
+ return true;
+ }
+
+ return false;
+}
+
+/// ExtendNEONArgs - For NEON "long" and "wide" operations, where the results
+/// have vector elements twice as big as one or both source operands, do the
+/// sign- or zero-extension that used to be handled by intrinsics. The
+/// extended values are returned via V0 and V1.
+static void ExtendNEONArgs(CallInst *CI, Value *Arg0, Value *Arg1,
+ Value *&V0, Value *&V1) {
+ Function *F = CI->getCalledFunction();
+ const std::string& Name = F->getName();
+ bool isLong = (Name.at(18) == 'l');
+ bool isSigned = (Name.at(19) == 's');
+
+ if (isSigned) {
+ if (isLong)
+ V0 = new SExtInst(Arg0, CI->getType(), "", CI);
+ else
+ V0 = Arg0;
+ V1 = new SExtInst(Arg1, CI->getType(), "", CI);
+ } else {
+ if (isLong)
+ V0 = new ZExtInst(Arg0, CI->getType(), "", CI);
+ else
+ V0 = Arg0;
+ V1 = new ZExtInst(Arg1, CI->getType(), "", CI);
+ }
+}
+
+/// CallVABD - As part of expanding a call to one of the old NEON vabdl, vaba,
+/// or vabal intrinsics, construct a call to a vabd intrinsic. Examine the
+/// name of the old intrinsic to determine whether to use a signed or unsigned
+/// vabd intrinsic. Get the type from the old call instruction, adjusted for
+/// half-size vector elements if the old intrinsic was vabdl or vabal.
+static Instruction *CallVABD(CallInst *CI, Value *Arg0, Value *Arg1) {
+ Function *F = CI->getCalledFunction();
+ const std::string& Name = F->getName();
+ bool isLong = (Name.at(18) == 'l');
+ bool isSigned = (Name.at(isLong ? 19 : 18) == 's');
+
+ Intrinsic::ID intID;
+ if (isSigned)
+ intID = Intrinsic::arm_neon_vabds;
+ else
+ intID = Intrinsic::arm_neon_vabdu;
+
+ const Type *Ty = CI->getType();
+ if (isLong)
+ Ty = VectorType::getTruncatedElementVectorType(cast<const VectorType>(Ty));
+
+ Function *VABD = Intrinsic::getDeclaration(F->getParent(), intID, &Ty, 1);
+ Value *Operands[2];
+ Operands[0] = Arg0;
+ Operands[1] = Arg1;
+ return CallInst::Create(VABD, Operands, Operands+2,
+ "upgraded."+CI->getName(), CI);
+}
+
// UpgradeIntrinsicCall - Upgrade a call to an old intrinsic to be a call the
// upgraded intrinsic. All argument and return casting must be provided in
// order to seamlessly integrate with existing context.
@@ -320,6 +443,60 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
assert(F && "CallInst has no function associated with it.");
if (!NewFn) {
+ // Get the Function's name.
+ const std::string& Name = F->getName();
+
+ // Upgrade ARM NEON intrinsics.
+ if (Name.compare(5, 9, "arm.neon.", 9) == 0) {
+ Instruction *NewI;
+ Value *V0, *V1;
+ if (Name.compare(14, 7, "vmovls.", 7) == 0) {
+ NewI = new SExtInst(CI->getArgOperand(0), CI->getType(),
+ "upgraded." + CI->getName(), CI);
+ } else if (Name.compare(14, 7, "vmovlu.", 7) == 0) {
+ NewI = new ZExtInst(CI->getArgOperand(0), CI->getType(),
+ "upgraded." + CI->getName(), CI);
+ } else if (Name.compare(14, 4, "vadd", 4) == 0) {
+ ExtendNEONArgs(CI, CI->getArgOperand(0), CI->getArgOperand(1), V0, V1);
+ NewI = BinaryOperator::CreateAdd(V0, V1, "upgraded."+CI->getName(), CI);
+ } else if (Name.compare(14, 4, "vsub", 4) == 0) {
+ ExtendNEONArgs(CI, CI->getArgOperand(0), CI->getArgOperand(1), V0, V1);
+ NewI = BinaryOperator::CreateSub(V0, V1,"upgraded."+CI->getName(),CI);
+ } else if (Name.compare(14, 4, "vmul", 4) == 0) {
+ ExtendNEONArgs(CI, CI->getArgOperand(0), CI->getArgOperand(1), V0, V1);
+ NewI = BinaryOperator::CreateMul(V0, V1,"upgraded."+CI->getName(),CI);
+ } else if (Name.compare(14, 4, "vmla", 4) == 0) {
+ ExtendNEONArgs(CI, CI->getArgOperand(1), CI->getArgOperand(2), V0, V1);
+ Instruction *MulI = BinaryOperator::CreateMul(V0, V1, "", CI);
+ NewI = BinaryOperator::CreateAdd(CI->getArgOperand(0), MulI,
+ "upgraded."+CI->getName(), CI);
+ } else if (Name.compare(14, 4, "vmls", 4) == 0) {
+ ExtendNEONArgs(CI, CI->getArgOperand(1), CI->getArgOperand(2), V0, V1);
+ Instruction *MulI = BinaryOperator::CreateMul(V0, V1, "", CI);
+ NewI = BinaryOperator::CreateSub(CI->getArgOperand(0), MulI,
+ "upgraded."+CI->getName(), CI);
+ } else if (Name.compare(14, 4, "vabd", 4) == 0) {
+ NewI = CallVABD(CI, CI->getArgOperand(0), CI->getArgOperand(1));
+ NewI = new ZExtInst(NewI, CI->getType(), "upgraded."+CI->getName(), CI);
+ } else if (Name.compare(14, 4, "vaba", 4) == 0) {
+ NewI = CallVABD(CI, CI->getArgOperand(1), CI->getArgOperand(2));
+ if (Name.at(18) == 'l')
+ NewI = new ZExtInst(NewI, CI->getType(), "", CI);
+ NewI = BinaryOperator::CreateAdd(CI->getArgOperand(0), NewI,
+ "upgraded."+CI->getName(), CI);
+ } else if (Name.compare(14, 6, "vmovn.", 6) == 0) {
+ NewI = new TruncInst(CI->getArgOperand(0), CI->getType(),
+ "upgraded." + CI->getName(), CI);
+ } else {
+ llvm_unreachable("Unknown arm.neon function for CallInst upgrade.");
+ }
+ // Replace any uses of the old CallInst.
+ if (!CI->use_empty())
+ CI->replaceAllUsesWith(NewI);
+ CI->eraseFromParent();
+ return;
+ }
+
bool isLoadH = false, isLoadL = false, isMovL = false;
bool isMovSD = false, isShufPD = false;
bool isUnpckhPD = false, isUnpcklPD = false;
@@ -398,7 +575,8 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
SI = new ShuffleVectorInst(Op0, Op1, Mask, "upgraded.", CI);
} else if (isShufPD) {
Value *Op1 = CI->getArgOperand(1);
- unsigned MaskVal = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
+ unsigned MaskVal =
+ cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
Idxs.push_back(ConstantInt::get(Type::getInt32Ty(C), MaskVal & 1));
Idxs.push_back(ConstantInt::get(Type::getInt32Ty(C),
((MaskVal >> 1) & 1)+2));
@@ -547,7 +725,40 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
}
switch (NewFn->getIntrinsicID()) {
- default: llvm_unreachable("Unknown function for CallInst upgrade.");
+ default: llvm_unreachable("Unknown function for CallInst upgrade.");
+ case Intrinsic::arm_neon_vld1:
+ case Intrinsic::arm_neon_vld2:
+ case Intrinsic::arm_neon_vld3:
+ case Intrinsic::arm_neon_vld4:
+ case Intrinsic::arm_neon_vst1:
+ case Intrinsic::arm_neon_vst2:
+ case Intrinsic::arm_neon_vst3:
+ case Intrinsic::arm_neon_vst4:
+ case Intrinsic::arm_neon_vld2lane:
+ case Intrinsic::arm_neon_vld3lane:
+ case Intrinsic::arm_neon_vld4lane:
+ case Intrinsic::arm_neon_vst2lane:
+ case Intrinsic::arm_neon_vst3lane:
+ case Intrinsic::arm_neon_vst4lane: {
+ // Add a default alignment argument of 1.
+ SmallVector<Value*, 8> Operands(CS.arg_begin(), CS.arg_end());
+ Operands.push_back(ConstantInt::get(Type::getInt32Ty(C), 1));
+ CallInst *NewCI = CallInst::Create(NewFn, Operands.begin(), Operands.end(),
+ CI->getName(), CI);
+ NewCI->setTailCall(CI->isTailCall());
+ NewCI->setCallingConv(CI->getCallingConv());
+
+ // Handle any uses of the old CallInst.
+ if (!CI->use_empty())
+ // Replace all uses of the old call with the new cast which has the
+ // correct type.
+ CI->replaceAllUsesWith(NewCI);
+
+ // Clean up the old call now that it has been completely upgraded.
+ CI->eraseFromParent();
+ break;
+ }
+
case Intrinsic::x86_mmx_psll_d:
case Intrinsic::x86_mmx_psll_q:
case Intrinsic::x86_mmx_psll_w:
diff --git a/lib/VMCore/CMakeLists.txt b/lib/VMCore/CMakeLists.txt
index c64564b8a1e78..1388c93cce39c 100644
--- a/lib/VMCore/CMakeLists.txt
+++ b/lib/VMCore/CMakeLists.txt
@@ -23,6 +23,7 @@ add_llvm_library(LLVMCore
Module.cpp
Pass.cpp
PassManager.cpp
+ PassRegistry.cpp
PrintModulePass.cpp
Type.cpp
TypeSymbolTable.cpp
diff --git a/lib/VMCore/ConstantFold.cpp b/lib/VMCore/ConstantFold.cpp
index 35672661e4459..9a91dafab2ff7 100644
--- a/lib/VMCore/ConstantFold.cpp
+++ b/lib/VMCore/ConstantFold.cpp
@@ -357,22 +357,6 @@ static Constant *getFoldedSizeOf(const Type *Ty, const Type *DestTy,
}
}
- if (const UnionType *UTy = dyn_cast<UnionType>(Ty)) {
- unsigned NumElems = UTy->getNumElements();
- // Check for a union with all members having the same size.
- Constant *MemberSize =
- getFoldedSizeOf(UTy->getElementType(0), DestTy, true);
- bool AllSame = true;
- for (unsigned i = 1; i != NumElems; ++i)
- if (MemberSize !=
- getFoldedSizeOf(UTy->getElementType(i), DestTy, true)) {
- AllSame = false;
- break;
- }
- if (AllSame)
- return MemberSize;
- }
-
// Pointer size doesn't depend on the pointee type, so canonicalize them
// to an arbitrary pointee.
if (const PointerType *PTy = dyn_cast<PointerType>(Ty))
@@ -438,24 +422,6 @@ static Constant *getFoldedAlignOf(const Type *Ty, const Type *DestTy,
return MemberAlign;
}
- if (const UnionType *UTy = dyn_cast<UnionType>(Ty)) {
- // Union alignment is the maximum alignment of any member.
- // Without target data, we can't compare much, but we can check to see
- // if all the members have the same alignment.
- unsigned NumElems = UTy->getNumElements();
- // Check for a union with all members having the same alignment.
- Constant *MemberAlign =
- getFoldedAlignOf(UTy->getElementType(0), DestTy, true);
- bool AllSame = true;
- for (unsigned i = 1; i != NumElems; ++i)
- if (MemberAlign != getFoldedAlignOf(UTy->getElementType(i), DestTy, true)) {
- AllSame = false;
- break;
- }
- if (AllSame)
- return MemberAlign;
- }
-
// Pointer alignment doesn't depend on the pointee type, so canonicalize them
// to an arbitrary pointee.
if (const PointerType *PTy = dyn_cast<PointerType>(Ty))
@@ -909,8 +875,6 @@ Constant *llvm::ConstantFoldInsertValueInstruction(Constant *Agg,
unsigned numOps;
if (const ArrayType *AR = dyn_cast<ArrayType>(AggTy))
numOps = AR->getNumElements();
- else if (AggTy->isUnionTy())
- numOps = 1;
else
numOps = cast<StructType>(AggTy)->getNumElements();
@@ -927,10 +891,6 @@ Constant *llvm::ConstantFoldInsertValueInstruction(Constant *Agg,
if (const StructType* ST = dyn_cast<StructType>(AggTy))
return ConstantStruct::get(ST->getContext(), Ops, ST->isPacked());
- if (const UnionType* UT = dyn_cast<UnionType>(AggTy)) {
- assert(Ops.size() == 1 && "Union can only contain a single value!");
- return ConstantUnion::get(UT, Ops[0]);
- }
return ConstantArray::get(cast<ArrayType>(AggTy), Ops);
}
diff --git a/lib/VMCore/Constants.cpp b/lib/VMCore/Constants.cpp
index 00b009401dccd..16eaca81048bb 100644
--- a/lib/VMCore/Constants.cpp
+++ b/lib/VMCore/Constants.cpp
@@ -59,7 +59,6 @@ Constant *Constant::getNullValue(const Type *Ty) {
case Type::PointerTyID:
return ConstantPointerNull::get(cast<PointerType>(Ty));
case Type::StructTyID:
- case Type::UnionTyID:
case Type::ArrayTyID:
case Type::VectorTyID:
return ConstantAggregateZero::get(Ty);
@@ -526,6 +525,7 @@ Constant* ConstantArray::get(const ArrayType* T, Constant* const* Vals,
Constant* ConstantArray::get(LLVMContext &Context, StringRef Str,
bool AddNull) {
std::vector<Constant*> ElementVals;
+ ElementVals.reserve(Str.size() + size_t(AddNull));
for (unsigned i = 0; i < Str.size(); ++i)
ElementVals.push_back(ConstantInt::get(Type::getInt8Ty(Context), Str[i]));
@@ -586,27 +586,6 @@ Constant* ConstantStruct::get(LLVMContext &Context,
return get(Context, std::vector<Constant*>(Vals, Vals+NumVals), Packed);
}
-ConstantUnion::ConstantUnion(const UnionType *T, Constant* V)
- : Constant(T, ConstantUnionVal,
- OperandTraits<ConstantUnion>::op_end(this) - 1, 1) {
- Use *OL = OperandList;
- assert(T->getElementTypeIndex(V->getType()) >= 0 &&
- "Initializer for union element isn't a member of union type!");
- *OL = V;
-}
-
-// ConstantUnion accessors.
-Constant* ConstantUnion::get(const UnionType* T, Constant* V) {
- LLVMContextImpl* pImpl = T->getContext().pImpl;
-
- // Create a ConstantAggregateZero value if all elements are zeros...
- if (!V->isNullValue())
- return pImpl->UnionConstants.getOrCreate(T, V);
-
- return ConstantAggregateZero::get(T);
-}
-
-
ConstantVector::ConstantVector(const VectorType *T,
const std::vector<Constant*> &V)
: Constant(T, ConstantVectorVal,
@@ -723,7 +702,7 @@ bool ConstantExpr::isGEPWithNoNotionalOverIndexing() const {
if (getOpcode() != Instruction::GetElementPtr) return false;
gep_type_iterator GEPI = gep_type_begin(this), E = gep_type_end(this);
- User::const_op_iterator OI = next(this->op_begin());
+ User::const_op_iterator OI = llvm::next(this->op_begin());
// Skip the first index, as it has no static limit.
++GEPI;
@@ -945,8 +924,7 @@ bool ConstantFP::isValueValidForType(const Type *Ty, const APFloat& Val) {
// Factory Function Implementation
ConstantAggregateZero* ConstantAggregateZero::get(const Type* Ty) {
- assert((Ty->isStructTy() || Ty->isUnionTy()
- || Ty->isArrayTy() || Ty->isVectorTy()) &&
+ assert((Ty->isStructTy() || Ty->isArrayTy() || Ty->isVectorTy()) &&
"Cannot create an aggregate zero of non-aggregate type!");
LLVMContextImpl *pImpl = Ty->getContext().pImpl;
@@ -956,14 +934,14 @@ ConstantAggregateZero* ConstantAggregateZero::get(const Type* Ty) {
/// destroyConstant - Remove the constant from the constant table...
///
void ConstantAggregateZero::destroyConstant() {
- getType()->getContext().pImpl->AggZeroConstants.remove(this);
+ getRawType()->getContext().pImpl->AggZeroConstants.remove(this);
destroyConstantImpl();
}
/// destroyConstant - Remove the constant from the constant table...
///
void ConstantArray::destroyConstant() {
- getType()->getContext().pImpl->ArrayConstants.remove(this);
+ getRawType()->getContext().pImpl->ArrayConstants.remove(this);
destroyConstantImpl();
}
@@ -1027,21 +1005,14 @@ namespace llvm {
// destroyConstant - Remove the constant from the constant table...
//
void ConstantStruct::destroyConstant() {
- getType()->getContext().pImpl->StructConstants.remove(this);
- destroyConstantImpl();
-}
-
-// destroyConstant - Remove the constant from the constant table...
-//
-void ConstantUnion::destroyConstant() {
- getType()->getContext().pImpl->UnionConstants.remove(this);
+ getRawType()->getContext().pImpl->StructConstants.remove(this);
destroyConstantImpl();
}
// destroyConstant - Remove the constant from the constant table...
//
void ConstantVector::destroyConstant() {
- getType()->getContext().pImpl->VectorConstants.remove(this);
+ getRawType()->getContext().pImpl->VectorConstants.remove(this);
destroyConstantImpl();
}
@@ -1082,7 +1053,7 @@ ConstantPointerNull *ConstantPointerNull::get(const PointerType *Ty) {
// destroyConstant - Remove the constant from the constant table...
//
void ConstantPointerNull::destroyConstant() {
- getType()->getContext().pImpl->NullPtrConstants.remove(this);
+ getRawType()->getContext().pImpl->NullPtrConstants.remove(this);
destroyConstantImpl();
}
@@ -1097,7 +1068,7 @@ UndefValue *UndefValue::get(const Type *Ty) {
// destroyConstant - Remove the constant from the constant table.
//
void UndefValue::destroyConstant() {
- getType()->getContext().pImpl->UndefValueConstants.remove(this);
+ getRawType()->getContext().pImpl->UndefValueConstants.remove(this);
destroyConstantImpl();
}
@@ -1131,7 +1102,7 @@ BlockAddress::BlockAddress(Function *F, BasicBlock *BB)
// destroyConstant - Remove the constant from the constant table.
//
void BlockAddress::destroyConstant() {
- getFunction()->getType()->getContext().pImpl
+ getFunction()->getRawType()->getContext().pImpl
->BlockAddresses.erase(std::make_pair(getFunction(), getBasicBlock()));
getBasicBlock()->AdjustBlockAddressRefCount(-1);
destroyConstantImpl();
@@ -1930,7 +1901,7 @@ Constant* ConstantExpr::getAShr(Constant* C1, Constant* C2) {
// destroyConstant - Remove the constant from the constant table...
//
void ConstantExpr::destroyConstant() {
- getType()->getContext().pImpl->ExprConstants.remove(this);
+ getRawType()->getContext().pImpl->ExprConstants.remove(this);
destroyConstantImpl();
}
@@ -1971,11 +1942,10 @@ void ConstantArray::replaceUsesOfWithOnConstant(Value *From, Value *To,
assert(isa<Constant>(To) && "Cannot make Constant refer to non-constant!");
Constant *ToC = cast<Constant>(To);
- LLVMContext &Context = getType()->getContext();
- LLVMContextImpl *pImpl = Context.pImpl;
+ LLVMContextImpl *pImpl = getRawType()->getContext().pImpl;
std::pair<LLVMContextImpl::ArrayConstantsTy::MapKey, ConstantArray*> Lookup;
- Lookup.first.first = getType();
+ Lookup.first.first = cast<ArrayType>(getRawType());
Lookup.second = this;
std::vector<Constant*> &Values = Lookup.first.second;
@@ -2009,7 +1979,7 @@ void ConstantArray::replaceUsesOfWithOnConstant(Value *From, Value *To,
Constant *Replacement = 0;
if (isAllZeros) {
- Replacement = ConstantAggregateZero::get(getType());
+ Replacement = ConstantAggregateZero::get(getRawType());
} else {
// Check to see if we have this array type already.
bool Exists;
@@ -2060,7 +2030,7 @@ void ConstantStruct::replaceUsesOfWithOnConstant(Value *From, Value *To,
assert(getOperand(OperandToUpdate) == From && "ReplaceAllUsesWith broken!");
std::pair<LLVMContextImpl::StructConstantsTy::MapKey, ConstantStruct*> Lookup;
- Lookup.first.first = getType();
+ Lookup.first.first = cast<StructType>(getRawType());
Lookup.second = this;
std::vector<Constant*> &Values = Lookup.first.second;
Values.reserve(getNumOperands()); // Build replacement struct.
@@ -2082,14 +2052,13 @@ void ConstantStruct::replaceUsesOfWithOnConstant(Value *From, Value *To,
}
Values[OperandToUpdate] = ToC;
- LLVMContext &Context = getType()->getContext();
- LLVMContextImpl *pImpl = Context.pImpl;
+ LLVMContextImpl *pImpl = getRawType()->getContext().pImpl;
Constant *Replacement = 0;
if (isAllZeros) {
- Replacement = ConstantAggregateZero::get(getType());
+ Replacement = ConstantAggregateZero::get(getRawType());
} else {
- // Check to see if we have this array type already.
+ // Check to see if we have this struct type already.
bool Exists;
LLVMContextImpl::StructConstantsTy::MapTy::iterator I =
pImpl->StructConstants.InsertOrGetItem(Lookup, Exists);
@@ -2118,56 +2087,6 @@ void ConstantStruct::replaceUsesOfWithOnConstant(Value *From, Value *To,
destroyConstant();
}
-void ConstantUnion::replaceUsesOfWithOnConstant(Value *From, Value *To,
- Use *U) {
- assert(isa<Constant>(To) && "Cannot make Constant refer to non-constant!");
- Constant *ToC = cast<Constant>(To);
-
- assert(U == OperandList && "Union constants can only have one use!");
- assert(getNumOperands() == 1 && "Union constants can only have one use!");
- assert(getOperand(0) == From && "ReplaceAllUsesWith broken!");
-
- std::pair<LLVMContextImpl::UnionConstantsTy::MapKey, ConstantUnion*> Lookup;
- Lookup.first.first = getType();
- Lookup.second = this;
- Lookup.first.second = ToC;
-
- LLVMContext &Context = getType()->getContext();
- LLVMContextImpl *pImpl = Context.pImpl;
-
- Constant *Replacement = 0;
- if (ToC->isNullValue()) {
- Replacement = ConstantAggregateZero::get(getType());
- } else {
- // Check to see if we have this union type already.
- bool Exists;
- LLVMContextImpl::UnionConstantsTy::MapTy::iterator I =
- pImpl->UnionConstants.InsertOrGetItem(Lookup, Exists);
-
- if (Exists) {
- Replacement = I->second;
- } else {
- // Okay, the new shape doesn't exist in the system yet. Instead of
- // creating a new constant union, inserting it, replaceallusesof'ing the
- // old with the new, then deleting the old... just update the current one
- // in place!
- pImpl->UnionConstants.MoveConstantToNewSlot(this, I);
-
- // Update to the new value.
- setOperand(0, ToC);
- return;
- }
- }
-
- assert(Replacement != this && "I didn't contain From!");
-
- // Everyone using this now uses the replacement.
- uncheckedReplaceAllUsesWith(Replacement);
-
- // Delete the old constant!
- destroyConstant();
-}
-
void ConstantVector::replaceUsesOfWithOnConstant(Value *From, Value *To,
Use *U) {
assert(isa<Constant>(To) && "Cannot make Constant refer to non-constant!");
@@ -2180,7 +2099,7 @@ void ConstantVector::replaceUsesOfWithOnConstant(Value *From, Value *To,
Values.push_back(Val);
}
- Constant *Replacement = get(getType(), Values);
+ Constant *Replacement = get(cast<VectorType>(getRawType()), Values);
assert(Replacement != this && "I didn't contain From!");
// Everyone using this now uses the replacement.
@@ -2227,7 +2146,7 @@ void ConstantExpr::replaceUsesOfWithOnConstant(Value *From, Value *ToV,
&Indices[0], Indices.size());
} else if (isCast()) {
assert(getOperand(0) == From && "Cast only has one use!");
- Replacement = ConstantExpr::getCast(getOpcode(), To, getType());
+ Replacement = ConstantExpr::getCast(getOpcode(), To, getRawType());
} else if (getOpcode() == Instruction::Select) {
Constant *C1 = getOperand(0);
Constant *C2 = getOperand(1);
diff --git a/lib/VMCore/ConstantsContext.h b/lib/VMCore/ConstantsContext.h
index 2f2fac53f062e..1c04c3e1987e2 100644
--- a/lib/VMCore/ConstantsContext.h
+++ b/lib/VMCore/ConstantsContext.h
@@ -511,14 +511,6 @@ struct ConstantKeyData<ConstantStruct> {
}
};
-template<>
-struct ConstantKeyData<ConstantUnion> {
- typedef Constant* ValType;
- static ValType getValType(ConstantUnion *CU) {
- return cast<Constant>(CU->getOperand(0));
- }
-};
-
// ConstantPointerNull does not take extra "value" argument...
template<class ValType>
struct ConstantCreator<ConstantPointerNull, PointerType, ValType> {
@@ -757,9 +749,13 @@ public:
// If this constant is the representative element for its abstract type,
// update the AbstractTypeMap so that the representative element is I.
- if (C->getType()->isAbstract()) {
+ //
+ // This must use getRawType() because if the type is under refinement, we
+ // will get the refineAbstractType callback below, and we don't want to
+ // kick union find in on the constant.
+ if (C->getRawType()->isAbstract()) {
typename AbstractTypeMapTy::iterator ATI =
- AbstractTypeMap.find(C->getType());
+ AbstractTypeMap.find(cast<DerivedType>(C->getRawType()));
assert(ATI != AbstractTypeMap.end() &&
"Abstract type not in AbstractTypeMap?");
if (ATI->second == OldI)
diff --git a/lib/VMCore/Core.cpp b/lib/VMCore/Core.cpp
index ca1a399fe8aae..5aad19dd2a4ad 100644
--- a/lib/VMCore/Core.cpp
+++ b/lib/VMCore/Core.cpp
@@ -22,6 +22,7 @@
#include "llvm/TypeSymbolTable.h"
#include "llvm/InlineAsm.h"
#include "llvm/IntrinsicInst.h"
+#include "llvm/PassManager.h"
#include "llvm/Support/CallSite.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
@@ -155,8 +156,6 @@ LLVMTypeKind LLVMGetTypeKind(LLVMTypeRef Ty) {
return LLVMFunctionTypeKind;
case Type::StructTyID:
return LLVMStructTypeKind;
- case Type::UnionTyID:
- return LLVMUnionTypeKind;
case Type::ArrayTyID:
return LLVMArrayTypeKind;
case Type::PointerTyID:
@@ -315,34 +314,6 @@ LLVMBool LLVMIsPackedStruct(LLVMTypeRef StructTy) {
return unwrap<StructType>(StructTy)->isPacked();
}
-/*--.. Operations on union types ..........................................--*/
-
-LLVMTypeRef LLVMUnionTypeInContext(LLVMContextRef C, LLVMTypeRef *ElementTypes,
- unsigned ElementCount) {
- SmallVector<const Type*, 8> Tys;
- for (LLVMTypeRef *I = ElementTypes,
- *E = ElementTypes + ElementCount; I != E; ++I)
- Tys.push_back(unwrap(*I));
-
- return wrap(UnionType::get(&Tys[0], Tys.size()));
-}
-
-LLVMTypeRef LLVMUnionType(LLVMTypeRef *ElementTypes, unsigned ElementCount) {
- return LLVMUnionTypeInContext(LLVMGetGlobalContext(), ElementTypes,
- ElementCount);
-}
-
-unsigned LLVMCountUnionElementTypes(LLVMTypeRef UnionTy) {
- return unwrap<UnionType>(UnionTy)->getNumElements();
-}
-
-void LLVMGetUnionElementTypes(LLVMTypeRef UnionTy, LLVMTypeRef *Dest) {
- UnionType *Ty = unwrap<UnionType>(UnionTy);
- for (FunctionType::param_iterator I = Ty->element_begin(),
- E = Ty->element_end(); I != E; ++I)
- *Dest++ = wrap(*I);
-}
-
/*--.. Operations on array, pointer, and vector types (sequence types) .....--*/
LLVMTypeRef LLVMArrayType(LLVMTypeRef ElementType, unsigned ElementCount) {
@@ -488,6 +459,14 @@ LLVMValueRef LLVMGetOperand(LLVMValueRef Val, unsigned Index) {
return wrap(unwrap<User>(Val)->getOperand(Index));
}
+void LLVMSetOperand(LLVMValueRef Val, unsigned Index, LLVMValueRef Op) {
+ unwrap<User>(Val)->setOperand(Index, unwrap(Op));
+}
+
+int LLVMGetNumOperands(LLVMValueRef Val) {
+ return unwrap<User>(Val)->getNumOperands();
+}
+
/*--.. Operations on constants of any type .................................--*/
LLVMValueRef LLVMConstNull(LLVMTypeRef Ty) {
@@ -619,10 +598,6 @@ LLVMValueRef LLVMConstVector(LLVMValueRef *ScalarConstantVals, unsigned Size) {
return wrap(ConstantVector::get(
unwrap<Constant>(ScalarConstantVals, Size), Size));
}
-LLVMValueRef LLVMConstUnion(LLVMTypeRef Ty, LLVMValueRef Val) {
- return wrap(ConstantUnion::get(unwrap<UnionType>(Ty), unwrap<Constant>(Val)));
-}
-
/*--.. Constant expressions ................................................--*/
LLVMOpcode LLVMGetConstOpcode(LLVMValueRef ConstantVal) {
@@ -1060,6 +1035,8 @@ LLVMLinkage LLVMGetLinkage(LLVMValueRef Global) {
return LLVMLinkerPrivateLinkage;
case GlobalValue::LinkerPrivateWeakLinkage:
return LLVMLinkerPrivateWeakLinkage;
+ case GlobalValue::LinkerPrivateWeakDefAutoLinkage:
+ return LLVMLinkerPrivateWeakDefAutoLinkage;
case GlobalValue::DLLImportLinkage:
return LLVMDLLImportLinkage;
case GlobalValue::DLLExportLinkage:
@@ -1113,6 +1090,9 @@ void LLVMSetLinkage(LLVMValueRef Global, LLVMLinkage Linkage) {
case LLVMLinkerPrivateWeakLinkage:
GV->setLinkage(GlobalValue::LinkerPrivateWeakLinkage);
break;
+ case LLVMLinkerPrivateWeakDefAutoLinkage:
+ GV->setLinkage(GlobalValue::LinkerPrivateWeakDefAutoLinkage);
+ break;
case LLVMDLLImportLinkage:
GV->setLinkage(GlobalValue::DLLImportLinkage);
break;
@@ -1515,6 +1495,14 @@ void LLVMDeleteBasicBlock(LLVMBasicBlockRef BBRef) {
unwrap(BBRef)->eraseFromParent();
}
+void LLVMMoveBasicBlockBefore(LLVMBasicBlockRef BB, LLVMBasicBlockRef MovePos) {
+ unwrap(BB)->moveBefore(unwrap(MovePos));
+}
+
+void LLVMMoveBasicBlockAfter(LLVMBasicBlockRef BB, LLVMBasicBlockRef MovePos) {
+ unwrap(BB)->moveAfter(unwrap(MovePos));
+}
+
/*--.. Operations on instructions ..........................................--*/
LLVMBasicBlockRef LLVMGetInstructionParent(LLVMValueRef Inst) {
@@ -2223,3 +2211,39 @@ LLVMBool LLVMCreateMemoryBufferWithSTDIN(LLVMMemoryBufferRef *OutMemBuf,
void LLVMDisposeMemoryBuffer(LLVMMemoryBufferRef MemBuf) {
delete unwrap(MemBuf);
}
+
+
+/*===-- Pass Manager ------------------------------------------------------===*/
+
+LLVMPassManagerRef LLVMCreatePassManager() {
+ return wrap(new PassManager());
+}
+
+LLVMPassManagerRef LLVMCreateFunctionPassManagerForModule(LLVMModuleRef M) {
+ return wrap(new FunctionPassManager(unwrap(M)));
+}
+
+LLVMPassManagerRef LLVMCreateFunctionPassManager(LLVMModuleProviderRef P) {
+ return LLVMCreateFunctionPassManagerForModule(
+ reinterpret_cast<LLVMModuleRef>(P));
+}
+
+LLVMBool LLVMRunPassManager(LLVMPassManagerRef PM, LLVMModuleRef M) {
+ return unwrap<PassManager>(PM)->run(*unwrap(M));
+}
+
+LLVMBool LLVMInitializeFunctionPassManager(LLVMPassManagerRef FPM) {
+ return unwrap<FunctionPassManager>(FPM)->doInitialization();
+}
+
+LLVMBool LLVMRunFunctionPassManager(LLVMPassManagerRef FPM, LLVMValueRef F) {
+ return unwrap<FunctionPassManager>(FPM)->run(*unwrap<Function>(F));
+}
+
+LLVMBool LLVMFinalizeFunctionPassManager(LLVMPassManagerRef FPM) {
+ return unwrap<FunctionPassManager>(FPM)->doFinalization();
+}
+
+void LLVMDisposePassManager(LLVMPassManagerRef PM) {
+ delete unwrap(PM);
+}
diff --git a/lib/VMCore/Dominators.cpp b/lib/VMCore/Dominators.cpp
index 10a866fab6226..f3dad824461dd 100644
--- a/lib/VMCore/Dominators.cpp
+++ b/lib/VMCore/Dominators.cpp
@@ -17,6 +17,7 @@
#include "llvm/Analysis/Dominators.h"
#include "llvm/Support/CFG.h"
#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
#include "llvm/ADT/DepthFirstIterator.h"
#include "llvm/ADT/SetOperations.h"
#include "llvm/ADT/SmallPtrSet.h"
@@ -51,8 +52,8 @@ TEMPLATE_INSTANTIATION(class llvm::DomTreeNodeBase<BasicBlock>);
TEMPLATE_INSTANTIATION(class llvm::DominatorTreeBase<BasicBlock>);
char DominatorTree::ID = 0;
-static RegisterPass<DominatorTree>
-E("domtree", "Dominator Tree Construction", true, true);
+INITIALIZE_PASS(DominatorTree, "domtree",
+ "Dominator Tree Construction", true, true);
bool DominatorTree::runOnFunction(Function &F) {
DT->recalculate(F);
@@ -105,8 +106,8 @@ bool DominatorTree::dominates(const Instruction *A, const Instruction *B) const{
//===----------------------------------------------------------------------===//
char DominanceFrontier::ID = 0;
-static RegisterPass<DominanceFrontier>
-G("domfrontier", "Dominance Frontier Construction", true, true);
+INITIALIZE_PASS(DominanceFrontier, "domfrontier",
+ "Dominance Frontier Construction", true, true);
void DominanceFrontier::verifyAnalysis() const {
if (!VerifyDomInfo) return;
@@ -122,36 +123,23 @@ void DominanceFrontier::verifyAnalysis() const {
// NewBB is split and now it has one successor. Update dominance frontier to
// reflect this change.
void DominanceFrontier::splitBlock(BasicBlock *NewBB) {
- assert(NewBB->getTerminator()->getNumSuccessors() == 1
- && "NewBB should have a single successor!");
+ assert(NewBB->getTerminator()->getNumSuccessors() == 1 &&
+ "NewBB should have a single successor!");
BasicBlock *NewBBSucc = NewBB->getTerminator()->getSuccessor(0);
- SmallVector<BasicBlock*, 8> PredBlocks;
- for (pred_iterator PI = pred_begin(NewBB), PE = pred_end(NewBB);
- PI != PE; ++PI)
- PredBlocks.push_back(*PI);
-
- if (PredBlocks.empty())
- // If NewBB does not have any predecessors then it is a entry block.
- // In this case, NewBB and its successor NewBBSucc dominates all
- // other blocks.
- return;
-
// NewBBSucc inherits original NewBB frontier.
DominanceFrontier::iterator NewBBI = find(NewBB);
- if (NewBBI != end()) {
- DominanceFrontier::DomSetType NewBBSet = NewBBI->second;
- DominanceFrontier::DomSetType NewBBSuccSet;
- NewBBSuccSet.insert(NewBBSet.begin(), NewBBSet.end());
- addBasicBlock(NewBBSucc, NewBBSuccSet);
- }
+ if (NewBBI != end())
+ addBasicBlock(NewBBSucc, NewBBI->second);
// If NewBB dominates NewBBSucc, then DF(NewBB) is now going to be the
- // DF(PredBlocks[0]) without the stuff that the new block does not dominate
+ // DF(NewBBSucc) without the stuff that the new block does not dominate
// a predecessor of.
DominatorTree &DT = getAnalysis<DominatorTree>();
- if (DT.dominates(NewBB, NewBBSucc)) {
- DominanceFrontier::iterator DFI = find(PredBlocks[0]);
+ DomTreeNode *NewBBNode = DT.getNode(NewBB);
+ DomTreeNode *NewBBSuccNode = DT.getNode(NewBBSucc);
+ if (DT.dominates(NewBBNode, NewBBSuccNode)) {
+ DominanceFrontier::iterator DFI = find(NewBBSucc);
if (DFI != end()) {
DominanceFrontier::DomSetType Set = DFI->second;
// Filter out stuff in Set that we do not dominate a predecessor of.
@@ -160,8 +148,10 @@ void DominanceFrontier::splitBlock(BasicBlock *NewBB) {
bool DominatesPred = false;
for (pred_iterator PI = pred_begin(*SetI), E = pred_end(*SetI);
PI != E; ++PI)
- if (DT.dominates(NewBB, *PI))
+ if (DT.dominates(NewBBNode, DT.getNode(*PI))) {
DominatesPred = true;
+ break;
+ }
if (!DominatesPred)
Set.erase(SetI++);
else
@@ -186,50 +176,71 @@ void DominanceFrontier::splitBlock(BasicBlock *NewBB) {
NewDFSet.insert(NewBBSucc);
addBasicBlock(NewBB, NewDFSet);
}
-
- // Now we must loop over all of the dominance frontiers in the function,
- // replacing occurrences of NewBBSucc with NewBB in some cases. All
- // blocks that dominate a block in PredBlocks and contained NewBBSucc in
- // their dominance frontier must be updated to contain NewBB instead.
- //
- for (Function::iterator FI = NewBB->getParent()->begin(),
- FE = NewBB->getParent()->end(); FI != FE; ++FI) {
- DominanceFrontier::iterator DFI = find(FI);
- if (DFI == end()) continue; // unreachable block.
-
- // Only consider nodes that have NewBBSucc in their dominator frontier.
- if (!DFI->second.count(NewBBSucc)) continue;
-
- // Verify whether this block dominates a block in predblocks. If not, do
- // not update it.
- bool BlockDominatesAny = false;
- for (SmallVectorImpl<BasicBlock*>::const_iterator BI = PredBlocks.begin(),
- BE = PredBlocks.end(); BI != BE; ++BI) {
- if (DT.dominates(FI, *BI)) {
- BlockDominatesAny = true;
+
+ // Now update dominance frontiers which either used to contain NewBBSucc
+ // or which now need to include NewBB.
+
+ // Collect the set of blocks which dominate a predecessor of NewBB or
+ // NewSuccBB and which don't dominate both. This is an initial
+ // approximation of the blocks whose dominance frontiers will need updates.
+ SmallVector<DomTreeNode *, 16> AllPredDoms;
+
+ // Compute the block which dominates both NewBBSucc and NewBB. This is
+ // the immediate dominator of NewBBSucc unless NewBB dominates NewBBSucc.
+ // The code below which climbs dominator trees will stop at this point,
+ // because from this point up, dominance frontiers are unaffected.
+ DomTreeNode *DominatesBoth = 0;
+ if (NewBBSuccNode) {
+ DominatesBoth = NewBBSuccNode->getIDom();
+ if (DominatesBoth == NewBBNode)
+ DominatesBoth = NewBBNode->getIDom();
+ }
+
+ // Collect the set of all blocks which dominate a predecessor of NewBB.
+ SmallPtrSet<DomTreeNode *, 8> NewBBPredDoms;
+ for (pred_iterator PI = pred_begin(NewBB), E = pred_end(NewBB); PI != E; ++PI)
+ for (DomTreeNode *DTN = DT.getNode(*PI); DTN; DTN = DTN->getIDom()) {
+ if (DTN == DominatesBoth)
break;
- }
+ if (!NewBBPredDoms.insert(DTN))
+ break;
+ AllPredDoms.push_back(DTN);
}
- // If NewBBSucc should not stay in our dominator frontier, remove it.
- // We remove it unless there is a predecessor of NewBBSucc that we
- // dominate, but we don't strictly dominate NewBBSucc.
- bool ShouldRemove = true;
- if ((BasicBlock*)FI == NewBBSucc || !DT.dominates(FI, NewBBSucc)) {
- // Okay, we know that PredDom does not strictly dominate NewBBSucc.
- // Check to see if it dominates any predecessors of NewBBSucc.
- for (pred_iterator PI = pred_begin(NewBBSucc),
- E = pred_end(NewBBSucc); PI != E; ++PI)
- if (DT.dominates(FI, *PI)) {
- ShouldRemove = false;
- break;
- }
+ // Collect the set of all blocks which dominate a predecessor of NewSuccBB.
+ SmallPtrSet<DomTreeNode *, 8> NewBBSuccPredDoms;
+ for (pred_iterator PI = pred_begin(NewBBSucc),
+ E = pred_end(NewBBSucc); PI != E; ++PI)
+ for (DomTreeNode *DTN = DT.getNode(*PI); DTN; DTN = DTN->getIDom()) {
+ if (DTN == DominatesBoth)
+ break;
+ if (!NewBBSuccPredDoms.insert(DTN))
+ break;
+ if (!NewBBPredDoms.count(DTN))
+ AllPredDoms.push_back(DTN);
}
-
- if (ShouldRemove)
- removeFromFrontier(DFI, NewBBSucc);
- if (BlockDominatesAny && (&*FI == NewBB || !DT.dominates(FI, NewBB)))
+
+ // Visit all relevant dominance frontiers and make any needed updates.
+ for (SmallVectorImpl<DomTreeNode *>::const_iterator I = AllPredDoms.begin(),
+ E = AllPredDoms.end(); I != E; ++I) {
+ DomTreeNode *DTN = *I;
+ iterator DFI = find((*I)->getBlock());
+
+ // Only consider nodes that have NewBBSucc in their dominator frontier.
+ if (DFI == end() || !DFI->second.count(NewBBSucc)) continue;
+
+ // If the block dominates a predecessor of NewBB but does not properly
+ // dominate NewBB itself, add NewBB to its dominance frontier.
+ if (NewBBPredDoms.count(DTN) &&
+ !DT.properlyDominates(DTN, NewBBNode))
addToFrontier(DFI, NewBB);
+
+ // If the block does not dominate a predecessor of NewBBSucc or
+ // properly dominates NewBBSucc itself, remove NewBBSucc from its
+ // dominance frontier.
+ if (!NewBBSuccPredDoms.count(DTN) ||
+ DT.properlyDominates(DTN, NewBBSuccNode))
+ removeFromFrontier(DFI, NewBBSucc);
}
}
@@ -343,3 +354,7 @@ void DominanceFrontierBase::print(raw_ostream &OS, const Module* ) const {
}
}
+void DominanceFrontierBase::dump() const {
+ print(dbgs());
+}
+
diff --git a/lib/VMCore/Globals.cpp b/lib/VMCore/Globals.cpp
index b758eb8702aea..96716eeb349b9 100644
--- a/lib/VMCore/Globals.cpp
+++ b/lib/VMCore/Globals.cpp
@@ -102,7 +102,14 @@ void GlobalValue::copyAttributesFrom(const GlobalValue *Src) {
setVisibility(Src->getVisibility());
}
-
+void GlobalValue::setAlignment(unsigned Align) {
+ assert((Align & (Align-1)) == 0 && "Alignment is not a power of 2!");
+ assert(Align <= MaximumAlignment &&
+ "Alignment is greater than MaximumAlignment!");
+ Alignment = Log2_32(Align) + 1;
+ assert(getAlignment() == Align && "Alignment representation error!");
+}
+
//===----------------------------------------------------------------------===//
// GlobalVariable Implementation
//===----------------------------------------------------------------------===//
diff --git a/lib/VMCore/InlineAsm.cpp b/lib/VMCore/InlineAsm.cpp
index 0d2eca9c3dea6..69f713b2c42c2 100644
--- a/lib/VMCore/InlineAsm.cpp
+++ b/lib/VMCore/InlineAsm.cpp
@@ -164,7 +164,7 @@ InlineAsm::ParseConstraints(StringRef Constraints) {
StringRef::iterator ConstraintEnd = std::find(I, E, ',');
if (ConstraintEnd == I || // Empty constraint like ",,"
- Info.Parse(std::string(I, ConstraintEnd), Result)) {
+ Info.Parse(StringRef(I, ConstraintEnd-I), Result)) {
Result.clear(); // Erroneous constraint?
break;
}
diff --git a/lib/VMCore/Instruction.cpp b/lib/VMCore/Instruction.cpp
index 9792adaaa122d..05bed4c64316f 100644
--- a/lib/VMCore/Instruction.cpp
+++ b/lib/VMCore/Instruction.cpp
@@ -49,8 +49,8 @@ Instruction::Instruction(const Type *ty, unsigned it, Use *Ops, unsigned NumOps,
// Out of line virtual method, so the vtable, etc has a home.
Instruction::~Instruction() {
assert(Parent == 0 && "Instruction still linked in the program!");
- if (hasMetadata())
- removeAllMetadata();
+ if (hasMetadataHashEntry())
+ clearMetadataHashEntries();
}
diff --git a/lib/VMCore/Instructions.cpp b/lib/VMCore/Instructions.cpp
index c13696f229022..401802ed13d5e 100644
--- a/lib/VMCore/Instructions.cpp
+++ b/lib/VMCore/Instructions.cpp
@@ -33,10 +33,8 @@ using namespace llvm;
User::op_iterator CallSite::getCallee() const {
Instruction *II(getInstruction());
return isCall()
- ? (CallInst::ArgOffset
- ? cast</*FIXME: CallInst*/User>(II)->op_begin()
- : cast</*FIXME: CallInst*/User>(II)->op_end() - 1)
- : cast<InvokeInst>(II)->op_end() - 3; // Skip BB, BB, Function
+ ? cast<CallInst>(II)->op_end() - 1 // Skip Callee
+ : cast<InvokeInst>(II)->op_end() - 3; // Skip BB, BB, Callee
}
//===----------------------------------------------------------------------===//
@@ -233,7 +231,7 @@ CallInst::~CallInst() {
void CallInst::init(Value *Func, Value* const *Params, unsigned NumParams) {
assert(NumOperands == NumParams+1 && "NumOperands not set up?");
- Op<ArgOffset -1>() = Func;
+ Op<-1>() = Func;
const FunctionType *FTy =
cast<FunctionType>(cast<PointerType>(Func->getType())->getElementType());
@@ -246,15 +244,15 @@ void CallInst::init(Value *Func, Value* const *Params, unsigned NumParams) {
assert((i >= FTy->getNumParams() ||
FTy->getParamType(i) == Params[i]->getType()) &&
"Calling a function with a bad signature!");
- OperandList[i + ArgOffset] = Params[i];
+ OperandList[i] = Params[i];
}
}
void CallInst::init(Value *Func, Value *Actual1, Value *Actual2) {
assert(NumOperands == 3 && "NumOperands not set up?");
- Op<ArgOffset -1>() = Func;
- Op<ArgOffset + 0>() = Actual1;
- Op<ArgOffset + 1>() = Actual2;
+ Op<-1>() = Func;
+ Op<0>() = Actual1;
+ Op<1>() = Actual2;
const FunctionType *FTy =
cast<FunctionType>(cast<PointerType>(Func->getType())->getElementType());
@@ -273,8 +271,8 @@ void CallInst::init(Value *Func, Value *Actual1, Value *Actual2) {
void CallInst::init(Value *Func, Value *Actual) {
assert(NumOperands == 2 && "NumOperands not set up?");
- Op<ArgOffset -1>() = Func;
- Op<ArgOffset + 0>() = Actual;
+ Op<-1>() = Func;
+ Op<0>() = Actual;
const FunctionType *FTy =
cast<FunctionType>(cast<PointerType>(Func->getType())->getElementType());
@@ -290,7 +288,7 @@ void CallInst::init(Value *Func, Value *Actual) {
void CallInst::init(Value *Func) {
assert(NumOperands == 1 && "NumOperands not set up?");
- Op<ArgOffset -1>() = Func;
+ Op<-1>() = Func;
const FunctionType *FTy =
cast<FunctionType>(cast<PointerType>(Func->getType())->getElementType());
@@ -893,6 +891,8 @@ AllocaInst::~AllocaInst() {
void AllocaInst::setAlignment(unsigned Align) {
assert((Align & (Align-1)) == 0 && "Alignment is not a power of 2!");
+ assert(Align <= MaximumAlignment &&
+ "Alignment is greater than MaximumAlignment!");
setInstructionSubclassData(Log2_32(Align) + 1);
assert(getAlignment() == Align && "Alignment representation error!");
}
@@ -1028,8 +1028,11 @@ LoadInst::LoadInst(Value *Ptr, const char *Name, bool isVolatile,
void LoadInst::setAlignment(unsigned Align) {
assert((Align & (Align-1)) == 0 && "Alignment is not a power of 2!");
+ assert(Align <= MaximumAlignment &&
+ "Alignment is greater than MaximumAlignment!");
setInstructionSubclassData((getSubclassDataFromInstruction() & 1) |
((Log2_32(Align)+1)<<1));
+ assert(getAlignment() == Align && "Alignment representation error!");
}
//===----------------------------------------------------------------------===//
@@ -1124,8 +1127,11 @@ StoreInst::StoreInst(Value *val, Value *addr, bool isVolatile,
void StoreInst::setAlignment(unsigned Align) {
assert((Align & (Align-1)) == 0 && "Alignment is not a power of 2!");
+ assert(Align <= MaximumAlignment &&
+ "Alignment is greater than MaximumAlignment!");
setInstructionSubclassData((getSubclassDataFromInstruction() & 1) |
((Log2_32(Align)+1) << 1));
+ assert(getAlignment() == Align && "Alignment representation error!");
}
//===----------------------------------------------------------------------===//
@@ -1424,9 +1430,24 @@ bool ShuffleVectorInst::isValidOperands(const Value *V1, const Value *V2,
return false;
const VectorType *MaskTy = dyn_cast<VectorType>(Mask->getType());
- if (!isa<Constant>(Mask) || MaskTy == 0 ||
- !MaskTy->getElementType()->isIntegerTy(32))
+ if (MaskTy == 0 || !MaskTy->getElementType()->isIntegerTy(32))
+ return false;
+
+ // Check to see if Mask is valid.
+ if (const ConstantVector *MV = dyn_cast<ConstantVector>(Mask)) {
+ const VectorType *VTy = cast<VectorType>(V1->getType());
+ for (unsigned i = 0, e = MV->getNumOperands(); i != e; ++i) {
+ if (ConstantInt* CI = dyn_cast<ConstantInt>(MV->getOperand(i))) {
+ if (CI->uge(VTy->getNumElements()*2))
+ return false;
+ } else if (!isa<UndefValue>(MV->getOperand(i))) {
+ return false;
+ }
+ }
+ }
+ else if (!isa<UndefValue>(Mask) && !isa<ConstantAggregateZero>(Mask))
return false;
+
return true;
}
diff --git a/lib/VMCore/LLVMContext.cpp b/lib/VMCore/LLVMContext.cpp
index 4d61363b9394d..563c651315a33 100644
--- a/lib/VMCore/LLVMContext.cpp
+++ b/lib/VMCore/LLVMContext.cpp
@@ -110,21 +110,18 @@ static bool isValidName(StringRef MDName) {
/// getMDKindID - Return a unique non-zero ID for the specified metadata kind.
unsigned LLVMContext::getMDKindID(StringRef Name) const {
assert(isValidName(Name) && "Invalid MDNode name");
-
- unsigned &Entry = pImpl->CustomMDKindNames[Name];
-
+
// If this is new, assign it its ID.
- if (Entry == 0) Entry = pImpl->CustomMDKindNames.size();
- return Entry;
+ return
+ pImpl->CustomMDKindNames.GetOrCreateValue(
+ Name, pImpl->CustomMDKindNames.size()).second;
}
/// getHandlerNames - Populate client supplied smallvector using custome
/// metadata name and ID.
void LLVMContext::getMDKindNames(SmallVectorImpl<StringRef> &Names) const {
- Names.resize(pImpl->CustomMDKindNames.size()+1);
- Names[0] = "";
+ Names.resize(pImpl->CustomMDKindNames.size());
for (StringMap<unsigned>::const_iterator I = pImpl->CustomMDKindNames.begin(),
E = pImpl->CustomMDKindNames.end(); I != E; ++I)
- // MD Handlers are numbered from 1.
Names[I->second] = I->first();
}
diff --git a/lib/VMCore/LLVMContextImpl.cpp b/lib/VMCore/LLVMContextImpl.cpp
index 9e41a08156084..93a075f0fccbb 100644
--- a/lib/VMCore/LLVMContextImpl.cpp
+++ b/lib/VMCore/LLVMContextImpl.cpp
@@ -57,14 +57,11 @@ LLVMContextImpl::~LLVMContextImpl() {
DropReferences());
std::for_each(StructConstants.map_begin(), StructConstants.map_end(),
DropReferences());
- std::for_each(UnionConstants.map_begin(), UnionConstants.map_end(),
- DropReferences());
std::for_each(VectorConstants.map_begin(), VectorConstants.map_end(),
DropReferences());
ExprConstants.freeConstants();
ArrayConstants.freeConstants();
StructConstants.freeConstants();
- UnionConstants.freeConstants();
VectorConstants.freeConstants();
AggZeroConstants.freeConstants();
NullPtrConstants.freeConstants();
diff --git a/lib/VMCore/LLVMContextImpl.h b/lib/VMCore/LLVMContextImpl.h
index 4876f5d5075a8..51b2992898c02 100644
--- a/lib/VMCore/LLVMContextImpl.h
+++ b/lib/VMCore/LLVMContextImpl.h
@@ -144,10 +144,6 @@ public:
ConstantStruct, true /*largekey*/> StructConstantsTy;
StructConstantsTy StructConstants;
- typedef ConstantUniqueMap<Constant*, UnionType, ConstantUnion>
- UnionConstantsTy;
- UnionConstantsTy UnionConstants;
-
typedef ConstantUniqueMap<std::vector<Constant*>, VectorType,
ConstantVector> VectorConstantsTy;
VectorConstantsTy VectorConstants;
@@ -192,7 +188,6 @@ public:
TypeMap<PointerValType, PointerType> PointerTypes;
TypeMap<FunctionValType, FunctionType> FunctionTypes;
TypeMap<StructValType, StructType> StructTypes;
- TypeMap<UnionValType, UnionType> UnionTypes;
TypeMap<IntegerValType, IntegerType> IntegerTypes;
// Opaque types are not structurally uniqued, so don't use TypeMap.
diff --git a/lib/VMCore/Metadata.cpp b/lib/VMCore/Metadata.cpp
index 3100d4ac7c9c3..da69c43ff7359 100644
--- a/lib/VMCore/Metadata.cpp
+++ b/lib/VMCore/Metadata.cpp
@@ -20,6 +20,7 @@
#include "llvm/ADT/StringMap.h"
#include "llvm/ADT/SmallString.h"
#include "SymbolTableListTraitsImpl.h"
+#include "llvm/Support/LeakDetector.h"
#include "llvm/Support/ValueHandle.h"
using namespace llvm;
@@ -186,6 +187,21 @@ MDNode *MDNode::getMDNode(LLVMContext &Context, Value *const *Vals,
unsigned NumVals, FunctionLocalness FL,
bool Insert) {
LLVMContextImpl *pImpl = Context.pImpl;
+
+ // Add all the operand pointers. Note that we don't have to add the
+ // isFunctionLocal bit because that's implied by the operands.
+ // Note that if the operands are later nulled out, the node will be
+ // removed from the uniquing map.
+ FoldingSetNodeID ID;
+ for (unsigned i = 0; i != NumVals; ++i)
+ ID.AddPointer(Vals[i]);
+
+ void *InsertPoint;
+ MDNode *N = NULL;
+
+ if ((N = pImpl->MDNodeSet.FindNodeOrInsertPos(ID, InsertPoint)))
+ return N;
+
bool isFunctionLocal = false;
switch (FL) {
case FL_Unknown:
@@ -206,20 +222,6 @@ MDNode *MDNode::getMDNode(LLVMContext &Context, Value *const *Vals,
break;
}
- FoldingSetNodeID ID;
- for (unsigned i = 0; i != NumVals; ++i)
- ID.AddPointer(Vals[i]);
- ID.AddBoolean(isFunctionLocal);
-
- void *InsertPoint;
- MDNode *N = NULL;
-
- if ((N = pImpl->MDNodeSet.FindNodeOrInsertPos(ID, InsertPoint)))
- return N;
-
- if (!Insert)
- return NULL;
-
// Coallocate space for the node and Operands together, then placement new.
void *Ptr = malloc(sizeof(MDNode)+NumVals*sizeof(MDNodeOperand));
N = new (Ptr) MDNode(Context, Vals, NumVals, isFunctionLocal);
@@ -244,15 +246,42 @@ MDNode *MDNode::getIfExists(LLVMContext &Context, Value *const *Vals,
return getMDNode(Context, Vals, NumVals, FL_Unknown, false);
}
+MDNode *MDNode::getTemporary(LLVMContext &Context, Value *const *Vals,
+ unsigned NumVals) {
+ MDNode *N = (MDNode *)malloc(sizeof(MDNode)+NumVals*sizeof(MDNodeOperand));
+ N = new (N) MDNode(Context, Vals, NumVals, FL_No);
+ N->setValueSubclassData(N->getSubclassDataFromValue() |
+ NotUniquedBit);
+ LeakDetector::addGarbageObject(N);
+ return N;
+}
+
+void MDNode::deleteTemporary(MDNode *N) {
+ assert(N->use_empty() && "Temporary MDNode has uses!");
+ assert(!N->getContext().pImpl->MDNodeSet.RemoveNode(N) &&
+ "Deleting a non-temporary uniqued node!");
+ assert(!N->getContext().pImpl->NonUniquedMDNodes.erase(N) &&
+ "Deleting a non-temporary non-uniqued node!");
+ assert((N->getSubclassDataFromValue() & NotUniquedBit) &&
+ "Temporary MDNode does not have NotUniquedBit set!");
+ assert((N->getSubclassDataFromValue() & DestroyFlag) == 0 &&
+ "Temporary MDNode has DestroyFlag set!");
+ LeakDetector::removeGarbageObject(N);
+ N->destroy();
+}
+
/// getOperand - Return specified operand.
Value *MDNode::getOperand(unsigned i) const {
return *getOperandPtr(const_cast<MDNode*>(this), i);
}
void MDNode::Profile(FoldingSetNodeID &ID) const {
+ // Add all the operand pointers. Note that we don't have to add the
+ // isFunctionLocal bit because that's implied by the operands.
+ // Note that if the operands are later nulled out, the node will be
+ // removed from the uniquing map.
for (unsigned i = 0, e = getNumOperands(); i != e; ++i)
ID.AddPointer(getOperand(i));
- ID.AddBoolean(isFunctionLocal());
}
void MDNode::setIsNotUniqued() {
@@ -301,7 +330,8 @@ void MDNode::replaceOperand(MDNodeOperand *Op, Value *To) {
// If we are dropping an argument to null, we choose to not unique the MDNode
// anymore. This commonly occurs during destruction, and uniquing these
- // brings little reuse.
+ // brings little reuse. Also, this means we don't need to include
+ // isFunctionLocal bits in FoldingSetNodeIDs for MDNodes.
if (To == 0) {
setIsNotUniqued();
return;
@@ -324,59 +354,35 @@ void MDNode::replaceOperand(MDNodeOperand *Op, Value *To) {
// InsertPoint will have been set by the FindNodeOrInsertPos call.
pImpl->MDNodeSet.InsertNode(this, InsertPoint);
+
+ // If this MDValue was previously function-local but no longer is, clear
+ // its function-local flag.
+ if (isFunctionLocal() && !isFunctionLocalValue(To)) {
+ bool isStillFunctionLocal = false;
+ for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
+ Value *V = getOperand(i);
+ if (!V) continue;
+ if (isFunctionLocalValue(V)) {
+ isStillFunctionLocal = true;
+ break;
+ }
+ }
+ if (!isStillFunctionLocal)
+ setValueSubclassData(getSubclassDataFromValue() & ~FunctionLocalBit);
+ }
}
//===----------------------------------------------------------------------===//
// NamedMDNode implementation.
//
-namespace llvm {
-// SymbolTableListTraits specialization for MDSymbolTable.
-void ilist_traits<NamedMDNode>
-::addNodeToList(NamedMDNode *N) {
- assert(N->getParent() == 0 && "Value already in a container!!");
- Module *Owner = getListOwner();
- N->setParent(Owner);
- MDSymbolTable &ST = Owner->getMDSymbolTable();
- ST.insert(N->getName(), N);
-}
-
-void ilist_traits<NamedMDNode>::removeNodeFromList(NamedMDNode *N) {
- N->setParent(0);
- Module *Owner = getListOwner();
- MDSymbolTable &ST = Owner->getMDSymbolTable();
- ST.remove(N->getName());
-}
-}
-
-static SmallVector<WeakVH, 4> &getNMDOps(void *Operands) {
- return *(SmallVector<WeakVH, 4>*)Operands;
-}
-
-NamedMDNode::NamedMDNode(LLVMContext &C, const Twine &N,
- MDNode *const *MDs,
- unsigned NumMDs, Module *ParentModule)
- : Value(Type::getMetadataTy(C), Value::NamedMDNodeVal), Parent(0) {
- setName(N);
- Operands = new SmallVector<WeakVH, 4>();
-
- SmallVector<WeakVH, 4> &Node = getNMDOps(Operands);
- for (unsigned i = 0; i != NumMDs; ++i)
- Node.push_back(WeakVH(MDs[i]));
-
- if (ParentModule)
- ParentModule->getNamedMDList().push_back(this);
+static SmallVector<TrackingVH<MDNode>, 4> &getNMDOps(void *Operands) {
+ return *(SmallVector<TrackingVH<MDNode>, 4>*)Operands;
}
-NamedMDNode *NamedMDNode::Create(const NamedMDNode *NMD, Module *M) {
- assert(NMD && "Invalid source NamedMDNode!");
- SmallVector<MDNode *, 4> Elems;
- Elems.reserve(NMD->getNumOperands());
-
- for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i)
- Elems.push_back(NMD->getOperand(i));
- return new NamedMDNode(NMD->getContext(), NMD->getName().data(),
- Elems.data(), Elems.size(), M);
+NamedMDNode::NamedMDNode(const Twine &N)
+ : Name(N.str()), Parent(0),
+ Operands(new SmallVector<TrackingVH<MDNode>, 4>()) {
}
NamedMDNode::~NamedMDNode() {
@@ -392,18 +398,20 @@ unsigned NamedMDNode::getNumOperands() const {
/// getOperand - Return specified operand.
MDNode *NamedMDNode::getOperand(unsigned i) const {
assert(i < getNumOperands() && "Invalid Operand number!");
- return dyn_cast_or_null<MDNode>(getNMDOps(Operands)[i]);
+ return dyn_cast<MDNode>(&*getNMDOps(Operands)[i]);
}
/// addOperand - Add metadata Operand.
void NamedMDNode::addOperand(MDNode *M) {
- getNMDOps(Operands).push_back(WeakVH(M));
+ assert(!M->isFunctionLocal() &&
+ "NamedMDNode operands must not be function-local!");
+ getNMDOps(Operands).push_back(TrackingVH<MDNode>(M));
}
/// eraseFromParent - Drop all references and remove the node from parent
/// module.
void NamedMDNode::eraseFromParent() {
- getParent()->getNamedMDList().erase(this);
+ getParent()->eraseNamedMetadata(this);
}
/// dropAllReferences - Remove all uses and clear node vector.
@@ -411,22 +419,6 @@ void NamedMDNode::dropAllReferences() {
getNMDOps(Operands).clear();
}
-/// setName - Set the name of this named metadata.
-void NamedMDNode::setName(const Twine &NewName) {
- assert (!NewName.isTriviallyEmpty() && "Invalid named metadata name!");
-
- SmallString<256> NameData;
- StringRef NameRef = NewName.toStringRef(NameData);
-
- // Name isn't changing?
- if (getName() == NameRef)
- return;
-
- Name = NameRef.str();
- if (Parent)
- Parent->getMDSymbolTable().insert(NameRef, this);
-}
-
/// getName - Return a constant reference to this named metadata's name.
StringRef NamedMDNode::getName() const {
return StringRef(Name);
@@ -445,10 +437,6 @@ MDNode *Instruction::getMetadataImpl(const char *Kind) const {
return getMetadataImpl(getContext().getMDKindID(Kind));
}
-void Instruction::setDbgMetadata(MDNode *Node) {
- DbgLoc = DebugLoc::getFromDILocation(Node);
-}
-
/// setMetadata - Set the metadata of of the specified kind to the specified
/// node. This updates/replaces metadata if already present, or removes it if
/// Node is null.
@@ -567,13 +555,11 @@ getAllMetadataOtherThanDebugLocImpl(SmallVectorImpl<std::pair<unsigned,
}
-/// removeAllMetadata - Remove all metadata from this instruction.
-void Instruction::removeAllMetadata() {
- assert(hasMetadata() && "Caller should check");
- DbgLoc = DebugLoc();
- if (hasMetadataHashEntry()) {
- getContext().pImpl->MetadataStore.erase(this);
- setHasMetadataHashEntry(false);
- }
+/// clearMetadataHashEntries - Clear all hashtable-based metadata from
+/// this instruction.
+void Instruction::clearMetadataHashEntries() {
+ assert(hasMetadataHashEntry() && "Caller should check");
+ getContext().pImpl->MetadataStore.erase(this);
+ setHasMetadataHashEntry(false);
}
diff --git a/lib/VMCore/Module.cpp b/lib/VMCore/Module.cpp
index 38a51dfd5d388..d7ddf96cb0700 100644
--- a/lib/VMCore/Module.cpp
+++ b/lib/VMCore/Module.cpp
@@ -58,10 +58,10 @@ template class llvm::SymbolTableListTraits<GlobalAlias, Module>;
//
Module::Module(StringRef MID, LLVMContext& C)
- : Context(C), Materializer(NULL), ModuleID(MID), DataLayout("") {
+ : Context(C), Materializer(NULL), ModuleID(MID) {
ValSymTab = new ValueSymbolTable();
TypeSymTab = new TypeSymbolTable();
- NamedMDSymTab = new MDSymbolTable();
+ NamedMDSymTab = new StringMap<NamedMDNode *>();
}
Module::~Module() {
@@ -73,7 +73,7 @@ Module::~Module() {
NamedMDList.clear();
delete ValSymTab;
delete TypeSymTab;
- delete NamedMDSymTab;
+ delete static_cast<StringMap<NamedMDNode *> *>(NamedMDSymTab);
}
/// Target endian information...
@@ -316,19 +316,28 @@ GlobalAlias *Module::getNamedAlias(StringRef Name) const {
NamedMDNode *Module::getNamedMetadata(const Twine &Name) const {
SmallString<256> NameData;
StringRef NameRef = Name.toStringRef(NameData);
- return NamedMDSymTab->lookup(NameRef);
+ return static_cast<StringMap<NamedMDNode*> *>(NamedMDSymTab)->lookup(NameRef);
}
/// getOrInsertNamedMetadata - Return the first named MDNode in the module
/// with the specified name. This method returns a new NamedMDNode if a
/// NamedMDNode with the specified name is not found.
NamedMDNode *Module::getOrInsertNamedMetadata(StringRef Name) {
- NamedMDNode *NMD = NamedMDSymTab->lookup(Name);
- if (!NMD)
- NMD = NamedMDNode::Create(getContext(), Name, NULL, 0, this);
+ NamedMDNode *&NMD =
+ (*static_cast<StringMap<NamedMDNode *> *>(NamedMDSymTab))[Name];
+ if (!NMD) {
+ NMD = new NamedMDNode(Name);
+ NMD->setParent(this);
+ NamedMDList.push_back(NMD);
+ }
return NMD;
}
+void Module::eraseNamedMetadata(NamedMDNode *NMD) {
+ static_cast<StringMap<NamedMDNode *> *>(NamedMDSymTab)->erase(NMD->getName());
+ NamedMDList.erase(NMD);
+}
+
//===----------------------------------------------------------------------===//
// Methods for easy access to the types in the module.
//
diff --git a/lib/VMCore/Pass.cpp b/lib/VMCore/Pass.cpp
index efd98af0f443c..a7d7f61dd7622 100644
--- a/lib/VMCore/Pass.cpp
+++ b/lib/VMCore/Pass.cpp
@@ -14,35 +14,18 @@
//===----------------------------------------------------------------------===//
#include "llvm/Pass.h"
-#include "llvm/PassManager.h"
-#include "llvm/Module.h"
-#include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/StringMap.h"
+#include "llvm/PassRegistry.h"
#include "llvm/Assembly/PrintModulePass.h"
#include "llvm/Support/Debug.h"
-#include "llvm/Support/ManagedStatic.h"
#include "llvm/Support/PassNameParser.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/System/Atomic.h"
-#include "llvm/System/Mutex.h"
-#include "llvm/System/Threading.h"
-#include <algorithm>
-#include <map>
-#include <set>
using namespace llvm;
//===----------------------------------------------------------------------===//
// Pass Implementation
//
-Pass::Pass(PassKind K, intptr_t pid) : Resolver(0), PassID(pid), Kind(K) {
- assert(pid && "pid cannot be 0");
-}
-
-Pass::Pass(PassKind K, const void *pid)
- : Resolver(0), PassID((intptr_t)pid), Kind(K) {
- assert(pid && "pid cannot be 0");
-}
+Pass::Pass(PassKind K, char &pid) : Resolver(0), PassID(&pid), Kind(K) { }
// Force out-of-line virtual method.
Pass::~Pass() {
@@ -61,8 +44,8 @@ PassManagerType ModulePass::getPotentialPassManagerType() const {
return PMT_ModulePassManager;
}
-bool Pass::mustPreserveAnalysisID(const PassInfo *AnalysisID) const {
- return Resolver->getAnalysisIfAvailable(AnalysisID, true) != 0;
+bool Pass::mustPreserveAnalysisID(char &AID) const {
+ return Resolver->getAnalysisIfAvailable(&AID, true) != 0;
}
// dumpPassStructure - Implement the -debug-passes=Structure option
@@ -75,7 +58,9 @@ void Pass::dumpPassStructure(unsigned Offset) {
/// Registration templates, but can be overloaded directly.
///
const char *Pass::getPassName() const {
- if (const PassInfo *PI = getPassInfo())
+ AnalysisID AID = getPassID();
+ const PassInfo *PI = PassRegistry::getPassRegistry()->getPassInfo(AID);
+ if (PI)
return PI->getPassName();
return "Unnamed pass: implement Pass::getPassName()";
}
@@ -101,7 +86,7 @@ void Pass::verifyAnalysis() const {
// By default, don't do anything.
}
-void *Pass::getAdjustedAnalysisPointer(const PassInfo *) {
+void *Pass::getAdjustedAnalysisPointer(AnalysisID AID) {
return this;
}
@@ -150,30 +135,6 @@ Pass *FunctionPass::createPrinterPass(raw_ostream &O,
return createPrintFunctionPass(Banner, &O);
}
-// run - On a module, we run this pass by initializing, runOnFunction'ing once
-// for every function in the module, then by finalizing.
-//
-bool FunctionPass::runOnModule(Module &M) {
- bool Changed = doInitialization(M);
-
- for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I)
- if (!I->isDeclaration()) // Passes are not run on external functions!
- Changed |= runOnFunction(*I);
-
- return Changed | doFinalization(M);
-}
-
-// run - On a function, we simply initialize, run the function, then finalize.
-//
-bool FunctionPass::run(Function &F) {
- // Passes are not run on external functions!
- if (F.isDeclaration()) return false;
-
- bool Changed = doInitialization(*F.getParent());
- Changed |= runOnFunction(F);
- return Changed | doFinalization(*F.getParent());
-}
-
bool FunctionPass::doInitialization(Module &) {
// By default, don't do anything.
return false;
@@ -199,16 +160,6 @@ Pass *BasicBlockPass::createPrinterPass(raw_ostream &O,
return 0;
}
-// To run this pass on a function, we simply call runOnBasicBlock once for each
-// function.
-//
-bool BasicBlockPass::runOnFunction(Function &F) {
- bool Changed = doInitialization(F);
- for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I)
- Changed |= runOnBasicBlock(*I);
- return Changed | doFinalization(F);
-}
-
bool BasicBlockPass::doInitialization(Module &) {
// By default, don't do anything.
return false;
@@ -233,161 +184,12 @@ PassManagerType BasicBlockPass::getPotentialPassManagerType() const {
return PMT_BasicBlockPassManager;
}
-//===----------------------------------------------------------------------===//
-// Pass Registration mechanism
-//
-namespace {
-class PassRegistrar {
- /// Guards the contents of this class.
- mutable sys::SmartMutex<true> Lock;
-
- /// PassInfoMap - Keep track of the passinfo object for each registered llvm
- /// pass.
- typedef std::map<intptr_t, const PassInfo*> MapType;
- MapType PassInfoMap;
-
- typedef StringMap<const PassInfo*> StringMapType;
- StringMapType PassInfoStringMap;
-
- /// AnalysisGroupInfo - Keep track of information for each analysis group.
- struct AnalysisGroupInfo {
- std::set<const PassInfo *> Implementations;
- };
-
- /// AnalysisGroupInfoMap - Information for each analysis group.
- std::map<const PassInfo *, AnalysisGroupInfo> AnalysisGroupInfoMap;
-
-public:
-
- const PassInfo *GetPassInfo(intptr_t TI) const {
- sys::SmartScopedLock<true> Guard(Lock);
- MapType::const_iterator I = PassInfoMap.find(TI);
- return I != PassInfoMap.end() ? I->second : 0;
- }
-
- const PassInfo *GetPassInfo(StringRef Arg) const {
- sys::SmartScopedLock<true> Guard(Lock);
- StringMapType::const_iterator I = PassInfoStringMap.find(Arg);
- return I != PassInfoStringMap.end() ? I->second : 0;
- }
-
- void RegisterPass(const PassInfo &PI) {
- sys::SmartScopedLock<true> Guard(Lock);
- bool Inserted =
- PassInfoMap.insert(std::make_pair(PI.getTypeInfo(),&PI)).second;
- assert(Inserted && "Pass registered multiple times!"); Inserted=Inserted;
- PassInfoStringMap[PI.getPassArgument()] = &PI;
- }
-
- void UnregisterPass(const PassInfo &PI) {
- sys::SmartScopedLock<true> Guard(Lock);
- MapType::iterator I = PassInfoMap.find(PI.getTypeInfo());
- assert(I != PassInfoMap.end() && "Pass registered but not in map!");
-
- // Remove pass from the map.
- PassInfoMap.erase(I);
- PassInfoStringMap.erase(PI.getPassArgument());
- }
-
- void EnumerateWith(PassRegistrationListener *L) {
- sys::SmartScopedLock<true> Guard(Lock);
- for (MapType::const_iterator I = PassInfoMap.begin(),
- E = PassInfoMap.end(); I != E; ++I)
- L->passEnumerate(I->second);
- }
-
-
- /// Analysis Group Mechanisms.
- void RegisterAnalysisGroup(PassInfo *InterfaceInfo,
- const PassInfo *ImplementationInfo,
- bool isDefault) {
- sys::SmartScopedLock<true> Guard(Lock);
- AnalysisGroupInfo &AGI = AnalysisGroupInfoMap[InterfaceInfo];
- assert(AGI.Implementations.count(ImplementationInfo) == 0 &&
- "Cannot add a pass to the same analysis group more than once!");
- AGI.Implementations.insert(ImplementationInfo);
- if (isDefault) {
- assert(InterfaceInfo->getNormalCtor() == 0 &&
- "Default implementation for analysis group already specified!");
- assert(ImplementationInfo->getNormalCtor() &&
- "Cannot specify pass as default if it does not have a default ctor");
- InterfaceInfo->setNormalCtor(ImplementationInfo->getNormalCtor());
- }
- }
-};
-}
-
-static std::vector<PassRegistrationListener*> *Listeners = 0;
-static sys::SmartMutex<true> ListenersLock;
-
-static PassRegistrar *PassRegistrarObj = 0;
-static PassRegistrar *getPassRegistrar() {
- // Use double-checked locking to safely initialize the registrar when
- // we're running in multithreaded mode.
- PassRegistrar* tmp = PassRegistrarObj;
- if (llvm_is_multithreaded()) {
- sys::MemoryFence();
- if (!tmp) {
- llvm_acquire_global_lock();
- tmp = PassRegistrarObj;
- if (!tmp) {
- tmp = new PassRegistrar();
- sys::MemoryFence();
- PassRegistrarObj = tmp;
- }
- llvm_release_global_lock();
- }
- } else if (!tmp) {
- PassRegistrarObj = new PassRegistrar();
- }
-
- return PassRegistrarObj;
-}
-
-namespace {
-
-// FIXME: We use ManagedCleanup to erase the pass registrar on shutdown.
-// Unfortunately, passes are registered with static ctors, and having
-// llvm_shutdown clear this map prevents successful ressurection after
-// llvm_shutdown is run. Ideally we should find a solution so that we don't
-// leak the map, AND can still resurrect after shutdown.
-void cleanupPassRegistrar(void*) {
- if (PassRegistrarObj) {
- delete PassRegistrarObj;
- PassRegistrarObj = 0;
- }
-}
-ManagedCleanup<&cleanupPassRegistrar> registrarCleanup ATTRIBUTE_USED;
-
-}
-
-// getPassInfo - Return the PassInfo data structure that corresponds to this
-// pass...
-const PassInfo *Pass::getPassInfo() const {
- return lookupPassInfo(PassID);
-}
-
-const PassInfo *Pass::lookupPassInfo(intptr_t TI) {
- return getPassRegistrar()->GetPassInfo(TI);
+const PassInfo *Pass::lookupPassInfo(const void *TI) {
+ return PassRegistry::getPassRegistry()->getPassInfo(TI);
}
const PassInfo *Pass::lookupPassInfo(StringRef Arg) {
- return getPassRegistrar()->GetPassInfo(Arg);
-}
-
-void PassInfo::registerPass() {
- getPassRegistrar()->RegisterPass(*this);
-
- // Notify any listeners.
- sys::SmartScopedLock<true> Lock(ListenersLock);
- if (Listeners)
- for (std::vector<PassRegistrationListener*>::iterator
- I = Listeners->begin(), E = Listeners->end(); I != E; ++I)
- (*I)->passRegistered(this);
-}
-
-void PassInfo::unregisterPass() {
- getPassRegistrar()->UnregisterPass(*this);
+ return PassRegistry::getPassRegistry()->getPassInfo(Arg);
}
Pass *PassInfo::createPass() const {
@@ -404,32 +206,11 @@ Pass *PassInfo::createPass() const {
// RegisterAGBase implementation
//
-RegisterAGBase::RegisterAGBase(const char *Name, intptr_t InterfaceID,
- intptr_t PassID, bool isDefault)
- : PassInfo(Name, InterfaceID) {
-
- PassInfo *InterfaceInfo =
- const_cast<PassInfo*>(Pass::lookupPassInfo(InterfaceID));
- if (InterfaceInfo == 0) {
- // First reference to Interface, register it now.
- registerPass();
- InterfaceInfo = this;
- }
- assert(isAnalysisGroup() &&
- "Trying to join an analysis group that is a normal pass!");
-
- if (PassID) {
- const PassInfo *ImplementationInfo = Pass::lookupPassInfo(PassID);
- assert(ImplementationInfo &&
- "Must register pass before adding to AnalysisGroup!");
-
- // Make sure we keep track of the fact that the implementation implements
- // the interface.
- PassInfo *IIPI = const_cast<PassInfo*>(ImplementationInfo);
- IIPI->addInterfaceImplemented(InterfaceInfo);
-
- getPassRegistrar()->RegisterAnalysisGroup(InterfaceInfo, IIPI, isDefault);
- }
+RegisterAGBase::RegisterAGBase(const char *Name, const void *InterfaceID,
+ const void *PassID, bool isDefault)
+ : PassInfo(Name, InterfaceID) {
+ PassRegistry::getPassRegistry()->registerAnalysisGroup(InterfaceID, PassID,
+ *this, isDefault);
}
@@ -440,31 +221,19 @@ RegisterAGBase::RegisterAGBase(const char *Name, intptr_t InterfaceID,
// PassRegistrationListener ctor - Add the current object to the list of
// PassRegistrationListeners...
PassRegistrationListener::PassRegistrationListener() {
- sys::SmartScopedLock<true> Lock(ListenersLock);
- if (!Listeners) Listeners = new std::vector<PassRegistrationListener*>();
- Listeners->push_back(this);
+ PassRegistry::getPassRegistry()->addRegistrationListener(this);
}
// dtor - Remove object from list of listeners...
PassRegistrationListener::~PassRegistrationListener() {
- sys::SmartScopedLock<true> Lock(ListenersLock);
- std::vector<PassRegistrationListener*>::iterator I =
- std::find(Listeners->begin(), Listeners->end(), this);
- assert(Listeners && I != Listeners->end() &&
- "PassRegistrationListener not registered!");
- Listeners->erase(I);
-
- if (Listeners->empty()) {
- delete Listeners;
- Listeners = 0;
- }
+ PassRegistry::getPassRegistry()->removeRegistrationListener(this);
}
// enumeratePasses - Iterate over the registered passes, calling the
// passEnumerate callback on each PassInfo object.
//
void PassRegistrationListener::enumeratePasses() {
- getPassRegistrar()->EnumerateWith(this);
+ PassRegistry::getPassRegistry()->enumerateWith(this);
}
PassNameParser::~PassNameParser() {}
@@ -481,7 +250,7 @@ namespace {
void passEnumerate(const PassInfo *P) {
if (P->isCFGOnlyPass())
- CFGOnlyList.push_back(P);
+ CFGOnlyList.push_back(P->getTypeInfo());
}
};
}
@@ -501,15 +270,25 @@ void AnalysisUsage::setPreservesCFG() {
GetCFGOnlyPasses(Preserved).enumeratePasses();
}
-AnalysisUsage &AnalysisUsage::addRequiredID(AnalysisID ID) {
- assert(ID && "Pass class not registered!");
- Required.push_back(ID);
+AnalysisUsage &AnalysisUsage::addPreserved(StringRef Arg) {
+ const PassInfo *PI = Pass::lookupPassInfo(Arg);
+ // If the pass exists, preserve it. Otherwise silently do nothing.
+ if (PI) Preserved.push_back(PI->getTypeInfo());
return *this;
}
-AnalysisUsage &AnalysisUsage::addRequiredTransitiveID(AnalysisID ID) {
- assert(ID && "Pass class not registered!");
+AnalysisUsage &AnalysisUsage::addRequiredID(const void *ID) {
Required.push_back(ID);
- RequiredTransitive.push_back(ID);
+ return *this;
+}
+
+AnalysisUsage &AnalysisUsage::addRequiredID(char &ID) {
+ Required.push_back(&ID);
+ return *this;
+}
+
+AnalysisUsage &AnalysisUsage::addRequiredTransitiveID(char &ID) {
+ Required.push_back(&ID);
+ RequiredTransitive.push_back(&ID);
return *this;
}
diff --git a/lib/VMCore/PassManager.cpp b/lib/VMCore/PassManager.cpp
index 296b0d13a710b..ab4d4e55c7504 100644
--- a/lib/VMCore/PassManager.cpp
+++ b/lib/VMCore/PassManager.cpp
@@ -7,12 +7,13 @@
//
//===----------------------------------------------------------------------===//
//
-// This file implements the LLVM Pass Manager infrastructure.
+// This file implements the LLVM Pass Manager infrastructure.
//
//===----------------------------------------------------------------------===//
#include "llvm/PassManagers.h"
+#include "llvm/PassManager.h"
#include "llvm/Assembly/PrintModulePass.h"
#include "llvm/Assembly/Writer.h"
#include "llvm/Support/CommandLine.h"
@@ -24,8 +25,6 @@
#include "llvm/Support/PassNameParser.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/System/Mutex.h"
-#include "llvm/System/Threading.h"
-#include "llvm-c/Core.h"
#include <algorithm>
#include <cstdio>
#include <map>
@@ -82,30 +81,32 @@ PrintAfterAll("print-after-all",
/// This is a helper to determine whether to print IR before or
/// after a pass.
-static bool ShouldPrintBeforeOrAfterPass(Pass *P,
+static bool ShouldPrintBeforeOrAfterPass(const void *PassID,
PassOptionList &PassesToPrint) {
- for (unsigned i = 0, ie = PassesToPrint.size(); i < ie; ++i) {
- const llvm::PassInfo *PassInf = PassesToPrint[i];
- if (PassInf && P->getPassInfo())
- if (PassInf->getPassArgument() ==
- P->getPassInfo()->getPassArgument()) {
- return true;
- }
+ if (const llvm::PassInfo *PI =
+ PassRegistry::getPassRegistry()->getPassInfo(PassID)) {
+ for (unsigned i = 0, ie = PassesToPrint.size(); i < ie; ++i) {
+ const llvm::PassInfo *PassInf = PassesToPrint[i];
+ if (PassInf)
+ if (PassInf->getPassArgument() == PI->getPassArgument()) {
+ return true;
+ }
+ }
}
return false;
}
-
+
/// This is a utility to check whether a pass should have IR dumped
/// before it.
-static bool ShouldPrintBeforePass(Pass *P) {
- return PrintBeforeAll || ShouldPrintBeforeOrAfterPass(P, PrintBefore);
+static bool ShouldPrintBeforePass(const void *PassID) {
+ return PrintBeforeAll || ShouldPrintBeforeOrAfterPass(PassID, PrintBefore);
}
/// This is a utility to check whether a pass should have IR dumped
/// after it.
-static bool ShouldPrintAfterPass(Pass *P) {
- return PrintAfterAll || ShouldPrintBeforeOrAfterPass(P, PrintAfter);
+static bool ShouldPrintAfterPass(const void *PassID) {
+ return PrintAfterAll || ShouldPrintBeforeOrAfterPass(PassID, PrintAfter);
}
} // End of llvm namespace
@@ -124,9 +125,9 @@ void PassManagerPrettyStackEntry::print(raw_ostream &OS) const {
OS << "Releasing pass '";
else
OS << "Running pass '";
-
+
OS << P->getPassName() << "'";
-
+
if (M) {
OS << " on module '" << M->getModuleIdentifier() << "'.\n";
return;
@@ -162,8 +163,8 @@ class BBPassManager : public PMDataManager, public FunctionPass {
public:
static char ID;
- explicit BBPassManager(int Depth)
- : PMDataManager(Depth), FunctionPass(&ID) {}
+ explicit BBPassManager(int Depth)
+ : PMDataManager(Depth), FunctionPass(ID) {}
/// Execute all of the passes scheduled for execution. Keep track of
/// whether any of the passes modifies the function, and if so, return true.
@@ -202,8 +203,8 @@ public:
return BP;
}
- virtual PassManagerType getPassManagerType() const {
- return PMT_BasicBlockPassManager;
+ virtual PassManagerType getPassManagerType() const {
+ return PMT_BasicBlockPassManager;
}
};
@@ -223,9 +224,9 @@ private:
bool wasRun;
public:
static char ID;
- explicit FunctionPassManagerImpl(int Depth) :
- Pass(PT_PassManager, &ID), PMDataManager(Depth),
- PMTopLevelManager(TLM_Function), wasRun(false) { }
+ explicit FunctionPassManagerImpl(int Depth) :
+ Pass(PT_PassManager, ID), PMDataManager(Depth),
+ PMTopLevelManager(new FPPassManager(1)), wasRun(false) {}
/// add - Add a pass to the queue of passes to run. This passes ownership of
/// the Pass to the PassManager. When the PassManager is destroyed, the pass
@@ -234,8 +235,8 @@ public:
void add(Pass *P) {
schedulePass(P);
}
-
- /// createPrinterPass - Get a function printer pass.
+
+ /// createPrinterPass - Get a function printer pass.
Pass *createPrinterPass(raw_ostream &O, const std::string &Banner) const {
return createPrintFunctionPass(Banner, &O);
}
@@ -251,12 +252,12 @@ public:
/// doInitialization - Run all of the initializers for the function passes.
///
bool doInitialization(Module &M);
-
+
/// doFinalization - Run all of the finalizers for the function passes.
///
bool doFinalization(Module &M);
-
+
virtual PMDataManager *getAsPMDataManager() { return this; }
virtual Pass *getAsPass() { return this; }
@@ -265,7 +266,7 @@ public:
Info.setPreservesAll();
}
- inline void addTopLevelPass(Pass *P) {
+ void addTopLevelPass(Pass *P) {
if (ImmutablePass *IP = P->getAsImmutablePass()) {
// P is a immutable pass and it will be managed by this
// top level manager. Set up analysis resolver to connect them.
@@ -288,6 +289,7 @@ public:
};
char FunctionPassManagerImpl::ID = 0;
+
//===----------------------------------------------------------------------===//
// MPPassManager
//
@@ -298,11 +300,11 @@ class MPPassManager : public Pass, public PMDataManager {
public:
static char ID;
explicit MPPassManager(int Depth) :
- Pass(PT_PassManager, &ID), PMDataManager(Depth) { }
+ Pass(PT_PassManager, ID), PMDataManager(Depth) { }
// Delete on the fly managers.
virtual ~MPPassManager() {
- for (std::map<Pass *, FunctionPassManagerImpl *>::iterator
+ for (std::map<Pass *, FunctionPassManagerImpl *>::iterator
I = OnTheFlyManagers.begin(), E = OnTheFlyManagers.end();
I != E; ++I) {
FunctionPassManagerImpl *FPP = I->second;
@@ -310,7 +312,7 @@ public:
}
}
- /// createPrinterPass - Get a module printer pass.
+ /// createPrinterPass - Get a module printer pass.
Pass *createPrinterPass(raw_ostream &O, const std::string &Banner) const {
return createPrintModulePass(&O, false, Banner);
}
@@ -329,10 +331,10 @@ public:
/// through getAnalysis interface.
virtual void addLowerLevelRequiredPass(Pass *P, Pass *RequiredPass);
- /// Return function pass corresponding to PassInfo PI, that is
+ /// Return function pass corresponding to PassInfo PI, that is
/// required by module pass MP. Instantiate analysis pass, by using
/// its runOnFunction() for function F.
- virtual Pass* getOnTheFlyPass(Pass *MP, const PassInfo *PI, Function &F);
+ virtual Pass* getOnTheFlyPass(Pass *MP, AnalysisID PI, Function &F);
virtual const char *getPassName() const {
return "Module Pass Manager";
@@ -360,8 +362,8 @@ public:
return static_cast<ModulePass *>(PassVector[N]);
}
- virtual PassManagerType getPassManagerType() const {
- return PMT_ModulePassManager;
+ virtual PassManagerType getPassManagerType() const {
+ return PMT_ModulePassManager;
}
private:
@@ -383,8 +385,8 @@ class PassManagerImpl : public Pass,
public:
static char ID;
explicit PassManagerImpl(int Depth) :
- Pass(PT_PassManager, &ID), PMDataManager(Depth),
- PMTopLevelManager(TLM_Pass) { }
+ Pass(PT_PassManager, ID), PMDataManager(Depth),
+ PMTopLevelManager(new MPPassManager(1)) {}
/// add - Add a pass to the queue of passes to run. This passes ownership of
/// the Pass to the PassManager. When the PassManager is destroyed, the pass
@@ -393,8 +395,8 @@ public:
void add(Pass *P) {
schedulePass(P);
}
-
- /// createPrinterPass - Get a module printer pass.
+
+ /// createPrinterPass - Get a module printer pass.
Pass *createPrinterPass(raw_ostream &O, const std::string &Banner) const {
return createPrintModulePass(&O, false, Banner);
}
@@ -408,7 +410,7 @@ public:
Info.setPreservesAll();
}
- inline void addTopLevelPass(Pass *P) {
+ void addTopLevelPass(Pass *P) {
if (ImmutablePass *IP = P->getAsImmutablePass()) {
// P is a immutable pass and it will be managed by this
// top level manager. Set up analysis resolver to connect them.
@@ -451,7 +453,7 @@ class TimingInfo {
public:
// Use 'create' member to get this.
TimingInfo() : TG("... Pass execution timing report ...") {}
-
+
// TimingDtor - Print out information about timing information
~TimingInfo() {
// Delete all of the timers, which accumulate their info into the
@@ -469,7 +471,7 @@ public:
/// getPassTimer - Return the timer for the specified pass if it exists.
Timer *getPassTimer(Pass *P) {
- if (P->getAsPMDataManager())
+ if (P->getAsPMDataManager())
return 0;
sys::SmartScopedLock<true> Lock(*TimingInfoMutex);
@@ -488,28 +490,20 @@ static TimingInfo *TheTimeInfo;
// PMTopLevelManager implementation
/// Initialize top level manager. Create first pass manager.
-PMTopLevelManager::PMTopLevelManager(enum TopLevelManagerType t) {
- if (t == TLM_Pass) {
- MPPassManager *MPP = new MPPassManager(1);
- MPP->setTopLevelManager(this);
- addPassManager(MPP);
- activeStack.push(MPP);
- } else if (t == TLM_Function) {
- FPPassManager *FPP = new FPPassManager(1);
- FPP->setTopLevelManager(this);
- addPassManager(FPP);
- activeStack.push(FPP);
- }
+PMTopLevelManager::PMTopLevelManager(PMDataManager *PMDM) {
+ PMDM->setTopLevelManager(this);
+ addPassManager(PMDM);
+ activeStack.push(PMDM);
}
/// Set pass P as the last user of the given analysis passes.
-void PMTopLevelManager::setLastUser(SmallVector<Pass *, 12> &AnalysisPasses,
+void PMTopLevelManager::setLastUser(SmallVector<Pass *, 12> &AnalysisPasses,
Pass *P) {
for (SmallVector<Pass *, 12>::iterator I = AnalysisPasses.begin(),
E = AnalysisPasses.end(); I != E; ++I) {
Pass *AP = *I;
LastUser[AP] = P;
-
+
if (P == AP)
continue;
@@ -528,7 +522,7 @@ void PMTopLevelManager::setLastUser(SmallVector<Pass *, 12> &AnalysisPasses,
/// Collect passes whose last user is P
void PMTopLevelManager::collectLastUses(SmallVector<Pass *, 12> &LastUses,
Pass *P) {
- DenseMap<Pass *, SmallPtrSet<Pass *, 8> >::iterator DMI =
+ DenseMap<Pass *, SmallPtrSet<Pass *, 8> >::iterator DMI =
InversedLastUser.find(P);
if (DMI == InversedLastUser.end())
return;
@@ -544,7 +538,7 @@ void PMTopLevelManager::collectLastUses(SmallVector<Pass *, 12> &LastUses,
AnalysisUsage *PMTopLevelManager::findAnalysisUsage(Pass *P) {
AnalysisUsage *AnUsage = NULL;
DenseMap<Pass *, AnalysisUsage *>::iterator DMI = AnUsageMap.find(P);
- if (DMI != AnUsageMap.end())
+ if (DMI != AnUsageMap.end())
AnUsage = DMI->second;
else {
AnUsage = new AnalysisUsage();
@@ -568,8 +562,9 @@ void PMTopLevelManager::schedulePass(Pass *P) {
// If P is an analysis pass and it is available then do not
// generate the analysis again. Stale analysis info should not be
// available at this point.
- if (P->getPassInfo() &&
- P->getPassInfo()->isAnalysis() && findAnalysisPass(P->getPassInfo())) {
+ const PassInfo *PI =
+ PassRegistry::getPassRegistry()->getPassInfo(P->getPassID());
+ if (PI && PI->isAnalysis() && findAnalysisPass(P->getPassID())) {
delete P;
return;
}
@@ -579,14 +574,15 @@ void PMTopLevelManager::schedulePass(Pass *P) {
bool checkAnalysis = true;
while (checkAnalysis) {
checkAnalysis = false;
-
+
const AnalysisUsage::VectorType &RequiredSet = AnUsage->getRequiredSet();
for (AnalysisUsage::VectorType::const_iterator I = RequiredSet.begin(),
E = RequiredSet.end(); I != E; ++I) {
-
+
Pass *AnalysisPass = findAnalysisPass(*I);
if (!AnalysisPass) {
- AnalysisPass = (*I)->createPass();
+ const PassInfo *PI = PassRegistry::getPassRegistry()->getPassInfo(*I);
+ AnalysisPass = PI->createPass();
if (P->getPotentialPassManagerType () ==
AnalysisPass->getPotentialPassManagerType())
// Schedule analysis pass that is managed by the same pass manager.
@@ -595,12 +591,12 @@ void PMTopLevelManager::schedulePass(Pass *P) {
AnalysisPass->getPotentialPassManagerType()) {
// Schedule analysis pass that is managed by a new manager.
schedulePass(AnalysisPass);
- // Recheck analysis passes to ensure that required analysises that
+ // Recheck analysis passes to ensure that required analyses that
// are already checked are still available.
checkAnalysis = true;
}
else
- // Do not schedule this analysis. Lower level analsyis
+ // Do not schedule this analysis. Lower level analsyis
// passes are run on the fly.
delete AnalysisPass;
}
@@ -632,16 +628,21 @@ Pass *PMTopLevelManager::findAnalysisPass(AnalysisID AID) {
for (SmallVector<ImmutablePass *, 8>::iterator I = ImmutablePasses.begin(),
E = ImmutablePasses.end(); P == NULL && I != E; ++I) {
- const PassInfo *PI = (*I)->getPassInfo();
+ AnalysisID PI = (*I)->getPassID();
if (PI == AID)
P = *I;
// If Pass not found then check the interfaces implemented by Immutable Pass
if (!P) {
+ const PassInfo *PassInf =
+ PassRegistry::getPassRegistry()->getPassInfo(PI);
const std::vector<const PassInfo*> &ImmPI =
- PI->getInterfacesImplemented();
- if (std::find(ImmPI.begin(), ImmPI.end(), AID) != ImmPI.end())
- P = *I;
+ PassInf->getInterfacesImplemented();
+ for (std::vector<const PassInfo*>::const_iterator II = ImmPI.begin(),
+ EE = ImmPI.end(); II != EE; ++II) {
+ if ((*II)->getTypeInfo() == AID)
+ P = *I;
+ }
}
}
@@ -658,7 +659,7 @@ void PMTopLevelManager::dumpPasses() const {
for (unsigned i = 0, e = ImmutablePasses.size(); i != e; ++i) {
ImmutablePasses[i]->dumpPassStructure(0);
}
-
+
// Every class that derives from PMDataManager also derives from Pass
// (sometimes indirectly), but there's no inheritance relationship
// between PMDataManager and Pass, so we have to getAsPass to get
@@ -684,15 +685,16 @@ void PMTopLevelManager::initializeAllAnalysisInfo() {
for (SmallVector<PMDataManager *, 8>::iterator I = PassManagers.begin(),
E = PassManagers.end(); I != E; ++I)
(*I)->initializeAnalysisInfo();
-
+
// Initailize other pass managers
- for (SmallVector<PMDataManager *, 8>::iterator I = IndirectPassManagers.begin(),
- E = IndirectPassManagers.end(); I != E; ++I)
+ for (SmallVector<PMDataManager *, 8>::iterator
+ I = IndirectPassManagers.begin(), E = IndirectPassManagers.end();
+ I != E; ++I)
(*I)->initializeAnalysisInfo();
for (DenseMap<Pass *, Pass *>::iterator DMI = LastUser.begin(),
DME = LastUser.end(); DMI != DME; ++DMI) {
- DenseMap<Pass *, SmallPtrSet<Pass *, 8> >::iterator InvDMI =
+ DenseMap<Pass *, SmallPtrSet<Pass *, 8> >::iterator InvDMI =
InversedLastUser.find(DMI->second);
if (InvDMI != InversedLastUser.end()) {
SmallPtrSet<Pass *, 8> &L = InvDMI->second;
@@ -709,7 +711,7 @@ PMTopLevelManager::~PMTopLevelManager() {
for (SmallVector<PMDataManager *, 8>::iterator I = PassManagers.begin(),
E = PassManagers.end(); I != E; ++I)
delete *I;
-
+
for (SmallVector<ImmutablePass *, 8>::iterator
I = ImmutablePasses.begin(), E = ImmutablePasses.end(); I != E; ++I)
delete *I;
@@ -724,16 +726,19 @@ PMTopLevelManager::~PMTopLevelManager() {
/// Augement AvailableAnalysis by adding analysis made available by pass P.
void PMDataManager::recordAvailableAnalysis(Pass *P) {
- const PassInfo *PI = P->getPassInfo();
- if (PI == 0) return;
-
+ AnalysisID PI = P->getPassID();
+
AvailableAnalysis[PI] = P;
- //This pass is the current implementation of all of the interfaces it
- //implements as well.
- const std::vector<const PassInfo*> &II = PI->getInterfacesImplemented();
+ assert(!AvailableAnalysis.empty());
+
+ // This pass is the current implementation of all of the interfaces it
+ // implements as well.
+ const PassInfo *PInf = PassRegistry::getPassRegistry()->getPassInfo(PI);
+ if (PInf == 0) return;
+ const std::vector<const PassInfo*> &II = PInf->getInterfacesImplemented();
for (unsigned i = 0, e = II.size(); i != e; ++i)
- AvailableAnalysis[II[i]] = P;
+ AvailableAnalysis[II[i]->getTypeInfo()] = P;
}
// Return true if P preserves high level analysis used by other
@@ -742,18 +747,18 @@ bool PMDataManager::preserveHigherLevelAnalysis(Pass *P) {
AnalysisUsage *AnUsage = TPM->findAnalysisUsage(P);
if (AnUsage->getPreservesAll())
return true;
-
+
const AnalysisUsage::VectorType &PreservedSet = AnUsage->getPreservedSet();
for (SmallVector<Pass *, 8>::iterator I = HigherLevelAnalysis.begin(),
E = HigherLevelAnalysis.end(); I != E; ++I) {
Pass *P1 = *I;
if (P1->getAsImmutablePass() == 0 &&
std::find(PreservedSet.begin(), PreservedSet.end(),
- P1->getPassInfo()) ==
+ P1->getPassID()) ==
PreservedSet.end())
return false;
}
-
+
return true;
}
@@ -788,7 +793,7 @@ void PMDataManager::removeNotPreservedAnalysis(Pass *P) {
E = AvailableAnalysis.end(); I != E; ) {
std::map<AnalysisID, Pass*>::iterator Info = I++;
if (Info->second->getAsImmutablePass() == 0 &&
- std::find(PreservedSet.begin(), PreservedSet.end(), Info->first) ==
+ std::find(PreservedSet.begin(), PreservedSet.end(), Info->first) ==
PreservedSet.end()) {
// Remove this analysis
if (PassDebugging >= Details) {
@@ -807,12 +812,12 @@ void PMDataManager::removeNotPreservedAnalysis(Pass *P) {
if (!InheritedAnalysis[Index])
continue;
- for (std::map<AnalysisID, Pass*>::iterator
+ for (std::map<AnalysisID, Pass*>::iterator
I = InheritedAnalysis[Index]->begin(),
E = InheritedAnalysis[Index]->end(); I != E; ) {
std::map<AnalysisID, Pass *>::iterator Info = I++;
if (Info->second->getAsImmutablePass() == 0 &&
- std::find(PreservedSet.begin(), PreservedSet.end(), Info->first) ==
+ std::find(PreservedSet.begin(), PreservedSet.end(), Info->first) ==
PreservedSet.end()) {
// Remove this analysis
if (PassDebugging >= Details) {
@@ -861,23 +866,24 @@ void PMDataManager::freePass(Pass *P, StringRef Msg,
P->releaseMemory();
}
- if (const PassInfo *PI = P->getPassInfo()) {
+ AnalysisID PI = P->getPassID();
+ if (const PassInfo *PInf = PassRegistry::getPassRegistry()->getPassInfo(PI)) {
// Remove the pass itself (if it is not already removed).
AvailableAnalysis.erase(PI);
// Remove all interfaces this pass implements, for which it is also
// listed as the available implementation.
- const std::vector<const PassInfo*> &II = PI->getInterfacesImplemented();
+ const std::vector<const PassInfo*> &II = PInf->getInterfacesImplemented();
for (unsigned i = 0, e = II.size(); i != e; ++i) {
std::map<AnalysisID, Pass*>::iterator Pos =
- AvailableAnalysis.find(II[i]);
+ AvailableAnalysis.find(II[i]->getTypeInfo());
if (Pos != AvailableAnalysis.end() && Pos->second == P)
AvailableAnalysis.erase(Pos);
}
}
}
-/// Add pass P into the PassVector. Update
+/// Add pass P into the PassVector. Update
/// AvailableAnalysis appropriately if ProcessAnalysis is true.
void PMDataManager::add(Pass *P, bool ProcessAnalysis) {
// This manager is going to manage pass P. Set up analysis resolver
@@ -902,7 +908,7 @@ void PMDataManager::add(Pass *P, bool ProcessAnalysis) {
unsigned PDepth = this->getDepth();
- collectRequiredAnalysis(RequiredPasses,
+ collectRequiredAnalysis(RequiredPasses,
ReqAnalysisNotAvailable, P);
for (SmallVector<Pass *, 8>::iterator I = RequiredPasses.begin(),
E = RequiredPasses.end(); I != E; ++I) {
@@ -920,7 +926,7 @@ void PMDataManager::add(Pass *P, bool ProcessAnalysis) {
TransferLastUses.push_back(PRequired);
// Keep track of higher level analysis used by this manager.
HigherLevelAnalysis.push_back(PRequired);
- } else
+ } else
llvm_unreachable("Unable to accomodate Required Pass");
}
@@ -937,11 +943,12 @@ void PMDataManager::add(Pass *P, bool ProcessAnalysis) {
TransferLastUses.clear();
}
- // Now, take care of required analysises that are not available.
- for (SmallVector<AnalysisID, 8>::iterator
- I = ReqAnalysisNotAvailable.begin(),
+ // Now, take care of required analyses that are not available.
+ for (SmallVector<AnalysisID, 8>::iterator
+ I = ReqAnalysisNotAvailable.begin(),
E = ReqAnalysisNotAvailable.end() ;I != E; ++I) {
- Pass *AnalysisPass = (*I)->createPass();
+ const PassInfo *PI = PassRegistry::getPassRegistry()->getPassInfo(*I);
+ Pass *AnalysisPass = PI->createPass();
this->addLowerLevelRequiredPass(P, AnalysisPass);
}
@@ -963,10 +970,10 @@ void PMDataManager::collectRequiredAnalysis(SmallVector<Pass *, 8>&RP,
Pass *P) {
AnalysisUsage *AnUsage = TPM->findAnalysisUsage(P);
const AnalysisUsage::VectorType &RequiredSet = AnUsage->getRequiredSet();
- for (AnalysisUsage::VectorType::const_iterator
+ for (AnalysisUsage::VectorType::const_iterator
I = RequiredSet.begin(), E = RequiredSet.end(); I != E; ++I) {
if (Pass *AnalysisPass = findAnalysisPass(*I, true))
- RP.push_back(AnalysisPass);
+ RP.push_back(AnalysisPass);
else
RP_NotAvail.push_back(*I);
}
@@ -975,7 +982,7 @@ void PMDataManager::collectRequiredAnalysis(SmallVector<Pass *, 8>&RP,
for (AnalysisUsage::VectorType::const_iterator I = IDs.begin(),
E = IDs.end(); I != E; ++I) {
if (Pass *AnalysisPass = findAnalysisPass(*I, true))
- RP.push_back(AnalysisPass);
+ RP.push_back(AnalysisPass);
else
RP_NotAvail.push_back(*I);
}
@@ -1016,7 +1023,7 @@ Pass *PMDataManager::findAnalysisPass(AnalysisID AID, bool SearchParent) {
// Search Parents through TopLevelManager
if (SearchParent)
return TPM->findAnalysisPass(AID);
-
+
return NULL;
}
@@ -1030,7 +1037,7 @@ void PMDataManager::dumpLastUses(Pass *P, unsigned Offset) const{
return;
TPM->collectLastUses(LUses, P);
-
+
for (SmallVector<Pass *, 12>::iterator I = LUses.begin(),
E = LUses.end(); I != E; ++I) {
llvm::dbgs() << "--" << std::string(Offset*2, ' ');
@@ -1044,7 +1051,8 @@ void PMDataManager::dumpPassArguments() const {
if (PMDataManager *PMD = (*I)->getAsPMDataManager())
PMD->dumpPassArguments();
else
- if (const PassInfo *PI = (*I)->getPassInfo())
+ if (const PassInfo *PI =
+ PassRegistry::getPassRegistry()->getPassInfo((*I)->getPassID()))
if (!PI->isAnalysisGroup())
dbgs() << " -" << PI->getPassArgument();
}
@@ -1093,7 +1101,7 @@ void PMDataManager::dumpPassInfo(Pass *P, enum PassDebuggingString S1,
void PMDataManager::dumpRequiredSet(const Pass *P) const {
if (PassDebugging < Details)
return;
-
+
AnalysisUsage analysisUsage;
P->getAnalysisUsage(analysisUsage);
dumpAnalysisUsage("Required", P, analysisUsage.getRequiredSet());
@@ -1102,7 +1110,7 @@ void PMDataManager::dumpRequiredSet(const Pass *P) const {
void PMDataManager::dumpPreservedSet(const Pass *P) const {
if (PassDebugging < Details)
return;
-
+
AnalysisUsage analysisUsage;
P->getAnalysisUsage(analysisUsage);
dumpAnalysisUsage("Preserved", P, analysisUsage.getPreservedSet());
@@ -1116,7 +1124,8 @@ void PMDataManager::dumpAnalysisUsage(StringRef Msg, const Pass *P,
dbgs() << (void*)P << std::string(getDepth()*2+3, ' ') << Msg << " Analyses:";
for (unsigned i = 0; i != Set.size(); ++i) {
if (i) dbgs() << ',';
- dbgs() << ' ' << Set[i]->getPassName();
+ const PassInfo *PInf = PassRegistry::getPassRegistry()->getPassInfo(Set[i]);
+ dbgs() << ' ' << PInf->getPassName();
}
dbgs() << '\n';
}
@@ -1131,14 +1140,14 @@ void PMDataManager::addLowerLevelRequiredPass(Pass *P, Pass *RequiredPass) {
TPM->dumpPasses();
}
- // Module Level pass may required Function Level analysis info
- // (e.g. dominator info). Pass manager uses on the fly function pass manager
- // to provide this on demand. In that case, in Pass manager terminology,
+ // Module Level pass may required Function Level analysis info
+ // (e.g. dominator info). Pass manager uses on the fly function pass manager
+ // to provide this on demand. In that case, in Pass manager terminology,
// module level pass is requiring lower level analysis info managed by
// lower level pass manager.
// When Pass manager is not able to order required analysis info, Pass manager
- // checks whether any lower level manager will be able to provide this
+ // checks whether any lower level manager will be able to provide this
// analysis info on demand or not.
#ifndef NDEBUG
dbgs() << "Unable to schedule '" << RequiredPass->getPassName();
@@ -1147,7 +1156,7 @@ void PMDataManager::addLowerLevelRequiredPass(Pass *P, Pass *RequiredPass) {
llvm_unreachable("Unable to schedule pass");
}
-Pass *PMDataManager::getOnTheFlyPass(Pass *P, const PassInfo *PI, Function &F) {
+Pass *PMDataManager::getOnTheFlyPass(Pass *P, AnalysisID PI, Function &F) {
assert(0 && "Unable to find on the fly pass");
return NULL;
}
@@ -1166,7 +1175,7 @@ Pass *AnalysisResolver::getAnalysisIfAvailable(AnalysisID ID, bool dir) const {
return PM.findAnalysisPass(ID, dir);
}
-Pass *AnalysisResolver::findImplPass(Pass *P, const PassInfo *AnalysisPI,
+Pass *AnalysisResolver::findImplPass(Pass *P, AnalysisID AnalysisPI,
Function &F) {
return PM.getOnTheFlyPass(P, AnalysisPI, F);
}
@@ -1174,8 +1183,8 @@ Pass *AnalysisResolver::findImplPass(Pass *P, const PassInfo *AnalysisPI,
//===----------------------------------------------------------------------===//
// BBPassManager implementation
-/// Execute all of the passes scheduled for execution by invoking
-/// runOnBasicBlock method. Keep track of whether any of the passes modifies
+/// Execute all of the passes scheduled for execution by invoking
+/// runOnBasicBlock method. Keep track of whether any of the passes modifies
/// the function, and if so, return true.
bool BBPassManager::runOnFunction(Function &F) {
if (F.isDeclaration())
@@ -1202,7 +1211,7 @@ bool BBPassManager::runOnFunction(Function &F) {
}
Changed |= LocalChanged;
- if (LocalChanged)
+ if (LocalChanged)
dumpPassInfo(BP, MODIFICATION_MSG, ON_BASICBLOCK_MSG,
I->getName());
dumpPreservedSet(BP);
@@ -1286,17 +1295,18 @@ void FunctionPassManager::addImpl(Pass *P) {
/// PassManager_X is destroyed, the pass will be destroyed as well, so
/// there is no need to delete the pass. (TODO delete passes.)
/// This implies that all passes MUST be allocated with 'new'.
-void FunctionPassManager::add(Pass *P) {
+void FunctionPassManager::add(Pass *P) {
// If this is a not a function pass, don't add a printer for it.
+ const void *PassID = P->getPassID();
if (P->getPassKind() == PT_Function)
- if (ShouldPrintBeforePass(P))
+ if (ShouldPrintBeforePass(PassID))
addImpl(P->createPrinterPass(dbgs(), std::string("*** IR Dump Before ")
+ P->getPassName() + " ***"));
addImpl(P);
if (P->getPassKind() == PT_Function)
- if (ShouldPrintAfterPass(P))
+ if (ShouldPrintAfterPass(PassID))
addImpl(P->createPrinterPass(dbgs(), std::string("*** IR Dump After ")
+ P->getPassName() + " ***"));
}
@@ -1405,8 +1415,8 @@ void FPPassManager::dumpPassStructure(unsigned Offset) {
}
-/// Execute all of the passes scheduled for execution by invoking
-/// runOnFunction method. Keep track of whether any of the passes modifies
+/// Execute all of the passes scheduled for execution by invoking
+/// runOnFunction method. Keep track of whether any of the passes modifies
/// the function, and if so, return true.
bool FPPassManager::runOnFunction(Function &F) {
if (F.isDeclaration())
@@ -1476,8 +1486,8 @@ bool FPPassManager::doFinalization(Module &M) {
//===----------------------------------------------------------------------===//
// MPPassManager implementation
-/// Execute all of the passes scheduled for execution by invoking
-/// runOnModule method. Keep track of whether any of the passes modifies
+/// Execute all of the passes scheduled for execution by invoking
+/// runOnModule method. Keep track of whether any of the passes modifies
/// the module, and if so, return true.
bool
MPPassManager::runOnModule(Module &M) {
@@ -1512,7 +1522,7 @@ MPPassManager::runOnModule(Module &M) {
dumpPassInfo(MP, MODIFICATION_MSG, ON_MODULE_MSG,
M.getModuleIdentifier());
dumpPreservedSet(MP);
-
+
verifyPreservedAnalysis(MP);
removeNotPreservedAnalysis(MP);
recordAvailableAnalysis(MP);
@@ -1538,7 +1548,7 @@ MPPassManager::runOnModule(Module &M) {
void MPPassManager::addLowerLevelRequiredPass(Pass *P, Pass *RequiredPass) {
assert(P->getPotentialPassManagerType() == PMT_ModulePassManager &&
"Unable to handle Pass that requires lower level Analysis pass");
- assert((P->getPotentialPassManagerType() <
+ assert((P->getPotentialPassManagerType() <
RequiredPass->getPotentialPassManagerType()) &&
"Unable to handle Pass that requires lower level Analysis pass");
@@ -1558,13 +1568,13 @@ void MPPassManager::addLowerLevelRequiredPass(Pass *P, Pass *RequiredPass) {
FPP->setLastUser(LU, P);
}
-/// Return function pass corresponding to PassInfo PI, that is
+/// Return function pass corresponding to PassInfo PI, that is
/// required by module pass MP. Instantiate analysis pass, by using
/// its runOnFunction() for function F.
-Pass* MPPassManager::getOnTheFlyPass(Pass *MP, const PassInfo *PI, Function &F){
+Pass* MPPassManager::getOnTheFlyPass(Pass *MP, AnalysisID PI, Function &F){
FunctionPassManagerImpl *FPP = OnTheFlyManagers[MP];
assert(FPP && "Unable to find on the fly pass");
-
+
FPP->releaseMemoryOnTheFly();
FPP->run(F);
return ((PMTopLevelManager*)FPP)->findAnalysisPass(PI);
@@ -1614,13 +1624,14 @@ void PassManager::addImpl(Pass *P) {
/// will be destroyed as well, so there is no need to delete the pass. This
/// implies that all passes MUST be allocated with 'new'.
void PassManager::add(Pass *P) {
- if (ShouldPrintBeforePass(P))
+ const void* PassID = P->getPassID();
+ if (ShouldPrintBeforePass(PassID))
addImpl(P->createPrinterPass(dbgs(), std::string("*** IR Dump Before ")
+ P->getPassName() + " ***"));
addImpl(P);
- if (ShouldPrintAfterPass(P))
+ if (ShouldPrintAfterPass(PassID))
addImpl(P->createPrinterPass(dbgs(), std::string("*** IR Dump After ")
+ P->getPassName() + " ***"));
}
@@ -1656,7 +1667,7 @@ void TimingInfo::createTheTimeInfo() {
/// If TimingInfo is enabled then start pass timer.
Timer *llvm::getPassTimer(Pass *P) {
- if (TheTimeInfo)
+ if (TheTimeInfo)
return TheTimeInfo->getPassTimer(P);
return 0;
}
@@ -1690,8 +1701,8 @@ void PMStack::push(PMDataManager *PM) {
}
// Dump content of the pass manager stack.
-void PMStack::dump() {
- for (std::deque<PMDataManager *>::iterator I = S.begin(),
+void PMStack::dump() const {
+ for (std::vector<PMDataManager *>::const_iterator I = S.begin(),
E = S.end(); I != E; ++I)
printf("%s ", (*I)->getAsPass()->getPassName());
@@ -1700,11 +1711,11 @@ void PMStack::dump() {
}
/// Find appropriate Module Pass Manager in the PM Stack and
-/// add self into that manager.
-void ModulePass::assignPassManager(PMStack &PMS,
+/// add self into that manager.
+void ModulePass::assignPassManager(PMStack &PMS,
PassManagerType PreferredType) {
// Find Module Pass Manager
- while(!PMS.empty()) {
+ while (!PMS.empty()) {
PassManagerType TopPMType = PMS.top()->getPassManagerType();
if (TopPMType == PreferredType)
break; // We found desired pass manager
@@ -1718,7 +1729,7 @@ void ModulePass::assignPassManager(PMStack &PMS,
}
/// Find appropriate Function Pass Manager or Call Graph Pass Manager
-/// in the PM Stack and add self into that manager.
+/// in the PM Stack and add self into that manager.
void FunctionPass::assignPassManager(PMStack &PMS,
PassManagerType PreferredType) {
@@ -1727,7 +1738,7 @@ void FunctionPass::assignPassManager(PMStack &PMS,
if (PMS.top()->getPassManagerType() > PMT_FunctionPassManager)
PMS.pop();
else
- break;
+ break;
}
// Create new Function Pass Manager if needed.
@@ -1759,14 +1770,14 @@ void FunctionPass::assignPassManager(PMStack &PMS,
}
/// Find appropriate Basic Pass Manager or Call Graph Pass Manager
-/// in the PM Stack and add self into that manager.
+/// in the PM Stack and add self into that manager.
void BasicBlockPass::assignPassManager(PMStack &PMS,
PassManagerType PreferredType) {
BBPassManager *BBP;
// Basic Pass Manager is a leaf pass manager. It does not handle
// any other pass manager.
- if (!PMS.empty() &&
+ if (!PMS.empty() &&
PMS.top()->getPassManagerType() == PMT_BasicBlockPassManager) {
BBP = (BBPassManager *)PMS.top();
} else {
@@ -1796,38 +1807,3 @@ void BasicBlockPass::assignPassManager(PMStack &PMS,
}
PassManagerBase::~PassManagerBase() {}
-
-/*===-- C Bindings --------------------------------------------------------===*/
-
-LLVMPassManagerRef LLVMCreatePassManager() {
- return wrap(new PassManager());
-}
-
-LLVMPassManagerRef LLVMCreateFunctionPassManagerForModule(LLVMModuleRef M) {
- return wrap(new FunctionPassManager(unwrap(M)));
-}
-
-LLVMPassManagerRef LLVMCreateFunctionPassManager(LLVMModuleProviderRef P) {
- return LLVMCreateFunctionPassManagerForModule(
- reinterpret_cast<LLVMModuleRef>(P));
-}
-
-LLVMBool LLVMRunPassManager(LLVMPassManagerRef PM, LLVMModuleRef M) {
- return unwrap<PassManager>(PM)->run(*unwrap(M));
-}
-
-LLVMBool LLVMInitializeFunctionPassManager(LLVMPassManagerRef FPM) {
- return unwrap<FunctionPassManager>(FPM)->doInitialization();
-}
-
-LLVMBool LLVMRunFunctionPassManager(LLVMPassManagerRef FPM, LLVMValueRef F) {
- return unwrap<FunctionPassManager>(FPM)->run(*unwrap<Function>(F));
-}
-
-LLVMBool LLVMFinalizeFunctionPassManager(LLVMPassManagerRef FPM) {
- return unwrap<FunctionPassManager>(FPM)->doFinalization();
-}
-
-void LLVMDisposePassManager(LLVMPassManagerRef PM) {
- delete unwrap(PM);
-}
diff --git a/lib/VMCore/PassRegistry.cpp b/lib/VMCore/PassRegistry.cpp
new file mode 100644
index 0000000000000..21dba56aad728
--- /dev/null
+++ b/lib/VMCore/PassRegistry.cpp
@@ -0,0 +1,159 @@
+//===- PassRegistry.cpp - Pass Registration Implementation ----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the PassRegistry, with which passes are registered on
+// initialization, and supports the PassManager in dependency resolution.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/PassRegistry.h"
+#include "llvm/PassSupport.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/ManagedStatic.h"
+
+using namespace llvm;
+
+static PassRegistry *PassRegistryObj = 0;
+PassRegistry *PassRegistry::getPassRegistry() {
+ // Use double-checked locking to safely initialize the registrar when
+ // we're running in multithreaded mode.
+ PassRegistry* tmp = PassRegistryObj;
+ if (llvm_is_multithreaded()) {
+ sys::MemoryFence();
+ if (!tmp) {
+ llvm_acquire_global_lock();
+ tmp = PassRegistryObj;
+ if (!tmp) {
+ tmp = new PassRegistry();
+ sys::MemoryFence();
+ PassRegistryObj = tmp;
+ }
+ llvm_release_global_lock();
+ }
+ } else if (!tmp) {
+ PassRegistryObj = new PassRegistry();
+ }
+
+ return PassRegistryObj;
+}
+
+namespace {
+
+// FIXME: We use ManagedCleanup to erase the pass registrar on shutdown.
+// Unfortunately, passes are registered with static ctors, and having
+// llvm_shutdown clear this map prevents successful ressurection after
+// llvm_shutdown is run. Ideally we should find a solution so that we don't
+// leak the map, AND can still resurrect after shutdown.
+void cleanupPassRegistry(void*) {
+ if (PassRegistryObj) {
+ delete PassRegistryObj;
+ PassRegistryObj = 0;
+ }
+}
+ManagedCleanup<&cleanupPassRegistry> registryCleanup ATTRIBUTE_USED;
+
+}
+
+const PassInfo *PassRegistry::getPassInfo(const void *TI) const {
+ sys::SmartScopedLock<true> Guard(Lock);
+ MapType::const_iterator I = PassInfoMap.find(TI);
+ return I != PassInfoMap.end() ? I->second : 0;
+}
+
+const PassInfo *PassRegistry::getPassInfo(StringRef Arg) const {
+ sys::SmartScopedLock<true> Guard(Lock);
+ StringMapType::const_iterator I = PassInfoStringMap.find(Arg);
+ return I != PassInfoStringMap.end() ? I->second : 0;
+}
+
+//===----------------------------------------------------------------------===//
+// Pass Registration mechanism
+//
+
+void PassRegistry::registerPass(const PassInfo &PI) {
+ sys::SmartScopedLock<true> Guard(Lock);
+ bool Inserted =
+ PassInfoMap.insert(std::make_pair(PI.getTypeInfo(),&PI)).second;
+ assert(Inserted && "Pass registered multiple times!"); Inserted=Inserted;
+ PassInfoStringMap[PI.getPassArgument()] = &PI;
+
+ // Notify any listeners.
+ for (std::vector<PassRegistrationListener*>::iterator
+ I = Listeners.begin(), E = Listeners.end(); I != E; ++I)
+ (*I)->passRegistered(&PI);
+}
+
+void PassRegistry::unregisterPass(const PassInfo &PI) {
+ sys::SmartScopedLock<true> Guard(Lock);
+ MapType::iterator I = PassInfoMap.find(PI.getTypeInfo());
+ assert(I != PassInfoMap.end() && "Pass registered but not in map!");
+
+ // Remove pass from the map.
+ PassInfoMap.erase(I);
+ PassInfoStringMap.erase(PI.getPassArgument());
+}
+
+void PassRegistry::enumerateWith(PassRegistrationListener *L) {
+ sys::SmartScopedLock<true> Guard(Lock);
+ for (MapType::const_iterator I = PassInfoMap.begin(),
+ E = PassInfoMap.end(); I != E; ++I)
+ L->passEnumerate(I->second);
+}
+
+
+/// Analysis Group Mechanisms.
+void PassRegistry::registerAnalysisGroup(const void *InterfaceID,
+ const void *PassID,
+ PassInfo& Registeree,
+ bool isDefault) {
+ PassInfo *InterfaceInfo = const_cast<PassInfo*>(getPassInfo(InterfaceID));
+ if (InterfaceInfo == 0) {
+ // First reference to Interface, register it now.
+ registerPass(Registeree);
+ InterfaceInfo = &Registeree;
+ }
+ assert(Registeree.isAnalysisGroup() &&
+ "Trying to join an analysis group that is a normal pass!");
+
+ if (PassID) {
+ PassInfo *ImplementationInfo = const_cast<PassInfo*>(getPassInfo(PassID));
+ assert(ImplementationInfo &&
+ "Must register pass before adding to AnalysisGroup!");
+
+ // Make sure we keep track of the fact that the implementation implements
+ // the interface.
+ ImplementationInfo->addInterfaceImplemented(InterfaceInfo);
+
+ sys::SmartScopedLock<true> Guard(Lock);
+ AnalysisGroupInfo &AGI = AnalysisGroupInfoMap[InterfaceInfo];
+ assert(AGI.Implementations.count(ImplementationInfo) == 0 &&
+ "Cannot add a pass to the same analysis group more than once!");
+ AGI.Implementations.insert(ImplementationInfo);
+ if (isDefault) {
+ assert(InterfaceInfo->getNormalCtor() == 0 &&
+ "Default implementation for analysis group already specified!");
+ assert(ImplementationInfo->getNormalCtor() &&
+ "Cannot specify pass as default if it does not have a default ctor");
+ InterfaceInfo->setNormalCtor(ImplementationInfo->getNormalCtor());
+ }
+ }
+}
+
+void PassRegistry::addRegistrationListener(PassRegistrationListener *L) {
+ sys::SmartScopedLock<true> Guard(Lock);
+ Listeners.push_back(L);
+}
+
+void PassRegistry::removeRegistrationListener(PassRegistrationListener *L) {
+ sys::SmartScopedLock<true> Guard(Lock);
+ std::vector<PassRegistrationListener*>::iterator I =
+ std::find(Listeners.begin(), Listeners.end(), L);
+ assert(I != Listeners.end() && "PassRegistrationListener not registered!");
+ Listeners.erase(I);
+}
diff --git a/lib/VMCore/PrintModulePass.cpp b/lib/VMCore/PrintModulePass.cpp
index 2d69dce07f3f7..2ee49d235963c 100644
--- a/lib/VMCore/PrintModulePass.cpp
+++ b/lib/VMCore/PrintModulePass.cpp
@@ -28,10 +28,10 @@ namespace {
bool DeleteStream; // Delete the ostream in our dtor?
public:
static char ID;
- PrintModulePass() : ModulePass(&ID), Out(&dbgs()),
+ PrintModulePass() : ModulePass(ID), Out(&dbgs()),
DeleteStream(false) {}
PrintModulePass(const std::string &B, raw_ostream *o, bool DS)
- : ModulePass(&ID), Banner(B), Out(o), DeleteStream(DS) {}
+ : ModulePass(ID), Banner(B), Out(o), DeleteStream(DS) {}
~PrintModulePass() {
if (DeleteStream) delete Out;
@@ -53,12 +53,12 @@ namespace {
bool DeleteStream; // Delete the ostream in our dtor?
public:
static char ID;
- PrintFunctionPass() : FunctionPass(&ID), Banner(""), Out(&dbgs()),
+ PrintFunctionPass() : FunctionPass(ID), Banner(""), Out(&dbgs()),
DeleteStream(false) {}
PrintFunctionPass(const std::string &B, raw_ostream *o, bool DS)
- : FunctionPass(&ID), Banner(B), Out(o), DeleteStream(DS) {}
+ : FunctionPass(ID), Banner(B), Out(o), DeleteStream(DS) {}
- inline ~PrintFunctionPass() {
+ ~PrintFunctionPass() {
if (DeleteStream) delete Out;
}
@@ -77,11 +77,11 @@ namespace {
}
char PrintModulePass::ID = 0;
-static RegisterPass<PrintModulePass>
-X("print-module", "Print module to stderr");
+INITIALIZE_PASS(PrintModulePass, "print-module",
+ "Print module to stderr", false, false);
char PrintFunctionPass::ID = 0;
-static RegisterPass<PrintFunctionPass>
-Y("print-function","Print function to stderr");
+INITIALIZE_PASS(PrintFunctionPass, "print-function",
+ "Print function to stderr", false, false);
/// createPrintModulePass - Create and return a pass that writes the
/// module to the specified raw_ostream.
diff --git a/lib/VMCore/Type.cpp b/lib/VMCore/Type.cpp
index 845b523c24216..c55e6267836ac 100644
--- a/lib/VMCore/Type.cpp
+++ b/lib/VMCore/Type.cpp
@@ -50,7 +50,7 @@ void AbstractTypeUser::setType(Value *V, const Type *NewTy) {
/// Because of the way Type subclasses are allocated, this function is necessary
/// to use the correct kind of "delete" operator to deallocate the Type object.
-/// Some type objects (FunctionTy, StructTy, UnionTy) allocate additional space
+/// Some type objects (FunctionTy, StructTy) allocate additional space
/// after the space for their derived type to hold the contained types array of
/// PATypeHandles. Using this allocation scheme means all the PATypeHandles are
/// allocated with the type object, decreasing allocations and eliminating the
@@ -66,8 +66,7 @@ void Type::destroy() const {
// Structures and Functions allocate their contained types past the end of
// the type object itself. These need to be destroyed differently than the
// other types.
- if (this->isFunctionTy() || this->isStructTy() ||
- this->isUnionTy()) {
+ if (this->isFunctionTy() || this->isStructTy()) {
// First, make sure we destruct any PATypeHandles allocated by these
// subclasses. They must be manually destructed.
for (unsigned i = 0; i < NumContainedTys; ++i)
@@ -77,10 +76,10 @@ void Type::destroy() const {
// to delete this as an array of char.
if (this->isFunctionTy())
static_cast<const FunctionType*>(this)->FunctionType::~FunctionType();
- else if (this->isStructTy())
+ else {
+ assert(isStructTy());
static_cast<const StructType*>(this)->StructType::~StructType();
- else
- static_cast<const UnionType*>(this)->UnionType::~UnionType();
+ }
// Finally, remove the memory as an array deallocation of the chars it was
// constructed from.
@@ -234,7 +233,7 @@ bool Type::isSizedDerivedType() const {
if (const VectorType *PTy = dyn_cast<VectorType>(this))
return PTy->getElementType()->isSized();
- if (!this->isStructTy() && !this->isUnionTy())
+ if (!this->isStructTy())
return false;
// Okay, our struct is sized if all of the elements are...
@@ -319,31 +318,6 @@ const Type *StructType::getTypeAtIndex(unsigned Idx) const {
}
-bool UnionType::indexValid(const Value *V) const {
- // Union indexes require 32-bit integer constants.
- if (V->getType()->isIntegerTy(32))
- if (const ConstantInt *CU = dyn_cast<ConstantInt>(V))
- return indexValid(CU->getZExtValue());
- return false;
-}
-
-bool UnionType::indexValid(unsigned V) const {
- return V < NumContainedTys;
-}
-
-// getTypeAtIndex - Given an index value into the type, return the type of the
-// element. For a structure type, this must be a constant value...
-//
-const Type *UnionType::getTypeAtIndex(const Value *V) const {
- unsigned Idx = (unsigned)cast<ConstantInt>(V)->getZExtValue();
- return getTypeAtIndex(Idx);
-}
-
-const Type *UnionType::getTypeAtIndex(unsigned Idx) const {
- assert(indexValid(Idx) && "Invalid structure index!");
- return ContainedTys[Idx];
-}
-
//===----------------------------------------------------------------------===//
// Primitive 'Type' data
//===----------------------------------------------------------------------===//
@@ -455,8 +429,8 @@ const PointerType *Type::getInt64PtrTy(LLVMContext &C, unsigned AS) {
/// isValidReturnType - Return true if the specified type is valid as a return
/// type.
bool FunctionType::isValidReturnType(const Type *RetTy) {
- return RetTy->getTypeID() != LabelTyID &&
- RetTy->getTypeID() != MetadataTyID;
+ return !RetTy->isFunctionTy() && !RetTy->isLabelTy() &&
+ !RetTy->isMetadataTy();
}
/// isValidArgumentType - Return true if the specified type is valid as an
@@ -507,23 +481,6 @@ StructType::StructType(LLVMContext &C,
setAbstract(isAbstract);
}
-UnionType::UnionType(LLVMContext &C,const Type* const* Types, unsigned NumTypes)
- : CompositeType(C, UnionTyID) {
- ContainedTys = reinterpret_cast<PATypeHandle*>(this + 1);
- NumContainedTys = NumTypes;
- bool isAbstract = false;
- for (unsigned i = 0; i < NumTypes; ++i) {
- assert(Types[i] && "<null> type for union field!");
- assert(isValidElementType(Types[i]) &&
- "Invalid type for union element!");
- new (&ContainedTys[i]) PATypeHandle(Types[i], this);
- isAbstract |= Types[i]->isAbstract();
- }
-
- // Calculate whether or not this type is abstract
- setAbstract(isAbstract);
-}
-
ArrayType::ArrayType(const Type *ElType, uint64_t NumEl)
: SequentialType(ArrayTyID, ElType) {
NumElements = NumEl;
@@ -603,8 +560,8 @@ namespace llvm {
static inline ChildIteratorType child_begin(NodeType *N) {
if (N->isAbstract())
return N->subtype_begin();
- else // No need to process children of concrete types.
- return N->subtype_end();
+ // No need to process children of concrete types.
+ return N->subtype_end();
}
static inline ChildIteratorType child_end(NodeType *N) {
return N->subtype_end();
@@ -627,35 +584,35 @@ void Type::PromoteAbstractToConcrete() {
// Concrete types are leaves in the tree. Since an SCC will either be all
// abstract or all concrete, we only need to check one type.
- if (SCC[0]->isAbstract()) {
- if (SCC[0]->isOpaqueTy())
- return; // Not going to be concrete, sorry.
-
- // If all of the children of all of the types in this SCC are concrete,
- // then this SCC is now concrete as well. If not, neither this SCC, nor
- // any parent SCCs will be concrete, so we might as well just exit.
- for (unsigned i = 0, e = SCC.size(); i != e; ++i)
- for (Type::subtype_iterator CI = SCC[i]->subtype_begin(),
- E = SCC[i]->subtype_end(); CI != E; ++CI)
- if ((*CI)->isAbstract())
- // If the child type is in our SCC, it doesn't make the entire SCC
- // abstract unless there is a non-SCC abstract type.
- if (std::find(SCC.begin(), SCC.end(), *CI) == SCC.end())
- return; // Not going to be concrete, sorry.
-
- // Okay, we just discovered this whole SCC is now concrete, mark it as
- // such!
- for (unsigned i = 0, e = SCC.size(); i != e; ++i) {
- assert(SCC[i]->isAbstract() && "Why are we processing concrete types?");
-
- SCC[i]->setAbstract(false);
- }
-
- for (unsigned i = 0, e = SCC.size(); i != e; ++i) {
- assert(!SCC[i]->isAbstract() && "Concrete type became abstract?");
- // The type just became concrete, notify all users!
- cast<DerivedType>(SCC[i])->notifyUsesThatTypeBecameConcrete();
- }
+ if (!SCC[0]->isAbstract()) continue;
+
+ if (SCC[0]->isOpaqueTy())
+ return; // Not going to be concrete, sorry.
+
+ // If all of the children of all of the types in this SCC are concrete,
+ // then this SCC is now concrete as well. If not, neither this SCC, nor
+ // any parent SCCs will be concrete, so we might as well just exit.
+ for (unsigned i = 0, e = SCC.size(); i != e; ++i)
+ for (Type::subtype_iterator CI = SCC[i]->subtype_begin(),
+ E = SCC[i]->subtype_end(); CI != E; ++CI)
+ if ((*CI)->isAbstract())
+ // If the child type is in our SCC, it doesn't make the entire SCC
+ // abstract unless there is a non-SCC abstract type.
+ if (std::find(SCC.begin(), SCC.end(), *CI) == SCC.end())
+ return; // Not going to be concrete, sorry.
+
+ // Okay, we just discovered this whole SCC is now concrete, mark it as
+ // such!
+ for (unsigned i = 0, e = SCC.size(); i != e; ++i) {
+ assert(SCC[i]->isAbstract() && "Why are we processing concrete types?");
+
+ SCC[i]->setAbstract(false);
+ }
+
+ for (unsigned i = 0, e = SCC.size(); i != e; ++i) {
+ assert(!SCC[i]->isAbstract() && "Concrete type became abstract?");
+ // The type just became concrete, notify all users!
+ cast<DerivedType>(SCC[i])->notifyUsesThatTypeBecameConcrete();
}
}
}
@@ -693,11 +650,15 @@ static bool TypesEqual(const Type *Ty, const Type *Ty2,
if (const IntegerType *ITy = dyn_cast<IntegerType>(Ty)) {
const IntegerType *ITy2 = cast<IntegerType>(Ty2);
return ITy->getBitWidth() == ITy2->getBitWidth();
- } else if (const PointerType *PTy = dyn_cast<PointerType>(Ty)) {
+ }
+
+ if (const PointerType *PTy = dyn_cast<PointerType>(Ty)) {
const PointerType *PTy2 = cast<PointerType>(Ty2);
return PTy->getAddressSpace() == PTy2->getAddressSpace() &&
TypesEqual(PTy->getElementType(), PTy2->getElementType(), EqTypes);
- } else if (const StructType *STy = dyn_cast<StructType>(Ty)) {
+ }
+
+ if (const StructType *STy = dyn_cast<StructType>(Ty)) {
const StructType *STy2 = cast<StructType>(Ty2);
if (STy->getNumElements() != STy2->getNumElements()) return false;
if (STy->isPacked() != STy2->isPacked()) return false;
@@ -705,22 +666,21 @@ static bool TypesEqual(const Type *Ty, const Type *Ty2,
if (!TypesEqual(STy->getElementType(i), STy2->getElementType(i), EqTypes))
return false;
return true;
- } else if (const UnionType *UTy = dyn_cast<UnionType>(Ty)) {
- const UnionType *UTy2 = cast<UnionType>(Ty2);
- if (UTy->getNumElements() != UTy2->getNumElements()) return false;
- for (unsigned i = 0, e = UTy2->getNumElements(); i != e; ++i)
- if (!TypesEqual(UTy->getElementType(i), UTy2->getElementType(i), EqTypes))
- return false;
- return true;
- } else if (const ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
+ }
+
+ if (const ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
const ArrayType *ATy2 = cast<ArrayType>(Ty2);
return ATy->getNumElements() == ATy2->getNumElements() &&
TypesEqual(ATy->getElementType(), ATy2->getElementType(), EqTypes);
- } else if (const VectorType *PTy = dyn_cast<VectorType>(Ty)) {
+ }
+
+ if (const VectorType *PTy = dyn_cast<VectorType>(Ty)) {
const VectorType *PTy2 = cast<VectorType>(Ty2);
return PTy->getNumElements() == PTy2->getNumElements() &&
TypesEqual(PTy->getElementType(), PTy2->getElementType(), EqTypes);
- } else if (const FunctionType *FTy = dyn_cast<FunctionType>(Ty)) {
+ }
+
+ if (const FunctionType *FTy = dyn_cast<FunctionType>(Ty)) {
const FunctionType *FTy2 = cast<FunctionType>(Ty2);
if (FTy->isVarArg() != FTy2->isVarArg() ||
FTy->getNumParams() != FTy2->getNumParams() ||
@@ -731,10 +691,10 @@ static bool TypesEqual(const Type *Ty, const Type *Ty2,
return false;
}
return true;
- } else {
- llvm_unreachable("Unknown derived type!");
- return false;
}
+
+ llvm_unreachable("Unknown derived type!");
+ return false;
}
namespace llvm { // in namespace llvm so findable by ADL
@@ -808,13 +768,13 @@ const IntegerType *IntegerType::get(LLVMContext &C, unsigned NumBits) {
// Check for the built-in integer types
switch (NumBits) {
- case 1: return cast<IntegerType>(Type::getInt1Ty(C));
- case 8: return cast<IntegerType>(Type::getInt8Ty(C));
- case 16: return cast<IntegerType>(Type::getInt16Ty(C));
- case 32: return cast<IntegerType>(Type::getInt32Ty(C));
- case 64: return cast<IntegerType>(Type::getInt64Ty(C));
- default:
- break;
+ case 1: return cast<IntegerType>(Type::getInt1Ty(C));
+ case 8: return cast<IntegerType>(Type::getInt8Ty(C));
+ case 16: return cast<IntegerType>(Type::getInt16Ty(C));
+ case 32: return cast<IntegerType>(Type::getInt32Ty(C));
+ case 64: return cast<IntegerType>(Type::getInt64Ty(C));
+ default:
+ break;
}
LLVMContextImpl *pImpl = C.pImpl;
@@ -902,8 +862,8 @@ ArrayType *ArrayType::get(const Type *ElementType, uint64_t NumElements) {
}
bool ArrayType::isValidElementType(const Type *ElemTy) {
- return ElemTy->getTypeID() != VoidTyID && ElemTy->getTypeID() != LabelTyID &&
- ElemTy->getTypeID() != MetadataTyID && !ElemTy->isFunctionTy();
+ return !ElemTy->isVoidTy() && !ElemTy->isLabelTy() &&
+ !ElemTy->isMetadataTy() && !ElemTy->isFunctionTy();
}
VectorType *VectorType::get(const Type *ElementType, unsigned NumElements) {
@@ -975,60 +935,6 @@ bool StructType::isValidElementType(const Type *ElemTy) {
//===----------------------------------------------------------------------===//
-// Union Type Factory...
-//
-
-UnionType *UnionType::get(const Type* const* Types, unsigned NumTypes) {
- assert(NumTypes > 0 && "union must have at least one member type!");
- UnionValType UTV(Types, NumTypes);
- UnionType *UT = 0;
-
- LLVMContextImpl *pImpl = Types[0]->getContext().pImpl;
-
- UT = pImpl->UnionTypes.get(UTV);
-
- if (!UT) {
- // Value not found. Derive a new type!
- UT = (UnionType*) operator new(sizeof(UnionType) +
- sizeof(PATypeHandle) * NumTypes);
- new (UT) UnionType(Types[0]->getContext(), Types, NumTypes);
- pImpl->UnionTypes.add(UTV, UT);
- }
-#ifdef DEBUG_MERGE_TYPES
- DEBUG(dbgs() << "Derived new type: " << *UT << "\n");
-#endif
- return UT;
-}
-
-UnionType *UnionType::get(const Type *type, ...) {
- va_list ap;
- SmallVector<const llvm::Type*, 8> UnionFields;
- va_start(ap, type);
- while (type) {
- UnionFields.push_back(type);
- type = va_arg(ap, llvm::Type*);
- }
- unsigned NumTypes = UnionFields.size();
- assert(NumTypes > 0 && "union must have at least one member type!");
- return llvm::UnionType::get(&UnionFields[0], NumTypes);
-}
-
-bool UnionType::isValidElementType(const Type *ElemTy) {
- return !ElemTy->isVoidTy() && !ElemTy->isLabelTy() &&
- !ElemTy->isMetadataTy() && !ElemTy->isFunctionTy();
-}
-
-int UnionType::getElementTypeIndex(const Type *ElemTy) const {
- int index = 0;
- for (UnionType::element_iterator I = element_begin(), E = element_end();
- I != E; ++I, ++index) {
- if (ElemTy == *I) return index;
- }
-
- return -1;
-}
-
-//===----------------------------------------------------------------------===//
// Pointer Type Factory...
//
@@ -1060,9 +966,8 @@ const PointerType *Type::getPointerTo(unsigned addrs) const {
}
bool PointerType::isValidElementType(const Type *ElemTy) {
- return ElemTy->getTypeID() != VoidTyID &&
- ElemTy->getTypeID() != LabelTyID &&
- ElemTy->getTypeID() != MetadataTyID;
+ return !ElemTy->isVoidTy() && !ElemTy->isLabelTy() &&
+ !ElemTy->isMetadataTy();
}
@@ -1071,8 +976,7 @@ bool PointerType::isValidElementType(const Type *ElemTy) {
//
OpaqueType *OpaqueType::get(LLVMContext &C) {
- OpaqueType *OT = new OpaqueType(C); // All opaque types are distinct
-
+ OpaqueType *OT = new OpaqueType(C); // All opaque types are distinct.
LLVMContextImpl *pImpl = C.pImpl;
pImpl->OpaqueTypes.insert(OT);
return OT;
@@ -1123,18 +1027,17 @@ void Type::removeAbstractTypeUser(AbstractTypeUser *U) const {
<< ">[" << (void*)this << "]" << "\n");
#endif
- this->destroy();
+ this->destroy();
}
-
}
-// unlockedRefineAbstractTypeTo - This function is used when it is discovered
+// refineAbstractTypeTo - This function is used when it is discovered
// that the 'this' abstract type is actually equivalent to the NewType
// specified. This causes all users of 'this' to switch to reference the more
// concrete type NewType and for 'this' to be deleted. Only used for internal
// callers.
//
-void DerivedType::unlockedRefineAbstractTypeTo(const Type *NewType) {
+void DerivedType::refineAbstractTypeTo(const Type *NewType) {
assert(isAbstract() && "refineAbstractTypeTo: Current type is not abstract!");
assert(this != NewType && "Can't refine to myself!");
assert(ForwardType == 0 && "This type has already been refined!");
@@ -1199,15 +1102,6 @@ void DerivedType::unlockedRefineAbstractTypeTo(const Type *NewType) {
// destroyed.
}
-// refineAbstractTypeTo - This function is used by external callers to notify
-// us that this abstract type is equivalent to another type.
-//
-void DerivedType::refineAbstractTypeTo(const Type *NewType) {
- // All recursive calls will go through unlockedRefineAbstractTypeTo,
- // to avoid deadlock problems.
- unlockedRefineAbstractTypeTo(NewType);
-}
-
// notifyUsesThatTypeBecameConcrete - Notify AbstractTypeUsers of this type that
// the current type has transitioned from being abstract to being concrete.
//
@@ -1291,21 +1185,6 @@ void StructType::typeBecameConcrete(const DerivedType *AbsTy) {
// concrete - this could potentially change us from an abstract type to a
// concrete type.
//
-void UnionType::refineAbstractType(const DerivedType *OldType,
- const Type *NewType) {
- LLVMContextImpl *pImpl = OldType->getContext().pImpl;
- pImpl->UnionTypes.RefineAbstractType(this, OldType, NewType);
-}
-
-void UnionType::typeBecameConcrete(const DerivedType *AbsTy) {
- LLVMContextImpl *pImpl = AbsTy->getContext().pImpl;
- pImpl->UnionTypes.TypeBecameConcrete(this, AbsTy);
-}
-
-// refineAbstractType - Called when a contained type is found to be more
-// concrete - this could potentially change us from an abstract type to a
-// concrete type.
-//
void PointerType::refineAbstractType(const DerivedType *OldType,
const Type *NewType) {
LLVMContextImpl *pImpl = OldType->getContext().pImpl;
diff --git a/lib/VMCore/TypesContext.h b/lib/VMCore/TypesContext.h
index 02ab1135b32cf..5a90917977b02 100644
--- a/lib/VMCore/TypesContext.h
+++ b/lib/VMCore/TypesContext.h
@@ -180,32 +180,6 @@ public:
}
};
-// UnionValType - Define a class to hold the key that goes into the TypeMap
-//
-class UnionValType {
- std::vector<const Type*> ElTypes;
-public:
- UnionValType(const Type* const* Types, unsigned NumTypes)
- : ElTypes(&Types[0], &Types[NumTypes]) {}
-
- static UnionValType get(const UnionType *UT) {
- std::vector<const Type *> ElTypes;
- ElTypes.reserve(UT->getNumElements());
- for (unsigned i = 0, e = UT->getNumElements(); i != e; ++i)
- ElTypes.push_back(UT->getElementType(i));
-
- return UnionValType(&ElTypes[0], ElTypes.size());
- }
-
- static unsigned hashTypeStructure(const UnionType *UT) {
- return UT->getNumElements();
- }
-
- inline bool operator<(const UnionValType &UTV) const {
- return (ElTypes < UTV.ElTypes);
- }
-};
-
// FunctionValType - Define a class to hold the key that goes into the TypeMap
//
class FunctionValType {
@@ -370,7 +344,7 @@ public:
// We already have this type in the table. Get rid of the newly refined
// type.
TypeClass *NewTy = cast<TypeClass>((Type*)I->second.get());
- Ty->unlockedRefineAbstractTypeTo(NewTy);
+ Ty->refineAbstractTypeTo(NewTy);
return;
}
} else {
@@ -385,31 +359,33 @@ public:
if (I->second == Ty) {
// Remember the position of the old type if we see it in our scan.
Entry = I;
+ continue;
+ }
+
+ if (!TypesEqual(Ty, I->second))
+ continue;
+
+ TypeClass *NewTy = cast<TypeClass>((Type*)I->second.get());
+
+ // Remove the old entry form TypesByHash. If the hash values differ
+ // now, remove it from the old place. Otherwise, continue scanning
+ // withing this hashcode to reduce work.
+ if (NewTypeHash != OldTypeHash) {
+ RemoveFromTypesByHash(OldTypeHash, Ty);
} else {
- if (TypesEqual(Ty, I->second)) {
- TypeClass *NewTy = cast<TypeClass>((Type*)I->second.get());
-
- // Remove the old entry form TypesByHash. If the hash values differ
- // now, remove it from the old place. Otherwise, continue scanning
- // withing this hashcode to reduce work.
- if (NewTypeHash != OldTypeHash) {
- RemoveFromTypesByHash(OldTypeHash, Ty);
- } else {
- if (Entry == E) {
- // Find the location of Ty in the TypesByHash structure if we
- // haven't seen it already.
- while (I->second != Ty) {
- ++I;
- assert(I != E && "Structure doesn't contain type??");
- }
- Entry = I;
- }
- TypesByHash.erase(Entry);
+ if (Entry == E) {
+ // Find the location of Ty in the TypesByHash structure if we
+ // haven't seen it already.
+ while (I->second != Ty) {
+ ++I;
+ assert(I != E && "Structure doesn't contain type??");
}
- Ty->unlockedRefineAbstractTypeTo(NewTy);
- return;
+ Entry = I;
}
+ TypesByHash.erase(Entry);
}
+ Ty->refineAbstractTypeTo(NewTy);
+ return;
}
// If there is no existing type of the same structure, we reinsert an
diff --git a/lib/VMCore/Use.cpp b/lib/VMCore/Use.cpp
index b7fd92f9b0660..fec710b39459b 100644
--- a/lib/VMCore/Use.cpp
+++ b/lib/VMCore/Use.cpp
@@ -86,14 +86,27 @@ const Use *Use::getImpliedUser() const {
//===----------------------------------------------------------------------===//
Use *Use::initTags(Use * const Start, Use *Stop, ptrdiff_t Done) {
+ while (Done < 20) {
+ if (Start == Stop--)
+ return Start;
+ static const PrevPtrTag tags[20] = { fullStopTag, oneDigitTag, stopTag,
+ oneDigitTag, oneDigitTag, stopTag,
+ zeroDigitTag, oneDigitTag, oneDigitTag,
+ stopTag, zeroDigitTag, oneDigitTag,
+ zeroDigitTag, oneDigitTag, stopTag,
+ oneDigitTag, oneDigitTag, oneDigitTag,
+ oneDigitTag, stopTag
+ };
+ Stop->Prev.setFromOpaqueValue(reinterpret_cast<Use**>(tags[Done++]));
+ Stop->Val = 0;
+ }
+
ptrdiff_t Count = Done;
while (Start != Stop) {
--Stop;
Stop->Val = 0;
if (!Count) {
- Stop->Prev.setFromOpaqueValue(reinterpret_cast<Use**>(Done == 0
- ? fullStopTag
- : stopTag));
+ Stop->Prev.setFromOpaqueValue(reinterpret_cast<Use**>(stopTag));
++Done;
Count = Done;
} else {
diff --git a/lib/VMCore/Value.cpp b/lib/VMCore/Value.cpp
index 585edf09c9e5a..b8c677565467a 100644
--- a/lib/VMCore/Value.cpp
+++ b/lib/VMCore/Value.cpp
@@ -139,10 +139,6 @@ static bool getSymTab(Value *V, ValueSymbolTable *&ST) {
} else if (Argument *A = dyn_cast<Argument>(V)) {
if (Function *P = A->getParent())
ST = &P->getValueSymbolTable();
- } else if (NamedMDNode *N = dyn_cast<NamedMDNode>(V)) {
- if (Module *P = N->getParent()) {
- ST = &P->getValueSymbolTable();
- }
} else if (isa<MDString>(V))
return true;
else {
@@ -492,10 +488,15 @@ void ValueHandleBase::ValueIsDeleted(Value *V) {
ValueHandleBase *Entry = pImpl->ValueHandles[V];
assert(Entry && "Value bit set but no entries exist");
- // We use a local ValueHandleBase as an iterator so that
- // ValueHandles can add and remove themselves from the list without
- // breaking our iteration. This is not really an AssertingVH; we
- // just have to give ValueHandleBase some kind.
+ // We use a local ValueHandleBase as an iterator so that ValueHandles can add
+ // and remove themselves from the list without breaking our iteration. This
+ // is not really an AssertingVH; we just have to give ValueHandleBase a kind.
+ // Note that we deliberately do not the support the case when dropping a value
+ // handle results in a new value handle being permanently added to the list
+ // (as might occur in theory for CallbackVH's): the new value handle will not
+ // be processed and the checking code will mete out righteous punishment if
+ // the handle is still present once we have finished processing all the other
+ // value handles (it is fine to momentarily add then remove a value handle).
for (ValueHandleBase Iterator(Assert, *Entry); Entry; Entry = Iterator.Next) {
Iterator.RemoveFromUseList();
Iterator.AddToExistingUseListAfter(Entry);
@@ -576,6 +577,24 @@ void ValueHandleBase::ValueIsRAUWd(Value *Old, Value *New) {
break;
}
}
+
+#ifndef NDEBUG
+ // If any new tracking or weak value handles were added while processing the
+ // list, then complain about it now.
+ if (Old->HasValueHandle)
+ for (Entry = pImpl->ValueHandles[Old]; Entry; Entry = Entry->Next)
+ switch (Entry->getKind()) {
+ case Tracking:
+ case Weak:
+ dbgs() << "After RAUW from " << *Old->getType() << " %"
+ << Old->getNameStr() << " to " << *New->getType() << " %"
+ << New->getNameStr() << "\n";
+ llvm_unreachable("A tracking or weak value handle still pointed to the"
+ " old value!\n");
+ default:
+ break;
+ }
+#endif
}
/// ~CallbackVH. Empty, but defined here to avoid emitting the vtable
diff --git a/lib/VMCore/ValueSymbolTable.cpp b/lib/VMCore/ValueSymbolTable.cpp
index 449d61a2cbb18..254bf06439d9b 100644
--- a/lib/VMCore/ValueSymbolTable.cpp
+++ b/lib/VMCore/ValueSymbolTable.cpp
@@ -115,5 +115,3 @@ void ValueSymbolTable::dump() const {
//DEBUG(dbgs() << "\n");
}
}
-
-MDSymbolTable::~MDSymbolTable() { }
diff --git a/lib/VMCore/Verifier.cpp b/lib/VMCore/Verifier.cpp
index f97699dabd890..e3ecc979bf128 100644
--- a/lib/VMCore/Verifier.cpp
+++ b/lib/VMCore/Verifier.cpp
@@ -72,7 +72,7 @@ namespace { // Anonymous namespace for class
struct PreVerifier : public FunctionPass {
static char ID; // Pass ID, replacement for typeid
- PreVerifier() : FunctionPass(&ID) { }
+ PreVerifier() : FunctionPass(ID) { }
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesAll();
@@ -102,9 +102,9 @@ namespace { // Anonymous namespace for class
}
char PreVerifier::ID = 0;
-static RegisterPass<PreVerifier>
-PreVer("preverify", "Preliminary module verification");
-static const PassInfo *const PreVerifyID = &PreVer;
+INITIALIZE_PASS(PreVerifier, "preverify", "Preliminary module verification",
+ false, false);
+char &PreVerifyID = PreVerifier::ID;
namespace {
class TypeSet : public AbstractTypeUser {
@@ -182,23 +182,13 @@ namespace {
SmallPtrSet<MDNode *, 32> MDNodes;
Verifier()
- : FunctionPass(&ID),
+ : FunctionPass(ID),
Broken(false), RealPass(true), action(AbortProcessAction),
Mod(0), Context(0), DT(0), MessagesStr(Messages) {}
explicit Verifier(VerifierFailureAction ctn)
- : FunctionPass(&ID),
+ : FunctionPass(ID),
Broken(false), RealPass(true), action(ctn), Mod(0), Context(0), DT(0),
MessagesStr(Messages) {}
- explicit Verifier(bool AB)
- : FunctionPass(&ID),
- Broken(false), RealPass(true),
- action( AB ? AbortProcessAction : PrintMessageAction), Mod(0),
- Context(0), DT(0), MessagesStr(Messages) {}
- explicit Verifier(DominatorTree &dt)
- : FunctionPass(&ID),
- Broken(false), RealPass(false), action(PrintMessageAction), Mod(0),
- Context(0), DT(&dt), MessagesStr(Messages) {}
-
bool doInitialization(Module &M) {
Mod = &M;
@@ -331,6 +321,7 @@ namespace {
void visitBranchInst(BranchInst &BI);
void visitReturnInst(ReturnInst &RI);
void visitSwitchInst(SwitchInst &SI);
+ void visitIndirectBrInst(IndirectBrInst &BI);
void visitSelectInst(SelectInst &SI);
void visitUserOp1(Instruction &I);
void visitUserOp2(Instruction &I) { visitUserOp1(I); }
@@ -402,7 +393,7 @@ namespace {
} // End anonymous namespace
char Verifier::ID = 0;
-static RegisterPass<Verifier> X("verify", "Module Verifier");
+INITIALIZE_PASS(Verifier, "verify", "Module Verifier", false, false);
// Assert - We know that cond should be true, if not print an error message.
#define Assert(C, M) \
@@ -445,6 +436,10 @@ void Verifier::visitGlobalValue(GlobalValue &GV) {
Assert1(GVar && GVar->getType()->getElementType()->isArrayTy(),
"Only global arrays can have appending linkage!", GVar);
}
+
+ Assert1(!GV.hasLinkerPrivateWeakDefAutoLinkage() || GV.hasDefaultVisibility(),
+ "linker_private_weak_def_auto can only have default visibility!",
+ &GV);
}
void Verifier::visitGlobalVariable(GlobalVariable &GV) {
@@ -504,8 +499,8 @@ void Verifier::visitNamedMDNode(NamedMDNode &NMD) {
if (!MD)
continue;
- Assert2(!MD->isFunctionLocal(),
- "Named metadata operand cannot be function local!", &NMD, MD);
+ Assert1(!MD->isFunctionLocal(),
+ "Named metadata operand cannot be function local!", MD);
visitMDNode(*MD, 0);
}
}
@@ -520,7 +515,7 @@ void Verifier::visitMDNode(MDNode &MD, Function *F) {
Value *Op = MD.getOperand(i);
if (!Op)
continue;
- if (isa<Constant>(Op) || isa<MDString>(Op) || isa<NamedMDNode>(Op))
+ if (isa<Constant>(Op) || isa<MDString>(Op))
continue;
if (MDNode *N = dyn_cast<MDNode>(Op)) {
Assert2(MD.isFunctionLocal() || !N->isFunctionLocal(),
@@ -864,6 +859,16 @@ void Verifier::visitSwitchInst(SwitchInst &SI) {
visitTerminatorInst(SI);
}
+void Verifier::visitIndirectBrInst(IndirectBrInst &BI) {
+ Assert1(BI.getAddress()->getType()->isPointerTy(),
+ "Indirectbr operand must have pointer type!", &BI);
+ for (unsigned i = 0, e = BI.getNumDestinations(); i != e; ++i)
+ Assert1(BI.getDestination(i)->getType()->isLabelTy(),
+ "Indirectbr destinations must all have pointer type!", &BI);
+
+ visitTerminatorInst(BI);
+}
+
void Verifier::visitSelectInst(SelectInst &SI) {
Assert1(!SelectInst::areInvalidOperands(SI.getOperand(0), SI.getOperand(1),
SI.getOperand(2)),
@@ -1202,6 +1207,7 @@ void Verifier::visitCallInst(CallInst &CI) {
void Verifier::visitInvokeInst(InvokeInst &II) {
VerifyCallSite(&II);
+ visitTerminatorInst(II);
}
/// visitBinaryOperator - Check that both arguments to the binary operator are
@@ -1266,28 +1272,37 @@ void Verifier::visitBinaryOperator(BinaryOperator &B) {
visitInstruction(B);
}
-void Verifier::visitICmpInst(ICmpInst& IC) {
+void Verifier::visitICmpInst(ICmpInst &IC) {
// Check that the operands are the same type
- const Type* Op0Ty = IC.getOperand(0)->getType();
- const Type* Op1Ty = IC.getOperand(1)->getType();
+ const Type *Op0Ty = IC.getOperand(0)->getType();
+ const Type *Op1Ty = IC.getOperand(1)->getType();
Assert1(Op0Ty == Op1Ty,
"Both operands to ICmp instruction are not of the same type!", &IC);
// Check that the operands are the right type
Assert1(Op0Ty->isIntOrIntVectorTy() || Op0Ty->isPointerTy(),
"Invalid operand types for ICmp instruction", &IC);
+ // Check that the predicate is valid.
+ Assert1(IC.getPredicate() >= CmpInst::FIRST_ICMP_PREDICATE &&
+ IC.getPredicate() <= CmpInst::LAST_ICMP_PREDICATE,
+ "Invalid predicate in ICmp instruction!", &IC);
visitInstruction(IC);
}
-void Verifier::visitFCmpInst(FCmpInst& FC) {
+void Verifier::visitFCmpInst(FCmpInst &FC) {
// Check that the operands are the same type
- const Type* Op0Ty = FC.getOperand(0)->getType();
- const Type* Op1Ty = FC.getOperand(1)->getType();
+ const Type *Op0Ty = FC.getOperand(0)->getType();
+ const Type *Op1Ty = FC.getOperand(1)->getType();
Assert1(Op0Ty == Op1Ty,
"Both operands to FCmp instruction are not of the same type!", &FC);
// Check that the operands are the right type
Assert1(Op0Ty->isFPOrFPVectorTy(),
"Invalid operand types for FCmp instruction", &FC);
+ // Check that the predicate is valid.
+ Assert1(FC.getPredicate() >= CmpInst::FIRST_FCMP_PREDICATE &&
+ FC.getPredicate() <= CmpInst::LAST_FCMP_PREDICATE,
+ "Invalid predicate in FCmp instruction!", &FC);
+
visitInstruction(FC);
}
@@ -1310,27 +1325,6 @@ void Verifier::visitShuffleVectorInst(ShuffleVectorInst &SV) {
Assert1(ShuffleVectorInst::isValidOperands(SV.getOperand(0), SV.getOperand(1),
SV.getOperand(2)),
"Invalid shufflevector operands!", &SV);
-
- const VectorType *VTy = dyn_cast<VectorType>(SV.getOperand(0)->getType());
- Assert1(VTy, "Operands are not a vector type", &SV);
-
- // Check to see if Mask is valid.
- if (const ConstantVector *MV = dyn_cast<ConstantVector>(SV.getOperand(2))) {
- for (unsigned i = 0, e = MV->getNumOperands(); i != e; ++i) {
- if (ConstantInt* CI = dyn_cast<ConstantInt>(MV->getOperand(i))) {
- Assert1(!CI->uge(VTy->getNumElements()*2),
- "Invalid shufflevector shuffle mask!", &SV);
- } else {
- Assert1(isa<UndefValue>(MV->getOperand(i)),
- "Invalid shufflevector shuffle mask!", &SV);
- }
- }
- } else {
- Assert1(isa<UndefValue>(SV.getOperand(2)) ||
- isa<ConstantAggregateZero>(SV.getOperand(2)),
- "Invalid shufflevector shuffle mask!", &SV);
- }
-
visitInstruction(SV);
}
@@ -1408,10 +1402,6 @@ void Verifier::visitInstruction(Instruction &I) {
"Only PHI nodes may reference their own value!", &I);
}
- // Verify that if this is a terminator that it is at the end of the block.
- if (isa<TerminatorInst>(I))
- Assert1(BB->getTerminator() == &I, "Terminator not at end of block!", &I);
-
// Check that void typed values don't have names
Assert1(!I.getType()->isVoidTy() || !I.hasName(),
"Instruction has a name, but provides a void value!", &I);
@@ -1570,7 +1560,8 @@ void Verifier::VerifyType(const Type *Ty) {
"Function type with invalid parameter type", ElTy, FTy);
VerifyType(ElTy);
}
- } break;
+ break;
+ }
case Type::StructTyID: {
const StructType *STy = cast<StructType>(Ty);
for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
@@ -1579,34 +1570,29 @@ void Verifier::VerifyType(const Type *Ty) {
"Structure type with invalid element type", ElTy, STy);
VerifyType(ElTy);
}
- } break;
- case Type::UnionTyID: {
- const UnionType *UTy = cast<UnionType>(Ty);
- for (unsigned i = 0, e = UTy->getNumElements(); i != e; ++i) {
- const Type *ElTy = UTy->getElementType(i);
- Assert2(UnionType::isValidElementType(ElTy),
- "Union type with invalid element type", ElTy, UTy);
- VerifyType(ElTy);
- }
- } break;
+ break;
+ }
case Type::ArrayTyID: {
const ArrayType *ATy = cast<ArrayType>(Ty);
Assert1(ArrayType::isValidElementType(ATy->getElementType()),
"Array type with invalid element type", ATy);
VerifyType(ATy->getElementType());
- } break;
+ break;
+ }
case Type::PointerTyID: {
const PointerType *PTy = cast<PointerType>(Ty);
Assert1(PointerType::isValidElementType(PTy->getElementType()),
"Pointer type with invalid element type", PTy);
VerifyType(PTy->getElementType());
- } break;
+ break;
+ }
case Type::VectorTyID: {
const VectorType *VTy = cast<VectorType>(Ty);
Assert1(VectorType::isValidElementType(VTy->getElementType()),
"Vector type with invalid element type", VTy);
VerifyType(VTy->getElementType());
- } break;
+ break;
+ }
default:
break;
}
@@ -1832,8 +1818,13 @@ bool Verifier::PerformTypeCheck(Intrinsic::ID ID, Function *F, const Type *Ty,
// and iPTR. In the verifier, we can not distinguish which case we have so
// allow either case to be legal.
if (const PointerType* PTyp = dyn_cast<PointerType>(Ty)) {
- Suffix += ".p" + utostr(PTyp->getAddressSpace()) +
- EVT::getEVT(PTyp->getElementType()).getEVTString();
+ EVT PointeeVT = EVT::getEVT(PTyp->getElementType(), true);
+ if (PointeeVT == MVT::Other) {
+ CheckFailed("Intrinsic has pointer to complex type.");
+ return false;
+ }
+ Suffix += ".p" + utostr(PTyp->getAddressSpace()) +
+ PointeeVT.getEVTString();
} else {
CheckFailed(IntrinsicParam(ArgNo, NumRetVals) + " is not a "
"pointer and a pointer is required.", F);